diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp index 1fadd8039462..321c11e55c14 100644 --- a/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1,4742 +1,4746 @@ //===- ASTReaderDecl.cpp - Decl Deserialization ---------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the ASTReader::readDeclRecord method, which is the // entrypoint for loading a decl. // //===----------------------------------------------------------------------===// #include "ASTCommon.h" #include "ASTReaderInternals.h" #include "clang/AST/ASTConcept.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTStructuralEquivalence.h" #include "clang/AST/Attr.h" #include "clang/AST/AttrIterator.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclVisitor.h" #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExternalASTSource.h" #include "clang/AST/LambdaCapture.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/Redeclarable.h" #include "clang/AST/Stmt.h" #include "clang/AST/TemplateBase.h" #include "clang/AST/Type.h" #include "clang/AST/UnresolvedSet.h" #include "clang/Basic/AttrKinds.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/ExceptionSpecificationType.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/Lambda.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/Linkage.h" #include "clang/Basic/Module.h" #include "clang/Basic/PragmaKinds.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" #include "clang/Sema/IdentifierResolver.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ASTRecordReader.h" #include "clang/Serialization/ContinuousRangeMap.h" #include "clang/Serialization/ModuleFile.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Bitstream/BitstreamReader.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include #include #include #include #include #include using namespace clang; using namespace serialization; //===----------------------------------------------------------------------===// // Declaration deserialization //===----------------------------------------------------------------------===// namespace clang { class ASTDeclReader : public DeclVisitor { ASTReader &Reader; ASTRecordReader &Record; ASTReader::RecordLocation Loc; const DeclID ThisDeclID; const SourceLocation ThisDeclLoc; using RecordData = ASTReader::RecordData; TypeID DeferredTypeID = 0; unsigned AnonymousDeclNumber = 0; GlobalDeclID NamedDeclForTagDecl = 0; IdentifierInfo *TypedefNameForLinkage = nullptr; bool HasPendingBody = false; ///A flag to carry the information for a decl from the entity is /// used. We use it to delay the marking of the canonical decl as used until /// the entire declaration is deserialized and merged. bool IsDeclMarkedUsed = false; uint64_t GetCurrentCursorOffset(); uint64_t ReadLocalOffset() { uint64_t LocalOffset = Record.readInt(); assert(LocalOffset < Loc.Offset && "offset point after current record"); return LocalOffset ? Loc.Offset - LocalOffset : 0; } uint64_t ReadGlobalOffset() { uint64_t Local = ReadLocalOffset(); return Local ? Record.getGlobalBitOffset(Local) : 0; } SourceLocation readSourceLocation() { return Record.readSourceLocation(); } SourceRange readSourceRange() { return Record.readSourceRange(); } TypeSourceInfo *readTypeSourceInfo() { return Record.readTypeSourceInfo(); } serialization::DeclID readDeclID() { return Record.readDeclID(); } std::string readString() { return Record.readString(); } void readDeclIDList(SmallVectorImpl &IDs) { for (unsigned I = 0, Size = Record.readInt(); I != Size; ++I) IDs.push_back(readDeclID()); } Decl *readDecl() { return Record.readDecl(); } template T *readDeclAs() { return Record.readDeclAs(); } serialization::SubmoduleID readSubmoduleID() { if (Record.getIdx() == Record.size()) return 0; return Record.getGlobalSubmoduleID(Record.readInt()); } Module *readModule() { return Record.getSubmodule(readSubmoduleID()); } void ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update, Decl *LambdaContext = nullptr, unsigned IndexInLambdaContext = 0); void ReadCXXDefinitionData(struct CXXRecordDecl::DefinitionData &Data, const CXXRecordDecl *D, Decl *LambdaContext, unsigned IndexInLambdaContext); void MergeDefinitionData(CXXRecordDecl *D, struct CXXRecordDecl::DefinitionData &&NewDD); void ReadObjCDefinitionData(struct ObjCInterfaceDecl::DefinitionData &Data); void MergeDefinitionData(ObjCInterfaceDecl *D, struct ObjCInterfaceDecl::DefinitionData &&NewDD); void ReadObjCDefinitionData(struct ObjCProtocolDecl::DefinitionData &Data); void MergeDefinitionData(ObjCProtocolDecl *D, struct ObjCProtocolDecl::DefinitionData &&NewDD); static DeclContext *getPrimaryDCForAnonymousDecl(DeclContext *LexicalDC); static NamedDecl *getAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index); static void setAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index, NamedDecl *D); /// Commit to a primary definition of the class RD, which is known to be /// a definition of the class. We might not have read the definition data /// for it yet. If we haven't then allocate placeholder definition data /// now too. static CXXRecordDecl *getOrFakePrimaryClassDefinition(ASTReader &Reader, CXXRecordDecl *RD); /// Results from loading a RedeclarableDecl. class RedeclarableResult { Decl *MergeWith; GlobalDeclID FirstID; bool IsKeyDecl; public: RedeclarableResult(Decl *MergeWith, GlobalDeclID FirstID, bool IsKeyDecl) : MergeWith(MergeWith), FirstID(FirstID), IsKeyDecl(IsKeyDecl) {} /// Retrieve the first ID. GlobalDeclID getFirstID() const { return FirstID; } /// Is this declaration a key declaration? bool isKeyDecl() const { return IsKeyDecl; } /// Get a known declaration that this should be merged with, if /// any. Decl *getKnownMergeTarget() const { return MergeWith; } }; /// Class used to capture the result of searching for an existing /// declaration of a specific kind and name, along with the ability /// to update the place where this result was found (the declaration /// chain hanging off an identifier or the DeclContext we searched in) /// if requested. class FindExistingResult { ASTReader &Reader; NamedDecl *New = nullptr; NamedDecl *Existing = nullptr; bool AddResult = false; unsigned AnonymousDeclNumber = 0; IdentifierInfo *TypedefNameForLinkage = nullptr; public: FindExistingResult(ASTReader &Reader) : Reader(Reader) {} FindExistingResult(ASTReader &Reader, NamedDecl *New, NamedDecl *Existing, unsigned AnonymousDeclNumber, IdentifierInfo *TypedefNameForLinkage) : Reader(Reader), New(New), Existing(Existing), AddResult(true), AnonymousDeclNumber(AnonymousDeclNumber), TypedefNameForLinkage(TypedefNameForLinkage) {} FindExistingResult(FindExistingResult &&Other) : Reader(Other.Reader), New(Other.New), Existing(Other.Existing), AddResult(Other.AddResult), AnonymousDeclNumber(Other.AnonymousDeclNumber), TypedefNameForLinkage(Other.TypedefNameForLinkage) { Other.AddResult = false; } FindExistingResult &operator=(FindExistingResult &&) = delete; ~FindExistingResult(); /// Suppress the addition of this result into the known set of /// names. void suppress() { AddResult = false; } operator NamedDecl*() const { return Existing; } template operator T*() const { return dyn_cast_or_null(Existing); } }; static DeclContext *getPrimaryContextForMerging(ASTReader &Reader, DeclContext *DC); FindExistingResult findExisting(NamedDecl *D); public: ASTDeclReader(ASTReader &Reader, ASTRecordReader &Record, ASTReader::RecordLocation Loc, DeclID thisDeclID, SourceLocation ThisDeclLoc) : Reader(Reader), Record(Record), Loc(Loc), ThisDeclID(thisDeclID), ThisDeclLoc(ThisDeclLoc) {} template static void AddLazySpecializations(T *D, SmallVectorImpl& IDs) { if (IDs.empty()) return; // FIXME: We should avoid this pattern of getting the ASTContext. ASTContext &C = D->getASTContext(); auto *&LazySpecializations = D->getCommonPtr()->LazySpecializations; if (auto &Old = LazySpecializations) { IDs.insert(IDs.end(), Old + 1, Old + 1 + Old[0]); llvm::sort(IDs); IDs.erase(std::unique(IDs.begin(), IDs.end()), IDs.end()); } auto *Result = new (C) serialization::DeclID[1 + IDs.size()]; *Result = IDs.size(); std::copy(IDs.begin(), IDs.end(), Result + 1); LazySpecializations = Result; } template static Decl *getMostRecentDeclImpl(Redeclarable *D); static Decl *getMostRecentDeclImpl(...); static Decl *getMostRecentDecl(Decl *D); static void mergeInheritableAttributes(ASTReader &Reader, Decl *D, Decl *Previous); template static void attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon); static void attachPreviousDeclImpl(ASTReader &Reader, ...); static void attachPreviousDecl(ASTReader &Reader, Decl *D, Decl *Previous, Decl *Canon); template static void attachLatestDeclImpl(Redeclarable *D, Decl *Latest); static void attachLatestDeclImpl(...); static void attachLatestDecl(Decl *D, Decl *latest); template static void markIncompleteDeclChainImpl(Redeclarable *D); static void markIncompleteDeclChainImpl(...); /// Determine whether this declaration has a pending body. bool hasPendingBody() const { return HasPendingBody; } void ReadFunctionDefinition(FunctionDecl *FD); void Visit(Decl *D); void UpdateDecl(Decl *D, SmallVectorImpl &); static void setNextObjCCategory(ObjCCategoryDecl *Cat, ObjCCategoryDecl *Next) { Cat->NextClassCategory = Next; } void VisitDecl(Decl *D); void VisitPragmaCommentDecl(PragmaCommentDecl *D); void VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D); void VisitTranslationUnitDecl(TranslationUnitDecl *TU); void VisitNamedDecl(NamedDecl *ND); void VisitLabelDecl(LabelDecl *LD); void VisitNamespaceDecl(NamespaceDecl *D); void VisitHLSLBufferDecl(HLSLBufferDecl *D); void VisitUsingDirectiveDecl(UsingDirectiveDecl *D); void VisitNamespaceAliasDecl(NamespaceAliasDecl *D); void VisitTypeDecl(TypeDecl *TD); RedeclarableResult VisitTypedefNameDecl(TypedefNameDecl *TD); void VisitTypedefDecl(TypedefDecl *TD); void VisitTypeAliasDecl(TypeAliasDecl *TD); void VisitUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *D); void VisitUnresolvedUsingIfExistsDecl(UnresolvedUsingIfExistsDecl *D); RedeclarableResult VisitTagDecl(TagDecl *TD); void VisitEnumDecl(EnumDecl *ED); RedeclarableResult VisitRecordDeclImpl(RecordDecl *RD); void VisitRecordDecl(RecordDecl *RD); RedeclarableResult VisitCXXRecordDeclImpl(CXXRecordDecl *D); void VisitCXXRecordDecl(CXXRecordDecl *D) { VisitCXXRecordDeclImpl(D); } RedeclarableResult VisitClassTemplateSpecializationDeclImpl( ClassTemplateSpecializationDecl *D); void VisitClassTemplateSpecializationDecl( ClassTemplateSpecializationDecl *D) { VisitClassTemplateSpecializationDeclImpl(D); } void VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D); RedeclarableResult VisitVarTemplateSpecializationDeclImpl(VarTemplateSpecializationDecl *D); void VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D) { VisitVarTemplateSpecializationDeclImpl(D); } void VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D); void VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D); void VisitValueDecl(ValueDecl *VD); void VisitEnumConstantDecl(EnumConstantDecl *ECD); void VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D); void VisitDeclaratorDecl(DeclaratorDecl *DD); void VisitFunctionDecl(FunctionDecl *FD); void VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *GD); void VisitCXXMethodDecl(CXXMethodDecl *D); void VisitCXXConstructorDecl(CXXConstructorDecl *D); void VisitCXXDestructorDecl(CXXDestructorDecl *D); void VisitCXXConversionDecl(CXXConversionDecl *D); void VisitFieldDecl(FieldDecl *FD); void VisitMSPropertyDecl(MSPropertyDecl *FD); void VisitMSGuidDecl(MSGuidDecl *D); void VisitUnnamedGlobalConstantDecl(UnnamedGlobalConstantDecl *D); void VisitTemplateParamObjectDecl(TemplateParamObjectDecl *D); void VisitIndirectFieldDecl(IndirectFieldDecl *FD); RedeclarableResult VisitVarDeclImpl(VarDecl *D); void ReadVarDeclInit(VarDecl *VD); void VisitVarDecl(VarDecl *VD) { VisitVarDeclImpl(VD); } void VisitImplicitParamDecl(ImplicitParamDecl *PD); void VisitParmVarDecl(ParmVarDecl *PD); void VisitDecompositionDecl(DecompositionDecl *DD); void VisitBindingDecl(BindingDecl *BD); void VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D); void VisitTemplateDecl(TemplateDecl *D); void VisitConceptDecl(ConceptDecl *D); void VisitImplicitConceptSpecializationDecl( ImplicitConceptSpecializationDecl *D); void VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D); RedeclarableResult VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D); void VisitClassTemplateDecl(ClassTemplateDecl *D); void VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D); void VisitVarTemplateDecl(VarTemplateDecl *D); void VisitFunctionTemplateDecl(FunctionTemplateDecl *D); void VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D); void VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D); void VisitUsingDecl(UsingDecl *D); void VisitUsingEnumDecl(UsingEnumDecl *D); void VisitUsingPackDecl(UsingPackDecl *D); void VisitUsingShadowDecl(UsingShadowDecl *D); void VisitConstructorUsingShadowDecl(ConstructorUsingShadowDecl *D); void VisitLinkageSpecDecl(LinkageSpecDecl *D); void VisitExportDecl(ExportDecl *D); void VisitFileScopeAsmDecl(FileScopeAsmDecl *AD); void VisitTopLevelStmtDecl(TopLevelStmtDecl *D); void VisitImportDecl(ImportDecl *D); void VisitAccessSpecDecl(AccessSpecDecl *D); void VisitFriendDecl(FriendDecl *D); void VisitFriendTemplateDecl(FriendTemplateDecl *D); void VisitStaticAssertDecl(StaticAssertDecl *D); void VisitBlockDecl(BlockDecl *BD); void VisitCapturedDecl(CapturedDecl *CD); void VisitEmptyDecl(EmptyDecl *D); void VisitLifetimeExtendedTemporaryDecl(LifetimeExtendedTemporaryDecl *D); std::pair VisitDeclContext(DeclContext *DC); template RedeclarableResult VisitRedeclarable(Redeclarable *D); template void mergeRedeclarable(Redeclarable *D, RedeclarableResult &Redecl); void mergeLambda(CXXRecordDecl *D, RedeclarableResult &Redecl, Decl *Context, unsigned Number); void mergeRedeclarableTemplate(RedeclarableTemplateDecl *D, RedeclarableResult &Redecl); template void mergeRedeclarable(Redeclarable *D, T *Existing, RedeclarableResult &Redecl); template void mergeMergeable(Mergeable *D); void mergeMergeable(LifetimeExtendedTemporaryDecl *D); void mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, bool IsKeyDecl); ObjCTypeParamList *ReadObjCTypeParamList(); // FIXME: Reorder according to DeclNodes.td? void VisitObjCMethodDecl(ObjCMethodDecl *D); void VisitObjCTypeParamDecl(ObjCTypeParamDecl *D); void VisitObjCContainerDecl(ObjCContainerDecl *D); void VisitObjCInterfaceDecl(ObjCInterfaceDecl *D); void VisitObjCIvarDecl(ObjCIvarDecl *D); void VisitObjCProtocolDecl(ObjCProtocolDecl *D); void VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D); void VisitObjCCategoryDecl(ObjCCategoryDecl *D); void VisitObjCImplDecl(ObjCImplDecl *D); void VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D); void VisitObjCImplementationDecl(ObjCImplementationDecl *D); void VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *D); void VisitObjCPropertyDecl(ObjCPropertyDecl *D); void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D); void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D); void VisitOMPAllocateDecl(OMPAllocateDecl *D); void VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D); void VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D); void VisitOMPRequiresDecl(OMPRequiresDecl *D); void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D); }; } // namespace clang namespace { /// Iterator over the redeclarations of a declaration that have already /// been merged into the same redeclaration chain. template class MergedRedeclIterator { DeclT *Start = nullptr; DeclT *Canonical = nullptr; DeclT *Current = nullptr; public: MergedRedeclIterator() = default; MergedRedeclIterator(DeclT *Start) : Start(Start), Current(Start) {} DeclT *operator*() { return Current; } MergedRedeclIterator &operator++() { if (Current->isFirstDecl()) { Canonical = Current; Current = Current->getMostRecentDecl(); } else Current = Current->getPreviousDecl(); // If we started in the merged portion, we'll reach our start position // eventually. Otherwise, we'll never reach it, but the second declaration // we reached was the canonical declaration, so stop when we see that one // again. if (Current == Start || Current == Canonical) Current = nullptr; return *this; } friend bool operator!=(const MergedRedeclIterator &A, const MergedRedeclIterator &B) { return A.Current != B.Current; } }; } // namespace template static llvm::iterator_range> merged_redecls(DeclT *D) { return llvm::make_range(MergedRedeclIterator(D), MergedRedeclIterator()); } uint64_t ASTDeclReader::GetCurrentCursorOffset() { return Loc.F->DeclsCursor.GetCurrentBitNo() + Loc.F->GlobalBitOffset; } void ASTDeclReader::ReadFunctionDefinition(FunctionDecl *FD) { if (Record.readInt()) { Reader.DefinitionSource[FD] = Loc.F->Kind == ModuleKind::MK_MainFile || Reader.getContext().getLangOpts().BuildingPCHWithObjectFile; } if (auto *CD = dyn_cast(FD)) { CD->setNumCtorInitializers(Record.readInt()); if (CD->getNumCtorInitializers()) CD->CtorInitializers = ReadGlobalOffset(); } // Store the offset of the body so we can lazily load it later. Reader.PendingBodies[FD] = GetCurrentCursorOffset(); HasPendingBody = true; } void ASTDeclReader::Visit(Decl *D) { DeclVisitor::Visit(D); // At this point we have deserialized and merged the decl and it is safe to // update its canonical decl to signal that the entire entity is used. D->getCanonicalDecl()->Used |= IsDeclMarkedUsed; IsDeclMarkedUsed = false; if (auto *DD = dyn_cast(D)) { if (auto *TInfo = DD->getTypeSourceInfo()) Record.readTypeLoc(TInfo->getTypeLoc()); } if (auto *TD = dyn_cast(D)) { // We have a fully initialized TypeDecl. Read its type now. TD->setTypeForDecl(Reader.GetType(DeferredTypeID).getTypePtrOrNull()); // If this is a tag declaration with a typedef name for linkage, it's safe // to load that typedef now. if (NamedDeclForTagDecl) cast(D)->TypedefNameDeclOrQualifier = cast(Reader.GetDecl(NamedDeclForTagDecl)); } else if (auto *ID = dyn_cast(D)) { // if we have a fully initialized TypeDecl, we can safely read its type now. ID->TypeForDecl = Reader.GetType(DeferredTypeID).getTypePtrOrNull(); } else if (auto *FD = dyn_cast(D)) { // FunctionDecl's body was written last after all other Stmts/Exprs. if (Record.readInt()) ReadFunctionDefinition(FD); } else if (auto *VD = dyn_cast(D)) { ReadVarDeclInit(VD); } else if (auto *FD = dyn_cast(D)) { if (FD->hasInClassInitializer() && Record.readInt()) { FD->setLazyInClassInitializer(LazyDeclStmtPtr(GetCurrentCursorOffset())); } } } void ASTDeclReader::VisitDecl(Decl *D) { BitsUnpacker DeclBits(Record.readInt()); auto ModuleOwnership = (Decl::ModuleOwnershipKind)DeclBits.getNextBits(/*Width=*/3); D->setReferenced(DeclBits.getNextBit()); D->Used = DeclBits.getNextBit(); IsDeclMarkedUsed |= D->Used; D->setAccess((AccessSpecifier)DeclBits.getNextBits(/*Width=*/2)); D->setImplicit(DeclBits.getNextBit()); bool HasStandaloneLexicalDC = DeclBits.getNextBit(); bool HasAttrs = DeclBits.getNextBit(); D->setTopLevelDeclInObjCContainer(DeclBits.getNextBit()); D->InvalidDecl = DeclBits.getNextBit(); D->FromASTFile = true; if (D->isTemplateParameter() || D->isTemplateParameterPack() || isa(D)) { // We don't want to deserialize the DeclContext of a template // parameter or of a parameter of a function template immediately. These // entities might be used in the formulation of its DeclContext (for // example, a function parameter can be used in decltype() in trailing // return type of the function). Use the translation unit DeclContext as a // placeholder. GlobalDeclID SemaDCIDForTemplateParmDecl = readDeclID(); GlobalDeclID LexicalDCIDForTemplateParmDecl = HasStandaloneLexicalDC ? readDeclID() : 0; if (!LexicalDCIDForTemplateParmDecl) LexicalDCIDForTemplateParmDecl = SemaDCIDForTemplateParmDecl; Reader.addPendingDeclContextInfo(D, SemaDCIDForTemplateParmDecl, LexicalDCIDForTemplateParmDecl); D->setDeclContext(Reader.getContext().getTranslationUnitDecl()); } else { auto *SemaDC = readDeclAs(); auto *LexicalDC = HasStandaloneLexicalDC ? readDeclAs() : nullptr; if (!LexicalDC) LexicalDC = SemaDC; // If the context is a class, we might not have actually merged it yet, in // the case where the definition comes from an update record. DeclContext *MergedSemaDC; if (auto *RD = dyn_cast(SemaDC)) MergedSemaDC = getOrFakePrimaryClassDefinition(Reader, RD); else MergedSemaDC = Reader.MergedDeclContexts.lookup(SemaDC); // Avoid calling setLexicalDeclContext() directly because it uses // Decl::getASTContext() internally which is unsafe during derialization. D->setDeclContextsImpl(MergedSemaDC ? MergedSemaDC : SemaDC, LexicalDC, Reader.getContext()); } D->setLocation(ThisDeclLoc); if (HasAttrs) { AttrVec Attrs; Record.readAttributes(Attrs); // Avoid calling setAttrs() directly because it uses Decl::getASTContext() // internally which is unsafe during derialization. D->setAttrsImpl(Attrs, Reader.getContext()); } // Determine whether this declaration is part of a (sub)module. If so, it // may not yet be visible. bool ModulePrivate = (ModuleOwnership == Decl::ModuleOwnershipKind::ModulePrivate); if (unsigned SubmoduleID = readSubmoduleID()) { switch (ModuleOwnership) { case Decl::ModuleOwnershipKind::Visible: ModuleOwnership = Decl::ModuleOwnershipKind::VisibleWhenImported; break; case Decl::ModuleOwnershipKind::Unowned: case Decl::ModuleOwnershipKind::VisibleWhenImported: case Decl::ModuleOwnershipKind::ReachableWhenImported: case Decl::ModuleOwnershipKind::ModulePrivate: break; } D->setModuleOwnershipKind(ModuleOwnership); // Store the owning submodule ID in the declaration. D->setOwningModuleID(SubmoduleID); if (ModulePrivate) { // Module-private declarations are never visible, so there is no work to // do. } else if (Reader.getContext().getLangOpts().ModulesLocalVisibility) { // If local visibility is being tracked, this declaration will become // hidden and visible as the owning module does. } else if (Module *Owner = Reader.getSubmodule(SubmoduleID)) { // Mark the declaration as visible when its owning module becomes visible. if (Owner->NameVisibility == Module::AllVisible) D->setVisibleDespiteOwningModule(); else Reader.HiddenNamesMap[Owner].push_back(D); } } else if (ModulePrivate) { D->setModuleOwnershipKind(Decl::ModuleOwnershipKind::ModulePrivate); } } void ASTDeclReader::VisitPragmaCommentDecl(PragmaCommentDecl *D) { VisitDecl(D); D->setLocation(readSourceLocation()); D->CommentKind = (PragmaMSCommentKind)Record.readInt(); std::string Arg = readString(); memcpy(D->getTrailingObjects(), Arg.data(), Arg.size()); D->getTrailingObjects()[Arg.size()] = '\0'; } void ASTDeclReader::VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D) { VisitDecl(D); D->setLocation(readSourceLocation()); std::string Name = readString(); memcpy(D->getTrailingObjects(), Name.data(), Name.size()); D->getTrailingObjects()[Name.size()] = '\0'; D->ValueStart = Name.size() + 1; std::string Value = readString(); memcpy(D->getTrailingObjects() + D->ValueStart, Value.data(), Value.size()); D->getTrailingObjects()[D->ValueStart + Value.size()] = '\0'; } void ASTDeclReader::VisitTranslationUnitDecl(TranslationUnitDecl *TU) { llvm_unreachable("Translation units are not serialized"); } void ASTDeclReader::VisitNamedDecl(NamedDecl *ND) { VisitDecl(ND); ND->setDeclName(Record.readDeclarationName()); AnonymousDeclNumber = Record.readInt(); } void ASTDeclReader::VisitTypeDecl(TypeDecl *TD) { VisitNamedDecl(TD); TD->setLocStart(readSourceLocation()); // Delay type reading until after we have fully initialized the decl. DeferredTypeID = Record.getGlobalTypeID(Record.readInt()); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitTypedefNameDecl(TypedefNameDecl *TD) { RedeclarableResult Redecl = VisitRedeclarable(TD); VisitTypeDecl(TD); TypeSourceInfo *TInfo = readTypeSourceInfo(); if (Record.readInt()) { // isModed QualType modedT = Record.readType(); TD->setModedTypeSourceInfo(TInfo, modedT); } else TD->setTypeSourceInfo(TInfo); // Read and discard the declaration for which this is a typedef name for // linkage, if it exists. We cannot rely on our type to pull in this decl, // because it might have been merged with a type from another module and // thus might not refer to our version of the declaration. readDecl(); return Redecl; } void ASTDeclReader::VisitTypedefDecl(TypedefDecl *TD) { RedeclarableResult Redecl = VisitTypedefNameDecl(TD); mergeRedeclarable(TD, Redecl); } void ASTDeclReader::VisitTypeAliasDecl(TypeAliasDecl *TD) { RedeclarableResult Redecl = VisitTypedefNameDecl(TD); if (auto *Template = readDeclAs()) // Merged when we merge the template. TD->setDescribedAliasTemplate(Template); else mergeRedeclarable(TD, Redecl); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitTagDecl(TagDecl *TD) { RedeclarableResult Redecl = VisitRedeclarable(TD); VisitTypeDecl(TD); TD->IdentifierNamespace = Record.readInt(); BitsUnpacker TagDeclBits(Record.readInt()); TD->setTagKind( static_cast(TagDeclBits.getNextBits(/*Width=*/3))); TD->setCompleteDefinition(TagDeclBits.getNextBit()); TD->setEmbeddedInDeclarator(TagDeclBits.getNextBit()); TD->setFreeStanding(TagDeclBits.getNextBit()); TD->setCompleteDefinitionRequired(TagDeclBits.getNextBit()); TD->setBraceRange(readSourceRange()); switch (TagDeclBits.getNextBits(/*Width=*/2)) { case 0: break; case 1: { // ExtInfo auto *Info = new (Reader.getContext()) TagDecl::ExtInfo(); Record.readQualifierInfo(*Info); TD->TypedefNameDeclOrQualifier = Info; break; } case 2: // TypedefNameForAnonDecl NamedDeclForTagDecl = readDeclID(); TypedefNameForLinkage = Record.readIdentifier(); break; default: llvm_unreachable("unexpected tag info kind"); } if (!isa(TD)) mergeRedeclarable(TD, Redecl); return Redecl; } void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) { VisitTagDecl(ED); if (TypeSourceInfo *TI = readTypeSourceInfo()) ED->setIntegerTypeSourceInfo(TI); else ED->setIntegerType(Record.readType()); ED->setPromotionType(Record.readType()); BitsUnpacker EnumDeclBits(Record.readInt()); ED->setNumPositiveBits(EnumDeclBits.getNextBits(/*Width=*/8)); ED->setNumNegativeBits(EnumDeclBits.getNextBits(/*Width=*/8)); + bool ShouldSkipCheckingODR = EnumDeclBits.getNextBit(); ED->setScoped(EnumDeclBits.getNextBit()); ED->setScopedUsingClassTag(EnumDeclBits.getNextBit()); ED->setFixed(EnumDeclBits.getNextBit()); - if (!shouldSkipCheckingODR(ED)) { + if (!ShouldSkipCheckingODR) { ED->setHasODRHash(true); ED->ODRHash = Record.readInt(); } // If this is a definition subject to the ODR, and we already have a // definition, merge this one into it. if (ED->isCompleteDefinition() && Reader.getContext().getLangOpts().Modules && Reader.getContext().getLangOpts().CPlusPlus) { EnumDecl *&OldDef = Reader.EnumDefinitions[ED->getCanonicalDecl()]; if (!OldDef) { // This is the first time we've seen an imported definition. Look for a // local definition before deciding that we are the first definition. for (auto *D : merged_redecls(ED->getCanonicalDecl())) { if (!D->isFromASTFile() && D->isCompleteDefinition()) { OldDef = D; break; } } } if (OldDef) { Reader.MergedDeclContexts.insert(std::make_pair(ED, OldDef)); ED->demoteThisDefinitionToDeclaration(); Reader.mergeDefinitionVisibility(OldDef, ED); // We don't want to check the ODR hash value for declarations from global // module fragment. if (!shouldSkipCheckingODR(ED) && OldDef->getODRHash() != ED->getODRHash()) Reader.PendingEnumOdrMergeFailures[OldDef].push_back(ED); } else { OldDef = ED; } } if (auto *InstED = readDeclAs()) { auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); ED->setInstantiationOfMemberEnum(Reader.getContext(), InstED, TSK); ED->getMemberSpecializationInfo()->setPointOfInstantiation(POI); } } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitRecordDeclImpl(RecordDecl *RD) { RedeclarableResult Redecl = VisitTagDecl(RD); BitsUnpacker RecordDeclBits(Record.readInt()); RD->setHasFlexibleArrayMember(RecordDeclBits.getNextBit()); RD->setAnonymousStructOrUnion(RecordDeclBits.getNextBit()); RD->setHasObjectMember(RecordDeclBits.getNextBit()); RD->setHasVolatileMember(RecordDeclBits.getNextBit()); RD->setNonTrivialToPrimitiveDefaultInitialize(RecordDeclBits.getNextBit()); RD->setNonTrivialToPrimitiveCopy(RecordDeclBits.getNextBit()); RD->setNonTrivialToPrimitiveDestroy(RecordDeclBits.getNextBit()); RD->setHasNonTrivialToPrimitiveDefaultInitializeCUnion( RecordDeclBits.getNextBit()); RD->setHasNonTrivialToPrimitiveDestructCUnion(RecordDeclBits.getNextBit()); RD->setHasNonTrivialToPrimitiveCopyCUnion(RecordDeclBits.getNextBit()); RD->setParamDestroyedInCallee(RecordDeclBits.getNextBit()); RD->setArgPassingRestrictions( (RecordArgPassingKind)RecordDeclBits.getNextBits(/*Width=*/2)); return Redecl; } void ASTDeclReader::VisitRecordDecl(RecordDecl *RD) { VisitRecordDeclImpl(RD); // We should only reach here if we're in C/Objective-C. There is no // global module fragment. assert(!shouldSkipCheckingODR(RD)); RD->setODRHash(Record.readInt()); // Maintain the invariant of a redeclaration chain containing only // a single definition. if (RD->isCompleteDefinition()) { RecordDecl *Canon = static_cast(RD->getCanonicalDecl()); RecordDecl *&OldDef = Reader.RecordDefinitions[Canon]; if (!OldDef) { // This is the first time we've seen an imported definition. Look for a // local definition before deciding that we are the first definition. for (auto *D : merged_redecls(Canon)) { if (!D->isFromASTFile() && D->isCompleteDefinition()) { OldDef = D; break; } } } if (OldDef) { Reader.MergedDeclContexts.insert(std::make_pair(RD, OldDef)); RD->demoteThisDefinitionToDeclaration(); Reader.mergeDefinitionVisibility(OldDef, RD); if (OldDef->getODRHash() != RD->getODRHash()) Reader.PendingRecordOdrMergeFailures[OldDef].push_back(RD); } else { OldDef = RD; } } } void ASTDeclReader::VisitValueDecl(ValueDecl *VD) { VisitNamedDecl(VD); // For function or variable declarations, defer reading the type in case the // declaration has a deduced type that references an entity declared within // the function definition or variable initializer. if (isa(VD)) DeferredTypeID = Record.getGlobalTypeID(Record.readInt()); else VD->setType(Record.readType()); } void ASTDeclReader::VisitEnumConstantDecl(EnumConstantDecl *ECD) { VisitValueDecl(ECD); if (Record.readInt()) ECD->setInitExpr(Record.readExpr()); ECD->setInitVal(Reader.getContext(), Record.readAPSInt()); mergeMergeable(ECD); } void ASTDeclReader::VisitDeclaratorDecl(DeclaratorDecl *DD) { VisitValueDecl(DD); DD->setInnerLocStart(readSourceLocation()); if (Record.readInt()) { // hasExtInfo auto *Info = new (Reader.getContext()) DeclaratorDecl::ExtInfo(); Record.readQualifierInfo(*Info); Info->TrailingRequiresClause = Record.readExpr(); DD->DeclInfo = Info; } QualType TSIType = Record.readType(); DD->setTypeSourceInfo( TSIType.isNull() ? nullptr : Reader.getContext().CreateTypeSourceInfo(TSIType)); } void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { RedeclarableResult Redecl = VisitRedeclarable(FD); FunctionDecl *Existing = nullptr; switch ((FunctionDecl::TemplatedKind)Record.readInt()) { case FunctionDecl::TK_NonTemplate: break; case FunctionDecl::TK_DependentNonTemplate: FD->setInstantiatedFromDecl(readDeclAs()); break; case FunctionDecl::TK_FunctionTemplate: { auto *Template = readDeclAs(); Template->init(FD); FD->setDescribedFunctionTemplate(Template); break; } case FunctionDecl::TK_MemberSpecialization: { auto *InstFD = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); FD->setInstantiationOfMemberFunction(Reader.getContext(), InstFD, TSK); FD->getMemberSpecializationInfo()->setPointOfInstantiation(POI); break; } case FunctionDecl::TK_FunctionTemplateSpecialization: { auto *Template = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); // Template arguments. SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true); // Template args as written. TemplateArgumentListInfo TemplArgsWritten; bool HasTemplateArgumentsAsWritten = Record.readBool(); if (HasTemplateArgumentsAsWritten) Record.readTemplateArgumentListInfo(TemplArgsWritten); SourceLocation POI = readSourceLocation(); ASTContext &C = Reader.getContext(); TemplateArgumentList *TemplArgList = TemplateArgumentList::CreateCopy(C, TemplArgs); MemberSpecializationInfo *MSInfo = nullptr; if (Record.readInt()) { auto *FD = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); MSInfo = new (C) MemberSpecializationInfo(FD, TSK); MSInfo->setPointOfInstantiation(POI); } FunctionTemplateSpecializationInfo *FTInfo = FunctionTemplateSpecializationInfo::Create( C, FD, Template, TSK, TemplArgList, HasTemplateArgumentsAsWritten ? &TemplArgsWritten : nullptr, POI, MSInfo); FD->TemplateOrSpecialization = FTInfo; if (FD->isCanonicalDecl()) { // if canonical add to template's set. // The template that contains the specializations set. It's not safe to // use getCanonicalDecl on Template since it may still be initializing. auto *CanonTemplate = readDeclAs(); // Get the InsertPos by FindNodeOrInsertPos() instead of calling // InsertNode(FTInfo) directly to avoid the getASTContext() call in // FunctionTemplateSpecializationInfo's Profile(). // We avoid getASTContext because a decl in the parent hierarchy may // be initializing. llvm::FoldingSetNodeID ID; FunctionTemplateSpecializationInfo::Profile(ID, TemplArgs, C); void *InsertPos = nullptr; FunctionTemplateDecl::Common *CommonPtr = CanonTemplate->getCommonPtr(); FunctionTemplateSpecializationInfo *ExistingInfo = CommonPtr->Specializations.FindNodeOrInsertPos(ID, InsertPos); if (InsertPos) CommonPtr->Specializations.InsertNode(FTInfo, InsertPos); else { assert(Reader.getContext().getLangOpts().Modules && "already deserialized this template specialization"); Existing = ExistingInfo->getFunction(); } } break; } case FunctionDecl::TK_DependentFunctionTemplateSpecialization: { // Templates. UnresolvedSet<8> Candidates; unsigned NumCandidates = Record.readInt(); while (NumCandidates--) Candidates.addDecl(readDeclAs()); // Templates args. TemplateArgumentListInfo TemplArgsWritten; bool HasTemplateArgumentsAsWritten = Record.readBool(); if (HasTemplateArgumentsAsWritten) Record.readTemplateArgumentListInfo(TemplArgsWritten); FD->setDependentTemplateSpecialization( Reader.getContext(), Candidates, HasTemplateArgumentsAsWritten ? &TemplArgsWritten : nullptr); // These are not merged; we don't need to merge redeclarations of dependent // template friends. break; } } VisitDeclaratorDecl(FD); // Attach a type to this function. Use the real type if possible, but fall // back to the type as written if it involves a deduced return type. if (FD->getTypeSourceInfo() && FD->getTypeSourceInfo() ->getType() ->castAs() ->getReturnType() ->getContainedAutoType()) { // We'll set up the real type in Visit, once we've finished loading the // function. FD->setType(FD->getTypeSourceInfo()->getType()); Reader.PendingDeducedFunctionTypes.push_back({FD, DeferredTypeID}); } else { FD->setType(Reader.GetType(DeferredTypeID)); } DeferredTypeID = 0; FD->DNLoc = Record.readDeclarationNameLoc(FD->getDeclName()); FD->IdentifierNamespace = Record.readInt(); // FunctionDecl's body is handled last at ASTDeclReader::Visit, // after everything else is read. BitsUnpacker FunctionDeclBits(Record.readInt()); FD->setCachedLinkage((Linkage)FunctionDeclBits.getNextBits(/*Width=*/3)); FD->setStorageClass((StorageClass)FunctionDeclBits.getNextBits(/*Width=*/3)); + bool ShouldSkipCheckingODR = FunctionDeclBits.getNextBit(); FD->setInlineSpecified(FunctionDeclBits.getNextBit()); FD->setImplicitlyInline(FunctionDeclBits.getNextBit()); FD->setHasSkippedBody(FunctionDeclBits.getNextBit()); FD->setVirtualAsWritten(FunctionDeclBits.getNextBit()); // We defer calling `FunctionDecl::setPure()` here as for methods of // `CXXTemplateSpecializationDecl`s, we may not have connected up the // definition (which is required for `setPure`). const bool Pure = FunctionDeclBits.getNextBit(); FD->setHasInheritedPrototype(FunctionDeclBits.getNextBit()); FD->setHasWrittenPrototype(FunctionDeclBits.getNextBit()); FD->setDeletedAsWritten(FunctionDeclBits.getNextBit()); FD->setTrivial(FunctionDeclBits.getNextBit()); FD->setTrivialForCall(FunctionDeclBits.getNextBit()); FD->setDefaulted(FunctionDeclBits.getNextBit()); FD->setExplicitlyDefaulted(FunctionDeclBits.getNextBit()); FD->setIneligibleOrNotSelected(FunctionDeclBits.getNextBit()); FD->setConstexprKind( (ConstexprSpecKind)FunctionDeclBits.getNextBits(/*Width=*/2)); FD->setHasImplicitReturnZero(FunctionDeclBits.getNextBit()); FD->setIsMultiVersion(FunctionDeclBits.getNextBit()); FD->setLateTemplateParsed(FunctionDeclBits.getNextBit()); FD->setFriendConstraintRefersToEnclosingTemplate( FunctionDeclBits.getNextBit()); FD->setUsesSEHTry(FunctionDeclBits.getNextBit()); FD->EndRangeLoc = readSourceLocation(); if (FD->isExplicitlyDefaulted()) FD->setDefaultLoc(readSourceLocation()); - if (!shouldSkipCheckingODR(FD)) { + if (!ShouldSkipCheckingODR) { FD->ODRHash = Record.readInt(); FD->setHasODRHash(true); } if (FD->isDefaulted()) { if (unsigned NumLookups = Record.readInt()) { SmallVector Lookups; for (unsigned I = 0; I != NumLookups; ++I) { NamedDecl *ND = Record.readDeclAs(); AccessSpecifier AS = (AccessSpecifier)Record.readInt(); Lookups.push_back(DeclAccessPair::make(ND, AS)); } FD->setDefaultedFunctionInfo(FunctionDecl::DefaultedFunctionInfo::Create( Reader.getContext(), Lookups)); } } if (Existing) mergeRedeclarable(FD, Existing, Redecl); else if (auto Kind = FD->getTemplatedKind(); Kind == FunctionDecl::TK_FunctionTemplate || Kind == FunctionDecl::TK_FunctionTemplateSpecialization) { // Function Templates have their FunctionTemplateDecls merged instead of // their FunctionDecls. auto merge = [this, &Redecl, FD](auto &&F) { auto *Existing = cast_or_null(Redecl.getKnownMergeTarget()); RedeclarableResult NewRedecl(Existing ? F(Existing) : nullptr, Redecl.getFirstID(), Redecl.isKeyDecl()); mergeRedeclarableTemplate(F(FD), NewRedecl); }; if (Kind == FunctionDecl::TK_FunctionTemplate) merge( [](FunctionDecl *FD) { return FD->getDescribedFunctionTemplate(); }); else merge([](FunctionDecl *FD) { return FD->getTemplateSpecializationInfo()->getTemplate(); }); } else mergeRedeclarable(FD, Redecl); // Defer calling `setPure` until merging above has guaranteed we've set // `DefinitionData` (as this will need to access it). FD->setIsPureVirtual(Pure); // Read in the parameters. unsigned NumParams = Record.readInt(); SmallVector Params; Params.reserve(NumParams); for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); FD->setParams(Reader.getContext(), Params); } void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) { VisitNamedDecl(MD); if (Record.readInt()) { // Load the body on-demand. Most clients won't care, because method // definitions rarely show up in headers. Reader.PendingBodies[MD] = GetCurrentCursorOffset(); HasPendingBody = true; } MD->setSelfDecl(readDeclAs()); MD->setCmdDecl(readDeclAs()); MD->setInstanceMethod(Record.readInt()); MD->setVariadic(Record.readInt()); MD->setPropertyAccessor(Record.readInt()); MD->setSynthesizedAccessorStub(Record.readInt()); MD->setDefined(Record.readInt()); MD->setOverriding(Record.readInt()); MD->setHasSkippedBody(Record.readInt()); MD->setIsRedeclaration(Record.readInt()); MD->setHasRedeclaration(Record.readInt()); if (MD->hasRedeclaration()) Reader.getContext().setObjCMethodRedeclaration(MD, readDeclAs()); MD->setDeclImplementation( static_cast(Record.readInt())); MD->setObjCDeclQualifier((Decl::ObjCDeclQualifier)Record.readInt()); MD->setRelatedResultType(Record.readInt()); MD->setReturnType(Record.readType()); MD->setReturnTypeSourceInfo(readTypeSourceInfo()); MD->DeclEndLoc = readSourceLocation(); unsigned NumParams = Record.readInt(); SmallVector Params; Params.reserve(NumParams); for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); MD->setSelLocsKind((SelectorLocationsKind)Record.readInt()); unsigned NumStoredSelLocs = Record.readInt(); SmallVector SelLocs; SelLocs.reserve(NumStoredSelLocs); for (unsigned i = 0; i != NumStoredSelLocs; ++i) SelLocs.push_back(readSourceLocation()); MD->setParamsAndSelLocs(Reader.getContext(), Params, SelLocs); } void ASTDeclReader::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) { VisitTypedefNameDecl(D); D->Variance = Record.readInt(); D->Index = Record.readInt(); D->VarianceLoc = readSourceLocation(); D->ColonLoc = readSourceLocation(); } void ASTDeclReader::VisitObjCContainerDecl(ObjCContainerDecl *CD) { VisitNamedDecl(CD); CD->setAtStartLoc(readSourceLocation()); CD->setAtEndRange(readSourceRange()); } ObjCTypeParamList *ASTDeclReader::ReadObjCTypeParamList() { unsigned numParams = Record.readInt(); if (numParams == 0) return nullptr; SmallVector typeParams; typeParams.reserve(numParams); for (unsigned i = 0; i != numParams; ++i) { auto *typeParam = readDeclAs(); if (!typeParam) return nullptr; typeParams.push_back(typeParam); } SourceLocation lAngleLoc = readSourceLocation(); SourceLocation rAngleLoc = readSourceLocation(); return ObjCTypeParamList::create(Reader.getContext(), lAngleLoc, typeParams, rAngleLoc); } void ASTDeclReader::ReadObjCDefinitionData( struct ObjCInterfaceDecl::DefinitionData &Data) { // Read the superclass. Data.SuperClassTInfo = readTypeSourceInfo(); Data.EndLoc = readSourceLocation(); Data.HasDesignatedInitializers = Record.readInt(); Data.ODRHash = Record.readInt(); Data.HasODRHash = true; // Read the directly referenced protocols and their SourceLocations. unsigned NumProtocols = Record.readInt(); SmallVector Protocols; Protocols.reserve(NumProtocols); for (unsigned I = 0; I != NumProtocols; ++I) Protocols.push_back(readDeclAs()); SmallVector ProtoLocs; ProtoLocs.reserve(NumProtocols); for (unsigned I = 0; I != NumProtocols; ++I) ProtoLocs.push_back(readSourceLocation()); Data.ReferencedProtocols.set(Protocols.data(), NumProtocols, ProtoLocs.data(), Reader.getContext()); // Read the transitive closure of protocols referenced by this class. NumProtocols = Record.readInt(); Protocols.clear(); Protocols.reserve(NumProtocols); for (unsigned I = 0; I != NumProtocols; ++I) Protocols.push_back(readDeclAs()); Data.AllReferencedProtocols.set(Protocols.data(), NumProtocols, Reader.getContext()); } void ASTDeclReader::MergeDefinitionData(ObjCInterfaceDecl *D, struct ObjCInterfaceDecl::DefinitionData &&NewDD) { struct ObjCInterfaceDecl::DefinitionData &DD = D->data(); if (DD.Definition == NewDD.Definition) return; Reader.MergedDeclContexts.insert( std::make_pair(NewDD.Definition, DD.Definition)); Reader.mergeDefinitionVisibility(DD.Definition, NewDD.Definition); if (D->getODRHash() != NewDD.ODRHash) Reader.PendingObjCInterfaceOdrMergeFailures[DD.Definition].push_back( {NewDD.Definition, &NewDD}); } void ASTDeclReader::VisitObjCInterfaceDecl(ObjCInterfaceDecl *ID) { RedeclarableResult Redecl = VisitRedeclarable(ID); VisitObjCContainerDecl(ID); DeferredTypeID = Record.getGlobalTypeID(Record.readInt()); mergeRedeclarable(ID, Redecl); ID->TypeParamList = ReadObjCTypeParamList(); if (Record.readInt()) { // Read the definition. ID->allocateDefinitionData(); ReadObjCDefinitionData(ID->data()); ObjCInterfaceDecl *Canon = ID->getCanonicalDecl(); if (Canon->Data.getPointer()) { // If we already have a definition, keep the definition invariant and // merge the data. MergeDefinitionData(Canon, std::move(ID->data())); ID->Data = Canon->Data; } else { // Set the definition data of the canonical declaration, so other // redeclarations will see it. ID->getCanonicalDecl()->Data = ID->Data; // We will rebuild this list lazily. ID->setIvarList(nullptr); } // Note that we have deserialized a definition. Reader.PendingDefinitions.insert(ID); // Note that we've loaded this Objective-C class. Reader.ObjCClassesLoaded.push_back(ID); } else { ID->Data = ID->getCanonicalDecl()->Data; } } void ASTDeclReader::VisitObjCIvarDecl(ObjCIvarDecl *IVD) { VisitFieldDecl(IVD); IVD->setAccessControl((ObjCIvarDecl::AccessControl)Record.readInt()); // This field will be built lazily. IVD->setNextIvar(nullptr); bool synth = Record.readInt(); IVD->setSynthesize(synth); // Check ivar redeclaration. if (IVD->isInvalidDecl()) return; // Don't check ObjCInterfaceDecl as interfaces are named and mismatches can be // detected in VisitObjCInterfaceDecl. Here we are looking for redeclarations // in extensions. if (isa(IVD->getDeclContext())) return; ObjCInterfaceDecl *CanonIntf = IVD->getContainingInterface()->getCanonicalDecl(); IdentifierInfo *II = IVD->getIdentifier(); ObjCIvarDecl *PrevIvar = CanonIntf->lookupInstanceVariable(II); if (PrevIvar && PrevIvar != IVD) { auto *ParentExt = dyn_cast(IVD->getDeclContext()); auto *PrevParentExt = dyn_cast(PrevIvar->getDeclContext()); if (ParentExt && PrevParentExt) { // Postpone diagnostic as we should merge identical extensions from // different modules. Reader .PendingObjCExtensionIvarRedeclarations[std::make_pair(ParentExt, PrevParentExt)] .push_back(std::make_pair(IVD, PrevIvar)); } else if (ParentExt || PrevParentExt) { // Duplicate ivars in extension + implementation are never compatible. // Compatibility of implementation + implementation should be handled in // VisitObjCImplementationDecl. Reader.Diag(IVD->getLocation(), diag::err_duplicate_ivar_declaration) << II; Reader.Diag(PrevIvar->getLocation(), diag::note_previous_definition); } } } void ASTDeclReader::ReadObjCDefinitionData( struct ObjCProtocolDecl::DefinitionData &Data) { unsigned NumProtoRefs = Record.readInt(); SmallVector ProtoRefs; ProtoRefs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoRefs.push_back(readDeclAs()); SmallVector ProtoLocs; ProtoLocs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoLocs.push_back(readSourceLocation()); Data.ReferencedProtocols.set(ProtoRefs.data(), NumProtoRefs, ProtoLocs.data(), Reader.getContext()); Data.ODRHash = Record.readInt(); Data.HasODRHash = true; } void ASTDeclReader::MergeDefinitionData( ObjCProtocolDecl *D, struct ObjCProtocolDecl::DefinitionData &&NewDD) { struct ObjCProtocolDecl::DefinitionData &DD = D->data(); if (DD.Definition == NewDD.Definition) return; Reader.MergedDeclContexts.insert( std::make_pair(NewDD.Definition, DD.Definition)); Reader.mergeDefinitionVisibility(DD.Definition, NewDD.Definition); if (D->getODRHash() != NewDD.ODRHash) Reader.PendingObjCProtocolOdrMergeFailures[DD.Definition].push_back( {NewDD.Definition, &NewDD}); } void ASTDeclReader::VisitObjCProtocolDecl(ObjCProtocolDecl *PD) { RedeclarableResult Redecl = VisitRedeclarable(PD); VisitObjCContainerDecl(PD); mergeRedeclarable(PD, Redecl); if (Record.readInt()) { // Read the definition. PD->allocateDefinitionData(); ReadObjCDefinitionData(PD->data()); ObjCProtocolDecl *Canon = PD->getCanonicalDecl(); if (Canon->Data.getPointer()) { // If we already have a definition, keep the definition invariant and // merge the data. MergeDefinitionData(Canon, std::move(PD->data())); PD->Data = Canon->Data; } else { // Set the definition data of the canonical declaration, so other // redeclarations will see it. PD->getCanonicalDecl()->Data = PD->Data; } // Note that we have deserialized a definition. Reader.PendingDefinitions.insert(PD); } else { PD->Data = PD->getCanonicalDecl()->Data; } } void ASTDeclReader::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *FD) { VisitFieldDecl(FD); } void ASTDeclReader::VisitObjCCategoryDecl(ObjCCategoryDecl *CD) { VisitObjCContainerDecl(CD); CD->setCategoryNameLoc(readSourceLocation()); CD->setIvarLBraceLoc(readSourceLocation()); CD->setIvarRBraceLoc(readSourceLocation()); // Note that this category has been deserialized. We do this before // deserializing the interface declaration, so that it will consider this /// category. Reader.CategoriesDeserialized.insert(CD); CD->ClassInterface = readDeclAs(); CD->TypeParamList = ReadObjCTypeParamList(); unsigned NumProtoRefs = Record.readInt(); SmallVector ProtoRefs; ProtoRefs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoRefs.push_back(readDeclAs()); SmallVector ProtoLocs; ProtoLocs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoLocs.push_back(readSourceLocation()); CD->setProtocolList(ProtoRefs.data(), NumProtoRefs, ProtoLocs.data(), Reader.getContext()); // Protocols in the class extension belong to the class. if (NumProtoRefs > 0 && CD->ClassInterface && CD->IsClassExtension()) CD->ClassInterface->mergeClassExtensionProtocolList( (ObjCProtocolDecl *const *)ProtoRefs.data(), NumProtoRefs, Reader.getContext()); } void ASTDeclReader::VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *CAD) { VisitNamedDecl(CAD); CAD->setClassInterface(readDeclAs()); } void ASTDeclReader::VisitObjCPropertyDecl(ObjCPropertyDecl *D) { VisitNamedDecl(D); D->setAtLoc(readSourceLocation()); D->setLParenLoc(readSourceLocation()); QualType T = Record.readType(); TypeSourceInfo *TSI = readTypeSourceInfo(); D->setType(T, TSI); D->setPropertyAttributes((ObjCPropertyAttribute::Kind)Record.readInt()); D->setPropertyAttributesAsWritten( (ObjCPropertyAttribute::Kind)Record.readInt()); D->setPropertyImplementation( (ObjCPropertyDecl::PropertyControl)Record.readInt()); DeclarationName GetterName = Record.readDeclarationName(); SourceLocation GetterLoc = readSourceLocation(); D->setGetterName(GetterName.getObjCSelector(), GetterLoc); DeclarationName SetterName = Record.readDeclarationName(); SourceLocation SetterLoc = readSourceLocation(); D->setSetterName(SetterName.getObjCSelector(), SetterLoc); D->setGetterMethodDecl(readDeclAs()); D->setSetterMethodDecl(readDeclAs()); D->setPropertyIvarDecl(readDeclAs()); } void ASTDeclReader::VisitObjCImplDecl(ObjCImplDecl *D) { VisitObjCContainerDecl(D); D->setClassInterface(readDeclAs()); } void ASTDeclReader::VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D) { VisitObjCImplDecl(D); D->CategoryNameLoc = readSourceLocation(); } void ASTDeclReader::VisitObjCImplementationDecl(ObjCImplementationDecl *D) { VisitObjCImplDecl(D); D->setSuperClass(readDeclAs()); D->SuperLoc = readSourceLocation(); D->setIvarLBraceLoc(readSourceLocation()); D->setIvarRBraceLoc(readSourceLocation()); D->setHasNonZeroConstructors(Record.readInt()); D->setHasDestructors(Record.readInt()); D->NumIvarInitializers = Record.readInt(); if (D->NumIvarInitializers) D->IvarInitializers = ReadGlobalOffset(); } void ASTDeclReader::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) { VisitDecl(D); D->setAtLoc(readSourceLocation()); D->setPropertyDecl(readDeclAs()); D->PropertyIvarDecl = readDeclAs(); D->IvarLoc = readSourceLocation(); D->setGetterMethodDecl(readDeclAs()); D->setSetterMethodDecl(readDeclAs()); D->setGetterCXXConstructor(Record.readExpr()); D->setSetterCXXAssignment(Record.readExpr()); } void ASTDeclReader::VisitFieldDecl(FieldDecl *FD) { VisitDeclaratorDecl(FD); FD->Mutable = Record.readInt(); unsigned Bits = Record.readInt(); FD->StorageKind = Bits >> 1; if (FD->StorageKind == FieldDecl::ISK_CapturedVLAType) FD->CapturedVLAType = cast(Record.readType().getTypePtr()); else if (Bits & 1) FD->setBitWidth(Record.readExpr()); if (!FD->getDeclName()) { if (auto *Tmpl = readDeclAs()) Reader.getContext().setInstantiatedFromUnnamedFieldDecl(FD, Tmpl); } mergeMergeable(FD); } void ASTDeclReader::VisitMSPropertyDecl(MSPropertyDecl *PD) { VisitDeclaratorDecl(PD); PD->GetterId = Record.readIdentifier(); PD->SetterId = Record.readIdentifier(); } void ASTDeclReader::VisitMSGuidDecl(MSGuidDecl *D) { VisitValueDecl(D); D->PartVal.Part1 = Record.readInt(); D->PartVal.Part2 = Record.readInt(); D->PartVal.Part3 = Record.readInt(); for (auto &C : D->PartVal.Part4And5) C = Record.readInt(); // Add this GUID to the AST context's lookup structure, and merge if needed. if (MSGuidDecl *Existing = Reader.getContext().MSGuidDecls.GetOrInsertNode(D)) Reader.getContext().setPrimaryMergedDecl(D, Existing->getCanonicalDecl()); } void ASTDeclReader::VisitUnnamedGlobalConstantDecl( UnnamedGlobalConstantDecl *D) { VisitValueDecl(D); D->Value = Record.readAPValue(); // Add this to the AST context's lookup structure, and merge if needed. if (UnnamedGlobalConstantDecl *Existing = Reader.getContext().UnnamedGlobalConstantDecls.GetOrInsertNode(D)) Reader.getContext().setPrimaryMergedDecl(D, Existing->getCanonicalDecl()); } void ASTDeclReader::VisitTemplateParamObjectDecl(TemplateParamObjectDecl *D) { VisitValueDecl(D); D->Value = Record.readAPValue(); // Add this template parameter object to the AST context's lookup structure, // and merge if needed. if (TemplateParamObjectDecl *Existing = Reader.getContext().TemplateParamObjectDecls.GetOrInsertNode(D)) Reader.getContext().setPrimaryMergedDecl(D, Existing->getCanonicalDecl()); } void ASTDeclReader::VisitIndirectFieldDecl(IndirectFieldDecl *FD) { VisitValueDecl(FD); FD->ChainingSize = Record.readInt(); assert(FD->ChainingSize >= 2 && "Anonymous chaining must be >= 2"); FD->Chaining = new (Reader.getContext())NamedDecl*[FD->ChainingSize]; for (unsigned I = 0; I != FD->ChainingSize; ++I) FD->Chaining[I] = readDeclAs(); mergeMergeable(FD); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarDeclImpl(VarDecl *VD) { RedeclarableResult Redecl = VisitRedeclarable(VD); VisitDeclaratorDecl(VD); BitsUnpacker VarDeclBits(Record.readInt()); auto VarLinkage = Linkage(VarDeclBits.getNextBits(/*Width=*/3)); bool DefGeneratedInModule = VarDeclBits.getNextBit(); VD->VarDeclBits.SClass = (StorageClass)VarDeclBits.getNextBits(/*Width=*/3); VD->VarDeclBits.TSCSpec = VarDeclBits.getNextBits(/*Width=*/2); VD->VarDeclBits.InitStyle = VarDeclBits.getNextBits(/*Width=*/2); VD->VarDeclBits.ARCPseudoStrong = VarDeclBits.getNextBit(); bool HasDeducedType = false; if (!isa(VD)) { VD->NonParmVarDeclBits.IsThisDeclarationADemotedDefinition = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.ExceptionVar = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.NRVOVariable = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.CXXForRangeDecl = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.IsInline = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.IsInlineSpecified = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.IsConstexpr = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.IsInitCapture = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.PreviousDeclInSameBlockScope = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.EscapingByref = VarDeclBits.getNextBit(); HasDeducedType = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.ImplicitParamKind = VarDeclBits.getNextBits(/*Width*/ 3); VD->NonParmVarDeclBits.ObjCForDecl = VarDeclBits.getNextBit(); } // If this variable has a deduced type, defer reading that type until we are // done deserializing this variable, because the type might refer back to the // variable. if (HasDeducedType) Reader.PendingDeducedVarTypes.push_back({VD, DeferredTypeID}); else VD->setType(Reader.GetType(DeferredTypeID)); DeferredTypeID = 0; VD->setCachedLinkage(VarLinkage); // Reconstruct the one piece of the IdentifierNamespace that we need. if (VD->getStorageClass() == SC_Extern && VarLinkage != Linkage::None && VD->getLexicalDeclContext()->isFunctionOrMethod()) VD->setLocalExternDecl(); if (DefGeneratedInModule) { Reader.DefinitionSource[VD] = Loc.F->Kind == ModuleKind::MK_MainFile || Reader.getContext().getLangOpts().BuildingPCHWithObjectFile; } if (VD->hasAttr()) { Expr *CopyExpr = Record.readExpr(); if (CopyExpr) Reader.getContext().setBlockVarCopyInit(VD, CopyExpr, Record.readInt()); } enum VarKind { VarNotTemplate = 0, VarTemplate, StaticDataMemberSpecialization }; switch ((VarKind)Record.readInt()) { case VarNotTemplate: // Only true variables (not parameters or implicit parameters) can be // merged; the other kinds are not really redeclarable at all. if (!isa(VD) && !isa(VD) && !isa(VD)) mergeRedeclarable(VD, Redecl); break; case VarTemplate: // Merged when we merge the template. VD->setDescribedVarTemplate(readDeclAs()); break; case StaticDataMemberSpecialization: { // HasMemberSpecializationInfo. auto *Tmpl = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); Reader.getContext().setInstantiatedFromStaticDataMember(VD, Tmpl, TSK,POI); mergeRedeclarable(VD, Redecl); break; } } return Redecl; } void ASTDeclReader::ReadVarDeclInit(VarDecl *VD) { if (uint64_t Val = Record.readInt()) { EvaluatedStmt *Eval = VD->ensureEvaluatedStmt(); Eval->HasConstantInitialization = (Val & 2) != 0; Eval->HasConstantDestruction = (Val & 4) != 0; Eval->WasEvaluated = (Val & 8) != 0; if (Eval->WasEvaluated) { Eval->Evaluated = Record.readAPValue(); if (Eval->Evaluated.needsCleanup()) Reader.getContext().addDestruction(&Eval->Evaluated); } // Store the offset of the initializer. Don't deserialize it yet: it might // not be needed, and might refer back to the variable, for example if it // contains a lambda. Eval->Value = GetCurrentCursorOffset(); } } void ASTDeclReader::VisitImplicitParamDecl(ImplicitParamDecl *PD) { VisitVarDecl(PD); } void ASTDeclReader::VisitParmVarDecl(ParmVarDecl *PD) { VisitVarDecl(PD); unsigned scopeIndex = Record.readInt(); BitsUnpacker ParmVarDeclBits(Record.readInt()); unsigned isObjCMethodParam = ParmVarDeclBits.getNextBit(); unsigned scopeDepth = ParmVarDeclBits.getNextBits(/*Width=*/7); unsigned declQualifier = ParmVarDeclBits.getNextBits(/*Width=*/7); if (isObjCMethodParam) { assert(scopeDepth == 0); PD->setObjCMethodScopeInfo(scopeIndex); PD->ParmVarDeclBits.ScopeDepthOrObjCQuals = declQualifier; } else { PD->setScopeInfo(scopeDepth, scopeIndex); } PD->ParmVarDeclBits.IsKNRPromoted = ParmVarDeclBits.getNextBit(); PD->ParmVarDeclBits.HasInheritedDefaultArg = ParmVarDeclBits.getNextBit(); if (ParmVarDeclBits.getNextBit()) // hasUninstantiatedDefaultArg. PD->setUninstantiatedDefaultArg(Record.readExpr()); if (ParmVarDeclBits.getNextBit()) // Valid explicit object parameter PD->ExplicitObjectParameterIntroducerLoc = Record.readSourceLocation(); // FIXME: If this is a redeclaration of a function from another module, handle // inheritance of default arguments. } void ASTDeclReader::VisitDecompositionDecl(DecompositionDecl *DD) { VisitVarDecl(DD); auto **BDs = DD->getTrailingObjects(); for (unsigned I = 0; I != DD->NumBindings; ++I) { BDs[I] = readDeclAs(); BDs[I]->setDecomposedDecl(DD); } } void ASTDeclReader::VisitBindingDecl(BindingDecl *BD) { VisitValueDecl(BD); BD->Binding = Record.readExpr(); } void ASTDeclReader::VisitFileScopeAsmDecl(FileScopeAsmDecl *AD) { VisitDecl(AD); AD->setAsmString(cast(Record.readExpr())); AD->setRParenLoc(readSourceLocation()); } void ASTDeclReader::VisitTopLevelStmtDecl(TopLevelStmtDecl *D) { VisitDecl(D); D->Statement = Record.readStmt(); } void ASTDeclReader::VisitBlockDecl(BlockDecl *BD) { VisitDecl(BD); BD->setBody(cast_or_null(Record.readStmt())); BD->setSignatureAsWritten(readTypeSourceInfo()); unsigned NumParams = Record.readInt(); SmallVector Params; Params.reserve(NumParams); for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); BD->setParams(Params); BD->setIsVariadic(Record.readInt()); BD->setBlockMissingReturnType(Record.readInt()); BD->setIsConversionFromLambda(Record.readInt()); BD->setDoesNotEscape(Record.readInt()); BD->setCanAvoidCopyToHeap(Record.readInt()); bool capturesCXXThis = Record.readInt(); unsigned numCaptures = Record.readInt(); SmallVector captures; captures.reserve(numCaptures); for (unsigned i = 0; i != numCaptures; ++i) { auto *decl = readDeclAs(); unsigned flags = Record.readInt(); bool byRef = (flags & 1); bool nested = (flags & 2); Expr *copyExpr = ((flags & 4) ? Record.readExpr() : nullptr); captures.push_back(BlockDecl::Capture(decl, byRef, nested, copyExpr)); } BD->setCaptures(Reader.getContext(), captures, capturesCXXThis); } void ASTDeclReader::VisitCapturedDecl(CapturedDecl *CD) { VisitDecl(CD); unsigned ContextParamPos = Record.readInt(); CD->setNothrow(Record.readInt() != 0); // Body is set by VisitCapturedStmt. for (unsigned I = 0; I < CD->NumParams; ++I) { if (I != ContextParamPos) CD->setParam(I, readDeclAs()); else CD->setContextParam(I, readDeclAs()); } } void ASTDeclReader::VisitLinkageSpecDecl(LinkageSpecDecl *D) { VisitDecl(D); D->setLanguage(static_cast(Record.readInt())); D->setExternLoc(readSourceLocation()); D->setRBraceLoc(readSourceLocation()); } void ASTDeclReader::VisitExportDecl(ExportDecl *D) { VisitDecl(D); D->RBraceLoc = readSourceLocation(); } void ASTDeclReader::VisitLabelDecl(LabelDecl *D) { VisitNamedDecl(D); D->setLocStart(readSourceLocation()); } void ASTDeclReader::VisitNamespaceDecl(NamespaceDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); VisitNamedDecl(D); BitsUnpacker NamespaceDeclBits(Record.readInt()); D->setInline(NamespaceDeclBits.getNextBit()); D->setNested(NamespaceDeclBits.getNextBit()); D->LocStart = readSourceLocation(); D->RBraceLoc = readSourceLocation(); // Defer loading the anonymous namespace until we've finished merging // this namespace; loading it might load a later declaration of the // same namespace, and we have an invariant that older declarations // get merged before newer ones try to merge. GlobalDeclID AnonNamespace = 0; if (Redecl.getFirstID() == ThisDeclID) { AnonNamespace = readDeclID(); } else { // Link this namespace back to the first declaration, which has already // been deserialized. D->AnonOrFirstNamespaceAndFlags.setPointer(D->getFirstDecl()); } mergeRedeclarable(D, Redecl); if (AnonNamespace) { // Each module has its own anonymous namespace, which is disjoint from // any other module's anonymous namespaces, so don't attach the anonymous // namespace at all. auto *Anon = cast(Reader.GetDecl(AnonNamespace)); if (!Record.isModule()) D->setAnonymousNamespace(Anon); } } void ASTDeclReader::VisitHLSLBufferDecl(HLSLBufferDecl *D) { VisitNamedDecl(D); VisitDeclContext(D); D->IsCBuffer = Record.readBool(); D->KwLoc = readSourceLocation(); D->LBraceLoc = readSourceLocation(); D->RBraceLoc = readSourceLocation(); } void ASTDeclReader::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); VisitNamedDecl(D); D->NamespaceLoc = readSourceLocation(); D->IdentLoc = readSourceLocation(); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->Namespace = readDeclAs(); mergeRedeclarable(D, Redecl); } void ASTDeclReader::VisitUsingDecl(UsingDecl *D) { VisitNamedDecl(D); D->setUsingLoc(readSourceLocation()); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->DNLoc = Record.readDeclarationNameLoc(D->getDeclName()); D->FirstUsingShadow.setPointer(readDeclAs()); D->setTypename(Record.readInt()); if (auto *Pattern = readDeclAs()) Reader.getContext().setInstantiatedFromUsingDecl(D, Pattern); mergeMergeable(D); } void ASTDeclReader::VisitUsingEnumDecl(UsingEnumDecl *D) { VisitNamedDecl(D); D->setUsingLoc(readSourceLocation()); D->setEnumLoc(readSourceLocation()); D->setEnumType(Record.readTypeSourceInfo()); D->FirstUsingShadow.setPointer(readDeclAs()); if (auto *Pattern = readDeclAs()) Reader.getContext().setInstantiatedFromUsingEnumDecl(D, Pattern); mergeMergeable(D); } void ASTDeclReader::VisitUsingPackDecl(UsingPackDecl *D) { VisitNamedDecl(D); D->InstantiatedFrom = readDeclAs(); auto **Expansions = D->getTrailingObjects(); for (unsigned I = 0; I != D->NumExpansions; ++I) Expansions[I] = readDeclAs(); mergeMergeable(D); } void ASTDeclReader::VisitUsingShadowDecl(UsingShadowDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); VisitNamedDecl(D); D->Underlying = readDeclAs(); D->IdentifierNamespace = Record.readInt(); D->UsingOrNextShadow = readDeclAs(); auto *Pattern = readDeclAs(); if (Pattern) Reader.getContext().setInstantiatedFromUsingShadowDecl(D, Pattern); mergeRedeclarable(D, Redecl); } void ASTDeclReader::VisitConstructorUsingShadowDecl( ConstructorUsingShadowDecl *D) { VisitUsingShadowDecl(D); D->NominatedBaseClassShadowDecl = readDeclAs(); D->ConstructedBaseClassShadowDecl = readDeclAs(); D->IsVirtual = Record.readInt(); } void ASTDeclReader::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) { VisitNamedDecl(D); D->UsingLoc = readSourceLocation(); D->NamespaceLoc = readSourceLocation(); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->NominatedNamespace = readDeclAs(); D->CommonAncestor = readDeclAs(); } void ASTDeclReader::VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D) { VisitValueDecl(D); D->setUsingLoc(readSourceLocation()); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->DNLoc = Record.readDeclarationNameLoc(D->getDeclName()); D->EllipsisLoc = readSourceLocation(); mergeMergeable(D); } void ASTDeclReader::VisitUnresolvedUsingTypenameDecl( UnresolvedUsingTypenameDecl *D) { VisitTypeDecl(D); D->TypenameLocation = readSourceLocation(); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->EllipsisLoc = readSourceLocation(); mergeMergeable(D); } void ASTDeclReader::VisitUnresolvedUsingIfExistsDecl( UnresolvedUsingIfExistsDecl *D) { VisitNamedDecl(D); } void ASTDeclReader::ReadCXXDefinitionData( struct CXXRecordDecl::DefinitionData &Data, const CXXRecordDecl *D, Decl *LambdaContext, unsigned IndexInLambdaContext) { BitsUnpacker CXXRecordDeclBits = Record.readInt(); + bool ShouldSkipCheckingODR = CXXRecordDeclBits.getNextBit(); + #define FIELD(Name, Width, Merge) \ if (!CXXRecordDeclBits.canGetNextNBits(Width)) \ CXXRecordDeclBits.updateValue(Record.readInt()); \ Data.Name = CXXRecordDeclBits.getNextBits(Width); #include "clang/AST/CXXRecordDeclDefinitionBits.def" #undef FIELD // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) { + if (!ShouldSkipCheckingODR) { // Note: the caller has deserialized the IsLambda bit already. Data.ODRHash = Record.readInt(); Data.HasODRHash = true; } if (Record.readInt()) { Reader.DefinitionSource[D] = Loc.F->Kind == ModuleKind::MK_MainFile || Reader.getContext().getLangOpts().BuildingPCHWithObjectFile; } Record.readUnresolvedSet(Data.Conversions); Data.ComputedVisibleConversions = Record.readInt(); if (Data.ComputedVisibleConversions) Record.readUnresolvedSet(Data.VisibleConversions); assert(Data.Definition && "Data.Definition should be already set!"); if (!Data.IsLambda) { assert(!LambdaContext && !IndexInLambdaContext && "given lambda context for non-lambda"); Data.NumBases = Record.readInt(); if (Data.NumBases) Data.Bases = ReadGlobalOffset(); Data.NumVBases = Record.readInt(); if (Data.NumVBases) Data.VBases = ReadGlobalOffset(); Data.FirstFriend = readDeclID(); } else { using Capture = LambdaCapture; auto &Lambda = static_cast(Data); BitsUnpacker LambdaBits(Record.readInt()); Lambda.DependencyKind = LambdaBits.getNextBits(/*Width=*/2); Lambda.IsGenericLambda = LambdaBits.getNextBit(); Lambda.CaptureDefault = LambdaBits.getNextBits(/*Width=*/2); Lambda.NumCaptures = LambdaBits.getNextBits(/*Width=*/15); Lambda.HasKnownInternalLinkage = LambdaBits.getNextBit(); Lambda.NumExplicitCaptures = Record.readInt(); Lambda.ManglingNumber = Record.readInt(); if (unsigned DeviceManglingNumber = Record.readInt()) Reader.getContext().DeviceLambdaManglingNumbers[D] = DeviceManglingNumber; Lambda.IndexInContext = IndexInLambdaContext; Lambda.ContextDecl = LambdaContext; Capture *ToCapture = nullptr; if (Lambda.NumCaptures) { ToCapture = (Capture *)Reader.getContext().Allocate(sizeof(Capture) * Lambda.NumCaptures); Lambda.AddCaptureList(Reader.getContext(), ToCapture); } Lambda.MethodTyInfo = readTypeSourceInfo(); for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) { SourceLocation Loc = readSourceLocation(); BitsUnpacker CaptureBits(Record.readInt()); bool IsImplicit = CaptureBits.getNextBit(); auto Kind = static_cast(CaptureBits.getNextBits(/*Width=*/3)); switch (Kind) { case LCK_StarThis: case LCK_This: case LCK_VLAType: new (ToCapture) Capture(Loc, IsImplicit, Kind, nullptr, SourceLocation()); ToCapture++; break; case LCK_ByCopy: case LCK_ByRef: auto *Var = readDeclAs(); SourceLocation EllipsisLoc = readSourceLocation(); new (ToCapture) Capture(Loc, IsImplicit, Kind, Var, EllipsisLoc); ToCapture++; break; } } } } void ASTDeclReader::MergeDefinitionData( CXXRecordDecl *D, struct CXXRecordDecl::DefinitionData &&MergeDD) { assert(D->DefinitionData && "merging class definition into non-definition"); auto &DD = *D->DefinitionData; if (DD.Definition != MergeDD.Definition) { // Track that we merged the definitions. Reader.MergedDeclContexts.insert(std::make_pair(MergeDD.Definition, DD.Definition)); Reader.PendingDefinitions.erase(MergeDD.Definition); MergeDD.Definition->setCompleteDefinition(false); Reader.mergeDefinitionVisibility(DD.Definition, MergeDD.Definition); assert(!Reader.Lookups.contains(MergeDD.Definition) && "already loaded pending lookups for merged definition"); } auto PFDI = Reader.PendingFakeDefinitionData.find(&DD); if (PFDI != Reader.PendingFakeDefinitionData.end() && PFDI->second == ASTReader::PendingFakeDefinitionKind::Fake) { // We faked up this definition data because we found a class for which we'd // not yet loaded the definition. Replace it with the real thing now. assert(!DD.IsLambda && !MergeDD.IsLambda && "faked up lambda definition?"); PFDI->second = ASTReader::PendingFakeDefinitionKind::FakeLoaded; // Don't change which declaration is the definition; that is required // to be invariant once we select it. auto *Def = DD.Definition; DD = std::move(MergeDD); DD.Definition = Def; return; } bool DetectedOdrViolation = false; #define FIELD(Name, Width, Merge) Merge(Name) #define MERGE_OR(Field) DD.Field |= MergeDD.Field; #define NO_MERGE(Field) \ DetectedOdrViolation |= DD.Field != MergeDD.Field; \ MERGE_OR(Field) #include "clang/AST/CXXRecordDeclDefinitionBits.def" NO_MERGE(IsLambda) #undef NO_MERGE #undef MERGE_OR if (DD.NumBases != MergeDD.NumBases || DD.NumVBases != MergeDD.NumVBases) DetectedOdrViolation = true; // FIXME: Issue a diagnostic if the base classes don't match when we come // to lazily load them. // FIXME: Issue a diagnostic if the list of conversion functions doesn't // match when we come to lazily load them. if (MergeDD.ComputedVisibleConversions && !DD.ComputedVisibleConversions) { DD.VisibleConversions = std::move(MergeDD.VisibleConversions); DD.ComputedVisibleConversions = true; } // FIXME: Issue a diagnostic if FirstFriend doesn't match when we come to // lazily load it. if (DD.IsLambda) { auto &Lambda1 = static_cast(DD); auto &Lambda2 = static_cast(MergeDD); DetectedOdrViolation |= Lambda1.DependencyKind != Lambda2.DependencyKind; DetectedOdrViolation |= Lambda1.IsGenericLambda != Lambda2.IsGenericLambda; DetectedOdrViolation |= Lambda1.CaptureDefault != Lambda2.CaptureDefault; DetectedOdrViolation |= Lambda1.NumCaptures != Lambda2.NumCaptures; DetectedOdrViolation |= Lambda1.NumExplicitCaptures != Lambda2.NumExplicitCaptures; DetectedOdrViolation |= Lambda1.HasKnownInternalLinkage != Lambda2.HasKnownInternalLinkage; DetectedOdrViolation |= Lambda1.ManglingNumber != Lambda2.ManglingNumber; if (Lambda1.NumCaptures && Lambda1.NumCaptures == Lambda2.NumCaptures) { for (unsigned I = 0, N = Lambda1.NumCaptures; I != N; ++I) { LambdaCapture &Cap1 = Lambda1.Captures.front()[I]; LambdaCapture &Cap2 = Lambda2.Captures.front()[I]; DetectedOdrViolation |= Cap1.getCaptureKind() != Cap2.getCaptureKind(); } Lambda1.AddCaptureList(Reader.getContext(), Lambda2.Captures.front()); } } // We don't want to check ODR for decls in the global module fragment. if (shouldSkipCheckingODR(MergeDD.Definition)) return; if (D->getODRHash() != MergeDD.ODRHash) { DetectedOdrViolation = true; } if (DetectedOdrViolation) Reader.PendingOdrMergeFailures[DD.Definition].push_back( {MergeDD.Definition, &MergeDD}); } void ASTDeclReader::ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update, Decl *LambdaContext, unsigned IndexInLambdaContext) { struct CXXRecordDecl::DefinitionData *DD; ASTContext &C = Reader.getContext(); // Determine whether this is a lambda closure type, so that we can // allocate the appropriate DefinitionData structure. bool IsLambda = Record.readInt(); assert(!(IsLambda && Update) && "lambda definition should not be added by update record"); if (IsLambda) DD = new (C) CXXRecordDecl::LambdaDefinitionData( D, nullptr, CXXRecordDecl::LDK_Unknown, false, LCD_None); else DD = new (C) struct CXXRecordDecl::DefinitionData(D); CXXRecordDecl *Canon = D->getCanonicalDecl(); // Set decl definition data before reading it, so that during deserialization // when we read CXXRecordDecl, it already has definition data and we don't // set fake one. if (!Canon->DefinitionData) Canon->DefinitionData = DD; D->DefinitionData = Canon->DefinitionData; ReadCXXDefinitionData(*DD, D, LambdaContext, IndexInLambdaContext); // We might already have a different definition for this record. This can // happen either because we're reading an update record, or because we've // already done some merging. Either way, just merge into it. if (Canon->DefinitionData != DD) { MergeDefinitionData(Canon, std::move(*DD)); return; } // Mark this declaration as being a definition. D->setCompleteDefinition(true); // If this is not the first declaration or is an update record, we can have // other redeclarations already. Make a note that we need to propagate the // DefinitionData pointer onto them. if (Update || Canon != D) Reader.PendingDefinitions.insert(D); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitCXXRecordDeclImpl(CXXRecordDecl *D) { RedeclarableResult Redecl = VisitRecordDeclImpl(D); ASTContext &C = Reader.getContext(); enum CXXRecKind { CXXRecNotTemplate = 0, CXXRecTemplate, CXXRecMemberSpecialization, CXXLambda }; Decl *LambdaContext = nullptr; unsigned IndexInLambdaContext = 0; switch ((CXXRecKind)Record.readInt()) { case CXXRecNotTemplate: // Merged when we merge the folding set entry in the primary template. if (!isa(D)) mergeRedeclarable(D, Redecl); break; case CXXRecTemplate: { // Merged when we merge the template. auto *Template = readDeclAs(); D->TemplateOrInstantiation = Template; if (!Template->getTemplatedDecl()) { // We've not actually loaded the ClassTemplateDecl yet, because we're // currently being loaded as its pattern. Rely on it to set up our // TypeForDecl (see VisitClassTemplateDecl). // // Beware: we do not yet know our canonical declaration, and may still // get merged once the surrounding class template has got off the ground. DeferredTypeID = 0; } break; } case CXXRecMemberSpecialization: { auto *RD = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); MemberSpecializationInfo *MSI = new (C) MemberSpecializationInfo(RD, TSK); MSI->setPointOfInstantiation(POI); D->TemplateOrInstantiation = MSI; mergeRedeclarable(D, Redecl); break; } case CXXLambda: { LambdaContext = readDecl(); if (LambdaContext) IndexInLambdaContext = Record.readInt(); mergeLambda(D, Redecl, LambdaContext, IndexInLambdaContext); break; } } bool WasDefinition = Record.readInt(); if (WasDefinition) ReadCXXRecordDefinition(D, /*Update=*/false, LambdaContext, IndexInLambdaContext); else // Propagate DefinitionData pointer from the canonical declaration. D->DefinitionData = D->getCanonicalDecl()->DefinitionData; // Lazily load the key function to avoid deserializing every method so we can // compute it. if (WasDefinition) { DeclID KeyFn = readDeclID(); if (KeyFn && D->isCompleteDefinition()) // FIXME: This is wrong for the ARM ABI, where some other module may have // made this function no longer be a key function. We need an update // record or similar for that case. C.KeyFunctions[D] = KeyFn; } return Redecl; } void ASTDeclReader::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) { D->setExplicitSpecifier(Record.readExplicitSpec()); D->Ctor = readDeclAs(); VisitFunctionDecl(D); D->setDeductionCandidateKind( static_cast(Record.readInt())); } void ASTDeclReader::VisitCXXMethodDecl(CXXMethodDecl *D) { VisitFunctionDecl(D); unsigned NumOverridenMethods = Record.readInt(); if (D->isCanonicalDecl()) { while (NumOverridenMethods--) { // Avoid invariant checking of CXXMethodDecl::addOverriddenMethod, // MD may be initializing. if (auto *MD = readDeclAs()) Reader.getContext().addOverriddenMethod(D, MD->getCanonicalDecl()); } } else { // We don't care about which declarations this used to override; we get // the relevant information from the canonical declaration. Record.skipInts(NumOverridenMethods); } } void ASTDeclReader::VisitCXXConstructorDecl(CXXConstructorDecl *D) { // We need the inherited constructor information to merge the declaration, // so we have to read it before we call VisitCXXMethodDecl. D->setExplicitSpecifier(Record.readExplicitSpec()); if (D->isInheritingConstructor()) { auto *Shadow = readDeclAs(); auto *Ctor = readDeclAs(); *D->getTrailingObjects() = InheritedConstructor(Shadow, Ctor); } VisitCXXMethodDecl(D); } void ASTDeclReader::VisitCXXDestructorDecl(CXXDestructorDecl *D) { VisitCXXMethodDecl(D); if (auto *OperatorDelete = readDeclAs()) { CXXDestructorDecl *Canon = D->getCanonicalDecl(); auto *ThisArg = Record.readExpr(); // FIXME: Check consistency if we have an old and new operator delete. if (!Canon->OperatorDelete) { Canon->OperatorDelete = OperatorDelete; Canon->OperatorDeleteThisArg = ThisArg; } } } void ASTDeclReader::VisitCXXConversionDecl(CXXConversionDecl *D) { D->setExplicitSpecifier(Record.readExplicitSpec()); VisitCXXMethodDecl(D); } void ASTDeclReader::VisitImportDecl(ImportDecl *D) { VisitDecl(D); D->ImportedModule = readModule(); D->setImportComplete(Record.readInt()); auto *StoredLocs = D->getTrailingObjects(); for (unsigned I = 0, N = Record.back(); I != N; ++I) StoredLocs[I] = readSourceLocation(); Record.skipInts(1); // The number of stored source locations. } void ASTDeclReader::VisitAccessSpecDecl(AccessSpecDecl *D) { VisitDecl(D); D->setColonLoc(readSourceLocation()); } void ASTDeclReader::VisitFriendDecl(FriendDecl *D) { VisitDecl(D); if (Record.readInt()) // hasFriendDecl D->Friend = readDeclAs(); else D->Friend = readTypeSourceInfo(); for (unsigned i = 0; i != D->NumTPLists; ++i) D->getTrailingObjects()[i] = Record.readTemplateParameterList(); D->NextFriend = readDeclID(); D->UnsupportedFriend = (Record.readInt() != 0); D->FriendLoc = readSourceLocation(); } void ASTDeclReader::VisitFriendTemplateDecl(FriendTemplateDecl *D) { VisitDecl(D); unsigned NumParams = Record.readInt(); D->NumParams = NumParams; D->Params = new (Reader.getContext()) TemplateParameterList *[NumParams]; for (unsigned i = 0; i != NumParams; ++i) D->Params[i] = Record.readTemplateParameterList(); if (Record.readInt()) // HasFriendDecl D->Friend = readDeclAs(); else D->Friend = readTypeSourceInfo(); D->FriendLoc = readSourceLocation(); } void ASTDeclReader::VisitTemplateDecl(TemplateDecl *D) { VisitNamedDecl(D); assert(!D->TemplateParams && "TemplateParams already set!"); D->TemplateParams = Record.readTemplateParameterList(); D->init(readDeclAs()); } void ASTDeclReader::VisitConceptDecl(ConceptDecl *D) { VisitTemplateDecl(D); D->ConstraintExpr = Record.readExpr(); mergeMergeable(D); } void ASTDeclReader::VisitImplicitConceptSpecializationDecl( ImplicitConceptSpecializationDecl *D) { // The size of the template list was read during creation of the Decl, so we // don't have to re-read it here. VisitDecl(D); llvm::SmallVector Args; for (unsigned I = 0; I < D->NumTemplateArgs; ++I) Args.push_back(Record.readTemplateArgument(/*Canonicalize=*/true)); D->setTemplateArguments(Args); } void ASTDeclReader::VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D) { } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); // Make sure we've allocated the Common pointer first. We do this before // VisitTemplateDecl so that getCommonPtr() can be used during initialization. RedeclarableTemplateDecl *CanonD = D->getCanonicalDecl(); if (!CanonD->Common) { CanonD->Common = CanonD->newCommon(Reader.getContext()); Reader.PendingDefinitions.insert(CanonD); } D->Common = CanonD->Common; // If this is the first declaration of the template, fill in the information // for the 'common' pointer. if (ThisDeclID == Redecl.getFirstID()) { if (auto *RTD = readDeclAs()) { assert(RTD->getKind() == D->getKind() && "InstantiatedFromMemberTemplate kind mismatch"); D->setInstantiatedFromMemberTemplate(RTD); if (Record.readInt()) D->setMemberSpecialization(); } } VisitTemplateDecl(D); D->IdentifierNamespace = Record.readInt(); return Redecl; } void ASTDeclReader::VisitClassTemplateDecl(ClassTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D); mergeRedeclarableTemplate(D, Redecl); if (ThisDeclID == Redecl.getFirstID()) { // This ClassTemplateDecl owns a CommonPtr; read it to keep track of all of // the specializations. SmallVector SpecIDs; readDeclIDList(SpecIDs); ASTDeclReader::AddLazySpecializations(D, SpecIDs); } if (D->getTemplatedDecl()->TemplateOrInstantiation) { // We were loaded before our templated declaration was. We've not set up // its corresponding type yet (see VisitCXXRecordDeclImpl), so reconstruct // it now. Reader.getContext().getInjectedClassNameType( D->getTemplatedDecl(), D->getInjectedClassNameSpecialization()); } } void ASTDeclReader::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) { llvm_unreachable("BuiltinTemplates are not serialized"); } /// TODO: Unify with ClassTemplateDecl version? /// May require unifying ClassTemplateDecl and /// VarTemplateDecl beyond TemplateDecl... void ASTDeclReader::VisitVarTemplateDecl(VarTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D); mergeRedeclarableTemplate(D, Redecl); if (ThisDeclID == Redecl.getFirstID()) { // This VarTemplateDecl owns a CommonPtr; read it to keep track of all of // the specializations. SmallVector SpecIDs; readDeclIDList(SpecIDs); ASTDeclReader::AddLazySpecializations(D, SpecIDs); } } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitClassTemplateSpecializationDeclImpl( ClassTemplateSpecializationDecl *D) { RedeclarableResult Redecl = VisitCXXRecordDeclImpl(D); ASTContext &C = Reader.getContext(); if (Decl *InstD = readDecl()) { if (auto *CTD = dyn_cast(InstD)) { D->SpecializedTemplate = CTD; } else { SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs); TemplateArgumentList *ArgList = TemplateArgumentList::CreateCopy(C, TemplArgs); auto *PS = new (C) ClassTemplateSpecializationDecl:: SpecializedPartialSpecialization(); PS->PartialSpecialization = cast(InstD); PS->TemplateArgs = ArgList; D->SpecializedTemplate = PS; } } SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true); D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs); D->PointOfInstantiation = readSourceLocation(); D->SpecializationKind = (TemplateSpecializationKind)Record.readInt(); bool writtenAsCanonicalDecl = Record.readInt(); if (writtenAsCanonicalDecl) { auto *CanonPattern = readDeclAs(); if (D->isCanonicalDecl()) { // It's kept in the folding set. // Set this as, or find, the canonical declaration for this specialization ClassTemplateSpecializationDecl *CanonSpec; if (auto *Partial = dyn_cast(D)) { CanonSpec = CanonPattern->getCommonPtr()->PartialSpecializations .GetOrInsertNode(Partial); } else { CanonSpec = CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D); } // If there was already a canonical specialization, merge into it. if (CanonSpec != D) { mergeRedeclarable(D, CanonSpec, Redecl); // This declaration might be a definition. Merge with any existing // definition. if (auto *DDD = D->DefinitionData) { if (CanonSpec->DefinitionData) MergeDefinitionData(CanonSpec, std::move(*DDD)); else CanonSpec->DefinitionData = D->DefinitionData; } D->DefinitionData = CanonSpec->DefinitionData; } } } // Explicit info. if (TypeSourceInfo *TyInfo = readTypeSourceInfo()) { auto *ExplicitInfo = new (C) ClassTemplateSpecializationDecl::ExplicitSpecializationInfo; ExplicitInfo->TypeAsWritten = TyInfo; ExplicitInfo->ExternLoc = readSourceLocation(); ExplicitInfo->TemplateKeywordLoc = readSourceLocation(); D->ExplicitInfo = ExplicitInfo; } return Redecl; } void ASTDeclReader::VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D) { // We need to read the template params first because redeclarable is going to // need them for profiling TemplateParameterList *Params = Record.readTemplateParameterList(); D->TemplateParams = Params; D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo(); RedeclarableResult Redecl = VisitClassTemplateSpecializationDeclImpl(D); // These are read/set from/to the first declaration. if (ThisDeclID == Redecl.getFirstID()) { D->InstantiatedFromMember.setPointer( readDeclAs()); D->InstantiatedFromMember.setInt(Record.readInt()); } } void ASTDeclReader::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D); if (ThisDeclID == Redecl.getFirstID()) { // This FunctionTemplateDecl owns a CommonPtr; read it. SmallVector SpecIDs; readDeclIDList(SpecIDs); ASTDeclReader::AddLazySpecializations(D, SpecIDs); } } /// TODO: Unify with ClassTemplateSpecializationDecl version? /// May require unifying ClassTemplate(Partial)SpecializationDecl and /// VarTemplate(Partial)SpecializationDecl with a new data /// structure Template(Partial)SpecializationDecl, and /// using Template(Partial)SpecializationDecl as input type. ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarTemplateSpecializationDeclImpl( VarTemplateSpecializationDecl *D) { ASTContext &C = Reader.getContext(); if (Decl *InstD = readDecl()) { if (auto *VTD = dyn_cast(InstD)) { D->SpecializedTemplate = VTD; } else { SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs); TemplateArgumentList *ArgList = TemplateArgumentList::CreateCopy( C, TemplArgs); auto *PS = new (C) VarTemplateSpecializationDecl::SpecializedPartialSpecialization(); PS->PartialSpecialization = cast(InstD); PS->TemplateArgs = ArgList; D->SpecializedTemplate = PS; } } // Explicit info. if (TypeSourceInfo *TyInfo = readTypeSourceInfo()) { auto *ExplicitInfo = new (C) VarTemplateSpecializationDecl::ExplicitSpecializationInfo; ExplicitInfo->TypeAsWritten = TyInfo; ExplicitInfo->ExternLoc = readSourceLocation(); ExplicitInfo->TemplateKeywordLoc = readSourceLocation(); D->ExplicitInfo = ExplicitInfo; } SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true); D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs); D->PointOfInstantiation = readSourceLocation(); D->SpecializationKind = (TemplateSpecializationKind)Record.readInt(); D->IsCompleteDefinition = Record.readInt(); RedeclarableResult Redecl = VisitVarDeclImpl(D); bool writtenAsCanonicalDecl = Record.readInt(); if (writtenAsCanonicalDecl) { auto *CanonPattern = readDeclAs(); if (D->isCanonicalDecl()) { // It's kept in the folding set. VarTemplateSpecializationDecl *CanonSpec; if (auto *Partial = dyn_cast(D)) { CanonSpec = CanonPattern->getCommonPtr() ->PartialSpecializations.GetOrInsertNode(Partial); } else { CanonSpec = CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D); } // If we already have a matching specialization, merge it. if (CanonSpec != D) mergeRedeclarable(D, CanonSpec, Redecl); } } return Redecl; } /// TODO: Unify with ClassTemplatePartialSpecializationDecl version? /// May require unifying ClassTemplate(Partial)SpecializationDecl and /// VarTemplate(Partial)SpecializationDecl with a new data /// structure Template(Partial)SpecializationDecl, and /// using Template(Partial)SpecializationDecl as input type. void ASTDeclReader::VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D) { TemplateParameterList *Params = Record.readTemplateParameterList(); D->TemplateParams = Params; D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo(); RedeclarableResult Redecl = VisitVarTemplateSpecializationDeclImpl(D); // These are read/set from/to the first declaration. if (ThisDeclID == Redecl.getFirstID()) { D->InstantiatedFromMember.setPointer( readDeclAs()); D->InstantiatedFromMember.setInt(Record.readInt()); } } void ASTDeclReader::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) { VisitTypeDecl(D); D->setDeclaredWithTypename(Record.readInt()); if (D->hasTypeConstraint()) { ConceptReference *CR = nullptr; if (Record.readBool()) CR = Record.readConceptReference(); Expr *ImmediatelyDeclaredConstraint = Record.readExpr(); D->setTypeConstraint(CR, ImmediatelyDeclaredConstraint); if ((D->ExpandedParameterPack = Record.readInt())) D->NumExpanded = Record.readInt(); } if (Record.readInt()) D->setDefaultArgument(readTypeSourceInfo()); } void ASTDeclReader::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) { VisitDeclaratorDecl(D); // TemplateParmPosition. D->setDepth(Record.readInt()); D->setPosition(Record.readInt()); if (D->hasPlaceholderTypeConstraint()) D->setPlaceholderTypeConstraint(Record.readExpr()); if (D->isExpandedParameterPack()) { auto TypesAndInfos = D->getTrailingObjects>(); for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) { new (&TypesAndInfos[I].first) QualType(Record.readType()); TypesAndInfos[I].second = readTypeSourceInfo(); } } else { // Rest of NonTypeTemplateParmDecl. D->ParameterPack = Record.readInt(); if (Record.readInt()) D->setDefaultArgument(Record.readExpr()); } } void ASTDeclReader::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) { VisitTemplateDecl(D); // TemplateParmPosition. D->setDepth(Record.readInt()); D->setPosition(Record.readInt()); if (D->isExpandedParameterPack()) { auto **Data = D->getTrailingObjects(); for (unsigned I = 0, N = D->getNumExpansionTemplateParameters(); I != N; ++I) Data[I] = Record.readTemplateParameterList(); } else { // Rest of TemplateTemplateParmDecl. D->ParameterPack = Record.readInt(); if (Record.readInt()) D->setDefaultArgument(Reader.getContext(), Record.readTemplateArgumentLoc()); } } void ASTDeclReader::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D); mergeRedeclarableTemplate(D, Redecl); } void ASTDeclReader::VisitStaticAssertDecl(StaticAssertDecl *D) { VisitDecl(D); D->AssertExprAndFailed.setPointer(Record.readExpr()); D->AssertExprAndFailed.setInt(Record.readInt()); D->Message = cast_or_null(Record.readExpr()); D->RParenLoc = readSourceLocation(); } void ASTDeclReader::VisitEmptyDecl(EmptyDecl *D) { VisitDecl(D); } void ASTDeclReader::VisitLifetimeExtendedTemporaryDecl( LifetimeExtendedTemporaryDecl *D) { VisitDecl(D); D->ExtendingDecl = readDeclAs(); D->ExprWithTemporary = Record.readStmt(); if (Record.readInt()) { D->Value = new (D->getASTContext()) APValue(Record.readAPValue()); D->getASTContext().addDestruction(D->Value); } D->ManglingNumber = Record.readInt(); mergeMergeable(D); } std::pair ASTDeclReader::VisitDeclContext(DeclContext *DC) { uint64_t LexicalOffset = ReadLocalOffset(); uint64_t VisibleOffset = ReadLocalOffset(); return std::make_pair(LexicalOffset, VisibleOffset); } template ASTDeclReader::RedeclarableResult ASTDeclReader::VisitRedeclarable(Redeclarable *D) { DeclID FirstDeclID = readDeclID(); Decl *MergeWith = nullptr; bool IsKeyDecl = ThisDeclID == FirstDeclID; bool IsFirstLocalDecl = false; uint64_t RedeclOffset = 0; // 0 indicates that this declaration was the only declaration of its entity, // and is used for space optimization. if (FirstDeclID == 0) { FirstDeclID = ThisDeclID; IsKeyDecl = true; IsFirstLocalDecl = true; } else if (unsigned N = Record.readInt()) { // This declaration was the first local declaration, but may have imported // other declarations. IsKeyDecl = N == 1; IsFirstLocalDecl = true; // We have some declarations that must be before us in our redeclaration // chain. Read them now, and remember that we ought to merge with one of // them. // FIXME: Provide a known merge target to the second and subsequent such // declaration. for (unsigned I = 0; I != N - 1; ++I) MergeWith = readDecl(); RedeclOffset = ReadLocalOffset(); } else { // This declaration was not the first local declaration. Read the first // local declaration now, to trigger the import of other redeclarations. (void)readDecl(); } auto *FirstDecl = cast_or_null(Reader.GetDecl(FirstDeclID)); if (FirstDecl != D) { // We delay loading of the redeclaration chain to avoid deeply nested calls. // We temporarily set the first (canonical) declaration as the previous one // which is the one that matters and mark the real previous DeclID to be // loaded & attached later on. D->RedeclLink = Redeclarable::PreviousDeclLink(FirstDecl); D->First = FirstDecl->getCanonicalDecl(); } auto *DAsT = static_cast(D); // Note that we need to load local redeclarations of this decl and build a // decl chain for them. This must happen *after* we perform the preloading // above; this ensures that the redeclaration chain is built in the correct // order. if (IsFirstLocalDecl) Reader.PendingDeclChains.push_back(std::make_pair(DAsT, RedeclOffset)); return RedeclarableResult(MergeWith, FirstDeclID, IsKeyDecl); } /// Attempts to merge the given declaration (D) with another declaration /// of the same entity. template void ASTDeclReader::mergeRedeclarable(Redeclarable *DBase, RedeclarableResult &Redecl) { // If modules are not available, there is no reason to perform this merge. if (!Reader.getContext().getLangOpts().Modules) return; // If we're not the canonical declaration, we don't need to merge. if (!DBase->isFirstDecl()) return; auto *D = static_cast(DBase); if (auto *Existing = Redecl.getKnownMergeTarget()) // We already know of an existing declaration we should merge with. mergeRedeclarable(D, cast(Existing), Redecl); else if (FindExistingResult ExistingRes = findExisting(D)) if (T *Existing = ExistingRes) mergeRedeclarable(D, Existing, Redecl); } /// Attempt to merge D with a previous declaration of the same lambda, which is /// found by its index within its context declaration, if it has one. /// /// We can't look up lambdas in their enclosing lexical or semantic context in /// general, because for lambdas in variables, both of those might be a /// namespace or the translation unit. void ASTDeclReader::mergeLambda(CXXRecordDecl *D, RedeclarableResult &Redecl, Decl *Context, unsigned IndexInContext) { // If we don't have a mangling context, treat this like any other // declaration. if (!Context) return mergeRedeclarable(D, Redecl); // If modules are not available, there is no reason to perform this merge. if (!Reader.getContext().getLangOpts().Modules) return; // If we're not the canonical declaration, we don't need to merge. if (!D->isFirstDecl()) return; if (auto *Existing = Redecl.getKnownMergeTarget()) // We already know of an existing declaration we should merge with. mergeRedeclarable(D, cast(Existing), Redecl); // Look up this lambda to see if we've seen it before. If so, merge with the // one we already loaded. NamedDecl *&Slot = Reader.LambdaDeclarationsForMerging[{ Context->getCanonicalDecl(), IndexInContext}]; if (Slot) mergeRedeclarable(D, cast(Slot), Redecl); else Slot = D; } void ASTDeclReader::mergeRedeclarableTemplate(RedeclarableTemplateDecl *D, RedeclarableResult &Redecl) { mergeRedeclarable(D, Redecl); // If we merged the template with a prior declaration chain, merge the // common pointer. // FIXME: Actually merge here, don't just overwrite. D->Common = D->getCanonicalDecl()->Common; } /// "Cast" to type T, asserting if we don't have an implicit conversion. /// We use this to put code in a template that will only be valid for certain /// instantiations. template static T assert_cast(T t) { return t; } template static T assert_cast(...) { llvm_unreachable("bad assert_cast"); } /// Merge together the pattern declarations from two template /// declarations. void ASTDeclReader::mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, bool IsKeyDecl) { auto *DPattern = D->getTemplatedDecl(); auto *ExistingPattern = Existing->getTemplatedDecl(); RedeclarableResult Result(/*MergeWith*/ ExistingPattern, DPattern->getCanonicalDecl()->getGlobalID(), IsKeyDecl); if (auto *DClass = dyn_cast(DPattern)) { // Merge with any existing definition. // FIXME: This is duplicated in several places. Refactor. auto *ExistingClass = cast(ExistingPattern)->getCanonicalDecl(); if (auto *DDD = DClass->DefinitionData) { if (ExistingClass->DefinitionData) { MergeDefinitionData(ExistingClass, std::move(*DDD)); } else { ExistingClass->DefinitionData = DClass->DefinitionData; // We may have skipped this before because we thought that DClass // was the canonical declaration. Reader.PendingDefinitions.insert(DClass); } } DClass->DefinitionData = ExistingClass->DefinitionData; return mergeRedeclarable(DClass, cast(ExistingPattern), Result); } if (auto *DFunction = dyn_cast(DPattern)) return mergeRedeclarable(DFunction, cast(ExistingPattern), Result); if (auto *DVar = dyn_cast(DPattern)) return mergeRedeclarable(DVar, cast(ExistingPattern), Result); if (auto *DAlias = dyn_cast(DPattern)) return mergeRedeclarable(DAlias, cast(ExistingPattern), Result); llvm_unreachable("merged an unknown kind of redeclarable template"); } /// Attempts to merge the given declaration (D) with another declaration /// of the same entity. template void ASTDeclReader::mergeRedeclarable(Redeclarable *DBase, T *Existing, RedeclarableResult &Redecl) { auto *D = static_cast(DBase); T *ExistingCanon = Existing->getCanonicalDecl(); T *DCanon = D->getCanonicalDecl(); if (ExistingCanon != DCanon) { // Have our redeclaration link point back at the canonical declaration // of the existing declaration, so that this declaration has the // appropriate canonical declaration. D->RedeclLink = Redeclarable::PreviousDeclLink(ExistingCanon); D->First = ExistingCanon; ExistingCanon->Used |= D->Used; D->Used = false; // When we merge a namespace, update its pointer to the first namespace. // We cannot have loaded any redeclarations of this declaration yet, so // there's nothing else that needs to be updated. if (auto *Namespace = dyn_cast(D)) Namespace->AnonOrFirstNamespaceAndFlags.setPointer( assert_cast(ExistingCanon)); // When we merge a template, merge its pattern. if (auto *DTemplate = dyn_cast(D)) mergeTemplatePattern( DTemplate, assert_cast(ExistingCanon), Redecl.isKeyDecl()); // If this declaration is a key declaration, make a note of that. if (Redecl.isKeyDecl()) Reader.KeyDecls[ExistingCanon].push_back(Redecl.getFirstID()); } } /// ODR-like semantics for C/ObjC allow us to merge tag types and a structural /// check in Sema guarantees the types can be merged (see C11 6.2.7/1 or C89 /// 6.1.2.6/1). Although most merging is done in Sema, we need to guarantee /// that some types are mergeable during deserialization, otherwise name /// lookup fails. This is the case for EnumConstantDecl. static bool allowODRLikeMergeInC(NamedDecl *ND) { if (!ND) return false; // TODO: implement merge for other necessary decls. if (isa(ND)) return true; return false; } /// Attempts to merge LifetimeExtendedTemporaryDecl with /// identical class definitions from two different modules. void ASTDeclReader::mergeMergeable(LifetimeExtendedTemporaryDecl *D) { // If modules are not available, there is no reason to perform this merge. if (!Reader.getContext().getLangOpts().Modules) return; LifetimeExtendedTemporaryDecl *LETDecl = D; LifetimeExtendedTemporaryDecl *&LookupResult = Reader.LETemporaryForMerging[std::make_pair( LETDecl->getExtendingDecl(), LETDecl->getManglingNumber())]; if (LookupResult) Reader.getContext().setPrimaryMergedDecl(LETDecl, LookupResult->getCanonicalDecl()); else LookupResult = LETDecl; } /// Attempts to merge the given declaration (D) with another declaration /// of the same entity, for the case where the entity is not actually /// redeclarable. This happens, for instance, when merging the fields of /// identical class definitions from two different modules. template void ASTDeclReader::mergeMergeable(Mergeable *D) { // If modules are not available, there is no reason to perform this merge. if (!Reader.getContext().getLangOpts().Modules) return; // ODR-based merging is performed in C++ and in some cases (tag types) in C. // Note that C identically-named things in different translation units are // not redeclarations, but may still have compatible types, where ODR-like // semantics may apply. if (!Reader.getContext().getLangOpts().CPlusPlus && !allowODRLikeMergeInC(dyn_cast(static_cast(D)))) return; if (FindExistingResult ExistingRes = findExisting(static_cast(D))) if (T *Existing = ExistingRes) Reader.getContext().setPrimaryMergedDecl(static_cast(D), Existing->getCanonicalDecl()); } void ASTDeclReader::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) { Record.readOMPChildren(D->Data); VisitDecl(D); } void ASTDeclReader::VisitOMPAllocateDecl(OMPAllocateDecl *D) { Record.readOMPChildren(D->Data); VisitDecl(D); } void ASTDeclReader::VisitOMPRequiresDecl(OMPRequiresDecl * D) { Record.readOMPChildren(D->Data); VisitDecl(D); } void ASTDeclReader::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) { VisitValueDecl(D); D->setLocation(readSourceLocation()); Expr *In = Record.readExpr(); Expr *Out = Record.readExpr(); D->setCombinerData(In, Out); Expr *Combiner = Record.readExpr(); D->setCombiner(Combiner); Expr *Orig = Record.readExpr(); Expr *Priv = Record.readExpr(); D->setInitializerData(Orig, Priv); Expr *Init = Record.readExpr(); auto IK = static_cast(Record.readInt()); D->setInitializer(Init, IK); D->PrevDeclInScope = readDeclID(); } void ASTDeclReader::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) { Record.readOMPChildren(D->Data); VisitValueDecl(D); D->VarName = Record.readDeclarationName(); D->PrevDeclInScope = readDeclID(); } void ASTDeclReader::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) { VisitVarDecl(D); } //===----------------------------------------------------------------------===// // Attribute Reading //===----------------------------------------------------------------------===// namespace { class AttrReader { ASTRecordReader &Reader; public: AttrReader(ASTRecordReader &Reader) : Reader(Reader) {} uint64_t readInt() { return Reader.readInt(); } bool readBool() { return Reader.readBool(); } SourceRange readSourceRange() { return Reader.readSourceRange(); } SourceLocation readSourceLocation() { return Reader.readSourceLocation(); } Expr *readExpr() { return Reader.readExpr(); } Attr *readAttr() { return Reader.readAttr(); } std::string readString() { return Reader.readString(); } TypeSourceInfo *readTypeSourceInfo() { return Reader.readTypeSourceInfo(); } IdentifierInfo *readIdentifier() { return Reader.readIdentifier(); } VersionTuple readVersionTuple() { return Reader.readVersionTuple(); } OMPTraitInfo *readOMPTraitInfo() { return Reader.readOMPTraitInfo(); } template T *GetLocalDeclAs(uint32_t LocalID) { return Reader.GetLocalDeclAs(LocalID); } }; } Attr *ASTRecordReader::readAttr() { AttrReader Record(*this); auto V = Record.readInt(); if (!V) return nullptr; Attr *New = nullptr; // Kind is stored as a 1-based integer because 0 is used to indicate a null // Attr pointer. auto Kind = static_cast(V - 1); ASTContext &Context = getContext(); IdentifierInfo *AttrName = Record.readIdentifier(); IdentifierInfo *ScopeName = Record.readIdentifier(); SourceRange AttrRange = Record.readSourceRange(); SourceLocation ScopeLoc = Record.readSourceLocation(); unsigned ParsedKind = Record.readInt(); unsigned Syntax = Record.readInt(); unsigned SpellingIndex = Record.readInt(); bool IsAlignas = (ParsedKind == AttributeCommonInfo::AT_Aligned && Syntax == AttributeCommonInfo::AS_Keyword && SpellingIndex == AlignedAttr::Keyword_alignas); bool IsRegularKeywordAttribute = Record.readBool(); AttributeCommonInfo Info(AttrName, ScopeName, AttrRange, ScopeLoc, AttributeCommonInfo::Kind(ParsedKind), {AttributeCommonInfo::Syntax(Syntax), SpellingIndex, IsAlignas, IsRegularKeywordAttribute}); #include "clang/Serialization/AttrPCHRead.inc" assert(New && "Unable to decode attribute?"); return New; } /// Reads attributes from the current stream position. void ASTRecordReader::readAttributes(AttrVec &Attrs) { for (unsigned I = 0, E = readInt(); I != E; ++I) if (auto *A = readAttr()) Attrs.push_back(A); } //===----------------------------------------------------------------------===// // ASTReader Implementation //===----------------------------------------------------------------------===// /// Note that we have loaded the declaration with the given /// Index. /// /// This routine notes that this declaration has already been loaded, /// so that future GetDecl calls will return this declaration rather /// than trying to load a new declaration. inline void ASTReader::LoadedDecl(unsigned Index, Decl *D) { assert(!DeclsLoaded[Index] && "Decl loaded twice?"); DeclsLoaded[Index] = D; } /// Determine whether the consumer will be interested in seeing /// this declaration (via HandleTopLevelDecl). /// /// This routine should return true for anything that might affect /// code generation, e.g., inline function definitions, Objective-C /// declarations with metadata, etc. static bool isConsumerInterestedIn(ASTContext &Ctx, Decl *D, bool HasBody) { // An ObjCMethodDecl is never considered as "interesting" because its // implementation container always is. // An ImportDecl or VarDecl imported from a module map module will get // emitted when we import the relevant module. if (isPartOfPerModuleInitializer(D)) { auto *M = D->getImportedOwningModule(); if (M && M->Kind == Module::ModuleMapModule && Ctx.DeclMustBeEmitted(D)) return false; } if (isa(D)) return true; if (isa(D)) return !D->getDeclContext()->isFunctionOrMethod(); if (const auto *Var = dyn_cast(D)) return Var->isFileVarDecl() && (Var->isThisDeclarationADefinition() == VarDecl::Definition || OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Var)); if (const auto *Func = dyn_cast(D)) return Func->doesThisDeclarationHaveABody() || HasBody; if (auto *ES = D->getASTContext().getExternalSource()) if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) return true; return false; } /// Get the correct cursor and offset for loading a declaration. ASTReader::RecordLocation ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) { GlobalDeclMapType::iterator I = GlobalDeclMap.find(ID); assert(I != GlobalDeclMap.end() && "Corrupted global declaration map"); ModuleFile *M = I->second; const DeclOffset &DOffs = M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS]; Loc = TranslateSourceLocation(*M, DOffs.getLocation()); return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset)); } ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) { auto I = GlobalBitOffsetsMap.find(GlobalOffset); assert(I != GlobalBitOffsetsMap.end() && "Corrupted global bit offsets map"); return RecordLocation(I->second, GlobalOffset - I->second->GlobalBitOffset); } uint64_t ASTReader::getGlobalBitOffset(ModuleFile &M, uint64_t LocalOffset) { return LocalOffset + M.GlobalBitOffset; } CXXRecordDecl * ASTDeclReader::getOrFakePrimaryClassDefinition(ASTReader &Reader, CXXRecordDecl *RD) { // Try to dig out the definition. auto *DD = RD->DefinitionData; if (!DD) DD = RD->getCanonicalDecl()->DefinitionData; // If there's no definition yet, then DC's definition is added by an update // record, but we've not yet loaded that update record. In this case, we // commit to DC being the canonical definition now, and will fix this when // we load the update record. if (!DD) { DD = new (Reader.getContext()) struct CXXRecordDecl::DefinitionData(RD); RD->setCompleteDefinition(true); RD->DefinitionData = DD; RD->getCanonicalDecl()->DefinitionData = DD; // Track that we did this horrible thing so that we can fix it later. Reader.PendingFakeDefinitionData.insert( std::make_pair(DD, ASTReader::PendingFakeDefinitionKind::Fake)); } return DD->Definition; } /// Find the context in which we should search for previous declarations when /// looking for declarations to merge. DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader, DeclContext *DC) { if (auto *ND = dyn_cast(DC)) return ND->getOriginalNamespace(); if (auto *RD = dyn_cast(DC)) return getOrFakePrimaryClassDefinition(Reader, RD); if (auto *RD = dyn_cast(DC)) return RD->getDefinition(); if (auto *ED = dyn_cast(DC)) return ED->getASTContext().getLangOpts().CPlusPlus? ED->getDefinition() : nullptr; if (auto *OID = dyn_cast(DC)) return OID->getDefinition(); // We can see the TU here only if we have no Sema object. In that case, // there's no TU scope to look in, so using the DC alone is sufficient. if (auto *TU = dyn_cast(DC)) return TU; return nullptr; } ASTDeclReader::FindExistingResult::~FindExistingResult() { // Record that we had a typedef name for linkage whether or not we merge // with that declaration. if (TypedefNameForLinkage) { DeclContext *DC = New->getDeclContext()->getRedeclContext(); Reader.ImportedTypedefNamesForLinkage.insert( std::make_pair(std::make_pair(DC, TypedefNameForLinkage), New)); return; } if (!AddResult || Existing) return; DeclarationName Name = New->getDeclName(); DeclContext *DC = New->getDeclContext()->getRedeclContext(); if (needsAnonymousDeclarationNumber(New)) { setAnonymousDeclForMerging(Reader, New->getLexicalDeclContext(), AnonymousDeclNumber, New); } else if (DC->isTranslationUnit() && !Reader.getContext().getLangOpts().CPlusPlus) { if (Reader.getIdResolver().tryAddTopLevelDecl(New, Name)) Reader.PendingFakeLookupResults[Name.getAsIdentifierInfo()] .push_back(New); } else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) { // Add the declaration to its redeclaration context so later merging // lookups will find it. MergeDC->makeDeclVisibleInContextImpl(New, /*Internal*/true); } } /// Find the declaration that should be merged into, given the declaration found /// by name lookup. If we're merging an anonymous declaration within a typedef, /// we need a matching typedef, and we merge with the type inside it. static NamedDecl *getDeclForMerging(NamedDecl *Found, bool IsTypedefNameForLinkage) { if (!IsTypedefNameForLinkage) return Found; // If we found a typedef declaration that gives a name to some other // declaration, then we want that inner declaration. Declarations from // AST files are handled via ImportedTypedefNamesForLinkage. if (Found->isFromASTFile()) return nullptr; if (auto *TND = dyn_cast(Found)) return TND->getAnonDeclWithTypedefName(/*AnyRedecl*/true); return nullptr; } /// Find the declaration to use to populate the anonymous declaration table /// for the given lexical DeclContext. We only care about finding local /// definitions of the context; we'll merge imported ones as we go. DeclContext * ASTDeclReader::getPrimaryDCForAnonymousDecl(DeclContext *LexicalDC) { // For classes, we track the definition as we merge. if (auto *RD = dyn_cast(LexicalDC)) { auto *DD = RD->getCanonicalDecl()->DefinitionData; return DD ? DD->Definition : nullptr; } else if (auto *OID = dyn_cast(LexicalDC)) { return OID->getCanonicalDecl()->getDefinition(); } // For anything else, walk its merged redeclarations looking for a definition. // Note that we can't just call getDefinition here because the redeclaration // chain isn't wired up. for (auto *D : merged_redecls(cast(LexicalDC))) { if (auto *FD = dyn_cast(D)) if (FD->isThisDeclarationADefinition()) return FD; if (auto *MD = dyn_cast(D)) if (MD->isThisDeclarationADefinition()) return MD; if (auto *RD = dyn_cast(D)) if (RD->isThisDeclarationADefinition()) return RD; } // No merged definition yet. return nullptr; } NamedDecl *ASTDeclReader::getAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index) { // If the lexical context has been merged, look into the now-canonical // definition. auto *CanonDC = cast(DC)->getCanonicalDecl(); // If we've seen this before, return the canonical declaration. auto &Previous = Reader.AnonymousDeclarationsForMerging[CanonDC]; if (Index < Previous.size() && Previous[Index]) return Previous[Index]; // If this is the first time, but we have parsed a declaration of the context, // build the anonymous declaration list from the parsed declaration. auto *PrimaryDC = getPrimaryDCForAnonymousDecl(DC); if (PrimaryDC && !cast(PrimaryDC)->isFromASTFile()) { numberAnonymousDeclsWithin(PrimaryDC, [&](NamedDecl *ND, unsigned Number) { if (Previous.size() == Number) Previous.push_back(cast(ND->getCanonicalDecl())); else Previous[Number] = cast(ND->getCanonicalDecl()); }); } return Index < Previous.size() ? Previous[Index] : nullptr; } void ASTDeclReader::setAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index, NamedDecl *D) { auto *CanonDC = cast(DC)->getCanonicalDecl(); auto &Previous = Reader.AnonymousDeclarationsForMerging[CanonDC]; if (Index >= Previous.size()) Previous.resize(Index + 1); if (!Previous[Index]) Previous[Index] = D; } ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) { DeclarationName Name = TypedefNameForLinkage ? TypedefNameForLinkage : D->getDeclName(); if (!Name && !needsAnonymousDeclarationNumber(D)) { // Don't bother trying to find unnamed declarations that are in // unmergeable contexts. FindExistingResult Result(Reader, D, /*Existing=*/nullptr, AnonymousDeclNumber, TypedefNameForLinkage); Result.suppress(); return Result; } ASTContext &C = Reader.getContext(); DeclContext *DC = D->getDeclContext()->getRedeclContext(); if (TypedefNameForLinkage) { auto It = Reader.ImportedTypedefNamesForLinkage.find( std::make_pair(DC, TypedefNameForLinkage)); if (It != Reader.ImportedTypedefNamesForLinkage.end()) if (C.isSameEntity(It->second, D)) return FindExistingResult(Reader, D, It->second, AnonymousDeclNumber, TypedefNameForLinkage); // Go on to check in other places in case an existing typedef name // was not imported. } if (needsAnonymousDeclarationNumber(D)) { // This is an anonymous declaration that we may need to merge. Look it up // in its context by number. if (auto *Existing = getAnonymousDeclForMerging( Reader, D->getLexicalDeclContext(), AnonymousDeclNumber)) if (C.isSameEntity(Existing, D)) return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber, TypedefNameForLinkage); } else if (DC->isTranslationUnit() && !Reader.getContext().getLangOpts().CPlusPlus) { IdentifierResolver &IdResolver = Reader.getIdResolver(); // Temporarily consider the identifier to be up-to-date. We don't want to // cause additional lookups here. class UpToDateIdentifierRAII { IdentifierInfo *II; bool WasOutToDate = false; public: explicit UpToDateIdentifierRAII(IdentifierInfo *II) : II(II) { if (II) { WasOutToDate = II->isOutOfDate(); if (WasOutToDate) II->setOutOfDate(false); } } ~UpToDateIdentifierRAII() { if (WasOutToDate) II->setOutOfDate(true); } } UpToDate(Name.getAsIdentifierInfo()); for (IdentifierResolver::iterator I = IdResolver.begin(Name), IEnd = IdResolver.end(); I != IEnd; ++I) { if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage)) if (C.isSameEntity(Existing, D)) return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber, TypedefNameForLinkage); } } else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) { DeclContext::lookup_result R = MergeDC->noload_lookup(Name); for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E; ++I) { if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage)) if (C.isSameEntity(Existing, D)) return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber, TypedefNameForLinkage); } } else { // Not in a mergeable context. return FindExistingResult(Reader); } // If this declaration is from a merged context, make a note that we need to // check that the canonical definition of that context contains the decl. // // Note that we don't perform ODR checks for decls from the global module // fragment. // // FIXME: We should do something similar if we merge two definitions of the // same template specialization into the same CXXRecordDecl. auto MergedDCIt = Reader.MergedDeclContexts.find(D->getLexicalDeclContext()); if (MergedDCIt != Reader.MergedDeclContexts.end() && !shouldSkipCheckingODR(D) && MergedDCIt->second == D->getDeclContext()) Reader.PendingOdrMergeChecks.push_back(D); return FindExistingResult(Reader, D, /*Existing=*/nullptr, AnonymousDeclNumber, TypedefNameForLinkage); } template Decl *ASTDeclReader::getMostRecentDeclImpl(Redeclarable *D) { return D->RedeclLink.getLatestNotUpdated(); } Decl *ASTDeclReader::getMostRecentDeclImpl(...) { llvm_unreachable("getMostRecentDecl on non-redeclarable declaration"); } Decl *ASTDeclReader::getMostRecentDecl(Decl *D) { assert(D); switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ return getMostRecentDeclImpl(cast(D)); #include "clang/AST/DeclNodes.inc" } llvm_unreachable("unknown decl kind"); } Decl *ASTReader::getMostRecentExistingDecl(Decl *D) { return ASTDeclReader::getMostRecentDecl(D->getCanonicalDecl()); } void ASTDeclReader::mergeInheritableAttributes(ASTReader &Reader, Decl *D, Decl *Previous) { InheritableAttr *NewAttr = nullptr; ASTContext &Context = Reader.getContext(); const auto *IA = Previous->getAttr(); if (IA && !D->hasAttr()) { NewAttr = cast(IA->clone(Context)); NewAttr->setInherited(true); D->addAttr(NewAttr); } const auto *AA = Previous->getAttr(); if (AA && !D->hasAttr()) { NewAttr = AA->clone(Context); NewAttr->setInherited(true); D->addAttr(NewAttr); } } template void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon) { D->RedeclLink.setPrevious(cast(Previous)); D->First = cast(Previous)->First; } namespace clang { template<> void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon) { auto *VD = static_cast(D); auto *PrevVD = cast(Previous); D->RedeclLink.setPrevious(PrevVD); D->First = PrevVD->First; // We should keep at most one definition on the chain. // FIXME: Cache the definition once we've found it. Building a chain with // N definitions currently takes O(N^2) time here. if (VD->isThisDeclarationADefinition() == VarDecl::Definition) { for (VarDecl *CurD = PrevVD; CurD; CurD = CurD->getPreviousDecl()) { if (CurD->isThisDeclarationADefinition() == VarDecl::Definition) { Reader.mergeDefinitionVisibility(CurD, VD); VD->demoteThisDefinitionToDeclaration(); break; } } } } static bool isUndeducedReturnType(QualType T) { auto *DT = T->getContainedDeducedType(); return DT && !DT->isDeduced(); } template<> void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon) { auto *FD = static_cast(D); auto *PrevFD = cast(Previous); FD->RedeclLink.setPrevious(PrevFD); FD->First = PrevFD->First; // If the previous declaration is an inline function declaration, then this // declaration is too. if (PrevFD->isInlined() != FD->isInlined()) { // FIXME: [dcl.fct.spec]p4: // If a function with external linkage is declared inline in one // translation unit, it shall be declared inline in all translation // units in which it appears. // // Be careful of this case: // // module A: // template struct X { void f(); }; // template inline void X::f() {} // // module B instantiates the declaration of X::f // module C instantiates the definition of X::f // // If module B and C are merged, we do not have a violation of this rule. FD->setImplicitlyInline(true); } auto *FPT = FD->getType()->getAs(); auto *PrevFPT = PrevFD->getType()->getAs(); if (FPT && PrevFPT) { // If we need to propagate an exception specification along the redecl // chain, make a note of that so that we can do so later. bool IsUnresolved = isUnresolvedExceptionSpec(FPT->getExceptionSpecType()); bool WasUnresolved = isUnresolvedExceptionSpec(PrevFPT->getExceptionSpecType()); if (IsUnresolved != WasUnresolved) Reader.PendingExceptionSpecUpdates.insert( {Canon, IsUnresolved ? PrevFD : FD}); // If we need to propagate a deduced return type along the redecl chain, // make a note of that so that we can do it later. bool IsUndeduced = isUndeducedReturnType(FPT->getReturnType()); bool WasUndeduced = isUndeducedReturnType(PrevFPT->getReturnType()); if (IsUndeduced != WasUndeduced) Reader.PendingDeducedTypeUpdates.insert( {cast(Canon), (IsUndeduced ? PrevFPT : FPT)->getReturnType()}); } } } // namespace clang void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, ...) { llvm_unreachable("attachPreviousDecl on non-redeclarable declaration"); } /// Inherit the default template argument from \p From to \p To. Returns /// \c false if there is no default template for \p From. template static bool inheritDefaultTemplateArgument(ASTContext &Context, ParmDecl *From, Decl *ToD) { auto *To = cast(ToD); if (!From->hasDefaultArgument()) return false; To->setInheritedDefaultArgument(Context, From); return true; } static void inheritDefaultTemplateArguments(ASTContext &Context, TemplateDecl *From, TemplateDecl *To) { auto *FromTP = From->getTemplateParameters(); auto *ToTP = To->getTemplateParameters(); assert(FromTP->size() == ToTP->size() && "merged mismatched templates?"); for (unsigned I = 0, N = FromTP->size(); I != N; ++I) { NamedDecl *FromParam = FromTP->getParam(I); NamedDecl *ToParam = ToTP->getParam(I); if (auto *FTTP = dyn_cast(FromParam)) inheritDefaultTemplateArgument(Context, FTTP, ToParam); else if (auto *FNTTP = dyn_cast(FromParam)) inheritDefaultTemplateArgument(Context, FNTTP, ToParam); else inheritDefaultTemplateArgument( Context, cast(FromParam), ToParam); } } void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D, Decl *Previous, Decl *Canon) { assert(D && Previous); switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ attachPreviousDeclImpl(Reader, cast(D), Previous, Canon); \ break; #include "clang/AST/DeclNodes.inc" } // If the declaration was visible in one module, a redeclaration of it in // another module remains visible even if it wouldn't be visible by itself. // // FIXME: In this case, the declaration should only be visible if a module // that makes it visible has been imported. D->IdentifierNamespace |= Previous->IdentifierNamespace & (Decl::IDNS_Ordinary | Decl::IDNS_Tag | Decl::IDNS_Type); // If the declaration declares a template, it may inherit default arguments // from the previous declaration. if (auto *TD = dyn_cast(D)) inheritDefaultTemplateArguments(Reader.getContext(), cast(Previous), TD); // If any of the declaration in the chain contains an Inheritable attribute, // it needs to be added to all the declarations in the redeclarable chain. // FIXME: Only the logic of merging MSInheritableAttr is present, it should // be extended for all inheritable attributes. mergeInheritableAttributes(Reader, D, Previous); } template void ASTDeclReader::attachLatestDeclImpl(Redeclarable *D, Decl *Latest) { D->RedeclLink.setLatest(cast(Latest)); } void ASTDeclReader::attachLatestDeclImpl(...) { llvm_unreachable("attachLatestDecl on non-redeclarable declaration"); } void ASTDeclReader::attachLatestDecl(Decl *D, Decl *Latest) { assert(D && Latest); switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ attachLatestDeclImpl(cast(D), Latest); \ break; #include "clang/AST/DeclNodes.inc" } } template void ASTDeclReader::markIncompleteDeclChainImpl(Redeclarable *D) { D->RedeclLink.markIncomplete(); } void ASTDeclReader::markIncompleteDeclChainImpl(...) { llvm_unreachable("markIncompleteDeclChain on non-redeclarable declaration"); } void ASTReader::markIncompleteDeclChain(Decl *D) { switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ ASTDeclReader::markIncompleteDeclChainImpl(cast(D)); \ break; #include "clang/AST/DeclNodes.inc" } } /// Read the declaration at the given offset from the AST file. Decl *ASTReader::ReadDeclRecord(DeclID ID) { unsigned Index = ID - NUM_PREDEF_DECL_IDS; SourceLocation DeclLoc; RecordLocation Loc = DeclCursorForID(ID, DeclLoc); llvm::BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor; // Keep track of where we are in the stream, then jump back there // after reading this declaration. SavedStreamPosition SavedPosition(DeclsCursor); ReadingKindTracker ReadingKind(Read_Decl, *this); // Note that we are loading a declaration record. Deserializing ADecl(this); auto Fail = [](const char *what, llvm::Error &&Err) { llvm::report_fatal_error(Twine("ASTReader::readDeclRecord failed ") + what + ": " + toString(std::move(Err))); }; if (llvm::Error JumpFailed = DeclsCursor.JumpToBit(Loc.Offset)) Fail("jumping", std::move(JumpFailed)); ASTRecordReader Record(*this, *Loc.F); ASTDeclReader Reader(*this, Record, Loc, ID, DeclLoc); Expected MaybeCode = DeclsCursor.ReadCode(); if (!MaybeCode) Fail("reading code", MaybeCode.takeError()); unsigned Code = MaybeCode.get(); ASTContext &Context = getContext(); Decl *D = nullptr; Expected MaybeDeclCode = Record.readRecord(DeclsCursor, Code); if (!MaybeDeclCode) llvm::report_fatal_error( Twine("ASTReader::readDeclRecord failed reading decl code: ") + toString(MaybeDeclCode.takeError())); switch ((DeclCode)MaybeDeclCode.get()) { case DECL_CONTEXT_LEXICAL: case DECL_CONTEXT_VISIBLE: llvm_unreachable("Record cannot be de-serialized with readDeclRecord"); case DECL_TYPEDEF: D = TypedefDecl::CreateDeserialized(Context, ID); break; case DECL_TYPEALIAS: D = TypeAliasDecl::CreateDeserialized(Context, ID); break; case DECL_ENUM: D = EnumDecl::CreateDeserialized(Context, ID); break; case DECL_RECORD: D = RecordDecl::CreateDeserialized(Context, ID); break; case DECL_ENUM_CONSTANT: D = EnumConstantDecl::CreateDeserialized(Context, ID); break; case DECL_FUNCTION: D = FunctionDecl::CreateDeserialized(Context, ID); break; case DECL_LINKAGE_SPEC: D = LinkageSpecDecl::CreateDeserialized(Context, ID); break; case DECL_EXPORT: D = ExportDecl::CreateDeserialized(Context, ID); break; case DECL_LABEL: D = LabelDecl::CreateDeserialized(Context, ID); break; case DECL_NAMESPACE: D = NamespaceDecl::CreateDeserialized(Context, ID); break; case DECL_NAMESPACE_ALIAS: D = NamespaceAliasDecl::CreateDeserialized(Context, ID); break; case DECL_USING: D = UsingDecl::CreateDeserialized(Context, ID); break; case DECL_USING_PACK: D = UsingPackDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_USING_SHADOW: D = UsingShadowDecl::CreateDeserialized(Context, ID); break; case DECL_USING_ENUM: D = UsingEnumDecl::CreateDeserialized(Context, ID); break; case DECL_CONSTRUCTOR_USING_SHADOW: D = ConstructorUsingShadowDecl::CreateDeserialized(Context, ID); break; case DECL_USING_DIRECTIVE: D = UsingDirectiveDecl::CreateDeserialized(Context, ID); break; case DECL_UNRESOLVED_USING_VALUE: D = UnresolvedUsingValueDecl::CreateDeserialized(Context, ID); break; case DECL_UNRESOLVED_USING_TYPENAME: D = UnresolvedUsingTypenameDecl::CreateDeserialized(Context, ID); break; case DECL_UNRESOLVED_USING_IF_EXISTS: D = UnresolvedUsingIfExistsDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_RECORD: D = CXXRecordDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_DEDUCTION_GUIDE: D = CXXDeductionGuideDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_METHOD: D = CXXMethodDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_CONSTRUCTOR: D = CXXConstructorDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_CXX_DESTRUCTOR: D = CXXDestructorDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_CONVERSION: D = CXXConversionDecl::CreateDeserialized(Context, ID); break; case DECL_ACCESS_SPEC: D = AccessSpecDecl::CreateDeserialized(Context, ID); break; case DECL_FRIEND: D = FriendDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_FRIEND_TEMPLATE: D = FriendTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_CLASS_TEMPLATE: D = ClassTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_CLASS_TEMPLATE_SPECIALIZATION: D = ClassTemplateSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION: D = ClassTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_VAR_TEMPLATE: D = VarTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_VAR_TEMPLATE_SPECIALIZATION: D = VarTemplateSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION: D = VarTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_FUNCTION_TEMPLATE: D = FunctionTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_TEMPLATE_TYPE_PARM: { bool HasTypeConstraint = Record.readInt(); D = TemplateTypeParmDecl::CreateDeserialized(Context, ID, HasTypeConstraint); break; } case DECL_NON_TYPE_TEMPLATE_PARM: { bool HasTypeConstraint = Record.readInt(); D = NonTypeTemplateParmDecl::CreateDeserialized(Context, ID, HasTypeConstraint); break; } case DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK: { bool HasTypeConstraint = Record.readInt(); D = NonTypeTemplateParmDecl::CreateDeserialized(Context, ID, Record.readInt(), HasTypeConstraint); break; } case DECL_TEMPLATE_TEMPLATE_PARM: D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID); break; case DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK: D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_TYPE_ALIAS_TEMPLATE: D = TypeAliasTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_CONCEPT: D = ConceptDecl::CreateDeserialized(Context, ID); break; case DECL_REQUIRES_EXPR_BODY: D = RequiresExprBodyDecl::CreateDeserialized(Context, ID); break; case DECL_STATIC_ASSERT: D = StaticAssertDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_METHOD: D = ObjCMethodDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_INTERFACE: D = ObjCInterfaceDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_IVAR: D = ObjCIvarDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_PROTOCOL: D = ObjCProtocolDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_AT_DEFS_FIELD: D = ObjCAtDefsFieldDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_CATEGORY: D = ObjCCategoryDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_CATEGORY_IMPL: D = ObjCCategoryImplDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_IMPLEMENTATION: D = ObjCImplementationDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_COMPATIBLE_ALIAS: D = ObjCCompatibleAliasDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_PROPERTY: D = ObjCPropertyDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_PROPERTY_IMPL: D = ObjCPropertyImplDecl::CreateDeserialized(Context, ID); break; case DECL_FIELD: D = FieldDecl::CreateDeserialized(Context, ID); break; case DECL_INDIRECTFIELD: D = IndirectFieldDecl::CreateDeserialized(Context, ID); break; case DECL_VAR: D = VarDecl::CreateDeserialized(Context, ID); break; case DECL_IMPLICIT_PARAM: D = ImplicitParamDecl::CreateDeserialized(Context, ID); break; case DECL_PARM_VAR: D = ParmVarDecl::CreateDeserialized(Context, ID); break; case DECL_DECOMPOSITION: D = DecompositionDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_BINDING: D = BindingDecl::CreateDeserialized(Context, ID); break; case DECL_FILE_SCOPE_ASM: D = FileScopeAsmDecl::CreateDeserialized(Context, ID); break; case DECL_TOP_LEVEL_STMT_DECL: D = TopLevelStmtDecl::CreateDeserialized(Context, ID); break; case DECL_BLOCK: D = BlockDecl::CreateDeserialized(Context, ID); break; case DECL_MS_PROPERTY: D = MSPropertyDecl::CreateDeserialized(Context, ID); break; case DECL_MS_GUID: D = MSGuidDecl::CreateDeserialized(Context, ID); break; case DECL_UNNAMED_GLOBAL_CONSTANT: D = UnnamedGlobalConstantDecl::CreateDeserialized(Context, ID); break; case DECL_TEMPLATE_PARAM_OBJECT: D = TemplateParamObjectDecl::CreateDeserialized(Context, ID); break; case DECL_CAPTURED: D = CapturedDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_CXX_BASE_SPECIFIERS: Error("attempt to read a C++ base-specifier record as a declaration"); return nullptr; case DECL_CXX_CTOR_INITIALIZERS: Error("attempt to read a C++ ctor initializer record as a declaration"); return nullptr; case DECL_IMPORT: // Note: last entry of the ImportDecl record is the number of stored source // locations. D = ImportDecl::CreateDeserialized(Context, ID, Record.back()); break; case DECL_OMP_THREADPRIVATE: { Record.skipInts(1); unsigned NumChildren = Record.readInt(); Record.skipInts(1); D = OMPThreadPrivateDecl::CreateDeserialized(Context, ID, NumChildren); break; } case DECL_OMP_ALLOCATE: { unsigned NumClauses = Record.readInt(); unsigned NumVars = Record.readInt(); Record.skipInts(1); D = OMPAllocateDecl::CreateDeserialized(Context, ID, NumVars, NumClauses); break; } case DECL_OMP_REQUIRES: { unsigned NumClauses = Record.readInt(); Record.skipInts(2); D = OMPRequiresDecl::CreateDeserialized(Context, ID, NumClauses); break; } case DECL_OMP_DECLARE_REDUCTION: D = OMPDeclareReductionDecl::CreateDeserialized(Context, ID); break; case DECL_OMP_DECLARE_MAPPER: { unsigned NumClauses = Record.readInt(); Record.skipInts(2); D = OMPDeclareMapperDecl::CreateDeserialized(Context, ID, NumClauses); break; } case DECL_OMP_CAPTUREDEXPR: D = OMPCapturedExprDecl::CreateDeserialized(Context, ID); break; case DECL_PRAGMA_COMMENT: D = PragmaCommentDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_PRAGMA_DETECT_MISMATCH: D = PragmaDetectMismatchDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_EMPTY: D = EmptyDecl::CreateDeserialized(Context, ID); break; case DECL_LIFETIME_EXTENDED_TEMPORARY: D = LifetimeExtendedTemporaryDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_TYPE_PARAM: D = ObjCTypeParamDecl::CreateDeserialized(Context, ID); break; case DECL_HLSL_BUFFER: D = HLSLBufferDecl::CreateDeserialized(Context, ID); break; case DECL_IMPLICIT_CONCEPT_SPECIALIZATION: D = ImplicitConceptSpecializationDecl::CreateDeserialized(Context, ID, Record.readInt()); break; } assert(D && "Unknown declaration reading AST file"); LoadedDecl(Index, D); // Set the DeclContext before doing any deserialization, to make sure internal // calls to Decl::getASTContext() by Decl's methods will find the // TranslationUnitDecl without crashing. D->setDeclContext(Context.getTranslationUnitDecl()); Reader.Visit(D); // If this declaration is also a declaration context, get the // offsets for its tables of lexical and visible declarations. if (auto *DC = dyn_cast(D)) { std::pair Offsets = Reader.VisitDeclContext(DC); if (Offsets.first && ReadLexicalDeclContextStorage(*Loc.F, DeclsCursor, Offsets.first, DC)) return nullptr; if (Offsets.second && ReadVisibleDeclContextStorage(*Loc.F, DeclsCursor, Offsets.second, ID)) return nullptr; } assert(Record.getIdx() == Record.size()); // Load any relevant update records. PendingUpdateRecords.push_back( PendingUpdateRecord(ID, D, /*JustLoaded=*/true)); // Load the categories after recursive loading is finished. if (auto *Class = dyn_cast(D)) // If we already have a definition when deserializing the ObjCInterfaceDecl, // we put the Decl in PendingDefinitions so we can pull the categories here. if (Class->isThisDeclarationADefinition() || PendingDefinitions.count(Class)) loadObjCCategories(ID, Class); // If we have deserialized a declaration that has a definition the // AST consumer might need to know about, queue it. // We don't pass it to the consumer immediately because we may be in recursive // loading, and some declarations may still be initializing. PotentiallyInterestingDecls.push_back( InterestingDecl(D, Reader.hasPendingBody())); return D; } void ASTReader::PassInterestingDeclsToConsumer() { assert(Consumer); if (PassingDeclsToConsumer) return; // Guard variable to avoid recursively redoing the process of passing // decls to consumer. SaveAndRestore GuardPassingDeclsToConsumer(PassingDeclsToConsumer, true); // Ensure that we've loaded all potentially-interesting declarations // that need to be eagerly loaded. for (auto ID : EagerlyDeserializedDecls) GetDecl(ID); EagerlyDeserializedDecls.clear(); while (!PotentiallyInterestingDecls.empty()) { InterestingDecl D = PotentiallyInterestingDecls.front(); PotentiallyInterestingDecls.pop_front(); if (isConsumerInterestedIn(getContext(), D.getDecl(), D.hasPendingBody())) PassInterestingDeclToConsumer(D.getDecl()); } } void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { // The declaration may have been modified by files later in the chain. // If this is the case, read the record containing the updates from each file // and pass it to ASTDeclReader to make the modifications. serialization::GlobalDeclID ID = Record.ID; Decl *D = Record.D; ProcessingUpdatesRAIIObj ProcessingUpdates(*this); DeclUpdateOffsetsMap::iterator UpdI = DeclUpdateOffsets.find(ID); SmallVector PendingLazySpecializationIDs; if (UpdI != DeclUpdateOffsets.end()) { auto UpdateOffsets = std::move(UpdI->second); DeclUpdateOffsets.erase(UpdI); // Check if this decl was interesting to the consumer. If we just loaded // the declaration, then we know it was interesting and we skip the call // to isConsumerInterestedIn because it is unsafe to call in the // current ASTReader state. bool WasInteresting = Record.JustLoaded || isConsumerInterestedIn(getContext(), D, false); for (auto &FileAndOffset : UpdateOffsets) { ModuleFile *F = FileAndOffset.first; uint64_t Offset = FileAndOffset.second; llvm::BitstreamCursor &Cursor = F->DeclsCursor; SavedStreamPosition SavedPosition(Cursor); if (llvm::Error JumpFailed = Cursor.JumpToBit(Offset)) // FIXME don't do a fatal error. llvm::report_fatal_error( Twine("ASTReader::loadDeclUpdateRecords failed jumping: ") + toString(std::move(JumpFailed))); Expected MaybeCode = Cursor.ReadCode(); if (!MaybeCode) llvm::report_fatal_error( Twine("ASTReader::loadDeclUpdateRecords failed reading code: ") + toString(MaybeCode.takeError())); unsigned Code = MaybeCode.get(); ASTRecordReader Record(*this, *F); if (Expected MaybeRecCode = Record.readRecord(Cursor, Code)) assert(MaybeRecCode.get() == DECL_UPDATES && "Expected DECL_UPDATES record!"); else llvm::report_fatal_error( Twine("ASTReader::loadDeclUpdateRecords failed reading rec code: ") + toString(MaybeCode.takeError())); ASTDeclReader Reader(*this, Record, RecordLocation(F, Offset), ID, SourceLocation()); Reader.UpdateDecl(D, PendingLazySpecializationIDs); // We might have made this declaration interesting. If so, remember that // we need to hand it off to the consumer. if (!WasInteresting && isConsumerInterestedIn(getContext(), D, Reader.hasPendingBody())) { PotentiallyInterestingDecls.push_back( InterestingDecl(D, Reader.hasPendingBody())); WasInteresting = true; } } } // Add the lazy specializations to the template. assert((PendingLazySpecializationIDs.empty() || isa(D) || isa(D)) && "Must not have pending specializations"); if (auto *CTD = dyn_cast(D)) ASTDeclReader::AddLazySpecializations(CTD, PendingLazySpecializationIDs); else if (auto *FTD = dyn_cast(D)) ASTDeclReader::AddLazySpecializations(FTD, PendingLazySpecializationIDs); else if (auto *VTD = dyn_cast(D)) ASTDeclReader::AddLazySpecializations(VTD, PendingLazySpecializationIDs); PendingLazySpecializationIDs.clear(); // Load the pending visible updates for this decl context, if it has any. auto I = PendingVisibleUpdates.find(ID); if (I != PendingVisibleUpdates.end()) { auto VisibleUpdates = std::move(I->second); PendingVisibleUpdates.erase(I); auto *DC = cast(D)->getPrimaryContext(); for (const auto &Update : VisibleUpdates) Lookups[DC].Table.add( Update.Mod, Update.Data, reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod)); DC->setHasExternalVisibleStorage(true); } } void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) { // Attach FirstLocal to the end of the decl chain. Decl *CanonDecl = FirstLocal->getCanonicalDecl(); if (FirstLocal != CanonDecl) { Decl *PrevMostRecent = ASTDeclReader::getMostRecentDecl(CanonDecl); ASTDeclReader::attachPreviousDecl( *this, FirstLocal, PrevMostRecent ? PrevMostRecent : CanonDecl, CanonDecl); } if (!LocalOffset) { ASTDeclReader::attachLatestDecl(CanonDecl, FirstLocal); return; } // Load the list of other redeclarations from this module file. ModuleFile *M = getOwningModuleFile(FirstLocal); assert(M && "imported decl from no module file"); llvm::BitstreamCursor &Cursor = M->DeclsCursor; SavedStreamPosition SavedPosition(Cursor); if (llvm::Error JumpFailed = Cursor.JumpToBit(LocalOffset)) llvm::report_fatal_error( Twine("ASTReader::loadPendingDeclChain failed jumping: ") + toString(std::move(JumpFailed))); RecordData Record; Expected MaybeCode = Cursor.ReadCode(); if (!MaybeCode) llvm::report_fatal_error( Twine("ASTReader::loadPendingDeclChain failed reading code: ") + toString(MaybeCode.takeError())); unsigned Code = MaybeCode.get(); if (Expected MaybeRecCode = Cursor.readRecord(Code, Record)) assert(MaybeRecCode.get() == LOCAL_REDECLARATIONS && "expected LOCAL_REDECLARATIONS record!"); else llvm::report_fatal_error( Twine("ASTReader::loadPendingDeclChain failed reading rec code: ") + toString(MaybeCode.takeError())); // FIXME: We have several different dispatches on decl kind here; maybe // we should instead generate one loop per kind and dispatch up-front? Decl *MostRecent = FirstLocal; for (unsigned I = 0, N = Record.size(); I != N; ++I) { auto *D = GetLocalDecl(*M, Record[N - I - 1]); ASTDeclReader::attachPreviousDecl(*this, D, MostRecent, CanonDecl); MostRecent = D; } ASTDeclReader::attachLatestDecl(CanonDecl, MostRecent); } namespace { /// Given an ObjC interface, goes through the modules and links to the /// interface all the categories for it. class ObjCCategoriesVisitor { ASTReader &Reader; ObjCInterfaceDecl *Interface; llvm::SmallPtrSetImpl &Deserialized; ObjCCategoryDecl *Tail = nullptr; llvm::DenseMap NameCategoryMap; serialization::GlobalDeclID InterfaceID; unsigned PreviousGeneration; void add(ObjCCategoryDecl *Cat) { // Only process each category once. if (!Deserialized.erase(Cat)) return; // Check for duplicate categories. if (Cat->getDeclName()) { ObjCCategoryDecl *&Existing = NameCategoryMap[Cat->getDeclName()]; if (Existing && Reader.getOwningModuleFile(Existing) != Reader.getOwningModuleFile(Cat)) { llvm::DenseSet> NonEquivalentDecls; StructuralEquivalenceContext Ctx( Cat->getASTContext(), Existing->getASTContext(), NonEquivalentDecls, StructuralEquivalenceKind::Default, /*StrictTypeSpelling =*/false, /*Complain =*/false, /*ErrorOnTagTypeMismatch =*/true); if (!Ctx.IsEquivalent(Cat, Existing)) { // Warn only if the categories with the same name are different. Reader.Diag(Cat->getLocation(), diag::warn_dup_category_def) << Interface->getDeclName() << Cat->getDeclName(); Reader.Diag(Existing->getLocation(), diag::note_previous_definition); } } else if (!Existing) { // Record this category. Existing = Cat; } } // Add this category to the end of the chain. if (Tail) ASTDeclReader::setNextObjCCategory(Tail, Cat); else Interface->setCategoryListRaw(Cat); Tail = Cat; } public: ObjCCategoriesVisitor(ASTReader &Reader, ObjCInterfaceDecl *Interface, llvm::SmallPtrSetImpl &Deserialized, serialization::GlobalDeclID InterfaceID, unsigned PreviousGeneration) : Reader(Reader), Interface(Interface), Deserialized(Deserialized), InterfaceID(InterfaceID), PreviousGeneration(PreviousGeneration) { // Populate the name -> category map with the set of known categories. for (auto *Cat : Interface->known_categories()) { if (Cat->getDeclName()) NameCategoryMap[Cat->getDeclName()] = Cat; // Keep track of the tail of the category list. Tail = Cat; } } bool operator()(ModuleFile &M) { // If we've loaded all of the category information we care about from // this module file, we're done. if (M.Generation <= PreviousGeneration) return true; // Map global ID of the definition down to the local ID used in this // module file. If there is no such mapping, we'll find nothing here // (or in any module it imports). DeclID LocalID = Reader.mapGlobalIDToModuleFileGlobalID(M, InterfaceID); if (!LocalID) return true; // Perform a binary search to find the local redeclarations for this // declaration (if any). const ObjCCategoriesInfo Compare = { LocalID, 0 }; const ObjCCategoriesInfo *Result = std::lower_bound(M.ObjCCategoriesMap, M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap, Compare); if (Result == M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap || Result->DefinitionID != LocalID) { // We didn't find anything. If the class definition is in this module // file, then the module files it depends on cannot have any categories, // so suppress further lookup. return Reader.isDeclIDFromModule(InterfaceID, M); } // We found something. Dig out all of the categories. unsigned Offset = Result->Offset; unsigned N = M.ObjCCategories[Offset]; M.ObjCCategories[Offset++] = 0; // Don't try to deserialize again for (unsigned I = 0; I != N; ++I) add(cast_or_null( Reader.GetLocalDecl(M, M.ObjCCategories[Offset++]))); return true; } }; } // namespace void ASTReader::loadObjCCategories(serialization::GlobalDeclID ID, ObjCInterfaceDecl *D, unsigned PreviousGeneration) { ObjCCategoriesVisitor Visitor(*this, D, CategoriesDeserialized, ID, PreviousGeneration); ModuleMgr.visit(Visitor); } template static void forAllLaterRedecls(DeclT *D, Fn F) { F(D); // Check whether we've already merged D into its redeclaration chain. // MostRecent may or may not be nullptr if D has not been merged. If // not, walk the merged redecl chain and see if it's there. auto *MostRecent = D->getMostRecentDecl(); bool Found = false; for (auto *Redecl = MostRecent; Redecl && !Found; Redecl = Redecl->getPreviousDecl()) Found = (Redecl == D); // If this declaration is merged, apply the functor to all later decls. if (Found) { for (auto *Redecl = MostRecent; Redecl != D; Redecl = Redecl->getPreviousDecl()) F(Redecl); } } void ASTDeclReader::UpdateDecl(Decl *D, llvm::SmallVectorImpl &PendingLazySpecializationIDs) { while (Record.getIdx() < Record.size()) { switch ((DeclUpdateKind)Record.readInt()) { case UPD_CXX_ADDED_IMPLICIT_MEMBER: { auto *RD = cast(D); Decl *MD = Record.readDecl(); assert(MD && "couldn't read decl from update record"); Reader.PendingAddedClassMembers.push_back({RD, MD}); break; } case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION: // It will be added to the template's lazy specialization set. PendingLazySpecializationIDs.push_back(readDeclID()); break; case UPD_CXX_ADDED_ANONYMOUS_NAMESPACE: { auto *Anon = readDeclAs(); // Each module has its own anonymous namespace, which is disjoint from // any other module's anonymous namespaces, so don't attach the anonymous // namespace at all. if (!Record.isModule()) { if (auto *TU = dyn_cast(D)) TU->setAnonymousNamespace(Anon); else cast(D)->setAnonymousNamespace(Anon); } break; } case UPD_CXX_ADDED_VAR_DEFINITION: { auto *VD = cast(D); VD->NonParmVarDeclBits.IsInline = Record.readInt(); VD->NonParmVarDeclBits.IsInlineSpecified = Record.readInt(); ReadVarDeclInit(VD); break; } case UPD_CXX_POINT_OF_INSTANTIATION: { SourceLocation POI = Record.readSourceLocation(); if (auto *VTSD = dyn_cast(D)) { VTSD->setPointOfInstantiation(POI); } else if (auto *VD = dyn_cast(D)) { MemberSpecializationInfo *MSInfo = VD->getMemberSpecializationInfo(); assert(MSInfo && "No member specialization information"); MSInfo->setPointOfInstantiation(POI); } else { auto *FD = cast(D); if (auto *FTSInfo = FD->TemplateOrSpecialization .dyn_cast()) FTSInfo->setPointOfInstantiation(POI); else FD->TemplateOrSpecialization.get() ->setPointOfInstantiation(POI); } break; } case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: { auto *Param = cast(D); // We have to read the default argument regardless of whether we use it // so that hypothetical further update records aren't messed up. // TODO: Add a function to skip over the next expr record. auto *DefaultArg = Record.readExpr(); // Only apply the update if the parameter still has an uninstantiated // default argument. if (Param->hasUninstantiatedDefaultArg()) Param->setDefaultArg(DefaultArg); break; } case UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER: { auto *FD = cast(D); auto *DefaultInit = Record.readExpr(); // Only apply the update if the field still has an uninstantiated // default member initializer. if (FD->hasInClassInitializer() && !FD->hasNonNullInClassInitializer()) { if (DefaultInit) FD->setInClassInitializer(DefaultInit); else // Instantiation failed. We can get here if we serialized an AST for // an invalid program. FD->removeInClassInitializer(); } break; } case UPD_CXX_ADDED_FUNCTION_DEFINITION: { auto *FD = cast(D); if (Reader.PendingBodies[FD]) { // FIXME: Maybe check for ODR violations. // It's safe to stop now because this update record is always last. return; } if (Record.readInt()) { // Maintain AST consistency: any later redeclarations of this function // are inline if this one is. (We might have merged another declaration // into this one.) forAllLaterRedecls(FD, [](FunctionDecl *FD) { FD->setImplicitlyInline(); }); } FD->setInnerLocStart(readSourceLocation()); ReadFunctionDefinition(FD); assert(Record.getIdx() == Record.size() && "lazy body must be last"); break; } case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: { auto *RD = cast(D); auto *OldDD = RD->getCanonicalDecl()->DefinitionData; bool HadRealDefinition = OldDD && (OldDD->Definition != RD || !Reader.PendingFakeDefinitionData.count(OldDD)); RD->setParamDestroyedInCallee(Record.readInt()); RD->setArgPassingRestrictions( static_cast(Record.readInt())); ReadCXXRecordDefinition(RD, /*Update*/true); // Visible update is handled separately. uint64_t LexicalOffset = ReadLocalOffset(); if (!HadRealDefinition && LexicalOffset) { Record.readLexicalDeclContextStorage(LexicalOffset, RD); Reader.PendingFakeDefinitionData.erase(OldDD); } auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); if (MemberSpecializationInfo *MSInfo = RD->getMemberSpecializationInfo()) { MSInfo->setTemplateSpecializationKind(TSK); MSInfo->setPointOfInstantiation(POI); } else { auto *Spec = cast(RD); Spec->setTemplateSpecializationKind(TSK); Spec->setPointOfInstantiation(POI); if (Record.readInt()) { auto *PartialSpec = readDeclAs(); SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs); auto *TemplArgList = TemplateArgumentList::CreateCopy( Reader.getContext(), TemplArgs); // FIXME: If we already have a partial specialization set, // check that it matches. if (!Spec->getSpecializedTemplateOrPartial() .is()) Spec->setInstantiationOf(PartialSpec, TemplArgList); } } RD->setTagKind(static_cast(Record.readInt())); RD->setLocation(readSourceLocation()); RD->setLocStart(readSourceLocation()); RD->setBraceRange(readSourceRange()); if (Record.readInt()) { AttrVec Attrs; Record.readAttributes(Attrs); // If the declaration already has attributes, we assume that some other // AST file already loaded them. if (!D->hasAttrs()) D->setAttrsImpl(Attrs, Reader.getContext()); } break; } case UPD_CXX_RESOLVED_DTOR_DELETE: { // Set the 'operator delete' directly to avoid emitting another update // record. auto *Del = readDeclAs(); auto *First = cast(D->getCanonicalDecl()); auto *ThisArg = Record.readExpr(); // FIXME: Check consistency if we have an old and new operator delete. if (!First->OperatorDelete) { First->OperatorDelete = Del; First->OperatorDeleteThisArg = ThisArg; } break; } case UPD_CXX_RESOLVED_EXCEPTION_SPEC: { SmallVector ExceptionStorage; auto ESI = Record.readExceptionSpecInfo(ExceptionStorage); // Update this declaration's exception specification, if needed. auto *FD = cast(D); auto *FPT = FD->getType()->castAs(); // FIXME: If the exception specification is already present, check that it // matches. if (isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) { FD->setType(Reader.getContext().getFunctionType( FPT->getReturnType(), FPT->getParamTypes(), FPT->getExtProtoInfo().withExceptionSpec(ESI))); // When we get to the end of deserializing, see if there are other decls // that we need to propagate this exception specification onto. Reader.PendingExceptionSpecUpdates.insert( std::make_pair(FD->getCanonicalDecl(), FD)); } break; } case UPD_CXX_DEDUCED_RETURN_TYPE: { auto *FD = cast(D); QualType DeducedResultType = Record.readType(); Reader.PendingDeducedTypeUpdates.insert( {FD->getCanonicalDecl(), DeducedResultType}); break; } case UPD_DECL_MARKED_USED: // Maintain AST consistency: any later redeclarations are used too. D->markUsed(Reader.getContext()); break; case UPD_MANGLING_NUMBER: Reader.getContext().setManglingNumber(cast(D), Record.readInt()); break; case UPD_STATIC_LOCAL_NUMBER: Reader.getContext().setStaticLocalNumber(cast(D), Record.readInt()); break; case UPD_DECL_MARKED_OPENMP_THREADPRIVATE: D->addAttr(OMPThreadPrivateDeclAttr::CreateImplicit(Reader.getContext(), readSourceRange())); break; case UPD_DECL_MARKED_OPENMP_ALLOCATE: { auto AllocatorKind = static_cast(Record.readInt()); Expr *Allocator = Record.readExpr(); Expr *Alignment = Record.readExpr(); SourceRange SR = readSourceRange(); D->addAttr(OMPAllocateDeclAttr::CreateImplicit( Reader.getContext(), AllocatorKind, Allocator, Alignment, SR)); break; } case UPD_DECL_EXPORTED: { unsigned SubmoduleID = readSubmoduleID(); auto *Exported = cast(D); Module *Owner = SubmoduleID ? Reader.getSubmodule(SubmoduleID) : nullptr; Reader.getContext().mergeDefinitionIntoModule(Exported, Owner); Reader.PendingMergedDefinitionsToDeduplicate.insert(Exported); break; } case UPD_DECL_MARKED_OPENMP_DECLARETARGET: { auto MapType = Record.readEnum(); auto DevType = Record.readEnum(); Expr *IndirectE = Record.readExpr(); bool Indirect = Record.readBool(); unsigned Level = Record.readInt(); D->addAttr(OMPDeclareTargetDeclAttr::CreateImplicit( Reader.getContext(), MapType, DevType, IndirectE, Indirect, Level, readSourceRange())); break; } case UPD_ADDED_ATTR_TO_RECORD: AttrVec Attrs; Record.readAttributes(Attrs); assert(Attrs.size() == 1); D->addAttr(Attrs[0]); break; } } } diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTWriter.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTWriter.cpp index 3b79a9238d1a..73018c1170d8 100644 --- a/contrib/llvm-project/clang/lib/Serialization/ASTWriter.cpp +++ b/contrib/llvm-project/clang/lib/Serialization/ASTWriter.cpp @@ -1,7325 +1,7327 @@ //===- ASTWriter.cpp - AST File Writer ------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the ASTWriter class, which writes AST files. // //===----------------------------------------------------------------------===// #include "ASTCommon.h" #include "ASTReaderInternals.h" #include "MultiOnDiskHashTable.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTUnresolvedSet.h" #include "clang/AST/AbstractTypeWriter.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclContextInternals.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/LambdaCapture.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/RawCommentList.h" #include "clang/AST/TemplateName.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLocVisitor.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/FileSystemOptions.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/Lambda.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/OpenCLOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/SourceManagerInternals.h" #include "clang/Basic/Specifiers.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "clang/Basic/Version.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/ModuleMap.h" #include "clang/Lex/PreprocessingRecord.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/Lex/Token.h" #include "clang/Sema/IdentifierResolver.h" #include "clang/Sema/ObjCMethodList.h" #include "clang/Sema/Sema.h" #include "clang/Sema/Weak.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTRecordWriter.h" #include "clang/Serialization/InMemoryModuleCache.h" #include "clang/Serialization/ModuleFile.h" #include "clang/Serialization/ModuleFileExtension.h" #include "clang/Serialization/SerializationDiagnostic.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Bitstream/BitCodes.h" #include "llvm/Bitstream/BitstreamWriter.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DJB.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/OnDiskHashTable.h" #include "llvm/Support/Path.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/VersionTuple.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace clang; using namespace clang::serialization; template static StringRef bytes(const std::vector &v) { if (v.empty()) return StringRef(); return StringRef(reinterpret_cast(&v[0]), sizeof(T) * v.size()); } template static StringRef bytes(const SmallVectorImpl &v) { return StringRef(reinterpret_cast(v.data()), sizeof(T) * v.size()); } static std::string bytes(const std::vector &V) { std::string Str; Str.reserve(V.size() / 8); for (unsigned I = 0, E = V.size(); I < E;) { char Byte = 0; for (unsigned Bit = 0; Bit < 8 && I < E; ++Bit, ++I) Byte |= V[I] << Bit; Str += Byte; } return Str; } //===----------------------------------------------------------------------===// // Type serialization //===----------------------------------------------------------------------===// static TypeCode getTypeCodeForTypeClass(Type::TypeClass id) { switch (id) { #define TYPE_BIT_CODE(CLASS_ID, CODE_ID, CODE_VALUE) \ case Type::CLASS_ID: return TYPE_##CODE_ID; #include "clang/Serialization/TypeBitCodes.def" case Type::Builtin: llvm_unreachable("shouldn't be serializing a builtin type this way"); } llvm_unreachable("bad type kind"); } namespace { std::set GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { SmallVector ModulesToProcess{RootModule}; const HeaderSearch &HS = PP.getHeaderSearchInfo(); SmallVector FilesByUID; HS.getFileMgr().GetUniqueIDMapping(FilesByUID); if (FilesByUID.size() > HS.header_file_size()) FilesByUID.resize(HS.header_file_size()); for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) { OptionalFileEntryRef File = FilesByUID[UID]; if (!File) continue; const HeaderFileInfo *HFI = HS.getExistingFileInfo(*File, /*WantExternal*/ false); if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader)) continue; for (const auto &KH : HS.findResolvedModulesForHeader(*File)) { if (!KH.getModule()) continue; ModulesToProcess.push_back(KH.getModule()); } } const ModuleMap &MM = HS.getModuleMap(); SourceManager &SourceMgr = PP.getSourceManager(); std::set ModuleMaps{}; auto CollectIncludingModuleMaps = [&](FileEntryRef F) { if (!ModuleMaps.insert(F).second) return; FileID FID = SourceMgr.translateFile(F); SourceLocation Loc = SourceMgr.getIncludeLoc(FID); // The include location of inferred module maps can point into the header // file that triggered the inferring. Cut off the walk if that's the case. while (Loc.isValid() && isModuleMap(SourceMgr.getFileCharacteristic(Loc))) { FID = SourceMgr.getFileID(Loc); if (!ModuleMaps.insert(*SourceMgr.getFileEntryRefForID(FID)).second) break; Loc = SourceMgr.getIncludeLoc(FID); } }; std::set ProcessedModules; auto CollectIncludingMapsFromAncestors = [&](const Module *M) { for (const Module *Mod = M; Mod; Mod = Mod->Parent) { if (!ProcessedModules.insert(Mod).second) break; // The containing module map is affecting, because it's being pointed // into by Module::DefinitionLoc. if (auto ModuleMapFile = MM.getContainingModuleMapFile(Mod)) CollectIncludingModuleMaps(*ModuleMapFile); // For inferred modules, the module map that allowed inferring is not in // the include chain of the virtual containing module map file. It did // affect the compilation, though. if (auto ModuleMapFile = MM.getModuleMapFileForUniquing(Mod)) CollectIncludingModuleMaps(*ModuleMapFile); } }; for (const Module *CurrentModule : ModulesToProcess) { CollectIncludingMapsFromAncestors(CurrentModule); for (const Module *ImportedModule : CurrentModule->Imports) CollectIncludingMapsFromAncestors(ImportedModule); for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses) CollectIncludingMapsFromAncestors(UndeclaredModule); } return ModuleMaps; } class ASTTypeWriter { ASTWriter &Writer; ASTWriter::RecordData Record; ASTRecordWriter BasicWriter; public: ASTTypeWriter(ASTWriter &Writer) : Writer(Writer), BasicWriter(Writer, Record) {} uint64_t write(QualType T) { if (T.hasLocalNonFastQualifiers()) { Qualifiers Qs = T.getLocalQualifiers(); BasicWriter.writeQualType(T.getLocalUnqualifiedType()); BasicWriter.writeQualifiers(Qs); return BasicWriter.Emit(TYPE_EXT_QUAL, Writer.getTypeExtQualAbbrev()); } const Type *typePtr = T.getTypePtr(); serialization::AbstractTypeWriter atw(BasicWriter); atw.write(typePtr); return BasicWriter.Emit(getTypeCodeForTypeClass(typePtr->getTypeClass()), /*abbrev*/ 0); } }; class TypeLocWriter : public TypeLocVisitor { using LocSeq = SourceLocationSequence; ASTRecordWriter &Record; LocSeq *Seq; void addSourceLocation(SourceLocation Loc) { Record.AddSourceLocation(Loc, Seq); } void addSourceRange(SourceRange Range) { Record.AddSourceRange(Range, Seq); } public: TypeLocWriter(ASTRecordWriter &Record, LocSeq *Seq) : Record(Record), Seq(Seq) {} #define ABSTRACT_TYPELOC(CLASS, PARENT) #define TYPELOC(CLASS, PARENT) \ void Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc); #include "clang/AST/TypeLocNodes.def" void VisitArrayTypeLoc(ArrayTypeLoc TyLoc); void VisitFunctionTypeLoc(FunctionTypeLoc TyLoc); }; } // namespace void TypeLocWriter::VisitQualifiedTypeLoc(QualifiedTypeLoc TL) { // nothing to do } void TypeLocWriter::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) { addSourceLocation(TL.getBuiltinLoc()); if (TL.needsExtraLocalData()) { Record.push_back(TL.getWrittenTypeSpec()); Record.push_back(static_cast(TL.getWrittenSignSpec())); Record.push_back(static_cast(TL.getWrittenWidthSpec())); Record.push_back(TL.hasModeAttr()); } } void TypeLocWriter::VisitComplexTypeLoc(ComplexTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitPointerTypeLoc(PointerTypeLoc TL) { addSourceLocation(TL.getStarLoc()); } void TypeLocWriter::VisitDecayedTypeLoc(DecayedTypeLoc TL) { // nothing to do } void TypeLocWriter::VisitAdjustedTypeLoc(AdjustedTypeLoc TL) { // nothing to do } void TypeLocWriter::VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) { addSourceLocation(TL.getCaretLoc()); } void TypeLocWriter::VisitLValueReferenceTypeLoc(LValueReferenceTypeLoc TL) { addSourceLocation(TL.getAmpLoc()); } void TypeLocWriter::VisitRValueReferenceTypeLoc(RValueReferenceTypeLoc TL) { addSourceLocation(TL.getAmpAmpLoc()); } void TypeLocWriter::VisitMemberPointerTypeLoc(MemberPointerTypeLoc TL) { addSourceLocation(TL.getStarLoc()); Record.AddTypeSourceInfo(TL.getClassTInfo()); } void TypeLocWriter::VisitArrayTypeLoc(ArrayTypeLoc TL) { addSourceLocation(TL.getLBracketLoc()); addSourceLocation(TL.getRBracketLoc()); Record.push_back(TL.getSizeExpr() ? 1 : 0); if (TL.getSizeExpr()) Record.AddStmt(TL.getSizeExpr()); } void TypeLocWriter::VisitConstantArrayTypeLoc(ConstantArrayTypeLoc TL) { VisitArrayTypeLoc(TL); } void TypeLocWriter::VisitIncompleteArrayTypeLoc(IncompleteArrayTypeLoc TL) { VisitArrayTypeLoc(TL); } void TypeLocWriter::VisitVariableArrayTypeLoc(VariableArrayTypeLoc TL) { VisitArrayTypeLoc(TL); } void TypeLocWriter::VisitDependentSizedArrayTypeLoc( DependentSizedArrayTypeLoc TL) { VisitArrayTypeLoc(TL); } void TypeLocWriter::VisitDependentAddressSpaceTypeLoc( DependentAddressSpaceTypeLoc TL) { addSourceLocation(TL.getAttrNameLoc()); SourceRange range = TL.getAttrOperandParensRange(); addSourceLocation(range.getBegin()); addSourceLocation(range.getEnd()); Record.AddStmt(TL.getAttrExprOperand()); } void TypeLocWriter::VisitDependentSizedExtVectorTypeLoc( DependentSizedExtVectorTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitVectorTypeLoc(VectorTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitDependentVectorTypeLoc( DependentVectorTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitConstantMatrixTypeLoc(ConstantMatrixTypeLoc TL) { addSourceLocation(TL.getAttrNameLoc()); SourceRange range = TL.getAttrOperandParensRange(); addSourceLocation(range.getBegin()); addSourceLocation(range.getEnd()); Record.AddStmt(TL.getAttrRowOperand()); Record.AddStmt(TL.getAttrColumnOperand()); } void TypeLocWriter::VisitDependentSizedMatrixTypeLoc( DependentSizedMatrixTypeLoc TL) { addSourceLocation(TL.getAttrNameLoc()); SourceRange range = TL.getAttrOperandParensRange(); addSourceLocation(range.getBegin()); addSourceLocation(range.getEnd()); Record.AddStmt(TL.getAttrRowOperand()); Record.AddStmt(TL.getAttrColumnOperand()); } void TypeLocWriter::VisitFunctionTypeLoc(FunctionTypeLoc TL) { addSourceLocation(TL.getLocalRangeBegin()); addSourceLocation(TL.getLParenLoc()); addSourceLocation(TL.getRParenLoc()); addSourceRange(TL.getExceptionSpecRange()); addSourceLocation(TL.getLocalRangeEnd()); for (unsigned i = 0, e = TL.getNumParams(); i != e; ++i) Record.AddDeclRef(TL.getParam(i)); } void TypeLocWriter::VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc TL) { VisitFunctionTypeLoc(TL); } void TypeLocWriter::VisitFunctionNoProtoTypeLoc(FunctionNoProtoTypeLoc TL) { VisitFunctionTypeLoc(TL); } void TypeLocWriter::VisitUnresolvedUsingTypeLoc(UnresolvedUsingTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitUsingTypeLoc(UsingTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitTypedefTypeLoc(TypedefTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitObjCTypeParamTypeLoc(ObjCTypeParamTypeLoc TL) { if (TL.getNumProtocols()) { addSourceLocation(TL.getProtocolLAngleLoc()); addSourceLocation(TL.getProtocolRAngleLoc()); } for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i) addSourceLocation(TL.getProtocolLoc(i)); } void TypeLocWriter::VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) { addSourceLocation(TL.getTypeofLoc()); addSourceLocation(TL.getLParenLoc()); addSourceLocation(TL.getRParenLoc()); } void TypeLocWriter::VisitTypeOfTypeLoc(TypeOfTypeLoc TL) { addSourceLocation(TL.getTypeofLoc()); addSourceLocation(TL.getLParenLoc()); addSourceLocation(TL.getRParenLoc()); Record.AddTypeSourceInfo(TL.getUnmodifiedTInfo()); } void TypeLocWriter::VisitDecltypeTypeLoc(DecltypeTypeLoc TL) { addSourceLocation(TL.getDecltypeLoc()); addSourceLocation(TL.getRParenLoc()); } void TypeLocWriter::VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) { addSourceLocation(TL.getKWLoc()); addSourceLocation(TL.getLParenLoc()); addSourceLocation(TL.getRParenLoc()); Record.AddTypeSourceInfo(TL.getUnderlyingTInfo()); } void ASTRecordWriter::AddConceptReference(const ConceptReference *CR) { assert(CR); AddNestedNameSpecifierLoc(CR->getNestedNameSpecifierLoc()); AddSourceLocation(CR->getTemplateKWLoc()); AddDeclarationNameInfo(CR->getConceptNameInfo()); AddDeclRef(CR->getFoundDecl()); AddDeclRef(CR->getNamedConcept()); push_back(CR->getTemplateArgsAsWritten() != nullptr); if (CR->getTemplateArgsAsWritten()) AddASTTemplateArgumentListInfo(CR->getTemplateArgsAsWritten()); } void TypeLocWriter::VisitAutoTypeLoc(AutoTypeLoc TL) { addSourceLocation(TL.getNameLoc()); auto *CR = TL.getConceptReference(); Record.push_back(TL.isConstrained() && CR); if (TL.isConstrained() && CR) Record.AddConceptReference(CR); Record.push_back(TL.isDecltypeAuto()); if (TL.isDecltypeAuto()) addSourceLocation(TL.getRParenLoc()); } void TypeLocWriter::VisitDeducedTemplateSpecializationTypeLoc( DeducedTemplateSpecializationTypeLoc TL) { addSourceLocation(TL.getTemplateNameLoc()); } void TypeLocWriter::VisitRecordTypeLoc(RecordTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitEnumTypeLoc(EnumTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitAttributedTypeLoc(AttributedTypeLoc TL) { Record.AddAttr(TL.getAttr()); } void TypeLocWriter::VisitBTFTagAttributedTypeLoc(BTFTagAttributedTypeLoc TL) { // Nothing to do. } void TypeLocWriter::VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitSubstTemplateTypeParmTypeLoc( SubstTemplateTypeParmTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitSubstTemplateTypeParmPackTypeLoc( SubstTemplateTypeParmPackTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitTemplateSpecializationTypeLoc( TemplateSpecializationTypeLoc TL) { addSourceLocation(TL.getTemplateKeywordLoc()); addSourceLocation(TL.getTemplateNameLoc()); addSourceLocation(TL.getLAngleLoc()); addSourceLocation(TL.getRAngleLoc()); for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i) Record.AddTemplateArgumentLocInfo(TL.getArgLoc(i).getArgument().getKind(), TL.getArgLoc(i).getLocInfo()); } void TypeLocWriter::VisitParenTypeLoc(ParenTypeLoc TL) { addSourceLocation(TL.getLParenLoc()); addSourceLocation(TL.getRParenLoc()); } void TypeLocWriter::VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) { addSourceLocation(TL.getExpansionLoc()); } void TypeLocWriter::VisitElaboratedTypeLoc(ElaboratedTypeLoc TL) { addSourceLocation(TL.getElaboratedKeywordLoc()); Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc()); } void TypeLocWriter::VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) { addSourceLocation(TL.getElaboratedKeywordLoc()); Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc()); addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitDependentTemplateSpecializationTypeLoc( DependentTemplateSpecializationTypeLoc TL) { addSourceLocation(TL.getElaboratedKeywordLoc()); Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc()); addSourceLocation(TL.getTemplateKeywordLoc()); addSourceLocation(TL.getTemplateNameLoc()); addSourceLocation(TL.getLAngleLoc()); addSourceLocation(TL.getRAngleLoc()); for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I) Record.AddTemplateArgumentLocInfo(TL.getArgLoc(I).getArgument().getKind(), TL.getArgLoc(I).getLocInfo()); } void TypeLocWriter::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) { addSourceLocation(TL.getEllipsisLoc()); } void TypeLocWriter::VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) { addSourceLocation(TL.getNameLoc()); addSourceLocation(TL.getNameEndLoc()); } void TypeLocWriter::VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) { Record.push_back(TL.hasBaseTypeAsWritten()); addSourceLocation(TL.getTypeArgsLAngleLoc()); addSourceLocation(TL.getTypeArgsRAngleLoc()); for (unsigned i = 0, e = TL.getNumTypeArgs(); i != e; ++i) Record.AddTypeSourceInfo(TL.getTypeArgTInfo(i)); addSourceLocation(TL.getProtocolLAngleLoc()); addSourceLocation(TL.getProtocolRAngleLoc()); for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i) addSourceLocation(TL.getProtocolLoc(i)); } void TypeLocWriter::VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) { addSourceLocation(TL.getStarLoc()); } void TypeLocWriter::VisitAtomicTypeLoc(AtomicTypeLoc TL) { addSourceLocation(TL.getKWLoc()); addSourceLocation(TL.getLParenLoc()); addSourceLocation(TL.getRParenLoc()); } void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) { addSourceLocation(TL.getKWLoc()); } void TypeLocWriter::VisitBitIntTypeLoc(clang::BitIntTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitDependentBitIntTypeLoc( clang::DependentBitIntTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } void ASTWriter::WriteTypeAbbrevs() { using namespace llvm; std::shared_ptr Abv; // Abbreviation for TYPE_EXT_QUAL Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::TYPE_EXT_QUAL)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 3)); // Quals TypeExtQualAbbrev = Stream.EmitAbbrev(std::move(Abv)); } //===----------------------------------------------------------------------===// // ASTWriter Implementation //===----------------------------------------------------------------------===// static void EmitBlockID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, ASTWriter::RecordDataImpl &Record) { Record.clear(); Record.push_back(ID); Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record); // Emit the block name if present. if (!Name || Name[0] == 0) return; Record.clear(); while (*Name) Record.push_back(*Name++); Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record); } static void EmitRecordID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, ASTWriter::RecordDataImpl &Record) { Record.clear(); Record.push_back(ID); while (*Name) Record.push_back(*Name++); Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record); } static void AddStmtsExprs(llvm::BitstreamWriter &Stream, ASTWriter::RecordDataImpl &Record) { #define RECORD(X) EmitRecordID(X, #X, Stream, Record) RECORD(STMT_STOP); RECORD(STMT_NULL_PTR); RECORD(STMT_REF_PTR); RECORD(STMT_NULL); RECORD(STMT_COMPOUND); RECORD(STMT_CASE); RECORD(STMT_DEFAULT); RECORD(STMT_LABEL); RECORD(STMT_ATTRIBUTED); RECORD(STMT_IF); RECORD(STMT_SWITCH); RECORD(STMT_WHILE); RECORD(STMT_DO); RECORD(STMT_FOR); RECORD(STMT_GOTO); RECORD(STMT_INDIRECT_GOTO); RECORD(STMT_CONTINUE); RECORD(STMT_BREAK); RECORD(STMT_RETURN); RECORD(STMT_DECL); RECORD(STMT_GCCASM); RECORD(STMT_MSASM); RECORD(EXPR_PREDEFINED); RECORD(EXPR_DECL_REF); RECORD(EXPR_INTEGER_LITERAL); RECORD(EXPR_FIXEDPOINT_LITERAL); RECORD(EXPR_FLOATING_LITERAL); RECORD(EXPR_IMAGINARY_LITERAL); RECORD(EXPR_STRING_LITERAL); RECORD(EXPR_CHARACTER_LITERAL); RECORD(EXPR_PAREN); RECORD(EXPR_PAREN_LIST); RECORD(EXPR_UNARY_OPERATOR); RECORD(EXPR_SIZEOF_ALIGN_OF); RECORD(EXPR_ARRAY_SUBSCRIPT); RECORD(EXPR_CALL); RECORD(EXPR_MEMBER); RECORD(EXPR_BINARY_OPERATOR); RECORD(EXPR_COMPOUND_ASSIGN_OPERATOR); RECORD(EXPR_CONDITIONAL_OPERATOR); RECORD(EXPR_IMPLICIT_CAST); RECORD(EXPR_CSTYLE_CAST); RECORD(EXPR_COMPOUND_LITERAL); RECORD(EXPR_EXT_VECTOR_ELEMENT); RECORD(EXPR_INIT_LIST); RECORD(EXPR_DESIGNATED_INIT); RECORD(EXPR_DESIGNATED_INIT_UPDATE); RECORD(EXPR_IMPLICIT_VALUE_INIT); RECORD(EXPR_NO_INIT); RECORD(EXPR_VA_ARG); RECORD(EXPR_ADDR_LABEL); RECORD(EXPR_STMT); RECORD(EXPR_CHOOSE); RECORD(EXPR_GNU_NULL); RECORD(EXPR_SHUFFLE_VECTOR); RECORD(EXPR_BLOCK); RECORD(EXPR_GENERIC_SELECTION); RECORD(EXPR_OBJC_STRING_LITERAL); RECORD(EXPR_OBJC_BOXED_EXPRESSION); RECORD(EXPR_OBJC_ARRAY_LITERAL); RECORD(EXPR_OBJC_DICTIONARY_LITERAL); RECORD(EXPR_OBJC_ENCODE); RECORD(EXPR_OBJC_SELECTOR_EXPR); RECORD(EXPR_OBJC_PROTOCOL_EXPR); RECORD(EXPR_OBJC_IVAR_REF_EXPR); RECORD(EXPR_OBJC_PROPERTY_REF_EXPR); RECORD(EXPR_OBJC_KVC_REF_EXPR); RECORD(EXPR_OBJC_MESSAGE_EXPR); RECORD(STMT_OBJC_FOR_COLLECTION); RECORD(STMT_OBJC_CATCH); RECORD(STMT_OBJC_FINALLY); RECORD(STMT_OBJC_AT_TRY); RECORD(STMT_OBJC_AT_SYNCHRONIZED); RECORD(STMT_OBJC_AT_THROW); RECORD(EXPR_OBJC_BOOL_LITERAL); RECORD(STMT_CXX_CATCH); RECORD(STMT_CXX_TRY); RECORD(STMT_CXX_FOR_RANGE); RECORD(EXPR_CXX_OPERATOR_CALL); RECORD(EXPR_CXX_MEMBER_CALL); RECORD(EXPR_CXX_REWRITTEN_BINARY_OPERATOR); RECORD(EXPR_CXX_CONSTRUCT); RECORD(EXPR_CXX_TEMPORARY_OBJECT); RECORD(EXPR_CXX_STATIC_CAST); RECORD(EXPR_CXX_DYNAMIC_CAST); RECORD(EXPR_CXX_REINTERPRET_CAST); RECORD(EXPR_CXX_CONST_CAST); RECORD(EXPR_CXX_ADDRSPACE_CAST); RECORD(EXPR_CXX_FUNCTIONAL_CAST); RECORD(EXPR_USER_DEFINED_LITERAL); RECORD(EXPR_CXX_STD_INITIALIZER_LIST); RECORD(EXPR_CXX_BOOL_LITERAL); RECORD(EXPR_CXX_PAREN_LIST_INIT); RECORD(EXPR_CXX_NULL_PTR_LITERAL); RECORD(EXPR_CXX_TYPEID_EXPR); RECORD(EXPR_CXX_TYPEID_TYPE); RECORD(EXPR_CXX_THIS); RECORD(EXPR_CXX_THROW); RECORD(EXPR_CXX_DEFAULT_ARG); RECORD(EXPR_CXX_DEFAULT_INIT); RECORD(EXPR_CXX_BIND_TEMPORARY); RECORD(EXPR_CXX_SCALAR_VALUE_INIT); RECORD(EXPR_CXX_NEW); RECORD(EXPR_CXX_DELETE); RECORD(EXPR_CXX_PSEUDO_DESTRUCTOR); RECORD(EXPR_EXPR_WITH_CLEANUPS); RECORD(EXPR_CXX_DEPENDENT_SCOPE_MEMBER); RECORD(EXPR_CXX_DEPENDENT_SCOPE_DECL_REF); RECORD(EXPR_CXX_UNRESOLVED_CONSTRUCT); RECORD(EXPR_CXX_UNRESOLVED_MEMBER); RECORD(EXPR_CXX_UNRESOLVED_LOOKUP); RECORD(EXPR_CXX_EXPRESSION_TRAIT); RECORD(EXPR_CXX_NOEXCEPT); RECORD(EXPR_OPAQUE_VALUE); RECORD(EXPR_BINARY_CONDITIONAL_OPERATOR); RECORD(EXPR_TYPE_TRAIT); RECORD(EXPR_ARRAY_TYPE_TRAIT); RECORD(EXPR_PACK_EXPANSION); RECORD(EXPR_SIZEOF_PACK); RECORD(EXPR_SUBST_NON_TYPE_TEMPLATE_PARM); RECORD(EXPR_SUBST_NON_TYPE_TEMPLATE_PARM_PACK); RECORD(EXPR_FUNCTION_PARM_PACK); RECORD(EXPR_MATERIALIZE_TEMPORARY); RECORD(EXPR_CUDA_KERNEL_CALL); RECORD(EXPR_CXX_UUIDOF_EXPR); RECORD(EXPR_CXX_UUIDOF_TYPE); RECORD(EXPR_LAMBDA); #undef RECORD } void ASTWriter::WriteBlockInfoBlock() { RecordData Record; Stream.EnterBlockInfoBlock(); #define BLOCK(X) EmitBlockID(X ## _ID, #X, Stream, Record) #define RECORD(X) EmitRecordID(X, #X, Stream, Record) // Control Block. BLOCK(CONTROL_BLOCK); RECORD(METADATA); RECORD(MODULE_NAME); RECORD(MODULE_DIRECTORY); RECORD(MODULE_MAP_FILE); RECORD(IMPORTS); RECORD(ORIGINAL_FILE); RECORD(ORIGINAL_FILE_ID); RECORD(INPUT_FILE_OFFSETS); BLOCK(OPTIONS_BLOCK); RECORD(LANGUAGE_OPTIONS); RECORD(TARGET_OPTIONS); RECORD(FILE_SYSTEM_OPTIONS); RECORD(HEADER_SEARCH_OPTIONS); RECORD(PREPROCESSOR_OPTIONS); BLOCK(INPUT_FILES_BLOCK); RECORD(INPUT_FILE); RECORD(INPUT_FILE_HASH); // AST Top-Level Block. BLOCK(AST_BLOCK); RECORD(TYPE_OFFSET); RECORD(DECL_OFFSET); RECORD(IDENTIFIER_OFFSET); RECORD(IDENTIFIER_TABLE); RECORD(EAGERLY_DESERIALIZED_DECLS); RECORD(MODULAR_CODEGEN_DECLS); RECORD(SPECIAL_TYPES); RECORD(STATISTICS); RECORD(TENTATIVE_DEFINITIONS); RECORD(SELECTOR_OFFSETS); RECORD(METHOD_POOL); RECORD(PP_COUNTER_VALUE); RECORD(SOURCE_LOCATION_OFFSETS); RECORD(EXT_VECTOR_DECLS); RECORD(UNUSED_FILESCOPED_DECLS); RECORD(PPD_ENTITIES_OFFSETS); RECORD(VTABLE_USES); RECORD(PPD_SKIPPED_RANGES); RECORD(REFERENCED_SELECTOR_POOL); RECORD(TU_UPDATE_LEXICAL); RECORD(SEMA_DECL_REFS); RECORD(WEAK_UNDECLARED_IDENTIFIERS); RECORD(PENDING_IMPLICIT_INSTANTIATIONS); RECORD(UPDATE_VISIBLE); RECORD(DECL_UPDATE_OFFSETS); RECORD(DECL_UPDATES); RECORD(CUDA_SPECIAL_DECL_REFS); RECORD(HEADER_SEARCH_TABLE); RECORD(FP_PRAGMA_OPTIONS); RECORD(OPENCL_EXTENSIONS); RECORD(OPENCL_EXTENSION_TYPES); RECORD(OPENCL_EXTENSION_DECLS); RECORD(DELEGATING_CTORS); RECORD(KNOWN_NAMESPACES); RECORD(MODULE_OFFSET_MAP); RECORD(SOURCE_MANAGER_LINE_TABLE); RECORD(OBJC_CATEGORIES_MAP); RECORD(FILE_SORTED_DECLS); RECORD(IMPORTED_MODULES); RECORD(OBJC_CATEGORIES); RECORD(MACRO_OFFSET); RECORD(INTERESTING_IDENTIFIERS); RECORD(UNDEFINED_BUT_USED); RECORD(LATE_PARSED_TEMPLATE); RECORD(OPTIMIZE_PRAGMA_OPTIONS); RECORD(MSSTRUCT_PRAGMA_OPTIONS); RECORD(POINTERS_TO_MEMBERS_PRAGMA_OPTIONS); RECORD(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES); RECORD(DELETE_EXPRS_TO_ANALYZE); RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH); RECORD(PP_CONDITIONAL_STACK); RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS); RECORD(PP_ASSUME_NONNULL_LOC); // SourceManager Block. BLOCK(SOURCE_MANAGER_BLOCK); RECORD(SM_SLOC_FILE_ENTRY); RECORD(SM_SLOC_BUFFER_ENTRY); RECORD(SM_SLOC_BUFFER_BLOB); RECORD(SM_SLOC_BUFFER_BLOB_COMPRESSED); RECORD(SM_SLOC_EXPANSION_ENTRY); // Preprocessor Block. BLOCK(PREPROCESSOR_BLOCK); RECORD(PP_MACRO_DIRECTIVE_HISTORY); RECORD(PP_MACRO_FUNCTION_LIKE); RECORD(PP_MACRO_OBJECT_LIKE); RECORD(PP_MODULE_MACRO); RECORD(PP_TOKEN); // Submodule Block. BLOCK(SUBMODULE_BLOCK); RECORD(SUBMODULE_METADATA); RECORD(SUBMODULE_DEFINITION); RECORD(SUBMODULE_UMBRELLA_HEADER); RECORD(SUBMODULE_HEADER); RECORD(SUBMODULE_TOPHEADER); RECORD(SUBMODULE_UMBRELLA_DIR); RECORD(SUBMODULE_IMPORTS); RECORD(SUBMODULE_AFFECTING_MODULES); RECORD(SUBMODULE_EXPORTS); RECORD(SUBMODULE_REQUIRES); RECORD(SUBMODULE_EXCLUDED_HEADER); RECORD(SUBMODULE_LINK_LIBRARY); RECORD(SUBMODULE_CONFIG_MACRO); RECORD(SUBMODULE_CONFLICT); RECORD(SUBMODULE_PRIVATE_HEADER); RECORD(SUBMODULE_TEXTUAL_HEADER); RECORD(SUBMODULE_PRIVATE_TEXTUAL_HEADER); RECORD(SUBMODULE_INITIALIZERS); RECORD(SUBMODULE_EXPORT_AS); // Comments Block. BLOCK(COMMENTS_BLOCK); RECORD(COMMENTS_RAW_COMMENT); // Decls and Types block. BLOCK(DECLTYPES_BLOCK); RECORD(TYPE_EXT_QUAL); RECORD(TYPE_COMPLEX); RECORD(TYPE_POINTER); RECORD(TYPE_BLOCK_POINTER); RECORD(TYPE_LVALUE_REFERENCE); RECORD(TYPE_RVALUE_REFERENCE); RECORD(TYPE_MEMBER_POINTER); RECORD(TYPE_CONSTANT_ARRAY); RECORD(TYPE_INCOMPLETE_ARRAY); RECORD(TYPE_VARIABLE_ARRAY); RECORD(TYPE_VECTOR); RECORD(TYPE_EXT_VECTOR); RECORD(TYPE_FUNCTION_NO_PROTO); RECORD(TYPE_FUNCTION_PROTO); RECORD(TYPE_TYPEDEF); RECORD(TYPE_TYPEOF_EXPR); RECORD(TYPE_TYPEOF); RECORD(TYPE_RECORD); RECORD(TYPE_ENUM); RECORD(TYPE_OBJC_INTERFACE); RECORD(TYPE_OBJC_OBJECT_POINTER); RECORD(TYPE_DECLTYPE); RECORD(TYPE_ELABORATED); RECORD(TYPE_SUBST_TEMPLATE_TYPE_PARM); RECORD(TYPE_UNRESOLVED_USING); RECORD(TYPE_INJECTED_CLASS_NAME); RECORD(TYPE_OBJC_OBJECT); RECORD(TYPE_TEMPLATE_TYPE_PARM); RECORD(TYPE_TEMPLATE_SPECIALIZATION); RECORD(TYPE_DEPENDENT_NAME); RECORD(TYPE_DEPENDENT_TEMPLATE_SPECIALIZATION); RECORD(TYPE_DEPENDENT_SIZED_ARRAY); RECORD(TYPE_PAREN); RECORD(TYPE_MACRO_QUALIFIED); RECORD(TYPE_PACK_EXPANSION); RECORD(TYPE_ATTRIBUTED); RECORD(TYPE_SUBST_TEMPLATE_TYPE_PARM_PACK); RECORD(TYPE_AUTO); RECORD(TYPE_UNARY_TRANSFORM); RECORD(TYPE_ATOMIC); RECORD(TYPE_DECAYED); RECORD(TYPE_ADJUSTED); RECORD(TYPE_OBJC_TYPE_PARAM); RECORD(LOCAL_REDECLARATIONS); RECORD(DECL_TYPEDEF); RECORD(DECL_TYPEALIAS); RECORD(DECL_ENUM); RECORD(DECL_RECORD); RECORD(DECL_ENUM_CONSTANT); RECORD(DECL_FUNCTION); RECORD(DECL_OBJC_METHOD); RECORD(DECL_OBJC_INTERFACE); RECORD(DECL_OBJC_PROTOCOL); RECORD(DECL_OBJC_IVAR); RECORD(DECL_OBJC_AT_DEFS_FIELD); RECORD(DECL_OBJC_CATEGORY); RECORD(DECL_OBJC_CATEGORY_IMPL); RECORD(DECL_OBJC_IMPLEMENTATION); RECORD(DECL_OBJC_COMPATIBLE_ALIAS); RECORD(DECL_OBJC_PROPERTY); RECORD(DECL_OBJC_PROPERTY_IMPL); RECORD(DECL_FIELD); RECORD(DECL_MS_PROPERTY); RECORD(DECL_VAR); RECORD(DECL_IMPLICIT_PARAM); RECORD(DECL_PARM_VAR); RECORD(DECL_FILE_SCOPE_ASM); RECORD(DECL_BLOCK); RECORD(DECL_CONTEXT_LEXICAL); RECORD(DECL_CONTEXT_VISIBLE); RECORD(DECL_NAMESPACE); RECORD(DECL_NAMESPACE_ALIAS); RECORD(DECL_USING); RECORD(DECL_USING_SHADOW); RECORD(DECL_USING_DIRECTIVE); RECORD(DECL_UNRESOLVED_USING_VALUE); RECORD(DECL_UNRESOLVED_USING_TYPENAME); RECORD(DECL_LINKAGE_SPEC); RECORD(DECL_CXX_RECORD); RECORD(DECL_CXX_METHOD); RECORD(DECL_CXX_CONSTRUCTOR); RECORD(DECL_CXX_DESTRUCTOR); RECORD(DECL_CXX_CONVERSION); RECORD(DECL_ACCESS_SPEC); RECORD(DECL_FRIEND); RECORD(DECL_FRIEND_TEMPLATE); RECORD(DECL_CLASS_TEMPLATE); RECORD(DECL_CLASS_TEMPLATE_SPECIALIZATION); RECORD(DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION); RECORD(DECL_VAR_TEMPLATE); RECORD(DECL_VAR_TEMPLATE_SPECIALIZATION); RECORD(DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION); RECORD(DECL_FUNCTION_TEMPLATE); RECORD(DECL_TEMPLATE_TYPE_PARM); RECORD(DECL_NON_TYPE_TEMPLATE_PARM); RECORD(DECL_TEMPLATE_TEMPLATE_PARM); RECORD(DECL_CONCEPT); RECORD(DECL_REQUIRES_EXPR_BODY); RECORD(DECL_TYPE_ALIAS_TEMPLATE); RECORD(DECL_STATIC_ASSERT); RECORD(DECL_CXX_BASE_SPECIFIERS); RECORD(DECL_CXX_CTOR_INITIALIZERS); RECORD(DECL_INDIRECTFIELD); RECORD(DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK); RECORD(DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK); RECORD(DECL_IMPORT); RECORD(DECL_OMP_THREADPRIVATE); RECORD(DECL_EMPTY); RECORD(DECL_OBJC_TYPE_PARAM); RECORD(DECL_OMP_CAPTUREDEXPR); RECORD(DECL_PRAGMA_COMMENT); RECORD(DECL_PRAGMA_DETECT_MISMATCH); RECORD(DECL_OMP_DECLARE_REDUCTION); RECORD(DECL_OMP_ALLOCATE); RECORD(DECL_HLSL_BUFFER); // Statements and Exprs can occur in the Decls and Types block. AddStmtsExprs(Stream, Record); BLOCK(PREPROCESSOR_DETAIL_BLOCK); RECORD(PPD_MACRO_EXPANSION); RECORD(PPD_MACRO_DEFINITION); RECORD(PPD_INCLUSION_DIRECTIVE); // Decls and Types block. BLOCK(EXTENSION_BLOCK); RECORD(EXTENSION_METADATA); BLOCK(UNHASHED_CONTROL_BLOCK); RECORD(SIGNATURE); RECORD(AST_BLOCK_HASH); RECORD(DIAGNOSTIC_OPTIONS); RECORD(HEADER_SEARCH_PATHS); RECORD(DIAG_PRAGMA_MAPPINGS); #undef RECORD #undef BLOCK Stream.ExitBlock(); } /// Prepares a path for being written to an AST file by converting it /// to an absolute path and removing nested './'s. /// /// \return \c true if the path was changed. static bool cleanPathForOutput(FileManager &FileMgr, SmallVectorImpl &Path) { bool Changed = FileMgr.makeAbsolutePath(Path); return Changed | llvm::sys::path::remove_dots(Path); } /// Adjusts the given filename to only write out the portion of the /// filename that is not part of the system root directory. /// /// \param Filename the file name to adjust. /// /// \param BaseDir When non-NULL, the PCH file is a relocatable AST file and /// the returned filename will be adjusted by this root directory. /// /// \returns either the original filename (if it needs no adjustment) or the /// adjusted filename (which points into the @p Filename parameter). static const char * adjustFilenameForRelocatableAST(const char *Filename, StringRef BaseDir) { assert(Filename && "No file name to adjust?"); if (BaseDir.empty()) return Filename; // Verify that the filename and the system root have the same prefix. unsigned Pos = 0; for (; Filename[Pos] && Pos < BaseDir.size(); ++Pos) if (Filename[Pos] != BaseDir[Pos]) return Filename; // Prefixes don't match. // We hit the end of the filename before we hit the end of the system root. if (!Filename[Pos]) return Filename; // If there's not a path separator at the end of the base directory nor // immediately after it, then this isn't within the base directory. if (!llvm::sys::path::is_separator(Filename[Pos])) { if (!llvm::sys::path::is_separator(BaseDir.back())) return Filename; } else { // If the file name has a '/' at the current position, skip over the '/'. // We distinguish relative paths from absolute paths by the // absence of '/' at the beginning of relative paths. // // FIXME: This is wrong. We distinguish them by asking if the path is // absolute, which isn't the same thing. And there might be multiple '/'s // in a row. Use a better mechanism to indicate whether we have emitted an // absolute or relative path. ++Pos; } return Filename + Pos; } std::pair ASTWriter::createSignature() const { StringRef AllBytes(Buffer.data(), Buffer.size()); llvm::SHA1 Hasher; Hasher.update(AllBytes.slice(ASTBlockRange.first, ASTBlockRange.second)); ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hasher.result()); // Add the remaining bytes: // 1. Before the unhashed control block. Hasher.update(AllBytes.slice(0, UnhashedControlBlockRange.first)); // 2. Between the unhashed control block and the AST block. Hasher.update( AllBytes.slice(UnhashedControlBlockRange.second, ASTBlockRange.first)); // 3. After the AST block. Hasher.update(AllBytes.slice(ASTBlockRange.second, StringRef::npos)); ASTFileSignature Signature = ASTFileSignature::create(Hasher.result()); return std::make_pair(ASTBlockHash, Signature); } ASTFileSignature ASTWriter::backpatchSignature() { if (!WritingModule || !PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) return {}; // For implicit modules, write the hash of the PCM as its signature. auto BackpatchSignatureAt = [&](const ASTFileSignature &S, uint64_t BitNo) { for (uint8_t Byte : S) { Stream.BackpatchByte(BitNo, Byte); BitNo += 8; } }; ASTFileSignature ASTBlockHash; ASTFileSignature Signature; std::tie(ASTBlockHash, Signature) = createSignature(); BackpatchSignatureAt(ASTBlockHash, ASTBlockHashOffset); BackpatchSignatureAt(Signature, SignatureOffset); return Signature; } void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context) { using namespace llvm; // Flush first to prepare the PCM hash (signature). Stream.FlushToWord(); UnhashedControlBlockRange.first = Stream.GetCurrentBitNo() >> 3; // Enter the block and prepare to write records. RecordData Record; Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5); // For implicit modules, write the hash of the PCM as its signature. if (WritingModule && PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) { // At this point, we don't know the actual signature of the file or the AST // block - we're only able to compute those at the end of the serialization // process. Let's store dummy signatures for now, and replace them with the // real ones later on. // The bitstream VBR-encodes record elements, which makes backpatching them // really difficult. Let's store the signatures as blobs instead - they are // guaranteed to be word-aligned, and we control their format/encoding. auto Dummy = ASTFileSignature::createDummy(); SmallString<128> Blob{Dummy.begin(), Dummy.end()}; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned ASTBlockHashAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SIGNATURE)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned SignatureAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Record.push_back(AST_BLOCK_HASH); Stream.EmitRecordWithBlob(ASTBlockHashAbbrev, Record, Blob); ASTBlockHashOffset = Stream.GetCurrentBitNo() - Blob.size() * 8; Record.clear(); Record.push_back(SIGNATURE); Stream.EmitRecordWithBlob(SignatureAbbrev, Record, Blob); SignatureOffset = Stream.GetCurrentBitNo() - Blob.size() * 8; Record.clear(); } const auto &HSOpts = PP.getHeaderSearchInfo().getHeaderSearchOpts(); // Diagnostic options. const auto &Diags = Context.getDiagnostics(); const DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions(); if (!HSOpts.ModulesSkipDiagnosticOptions) { #define DIAGOPT(Name, Bits, Default) Record.push_back(DiagOpts.Name); #define ENUM_DIAGOPT(Name, Type, Bits, Default) \ Record.push_back(static_cast(DiagOpts.get##Name())); #include "clang/Basic/DiagnosticOptions.def" Record.push_back(DiagOpts.Warnings.size()); for (unsigned I = 0, N = DiagOpts.Warnings.size(); I != N; ++I) AddString(DiagOpts.Warnings[I], Record); Record.push_back(DiagOpts.Remarks.size()); for (unsigned I = 0, N = DiagOpts.Remarks.size(); I != N; ++I) AddString(DiagOpts.Remarks[I], Record); // Note: we don't serialize the log or serialization file names, because // they are generally transient files and will almost always be overridden. Stream.EmitRecord(DIAGNOSTIC_OPTIONS, Record); Record.clear(); } // Header search paths. if (!HSOpts.ModulesSkipHeaderSearchPaths) { // Include entries. Record.push_back(HSOpts.UserEntries.size()); for (unsigned I = 0, N = HSOpts.UserEntries.size(); I != N; ++I) { const HeaderSearchOptions::Entry &Entry = HSOpts.UserEntries[I]; AddString(Entry.Path, Record); Record.push_back(static_cast(Entry.Group)); Record.push_back(Entry.IsFramework); Record.push_back(Entry.IgnoreSysRoot); } // System header prefixes. Record.push_back(HSOpts.SystemHeaderPrefixes.size()); for (unsigned I = 0, N = HSOpts.SystemHeaderPrefixes.size(); I != N; ++I) { AddString(HSOpts.SystemHeaderPrefixes[I].Prefix, Record); Record.push_back(HSOpts.SystemHeaderPrefixes[I].IsSystemHeader); } // VFS overlay files. Record.push_back(HSOpts.VFSOverlayFiles.size()); for (StringRef VFSOverlayFile : HSOpts.VFSOverlayFiles) AddString(VFSOverlayFile, Record); Stream.EmitRecord(HEADER_SEARCH_PATHS, Record); } if (!HSOpts.ModulesSkipPragmaDiagnosticMappings) WritePragmaDiagnosticMappings(Diags, /* isModule = */ WritingModule); // Header search entry usage. auto HSEntryUsage = PP.getHeaderSearchInfo().computeUserEntryUsage(); auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_ENTRY_USAGE)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Number of bits. Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Bit vector. unsigned HSUsageAbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {HEADER_SEARCH_ENTRY_USAGE, HSEntryUsage.size()}; Stream.EmitRecordWithBlob(HSUsageAbbrevCode, Record, bytes(HSEntryUsage)); } // Leave the options block. Stream.ExitBlock(); UnhashedControlBlockRange.second = Stream.GetCurrentBitNo() >> 3; } /// Write the control block. void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context, StringRef isysroot) { using namespace llvm; Stream.EnterSubblock(CONTROL_BLOCK_ID, 5); RecordData Record; // Metadata auto MetadataAbbrev = std::make_shared(); MetadataAbbrev->Add(BitCodeAbbrevOp(METADATA)); MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Major MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Minor MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Clang maj. MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Clang min. MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Relocatable // Standard C++ module MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Timestamps MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Errors MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // SVN branch/tag unsigned MetadataAbbrevCode = Stream.EmitAbbrev(std::move(MetadataAbbrev)); assert((!WritingModule || isysroot.empty()) && "writing module as a relocatable PCH?"); { RecordData::value_type Record[] = {METADATA, VERSION_MAJOR, VERSION_MINOR, CLANG_VERSION_MAJOR, CLANG_VERSION_MINOR, !isysroot.empty(), isWritingStdCXXNamedModules(), IncludeTimestamps, ASTHasCompilerErrors}; Stream.EmitRecordWithBlob(MetadataAbbrevCode, Record, getClangFullRepositoryVersion()); } if (WritingModule) { // Module name auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_NAME)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {MODULE_NAME}; Stream.EmitRecordWithBlob(AbbrevCode, Record, WritingModule->Name); } if (WritingModule && WritingModule->Directory) { SmallString<128> BaseDir; if (PP.getHeaderSearchInfo().getHeaderSearchOpts().ModuleFileHomeIsCwd) { // Use the current working directory as the base path for all inputs. auto CWD = Context.getSourceManager().getFileManager().getOptionalDirectoryRef( "."); BaseDir.assign(CWD->getName()); } else { BaseDir.assign(WritingModule->Directory->getName()); } cleanPathForOutput(Context.getSourceManager().getFileManager(), BaseDir); // If the home of the module is the current working directory, then we // want to pick up the cwd of the build process loading the module, not // our cwd, when we load this module. if (!PP.getHeaderSearchInfo().getHeaderSearchOpts().ModuleFileHomeIsCwd && (!PP.getHeaderSearchInfo() .getHeaderSearchOpts() .ModuleMapFileHomeIsCwd || WritingModule->Directory->getName() != StringRef("."))) { // Module directory. auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_DIRECTORY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Directory unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {MODULE_DIRECTORY}; Stream.EmitRecordWithBlob(AbbrevCode, Record, BaseDir); } // Write out all other paths relative to the base directory if possible. BaseDirectory.assign(BaseDir.begin(), BaseDir.end()); } else if (!isysroot.empty()) { // Write out paths relative to the sysroot if possible. BaseDirectory = std::string(isysroot); } // Module map file if (WritingModule && WritingModule->Kind == Module::ModuleMapModule) { Record.clear(); auto &Map = PP.getHeaderSearchInfo().getModuleMap(); AddPath(WritingModule->PresumedModuleMapFile.empty() ? Map.getModuleMapFileForUniquing(WritingModule) ->getNameAsRequested() : StringRef(WritingModule->PresumedModuleMapFile), Record); // Additional module map files. if (auto *AdditionalModMaps = Map.getAdditionalModuleMapFiles(WritingModule)) { Record.push_back(AdditionalModMaps->size()); SmallVector ModMaps(AdditionalModMaps->begin(), AdditionalModMaps->end()); llvm::sort(ModMaps, [](FileEntryRef A, FileEntryRef B) { return A.getName() < B.getName(); }); for (FileEntryRef F : ModMaps) AddPath(F.getName(), Record); } else { Record.push_back(0); } Stream.EmitRecord(MODULE_MAP_FILE, Record); } // Imports if (Chain) { serialization::ModuleManager &Mgr = Chain->getModuleManager(); Record.clear(); for (ModuleFile &M : Mgr) { // Skip modules that weren't directly imported. if (!M.isDirectlyImported()) continue; Record.push_back((unsigned)M.Kind); // FIXME: Stable encoding Record.push_back(M.StandardCXXModule); AddSourceLocation(M.ImportLoc, Record); // We don't want to hard code the information about imported modules // in the C++20 named modules. if (!M.StandardCXXModule) { // If we have calculated signature, there is no need to store // the size or timestamp. Record.push_back(M.Signature ? 0 : M.File.getSize()); Record.push_back(M.Signature ? 0 : getTimestampForOutput(M.File)); llvm::append_range(Record, M.Signature); } AddString(M.ModuleName, Record); if (!M.StandardCXXModule) AddPath(M.FileName, Record); } Stream.EmitRecord(IMPORTS, Record); } // Write the options block. Stream.EnterSubblock(OPTIONS_BLOCK_ID, 4); // Language options. Record.clear(); const LangOptions &LangOpts = Context.getLangOpts(); #define LANGOPT(Name, Bits, Default, Description) \ Record.push_back(LangOpts.Name); #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \ Record.push_back(static_cast(LangOpts.get##Name())); #include "clang/Basic/LangOptions.def" #define SANITIZER(NAME, ID) \ Record.push_back(LangOpts.Sanitize.has(SanitizerKind::ID)); #include "clang/Basic/Sanitizers.def" Record.push_back(LangOpts.ModuleFeatures.size()); for (StringRef Feature : LangOpts.ModuleFeatures) AddString(Feature, Record); Record.push_back((unsigned) LangOpts.ObjCRuntime.getKind()); AddVersionTuple(LangOpts.ObjCRuntime.getVersion(), Record); AddString(LangOpts.CurrentModule, Record); // Comment options. Record.push_back(LangOpts.CommentOpts.BlockCommandNames.size()); for (const auto &I : LangOpts.CommentOpts.BlockCommandNames) { AddString(I, Record); } Record.push_back(LangOpts.CommentOpts.ParseAllComments); // OpenMP offloading options. Record.push_back(LangOpts.OMPTargetTriples.size()); for (auto &T : LangOpts.OMPTargetTriples) AddString(T.getTriple(), Record); AddString(LangOpts.OMPHostIRFile, Record); Stream.EmitRecord(LANGUAGE_OPTIONS, Record); // Target options. Record.clear(); const TargetInfo &Target = Context.getTargetInfo(); const TargetOptions &TargetOpts = Target.getTargetOpts(); AddString(TargetOpts.Triple, Record); AddString(TargetOpts.CPU, Record); AddString(TargetOpts.TuneCPU, Record); AddString(TargetOpts.ABI, Record); Record.push_back(TargetOpts.FeaturesAsWritten.size()); for (unsigned I = 0, N = TargetOpts.FeaturesAsWritten.size(); I != N; ++I) { AddString(TargetOpts.FeaturesAsWritten[I], Record); } Record.push_back(TargetOpts.Features.size()); for (unsigned I = 0, N = TargetOpts.Features.size(); I != N; ++I) { AddString(TargetOpts.Features[I], Record); } Stream.EmitRecord(TARGET_OPTIONS, Record); // File system options. Record.clear(); const FileSystemOptions &FSOpts = Context.getSourceManager().getFileManager().getFileSystemOpts(); AddString(FSOpts.WorkingDir, Record); Stream.EmitRecord(FILE_SYSTEM_OPTIONS, Record); // Header search options. Record.clear(); const HeaderSearchOptions &HSOpts = PP.getHeaderSearchInfo().getHeaderSearchOpts(); AddString(HSOpts.Sysroot, Record); AddString(HSOpts.ResourceDir, Record); AddString(HSOpts.ModuleCachePath, Record); AddString(HSOpts.ModuleUserBuildPath, Record); Record.push_back(HSOpts.DisableModuleHash); Record.push_back(HSOpts.ImplicitModuleMaps); Record.push_back(HSOpts.ModuleMapFileHomeIsCwd); Record.push_back(HSOpts.EnablePrebuiltImplicitModules); Record.push_back(HSOpts.UseBuiltinIncludes); Record.push_back(HSOpts.UseStandardSystemIncludes); Record.push_back(HSOpts.UseStandardCXXIncludes); Record.push_back(HSOpts.UseLibcxx); // Write out the specific module cache path that contains the module files. AddString(PP.getHeaderSearchInfo().getModuleCachePath(), Record); Stream.EmitRecord(HEADER_SEARCH_OPTIONS, Record); // Preprocessor options. Record.clear(); const PreprocessorOptions &PPOpts = PP.getPreprocessorOpts(); // If we're building an implicit module with a context hash, the importer is // guaranteed to have the same macros defined on the command line. Skip // writing them. bool SkipMacros = BuildingImplicitModule && !HSOpts.DisableModuleHash; bool WriteMacros = !SkipMacros; Record.push_back(WriteMacros); if (WriteMacros) { // Macro definitions. Record.push_back(PPOpts.Macros.size()); for (unsigned I = 0, N = PPOpts.Macros.size(); I != N; ++I) { AddString(PPOpts.Macros[I].first, Record); Record.push_back(PPOpts.Macros[I].second); } } // Includes Record.push_back(PPOpts.Includes.size()); for (unsigned I = 0, N = PPOpts.Includes.size(); I != N; ++I) AddString(PPOpts.Includes[I], Record); // Macro includes Record.push_back(PPOpts.MacroIncludes.size()); for (unsigned I = 0, N = PPOpts.MacroIncludes.size(); I != N; ++I) AddString(PPOpts.MacroIncludes[I], Record); Record.push_back(PPOpts.UsePredefines); // Detailed record is important since it is used for the module cache hash. Record.push_back(PPOpts.DetailedRecord); AddString(PPOpts.ImplicitPCHInclude, Record); Record.push_back(static_cast(PPOpts.ObjCXXARCStandardLibrary)); Stream.EmitRecord(PREPROCESSOR_OPTIONS, Record); // Leave the options block. Stream.ExitBlock(); // Original file name and file ID SourceManager &SM = Context.getSourceManager(); if (auto MainFile = SM.getFileEntryRefForID(SM.getMainFileID())) { auto FileAbbrev = std::make_shared(); FileAbbrev->Add(BitCodeAbbrevOp(ORIGINAL_FILE)); FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File ID FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name unsigned FileAbbrevCode = Stream.EmitAbbrev(std::move(FileAbbrev)); Record.clear(); Record.push_back(ORIGINAL_FILE); AddFileID(SM.getMainFileID(), Record); EmitRecordWithPath(FileAbbrevCode, Record, MainFile->getName()); } Record.clear(); AddFileID(SM.getMainFileID(), Record); Stream.EmitRecord(ORIGINAL_FILE_ID, Record); WriteInputFiles(Context.SourceMgr, PP.getHeaderSearchInfo().getHeaderSearchOpts()); Stream.ExitBlock(); } namespace { /// An input file. struct InputFileEntry { FileEntryRef File; bool IsSystemFile; bool IsTransient; bool BufferOverridden; bool IsTopLevel; bool IsModuleMap; uint32_t ContentHash[2]; InputFileEntry(FileEntryRef File) : File(File) {} }; } // namespace void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts) { using namespace llvm; Stream.EnterSubblock(INPUT_FILES_BLOCK_ID, 4); // Create input-file abbreviation. auto IFAbbrev = std::make_shared(); IFAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE)); IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 12)); // Size IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // Modification time IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Name as req. len IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name as req. + name unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev)); // Create input file hash abbreviation. auto IFHAbbrev = std::make_shared(); IFHAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE_HASH)); IFHAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); IFHAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); unsigned IFHAbbrevCode = Stream.EmitAbbrev(std::move(IFHAbbrev)); uint64_t InputFilesOffsetBase = Stream.GetCurrentBitNo(); // Get all ContentCache objects for files. std::vector UserFiles; std::vector SystemFiles; for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size(); I != N; ++I) { // Get this source location entry. const SrcMgr::SLocEntry *SLoc = &SourceMgr.getLocalSLocEntry(I); assert(&SourceMgr.getSLocEntry(FileID::get(I)) == SLoc); // We only care about file entries that were not overridden. if (!SLoc->isFile()) continue; const SrcMgr::FileInfo &File = SLoc->getFile(); const SrcMgr::ContentCache *Cache = &File.getContentCache(); if (!Cache->OrigEntry) continue; // Do not emit input files that do not affect current module. if (!IsSLocAffecting[I]) continue; InputFileEntry Entry(*Cache->OrigEntry); Entry.IsSystemFile = isSystem(File.getFileCharacteristic()); Entry.IsTransient = Cache->IsTransient; Entry.BufferOverridden = Cache->BufferOverridden; Entry.IsTopLevel = File.getIncludeLoc().isInvalid(); Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic()); auto ContentHash = hash_code(-1); if (PP->getHeaderSearchInfo() .getHeaderSearchOpts() .ValidateASTInputFilesContent) { auto MemBuff = Cache->getBufferIfLoaded(); if (MemBuff) ContentHash = hash_value(MemBuff->getBuffer()); else PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content) << Entry.File.getName(); } auto CH = llvm::APInt(64, ContentHash); Entry.ContentHash[0] = static_cast(CH.getLoBits(32).getZExtValue()); Entry.ContentHash[1] = static_cast(CH.getHiBits(32).getZExtValue()); if (Entry.IsSystemFile) SystemFiles.push_back(Entry); else UserFiles.push_back(Entry); } // User files go at the front, system files at the back. auto SortedFiles = llvm::concat(std::move(UserFiles), std::move(SystemFiles)); unsigned UserFilesNum = 0; // Write out all of the input files. std::vector InputFileOffsets; for (const auto &Entry : SortedFiles) { uint32_t &InputFileID = InputFileIDs[Entry.File]; if (InputFileID != 0) continue; // already recorded this file. // Record this entry's offset. InputFileOffsets.push_back(Stream.GetCurrentBitNo() - InputFilesOffsetBase); InputFileID = InputFileOffsets.size(); if (!Entry.IsSystemFile) ++UserFilesNum; // Emit size/modification time for this file. // And whether this file was overridden. { SmallString<128> NameAsRequested = Entry.File.getNameAsRequested(); SmallString<128> Name = Entry.File.getName(); PreparePathForOutput(NameAsRequested); PreparePathForOutput(Name); if (Name == NameAsRequested) Name.clear(); RecordData::value_type Record[] = { INPUT_FILE, InputFileOffsets.size(), (uint64_t)Entry.File.getSize(), (uint64_t)getTimestampForOutput(Entry.File), Entry.BufferOverridden, Entry.IsTransient, Entry.IsTopLevel, Entry.IsModuleMap, NameAsRequested.size()}; Stream.EmitRecordWithBlob(IFAbbrevCode, Record, (NameAsRequested + Name).str()); } // Emit content hash for this file. { RecordData::value_type Record[] = {INPUT_FILE_HASH, Entry.ContentHash[0], Entry.ContentHash[1]}; Stream.EmitRecordWithAbbrev(IFHAbbrevCode, Record); } } Stream.ExitBlock(); // Create input file offsets abbreviation. auto OffsetsAbbrev = std::make_shared(); OffsetsAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE_OFFSETS)); OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # input files OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # non-system // input files OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Array unsigned OffsetsAbbrevCode = Stream.EmitAbbrev(std::move(OffsetsAbbrev)); // Write input file offsets. RecordData::value_type Record[] = {INPUT_FILE_OFFSETS, InputFileOffsets.size(), UserFilesNum}; Stream.EmitRecordWithBlob(OffsetsAbbrevCode, Record, bytes(InputFileOffsets)); } //===----------------------------------------------------------------------===// // Source Manager Serialization //===----------------------------------------------------------------------===// /// Create an abbreviation for the SLocEntry that refers to a /// file. static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) { using namespace llvm; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_FILE_ENTRY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Characteristic Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Line directives // FileEntry fields. Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Input File ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumCreatedFIDs Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 24)); // FirstDeclIndex Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumDecls return Stream.EmitAbbrev(std::move(Abbrev)); } /// Create an abbreviation for the SLocEntry that refers to a /// buffer. static unsigned CreateSLocBufferAbbrev(llvm::BitstreamWriter &Stream) { using namespace llvm; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_ENTRY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Characteristic Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Line directives Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Buffer name blob return Stream.EmitAbbrev(std::move(Abbrev)); } /// Create an abbreviation for the SLocEntry that refers to a /// buffer's blob. static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream, bool Compressed) { using namespace llvm; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(Compressed ? SM_SLOC_BUFFER_BLOB_COMPRESSED : SM_SLOC_BUFFER_BLOB)); if (Compressed) Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Uncompressed size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Blob return Stream.EmitAbbrev(std::move(Abbrev)); } /// Create an abbreviation for the SLocEntry that refers to a macro /// expansion. static unsigned CreateSLocExpansionAbbrev(llvm::BitstreamWriter &Stream) { using namespace llvm; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_EXPANSION_ENTRY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Spelling location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Start location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // End location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Is token range Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Token length return Stream.EmitAbbrev(std::move(Abbrev)); } /// Emit key length and data length as ULEB-encoded data, and return them as a /// pair. static std::pair emitULEBKeyDataLength(unsigned KeyLen, unsigned DataLen, raw_ostream &Out) { llvm::encodeULEB128(KeyLen, Out); llvm::encodeULEB128(DataLen, Out); return std::make_pair(KeyLen, DataLen); } namespace { // Trait used for the on-disk hash table of header search information. class HeaderFileInfoTrait { ASTWriter &Writer; // Keep track of the framework names we've used during serialization. SmallString<128> FrameworkStringData; llvm::StringMap FrameworkNameOffset; public: HeaderFileInfoTrait(ASTWriter &Writer) : Writer(Writer) {} struct key_type { StringRef Filename; off_t Size; time_t ModTime; }; using key_type_ref = const key_type &; using UnresolvedModule = llvm::PointerIntPair; struct data_type { const HeaderFileInfo &HFI; bool AlreadyIncluded; ArrayRef KnownHeaders; UnresolvedModule Unresolved; }; using data_type_ref = const data_type &; using hash_value_type = unsigned; using offset_type = unsigned; hash_value_type ComputeHash(key_type_ref key) { // The hash is based only on size/time of the file, so that the reader can // match even when symlinking or excess path elements ("foo/../", "../") // change the form of the name. However, complete path is still the key. return llvm::hash_combine(key.Size, key.ModTime); } std::pair EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) { unsigned KeyLen = key.Filename.size() + 1 + 8 + 8; unsigned DataLen = 1 + 4 + 4; for (auto ModInfo : Data.KnownHeaders) if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule())) DataLen += 4; if (Data.Unresolved.getPointer()) DataLen += 4; return emitULEBKeyDataLength(KeyLen, DataLen, Out); } void EmitKey(raw_ostream& Out, key_type_ref key, unsigned KeyLen) { using namespace llvm::support; endian::Writer LE(Out, llvm::endianness::little); LE.write(key.Size); KeyLen -= 8; LE.write(key.ModTime); KeyLen -= 8; Out.write(key.Filename.data(), KeyLen); } void EmitData(raw_ostream &Out, key_type_ref key, data_type_ref Data, unsigned DataLen) { using namespace llvm::support; endian::Writer LE(Out, llvm::endianness::little); uint64_t Start = Out.tell(); (void)Start; unsigned char Flags = (Data.AlreadyIncluded << 6) | (Data.HFI.isImport << 5) | (Writer.isWritingStdCXXNamedModules() ? 0 : Data.HFI.isPragmaOnce << 4) | (Data.HFI.DirInfo << 1) | Data.HFI.IndexHeaderMapHeader; LE.write(Flags); if (!Data.HFI.ControllingMacro) LE.write(Data.HFI.ControllingMacroID); else LE.write(Writer.getIdentifierRef(Data.HFI.ControllingMacro)); unsigned Offset = 0; if (!Data.HFI.Framework.empty()) { // If this header refers into a framework, save the framework name. llvm::StringMap::iterator Pos = FrameworkNameOffset.find(Data.HFI.Framework); if (Pos == FrameworkNameOffset.end()) { Offset = FrameworkStringData.size() + 1; FrameworkStringData.append(Data.HFI.Framework); FrameworkStringData.push_back(0); FrameworkNameOffset[Data.HFI.Framework] = Offset; } else Offset = Pos->second; } LE.write(Offset); auto EmitModule = [&](Module *M, ModuleMap::ModuleHeaderRole Role) { if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) { uint32_t Value = (ModID << 3) | (unsigned)Role; assert((Value >> 3) == ModID && "overflow in header module info"); LE.write(Value); } }; for (auto ModInfo : Data.KnownHeaders) EmitModule(ModInfo.getModule(), ModInfo.getRole()); if (Data.Unresolved.getPointer()) EmitModule(Data.Unresolved.getPointer(), Data.Unresolved.getInt()); assert(Out.tell() - Start == DataLen && "Wrong data length"); } const char *strings_begin() const { return FrameworkStringData.begin(); } const char *strings_end() const { return FrameworkStringData.end(); } }; } // namespace /// Write the header search block for the list of files that /// /// \param HS The header search structure to save. void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { HeaderFileInfoTrait GeneratorTrait(*this); llvm::OnDiskChainedHashTableGenerator Generator; SmallVector SavedStrings; unsigned NumHeaderSearchEntries = 0; // Find all unresolved headers for the current module. We generally will // have resolved them before we get here, but not necessarily: we might be // compiling a preprocessed module, where there is no requirement for the // original files to exist any more. const HeaderFileInfo Empty; // So we can take a reference. if (WritingModule) { llvm::SmallVector Worklist(1, WritingModule); while (!Worklist.empty()) { Module *M = Worklist.pop_back_val(); // We don't care about headers in unimportable submodules. if (M->isUnimportable()) continue; // Map to disk files where possible, to pick up any missing stat // information. This also means we don't need to check the unresolved // headers list when emitting resolved headers in the first loop below. // FIXME: It'd be preferable to avoid doing this if we were given // sufficient stat information in the module map. HS.getModuleMap().resolveHeaderDirectives(M, /*File=*/std::nullopt); // If the file didn't exist, we can still create a module if we were given // enough information in the module map. for (const auto &U : M->MissingHeaders) { // Check that we were given enough information to build a module // without this file existing on disk. if (!U.Size || (!U.ModTime && IncludeTimestamps)) { PP->Diag(U.FileNameLoc, diag::err_module_no_size_mtime_for_header) << WritingModule->getFullModuleName() << U.Size.has_value() << U.FileName; continue; } // Form the effective relative pathname for the file. SmallString<128> Filename(M->Directory->getName()); llvm::sys::path::append(Filename, U.FileName); PreparePathForOutput(Filename); StringRef FilenameDup = strdup(Filename.c_str()); SavedStrings.push_back(FilenameDup.data()); HeaderFileInfoTrait::key_type Key = { FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0}; HeaderFileInfoTrait::data_type Data = { Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}}; // FIXME: Deal with cases where there are multiple unresolved header // directives in different submodules for the same header. Generator.insert(Key, Data, GeneratorTrait); ++NumHeaderSearchEntries; } auto SubmodulesRange = M->submodules(); Worklist.append(SubmodulesRange.begin(), SubmodulesRange.end()); } } SmallVector FilesByUID; HS.getFileMgr().GetUniqueIDMapping(FilesByUID); if (FilesByUID.size() > HS.header_file_size()) FilesByUID.resize(HS.header_file_size()); for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) { OptionalFileEntryRef File = FilesByUID[UID]; if (!File) continue; // Get the file info. This will load info from the external source if // necessary. Skip emitting this file if we have no information on it // as a header file (in which case HFI will be null) or if it hasn't // changed since it was loaded. Also skip it if it's for a modular header // from a different module; in that case, we rely on the module(s) // containing the header to provide this information. const HeaderFileInfo *HFI = HS.getExistingFileInfo(*File, /*WantExternal*/!Chain); if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader)) continue; // Massage the file path into an appropriate form. StringRef Filename = File->getName(); SmallString<128> FilenameTmp(Filename); if (PreparePathForOutput(FilenameTmp)) { // If we performed any translation on the file name at all, we need to // save this string, since the generator will refer to it later. Filename = StringRef(strdup(FilenameTmp.c_str())); SavedStrings.push_back(Filename.data()); } bool Included = PP->alreadyIncluded(*File); HeaderFileInfoTrait::key_type Key = { Filename, File->getSize(), getTimestampForOutput(*File) }; HeaderFileInfoTrait::data_type Data = { *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(*File), {} }; Generator.insert(Key, Data, GeneratorTrait); ++NumHeaderSearchEntries; } // Create the on-disk hash table in a buffer. SmallString<4096> TableData; uint32_t BucketOffset; { using namespace llvm::support; llvm::raw_svector_ostream Out(TableData); // Make sure that no bucket is at offset 0 endian::write(Out, 0, llvm::endianness::little); BucketOffset = Generator.Emit(Out, GeneratorTrait); } // Create a blob abbreviation using namespace llvm; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_TABLE)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned TableAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the header search table RecordData::value_type Record[] = {HEADER_SEARCH_TABLE, BucketOffset, NumHeaderSearchEntries, TableData.size()}; TableData.append(GeneratorTrait.strings_begin(),GeneratorTrait.strings_end()); Stream.EmitRecordWithBlob(TableAbbrev, Record, TableData); // Free all of the strings we had to duplicate. for (unsigned I = 0, N = SavedStrings.size(); I != N; ++I) free(const_cast(SavedStrings[I])); } static void emitBlob(llvm::BitstreamWriter &Stream, StringRef Blob, unsigned SLocBufferBlobCompressedAbbrv, unsigned SLocBufferBlobAbbrv) { using RecordDataType = ASTWriter::RecordData::value_type; // Compress the buffer if possible. We expect that almost all PCM // consumers will not want its contents. SmallVector CompressedBuffer; if (llvm::compression::zstd::isAvailable()) { llvm::compression::zstd::compress( llvm::arrayRefFromStringRef(Blob.drop_back(1)), CompressedBuffer, 9); RecordDataType Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED, Blob.size() - 1}; Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record, llvm::toStringRef(CompressedBuffer)); return; } if (llvm::compression::zlib::isAvailable()) { llvm::compression::zlib::compress( llvm::arrayRefFromStringRef(Blob.drop_back(1)), CompressedBuffer); RecordDataType Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED, Blob.size() - 1}; Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record, llvm::toStringRef(CompressedBuffer)); return; } RecordDataType Record[] = {SM_SLOC_BUFFER_BLOB}; Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, Blob); } /// Writes the block containing the serialized form of the /// source manager. /// /// TODO: We should probably use an on-disk hash table (stored in a /// blob), indexed based on the file name, so that we only create /// entries for files that we actually need. In the common case (no /// errors), we probably won't have to create file entries for any of /// the files in the AST. void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, const Preprocessor &PP) { RecordData Record; // Enter the source manager block. Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 4); const uint64_t SourceManagerBlockOffset = Stream.GetCurrentBitNo(); // Abbreviations for the various kinds of source-location entries. unsigned SLocFileAbbrv = CreateSLocFileAbbrev(Stream); unsigned SLocBufferAbbrv = CreateSLocBufferAbbrev(Stream); unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream, false); unsigned SLocBufferBlobCompressedAbbrv = CreateSLocBufferBlobAbbrev(Stream, true); unsigned SLocExpansionAbbrv = CreateSLocExpansionAbbrev(Stream); // Write out the source location entry table. We skip the first // entry, which is always the same dummy entry. std::vector SLocEntryOffsets; uint64_t SLocEntryOffsetsBase = Stream.GetCurrentBitNo(); SLocEntryOffsets.reserve(SourceMgr.local_sloc_entry_size() - 1); for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size(); I != N; ++I) { // Get this source location entry. const SrcMgr::SLocEntry *SLoc = &SourceMgr.getLocalSLocEntry(I); FileID FID = FileID::get(I); assert(&SourceMgr.getSLocEntry(FID) == SLoc); // Record the offset of this source-location entry. uint64_t Offset = Stream.GetCurrentBitNo() - SLocEntryOffsetsBase; assert((Offset >> 32) == 0 && "SLocEntry offset too large"); // Figure out which record code to use. unsigned Code; if (SLoc->isFile()) { const SrcMgr::ContentCache *Cache = &SLoc->getFile().getContentCache(); if (Cache->OrigEntry) { Code = SM_SLOC_FILE_ENTRY; } else Code = SM_SLOC_BUFFER_ENTRY; } else Code = SM_SLOC_EXPANSION_ENTRY; Record.clear(); Record.push_back(Code); if (SLoc->isFile()) { const SrcMgr::FileInfo &File = SLoc->getFile(); const SrcMgr::ContentCache *Content = &File.getContentCache(); // Do not emit files that were not listed as inputs. if (!IsSLocAffecting[I]) continue; SLocEntryOffsets.push_back(Offset); // Starting offset of this entry within this module, so skip the dummy. Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2); AddSourceLocation(File.getIncludeLoc(), Record); Record.push_back(File.getFileCharacteristic()); // FIXME: stable encoding Record.push_back(File.hasLineDirectives()); bool EmitBlob = false; if (Content->OrigEntry) { assert(Content->OrigEntry == Content->ContentsEntry && "Writing to AST an overridden file is not supported"); // The source location entry is a file. Emit input file ID. assert(InputFileIDs[*Content->OrigEntry] != 0 && "Missed file entry"); Record.push_back(InputFileIDs[*Content->OrigEntry]); Record.push_back(getAdjustedNumCreatedFIDs(FID)); FileDeclIDsTy::iterator FDI = FileDeclIDs.find(FID); if (FDI != FileDeclIDs.end()) { Record.push_back(FDI->second->FirstDeclIndex); Record.push_back(FDI->second->DeclIDs.size()); } else { Record.push_back(0); Record.push_back(0); } Stream.EmitRecordWithAbbrev(SLocFileAbbrv, Record); if (Content->BufferOverridden || Content->IsTransient) EmitBlob = true; } else { // The source location entry is a buffer. The blob associated // with this entry contains the contents of the buffer. // We add one to the size so that we capture the trailing NULL // that is required by llvm::MemoryBuffer::getMemBuffer (on // the reader side). std::optional Buffer = Content->getBufferOrNone(PP.getDiagnostics(), PP.getFileManager()); StringRef Name = Buffer ? Buffer->getBufferIdentifier() : ""; Stream.EmitRecordWithBlob(SLocBufferAbbrv, Record, StringRef(Name.data(), Name.size() + 1)); EmitBlob = true; } if (EmitBlob) { // Include the implicit terminating null character in the on-disk buffer // if we're writing it uncompressed. std::optional Buffer = Content->getBufferOrNone(PP.getDiagnostics(), PP.getFileManager()); if (!Buffer) Buffer = llvm::MemoryBufferRef("<<>>", ""); StringRef Blob(Buffer->getBufferStart(), Buffer->getBufferSize() + 1); emitBlob(Stream, Blob, SLocBufferBlobCompressedAbbrv, SLocBufferBlobAbbrv); } } else { // The source location entry is a macro expansion. const SrcMgr::ExpansionInfo &Expansion = SLoc->getExpansion(); SLocEntryOffsets.push_back(Offset); // Starting offset of this entry within this module, so skip the dummy. Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2); LocSeq::State Seq; AddSourceLocation(Expansion.getSpellingLoc(), Record, Seq); AddSourceLocation(Expansion.getExpansionLocStart(), Record, Seq); AddSourceLocation(Expansion.isMacroArgExpansion() ? SourceLocation() : Expansion.getExpansionLocEnd(), Record, Seq); Record.push_back(Expansion.isExpansionTokenRange()); // Compute the token length for this macro expansion. SourceLocation::UIntTy NextOffset = SourceMgr.getNextLocalOffset(); if (I + 1 != N) NextOffset = SourceMgr.getLocalSLocEntry(I + 1).getOffset(); Record.push_back(getAdjustedOffset(NextOffset - SLoc->getOffset()) - 1); Stream.EmitRecordWithAbbrev(SLocExpansionAbbrv, Record); } } Stream.ExitBlock(); if (SLocEntryOffsets.empty()) return; // Write the source-location offsets table into the AST block. This // table is used for lazily loading source-location information. using namespace llvm; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SOURCE_LOCATION_OFFSETS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // # of slocs Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // total size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // base offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // offsets unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = { SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(), getAdjustedOffset(SourceMgr.getNextLocalOffset()) - 1 /* skip dummy */, SLocEntryOffsetsBase - SourceManagerBlockOffset}; Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record, bytes(SLocEntryOffsets)); } // Write the line table. It depends on remapping working, so it must come // after the source location offsets. if (SourceMgr.hasLineTable()) { LineTableInfo &LineTable = SourceMgr.getLineTable(); Record.clear(); // Emit the needed file names. llvm::DenseMap FilenameMap; FilenameMap[-1] = -1; // For unspecified filenames. for (const auto &L : LineTable) { if (L.first.ID < 0) continue; for (auto &LE : L.second) { if (FilenameMap.insert(std::make_pair(LE.FilenameID, FilenameMap.size() - 1)).second) AddPath(LineTable.getFilename(LE.FilenameID), Record); } } Record.push_back(0); // Emit the line entries for (const auto &L : LineTable) { // Only emit entries for local files. if (L.first.ID < 0) continue; AddFileID(L.first, Record); // Emit the line entries Record.push_back(L.second.size()); for (const auto &LE : L.second) { Record.push_back(LE.FileOffset); Record.push_back(LE.LineNo); Record.push_back(FilenameMap[LE.FilenameID]); Record.push_back((unsigned)LE.FileKind); Record.push_back(LE.IncludeOffset); } } Stream.EmitRecord(SOURCE_MANAGER_LINE_TABLE, Record); } } //===----------------------------------------------------------------------===// // Preprocessor Serialization //===----------------------------------------------------------------------===// static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule, const Preprocessor &PP) { if (MacroInfo *MI = MD->getMacroInfo()) if (MI->isBuiltinMacro()) return true; if (IsModule) { SourceLocation Loc = MD->getLocation(); if (Loc.isInvalid()) return true; if (PP.getSourceManager().getFileID(Loc) == PP.getPredefinesFileID()) return true; } return false; } /// Writes the block containing the serialized form of the /// preprocessor. void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { uint64_t MacroOffsetsBase = Stream.GetCurrentBitNo(); PreprocessingRecord *PPRec = PP.getPreprocessingRecord(); if (PPRec) WritePreprocessorDetail(*PPRec, MacroOffsetsBase); RecordData Record; RecordData ModuleMacroRecord; // If the preprocessor __COUNTER__ value has been bumped, remember it. if (PP.getCounterValue() != 0) { RecordData::value_type Record[] = {PP.getCounterValue()}; Stream.EmitRecord(PP_COUNTER_VALUE, Record); } // If we have a recorded #pragma assume_nonnull, remember it so it can be // replayed when the preamble terminates into the main file. SourceLocation AssumeNonNullLoc = PP.getPreambleRecordedPragmaAssumeNonNullLoc(); if (AssumeNonNullLoc.isValid()) { assert(PP.isRecordingPreamble()); AddSourceLocation(AssumeNonNullLoc, Record); Stream.EmitRecord(PP_ASSUME_NONNULL_LOC, Record); Record.clear(); } if (PP.isRecordingPreamble() && PP.hasRecordedPreamble()) { assert(!IsModule); auto SkipInfo = PP.getPreambleSkipInfo(); if (SkipInfo) { Record.push_back(true); AddSourceLocation(SkipInfo->HashTokenLoc, Record); AddSourceLocation(SkipInfo->IfTokenLoc, Record); Record.push_back(SkipInfo->FoundNonSkipPortion); Record.push_back(SkipInfo->FoundElse); AddSourceLocation(SkipInfo->ElseLoc, Record); } else { Record.push_back(false); } for (const auto &Cond : PP.getPreambleConditionalStack()) { AddSourceLocation(Cond.IfLoc, Record); Record.push_back(Cond.WasSkipping); Record.push_back(Cond.FoundNonSkip); Record.push_back(Cond.FoundElse); } Stream.EmitRecord(PP_CONDITIONAL_STACK, Record); Record.clear(); } // Enter the preprocessor block. Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3); // If the AST file contains __DATE__ or __TIME__ emit a warning about this. // FIXME: Include a location for the use, and say which one was used. if (PP.SawDateOrTime()) PP.Diag(SourceLocation(), diag::warn_module_uses_date_time) << IsModule; // Loop over all the macro directives that are live at the end of the file, // emitting each to the PP section. // Construct the list of identifiers with macro directives that need to be // serialized. SmallVector MacroIdentifiers; // It is meaningless to emit macros for named modules. It only wastes times // and spaces. if (!isWritingStdCXXNamedModules()) for (auto &Id : PP.getIdentifierTable()) if (Id.second->hadMacroDefinition() && (!Id.second->isFromAST() || Id.second->hasChangedSinceDeserialization())) MacroIdentifiers.push_back(Id.second); // Sort the set of macro definitions that need to be serialized by the // name of the macro, to provide a stable ordering. llvm::sort(MacroIdentifiers, llvm::deref>()); // Emit the macro directives as a list and associate the offset with the // identifier they belong to. for (const IdentifierInfo *Name : MacroIdentifiers) { MacroDirective *MD = PP.getLocalMacroDirectiveHistory(Name); uint64_t StartOffset = Stream.GetCurrentBitNo() - MacroOffsetsBase; assert((StartOffset >> 32) == 0 && "Macro identifiers offset too large"); // Write out any exported module macros. bool EmittedModuleMacros = false; // C+=20 Header Units are compiled module interfaces, but they preserve // macros that are live (i.e. have a defined value) at the end of the // compilation. So when writing a header unit, we preserve only the final // value of each macro (and discard any that are undefined). Header units // do not have sub-modules (although they might import other header units). // PCH files, conversely, retain the history of each macro's define/undef // and of leaf macros in sub modules. if (IsModule && WritingModule->isHeaderUnit()) { // This is for the main TU when it is a C++20 header unit. // We preserve the final state of defined macros, and we do not emit ones // that are undefined. if (!MD || shouldIgnoreMacro(MD, IsModule, PP) || MD->getKind() == MacroDirective::MD_Undefine) continue; AddSourceLocation(MD->getLocation(), Record); Record.push_back(MD->getKind()); if (auto *DefMD = dyn_cast(MD)) { Record.push_back(getMacroRef(DefMD->getInfo(), Name)); } else if (auto *VisMD = dyn_cast(MD)) { Record.push_back(VisMD->isPublic()); } ModuleMacroRecord.push_back(getSubmoduleID(WritingModule)); ModuleMacroRecord.push_back(getMacroRef(MD->getMacroInfo(), Name)); Stream.EmitRecord(PP_MODULE_MACRO, ModuleMacroRecord); ModuleMacroRecord.clear(); EmittedModuleMacros = true; } else { // Emit the macro directives in reverse source order. for (; MD; MD = MD->getPrevious()) { // Once we hit an ignored macro, we're done: the rest of the chain // will all be ignored macros. if (shouldIgnoreMacro(MD, IsModule, PP)) break; AddSourceLocation(MD->getLocation(), Record); Record.push_back(MD->getKind()); if (auto *DefMD = dyn_cast(MD)) { Record.push_back(getMacroRef(DefMD->getInfo(), Name)); } else if (auto *VisMD = dyn_cast(MD)) { Record.push_back(VisMD->isPublic()); } } // We write out exported module macros for PCH as well. auto Leafs = PP.getLeafModuleMacros(Name); SmallVector Worklist(Leafs.begin(), Leafs.end()); llvm::DenseMap Visits; while (!Worklist.empty()) { auto *Macro = Worklist.pop_back_val(); // Emit a record indicating this submodule exports this macro. ModuleMacroRecord.push_back(getSubmoduleID(Macro->getOwningModule())); ModuleMacroRecord.push_back(getMacroRef(Macro->getMacroInfo(), Name)); for (auto *M : Macro->overrides()) ModuleMacroRecord.push_back(getSubmoduleID(M->getOwningModule())); Stream.EmitRecord(PP_MODULE_MACRO, ModuleMacroRecord); ModuleMacroRecord.clear(); // Enqueue overridden macros once we've visited all their ancestors. for (auto *M : Macro->overrides()) if (++Visits[M] == M->getNumOverridingMacros()) Worklist.push_back(M); EmittedModuleMacros = true; } } if (Record.empty() && !EmittedModuleMacros) continue; IdentMacroDirectivesOffsetMap[Name] = StartOffset; Stream.EmitRecord(PP_MACRO_DIRECTIVE_HISTORY, Record); Record.clear(); } /// Offsets of each of the macros into the bitstream, indexed by /// the local macro ID /// /// For each identifier that is associated with a macro, this map /// provides the offset into the bitstream where that macro is /// defined. std::vector MacroOffsets; for (unsigned I = 0, N = MacroInfosToEmit.size(); I != N; ++I) { const IdentifierInfo *Name = MacroInfosToEmit[I].Name; MacroInfo *MI = MacroInfosToEmit[I].MI; MacroID ID = MacroInfosToEmit[I].ID; if (ID < FirstMacroID) { assert(0 && "Loaded MacroInfo entered MacroInfosToEmit ?"); continue; } // Record the local offset of this macro. unsigned Index = ID - FirstMacroID; if (Index >= MacroOffsets.size()) MacroOffsets.resize(Index + 1); uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase; assert((Offset >> 32) == 0 && "Macro offset too large"); MacroOffsets[Index] = Offset; AddIdentifierRef(Name, Record); AddSourceLocation(MI->getDefinitionLoc(), Record); AddSourceLocation(MI->getDefinitionEndLoc(), Record); Record.push_back(MI->isUsed()); Record.push_back(MI->isUsedForHeaderGuard()); Record.push_back(MI->getNumTokens()); unsigned Code; if (MI->isObjectLike()) { Code = PP_MACRO_OBJECT_LIKE; } else { Code = PP_MACRO_FUNCTION_LIKE; Record.push_back(MI->isC99Varargs()); Record.push_back(MI->isGNUVarargs()); Record.push_back(MI->hasCommaPasting()); Record.push_back(MI->getNumParams()); for (const IdentifierInfo *Param : MI->params()) AddIdentifierRef(Param, Record); } // If we have a detailed preprocessing record, record the macro definition // ID that corresponds to this macro. if (PPRec) Record.push_back(MacroDefinitions[PPRec->findMacroDefinition(MI)]); Stream.EmitRecord(Code, Record); Record.clear(); // Emit the tokens array. for (unsigned TokNo = 0, e = MI->getNumTokens(); TokNo != e; ++TokNo) { // Note that we know that the preprocessor does not have any annotation // tokens in it because they are created by the parser, and thus can't // be in a macro definition. const Token &Tok = MI->getReplacementToken(TokNo); AddToken(Tok, Record); Stream.EmitRecord(PP_TOKEN, Record); Record.clear(); } ++NumMacros; } Stream.ExitBlock(); // Write the offsets table for macro IDs. using namespace llvm; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // base offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(), FirstMacroID - NUM_PREDEF_MACRO_IDS, MacroOffsetsBase - ASTBlockStartOffset}; Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets)); } } void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec, uint64_t MacroOffsetsBase) { if (PPRec.local_begin() == PPRec.local_end()) return; SmallVector PreprocessedEntityOffsets; // Enter the preprocessor block. Stream.EnterSubblock(PREPROCESSOR_DETAIL_BLOCK_ID, 3); // If the preprocessor has a preprocessing record, emit it. unsigned NumPreprocessingRecords = 0; using namespace llvm; // Set up the abbreviation for unsigned InclusionAbbrev = 0; { auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(PPD_INCLUSION_DIRECTIVE)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // filename length Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // in quotes Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // kind Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // imported module Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); InclusionAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); } unsigned FirstPreprocessorEntityID = (Chain ? PPRec.getNumLoadedPreprocessedEntities() : 0) + NUM_PREDEF_PP_ENTITY_IDS; unsigned NextPreprocessorEntityID = FirstPreprocessorEntityID; RecordData Record; for (PreprocessingRecord::iterator E = PPRec.local_begin(), EEnd = PPRec.local_end(); E != EEnd; (void)++E, ++NumPreprocessingRecords, ++NextPreprocessorEntityID) { Record.clear(); uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase; assert((Offset >> 32) == 0 && "Preprocessed entity offset too large"); PreprocessedEntityOffsets.push_back( PPEntityOffset(getAdjustedRange((*E)->getSourceRange()), Offset)); if (auto *MD = dyn_cast(*E)) { // Record this macro definition's ID. MacroDefinitions[MD] = NextPreprocessorEntityID; AddIdentifierRef(MD->getName(), Record); Stream.EmitRecord(PPD_MACRO_DEFINITION, Record); continue; } if (auto *ME = dyn_cast(*E)) { Record.push_back(ME->isBuiltinMacro()); if (ME->isBuiltinMacro()) AddIdentifierRef(ME->getName(), Record); else Record.push_back(MacroDefinitions[ME->getDefinition()]); Stream.EmitRecord(PPD_MACRO_EXPANSION, Record); continue; } if (auto *ID = dyn_cast(*E)) { Record.push_back(PPD_INCLUSION_DIRECTIVE); Record.push_back(ID->getFileName().size()); Record.push_back(ID->wasInQuotes()); Record.push_back(static_cast(ID->getKind())); Record.push_back(ID->importedModule()); SmallString<64> Buffer; Buffer += ID->getFileName(); // Check that the FileEntry is not null because it was not resolved and // we create a PCH even with compiler errors. if (ID->getFile()) Buffer += ID->getFile()->getName(); Stream.EmitRecordWithBlob(InclusionAbbrev, Record, Buffer); continue; } llvm_unreachable("Unhandled PreprocessedEntity in ASTWriter"); } Stream.ExitBlock(); // Write the offsets table for the preprocessing record. if (NumPreprocessingRecords > 0) { assert(PreprocessedEntityOffsets.size() == NumPreprocessingRecords); // Write the offsets table for identifier IDs. using namespace llvm; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(PPD_ENTITIES_OFFSETS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first pp entity Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned PPEOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {PPD_ENTITIES_OFFSETS, FirstPreprocessorEntityID - NUM_PREDEF_PP_ENTITY_IDS}; Stream.EmitRecordWithBlob(PPEOffsetAbbrev, Record, bytes(PreprocessedEntityOffsets)); } // Write the skipped region table for the preprocessing record. ArrayRef SkippedRanges = PPRec.getSkippedRanges(); if (SkippedRanges.size() > 0) { std::vector SerializedSkippedRanges; SerializedSkippedRanges.reserve(SkippedRanges.size()); for (auto const& Range : SkippedRanges) SerializedSkippedRanges.emplace_back(Range); using namespace llvm; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(PPD_SKIPPED_RANGES)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned PPESkippedRangeAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Record.clear(); Record.push_back(PPD_SKIPPED_RANGES); Stream.EmitRecordWithBlob(PPESkippedRangeAbbrev, Record, bytes(SerializedSkippedRanges)); } } unsigned ASTWriter::getLocalOrImportedSubmoduleID(const Module *Mod) { if (!Mod) return 0; auto Known = SubmoduleIDs.find(Mod); if (Known != SubmoduleIDs.end()) return Known->second; auto *Top = Mod->getTopLevelModule(); if (Top != WritingModule && (getLangOpts().CompilingPCH || !Top->fullModuleNameIs(StringRef(getLangOpts().CurrentModule)))) return 0; return SubmoduleIDs[Mod] = NextSubmoduleID++; } unsigned ASTWriter::getSubmoduleID(Module *Mod) { unsigned ID = getLocalOrImportedSubmoduleID(Mod); // FIXME: This can easily happen, if we have a reference to a submodule that // did not result in us loading a module file for that submodule. For // instance, a cross-top-level-module 'conflict' declaration will hit this. // assert((ID || !Mod) && // "asked for module ID for non-local, non-imported module"); return ID; } /// Compute the number of modules within the given tree (including the /// given module). static unsigned getNumberOfModules(Module *Mod) { unsigned ChildModules = 0; for (auto *Submodule : Mod->submodules()) ChildModules += getNumberOfModules(Submodule); return ChildModules + 1; } void ASTWriter::WriteSubmodules(Module *WritingModule) { // Enter the submodule description block. Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5); // Write the abbreviations needed for the submodules block. using namespace llvm; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_DEFINITION)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // Kind Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Definition location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExplicit Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsSystem Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExternC Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferSubmodules... Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferExplicit... Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferExportWild... Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ConfigMacrosExh... Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ModuleMapIsPriv... Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // NamedModuleHasN... Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name unsigned DefinitionAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_UMBRELLA_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name unsigned UmbrellaAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name unsigned HeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_TOPHEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name unsigned TopHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_UMBRELLA_DIR)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name unsigned UmbrellaDirAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_REQUIRES)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // State Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Feature unsigned RequiresAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_EXCLUDED_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name unsigned ExcludedHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_TEXTUAL_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name unsigned TextualHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_PRIVATE_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name unsigned PrivateHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_PRIVATE_TEXTUAL_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name unsigned PrivateTextualHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_LINK_LIBRARY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name unsigned LinkLibraryAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CONFIG_MACRO)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Macro name unsigned ConfigMacroAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CONFLICT)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Other module Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Message unsigned ConflictAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_EXPORT_AS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Macro name unsigned ExportAsAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the submodule metadata block. RecordData::value_type Record[] = { getNumberOfModules(WritingModule), FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS}; Stream.EmitRecord(SUBMODULE_METADATA, Record); // Write all of the submodules. std::queue Q; Q.push(WritingModule); while (!Q.empty()) { Module *Mod = Q.front(); Q.pop(); unsigned ID = getSubmoduleID(Mod); uint64_t ParentID = 0; if (Mod->Parent) { assert(SubmoduleIDs[Mod->Parent] && "Submodule parent not written?"); ParentID = SubmoduleIDs[Mod->Parent]; } uint64_t DefinitionLoc = SourceLocationEncoding::encode(getAdjustedLocation(Mod->DefinitionLoc)); // Emit the definition of the block. { RecordData::value_type Record[] = {SUBMODULE_DEFINITION, ID, ParentID, (RecordData::value_type)Mod->Kind, DefinitionLoc, Mod->IsFramework, Mod->IsExplicit, Mod->IsSystem, Mod->IsExternC, Mod->InferSubmodules, Mod->InferExplicitSubmodules, Mod->InferExportWildcard, Mod->ConfigMacrosExhaustive, Mod->ModuleMapIsPrivate, Mod->NamedModuleHasInit}; Stream.EmitRecordWithBlob(DefinitionAbbrev, Record, Mod->Name); } // Emit the requirements. for (const auto &R : Mod->Requirements) { RecordData::value_type Record[] = {SUBMODULE_REQUIRES, R.second}; Stream.EmitRecordWithBlob(RequiresAbbrev, Record, R.first); } // Emit the umbrella header, if there is one. if (std::optional UmbrellaHeader = Mod->getUmbrellaHeaderAsWritten()) { RecordData::value_type Record[] = {SUBMODULE_UMBRELLA_HEADER}; Stream.EmitRecordWithBlob(UmbrellaAbbrev, Record, UmbrellaHeader->NameAsWritten); } else if (std::optional UmbrellaDir = Mod->getUmbrellaDirAsWritten()) { RecordData::value_type Record[] = {SUBMODULE_UMBRELLA_DIR}; Stream.EmitRecordWithBlob(UmbrellaDirAbbrev, Record, UmbrellaDir->NameAsWritten); } // Emit the headers. struct { unsigned RecordKind; unsigned Abbrev; Module::HeaderKind HeaderKind; } HeaderLists[] = { {SUBMODULE_HEADER, HeaderAbbrev, Module::HK_Normal}, {SUBMODULE_TEXTUAL_HEADER, TextualHeaderAbbrev, Module::HK_Textual}, {SUBMODULE_PRIVATE_HEADER, PrivateHeaderAbbrev, Module::HK_Private}, {SUBMODULE_PRIVATE_TEXTUAL_HEADER, PrivateTextualHeaderAbbrev, Module::HK_PrivateTextual}, {SUBMODULE_EXCLUDED_HEADER, ExcludedHeaderAbbrev, Module::HK_Excluded} }; for (auto &HL : HeaderLists) { RecordData::value_type Record[] = {HL.RecordKind}; for (auto &H : Mod->Headers[HL.HeaderKind]) Stream.EmitRecordWithBlob(HL.Abbrev, Record, H.NameAsWritten); } // Emit the top headers. { RecordData::value_type Record[] = {SUBMODULE_TOPHEADER}; for (FileEntryRef H : Mod->getTopHeaders(PP->getFileManager())) { SmallString<128> HeaderName(H.getName()); PreparePathForOutput(HeaderName); Stream.EmitRecordWithBlob(TopHeaderAbbrev, Record, HeaderName); } } // Emit the imports. if (!Mod->Imports.empty()) { RecordData Record; for (auto *I : Mod->Imports) Record.push_back(getSubmoduleID(I)); Stream.EmitRecord(SUBMODULE_IMPORTS, Record); } // Emit the modules affecting compilation that were not imported. if (!Mod->AffectingClangModules.empty()) { RecordData Record; for (auto *I : Mod->AffectingClangModules) Record.push_back(getSubmoduleID(I)); Stream.EmitRecord(SUBMODULE_AFFECTING_MODULES, Record); } // Emit the exports. if (!Mod->Exports.empty()) { RecordData Record; for (const auto &E : Mod->Exports) { // FIXME: This may fail; we don't require that all exported modules // are local or imported. Record.push_back(getSubmoduleID(E.getPointer())); Record.push_back(E.getInt()); } Stream.EmitRecord(SUBMODULE_EXPORTS, Record); } //FIXME: How do we emit the 'use'd modules? They may not be submodules. // Might be unnecessary as use declarations are only used to build the // module itself. // TODO: Consider serializing undeclared uses of modules. // Emit the link libraries. for (const auto &LL : Mod->LinkLibraries) { RecordData::value_type Record[] = {SUBMODULE_LINK_LIBRARY, LL.IsFramework}; Stream.EmitRecordWithBlob(LinkLibraryAbbrev, Record, LL.Library); } // Emit the conflicts. for (const auto &C : Mod->Conflicts) { // FIXME: This may fail; we don't require that all conflicting modules // are local or imported. RecordData::value_type Record[] = {SUBMODULE_CONFLICT, getSubmoduleID(C.Other)}; Stream.EmitRecordWithBlob(ConflictAbbrev, Record, C.Message); } // Emit the configuration macros. for (const auto &CM : Mod->ConfigMacros) { RecordData::value_type Record[] = {SUBMODULE_CONFIG_MACRO}; Stream.EmitRecordWithBlob(ConfigMacroAbbrev, Record, CM); } // Emit the initializers, if any. RecordData Inits; for (Decl *D : Context->getModuleInitializers(Mod)) Inits.push_back(GetDeclRef(D)); if (!Inits.empty()) Stream.EmitRecord(SUBMODULE_INITIALIZERS, Inits); // Emit the name of the re-exported module, if any. if (!Mod->ExportAsModule.empty()) { RecordData::value_type Record[] = {SUBMODULE_EXPORT_AS}; Stream.EmitRecordWithBlob(ExportAsAbbrev, Record, Mod->ExportAsModule); } // Queue up the submodules of this module. for (auto *M : Mod->submodules()) Q.push(M); } Stream.ExitBlock(); assert((NextSubmoduleID - FirstSubmoduleID == getNumberOfModules(WritingModule)) && "Wrong # of submodules; found a reference to a non-local, " "non-imported submodule?"); } void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, bool isModule) { llvm::SmallDenseMap DiagStateIDMap; unsigned CurrID = 0; RecordData Record; auto EncodeDiagStateFlags = [](const DiagnosticsEngine::DiagState *DS) -> unsigned { unsigned Result = (unsigned)DS->ExtBehavior; for (unsigned Val : {(unsigned)DS->IgnoreAllWarnings, (unsigned)DS->EnableAllWarnings, (unsigned)DS->WarningsAsErrors, (unsigned)DS->ErrorsAsFatal, (unsigned)DS->SuppressSystemWarnings}) Result = (Result << 1) | Val; return Result; }; unsigned Flags = EncodeDiagStateFlags(Diag.DiagStatesByLoc.FirstDiagState); Record.push_back(Flags); auto AddDiagState = [&](const DiagnosticsEngine::DiagState *State, bool IncludeNonPragmaStates) { // Ensure that the diagnostic state wasn't modified since it was created. // We will not correctly round-trip this information otherwise. assert(Flags == EncodeDiagStateFlags(State) && "diag state flags vary in single AST file"); // If we ever serialize non-pragma mappings outside the initial state, the // code below will need to consider more than getDefaultMapping. assert(!IncludeNonPragmaStates || State == Diag.DiagStatesByLoc.FirstDiagState); unsigned &DiagStateID = DiagStateIDMap[State]; Record.push_back(DiagStateID); if (DiagStateID == 0) { DiagStateID = ++CurrID; SmallVector> Mappings; // Add a placeholder for the number of mappings. auto SizeIdx = Record.size(); Record.emplace_back(); for (const auto &I : *State) { // Maybe skip non-pragmas. if (!I.second.isPragma() && !IncludeNonPragmaStates) continue; // Skip default mappings. We have a mapping for every diagnostic ever // emitted, regardless of whether it was customized. if (!I.second.isPragma() && I.second == DiagnosticIDs::getDefaultMapping(I.first)) continue; Mappings.push_back(I); } // Sort by diag::kind for deterministic output. llvm::sort(Mappings, [](const auto &LHS, const auto &RHS) { return LHS.first < RHS.first; }); for (const auto &I : Mappings) { Record.push_back(I.first); Record.push_back(I.second.serialize()); } // Update the placeholder. Record[SizeIdx] = (Record.size() - SizeIdx) / 2; } }; AddDiagState(Diag.DiagStatesByLoc.FirstDiagState, isModule); // Reserve a spot for the number of locations with state transitions. auto NumLocationsIdx = Record.size(); Record.emplace_back(); // Emit the state transitions. unsigned NumLocations = 0; for (auto &FileIDAndFile : Diag.DiagStatesByLoc.Files) { if (!FileIDAndFile.first.isValid() || !FileIDAndFile.second.HasLocalTransitions) continue; ++NumLocations; SourceLocation Loc = Diag.SourceMgr->getComposedLoc(FileIDAndFile.first, 0); assert(!Loc.isInvalid() && "start loc for valid FileID is invalid"); AddSourceLocation(Loc, Record); Record.push_back(FileIDAndFile.second.StateTransitions.size()); for (auto &StatePoint : FileIDAndFile.second.StateTransitions) { Record.push_back(getAdjustedOffset(StatePoint.Offset)); AddDiagState(StatePoint.State, false); } } // Backpatch the number of locations. Record[NumLocationsIdx] = NumLocations; // Emit CurDiagStateLoc. Do it last in order to match source order. // // This also protects against a hypothetical corner case with simulating // -Werror settings for implicit modules in the ASTReader, where reading // CurDiagState out of context could change whether warning pragmas are // treated as errors. AddSourceLocation(Diag.DiagStatesByLoc.CurDiagStateLoc, Record); AddDiagState(Diag.DiagStatesByLoc.CurDiagState, false); Stream.EmitRecord(DIAG_PRAGMA_MAPPINGS, Record); } //===----------------------------------------------------------------------===// // Type Serialization //===----------------------------------------------------------------------===// /// Write the representation of a type to the AST stream. void ASTWriter::WriteType(QualType T) { TypeIdx &IdxRef = TypeIdxs[T]; if (IdxRef.getIndex() == 0) // we haven't seen this type before. IdxRef = TypeIdx(NextTypeID++); TypeIdx Idx = IdxRef; assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST"); // Emit the type's representation. uint64_t Offset = ASTTypeWriter(*this).write(T) - DeclTypesBlockStartOffset; // Record the offset for this type. unsigned Index = Idx.getIndex() - FirstTypeID; if (TypeOffsets.size() == Index) TypeOffsets.emplace_back(Offset); else if (TypeOffsets.size() < Index) { TypeOffsets.resize(Index + 1); TypeOffsets[Index].setBitOffset(Offset); } else { llvm_unreachable("Types emitted in wrong order"); } } //===----------------------------------------------------------------------===// // Declaration Serialization //===----------------------------------------------------------------------===// /// Write the block containing all of the declaration IDs /// lexically declared within the given DeclContext. /// /// \returns the offset of the DECL_CONTEXT_LEXICAL block within the /// bitstream, or 0 if no block was written. uint64_t ASTWriter::WriteDeclContextLexicalBlock(ASTContext &Context, DeclContext *DC) { if (DC->decls_empty()) return 0; uint64_t Offset = Stream.GetCurrentBitNo(); SmallVector KindDeclPairs; for (const auto *D : DC->decls()) { KindDeclPairs.push_back(D->getKind()); KindDeclPairs.push_back(GetDeclRef(D)); } ++NumLexicalDeclContexts; RecordData::value_type Record[] = {DECL_CONTEXT_LEXICAL}; Stream.EmitRecordWithBlob(DeclContextLexicalAbbrev, Record, bytes(KindDeclPairs)); return Offset; } void ASTWriter::WriteTypeDeclOffsets() { using namespace llvm; // Write the type offsets array auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(TYPE_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of types Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base type index Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // types block unsigned TypeOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {TYPE_OFFSET, TypeOffsets.size(), FirstTypeID - NUM_PREDEF_TYPE_IDS}; Stream.EmitRecordWithBlob(TypeOffsetAbbrev, Record, bytes(TypeOffsets)); } // Write the declaration offsets array Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(DECL_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of declarations Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base decl ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // declarations block unsigned DeclOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {DECL_OFFSET, DeclOffsets.size(), FirstDeclID - NUM_PREDEF_DECL_IDS}; Stream.EmitRecordWithBlob(DeclOffsetAbbrev, Record, bytes(DeclOffsets)); } } void ASTWriter::WriteFileDeclIDsMap() { using namespace llvm; SmallVector, 64> SortedFileDeclIDs; SortedFileDeclIDs.reserve(FileDeclIDs.size()); for (const auto &P : FileDeclIDs) SortedFileDeclIDs.push_back(std::make_pair(P.first, P.second.get())); llvm::sort(SortedFileDeclIDs, llvm::less_first()); // Join the vectors of DeclIDs from all files. SmallVector FileGroupedDeclIDs; for (auto &FileDeclEntry : SortedFileDeclIDs) { DeclIDInFileInfo &Info = *FileDeclEntry.second; Info.FirstDeclIndex = FileGroupedDeclIDs.size(); llvm::stable_sort(Info.DeclIDs); for (auto &LocDeclEntry : Info.DeclIDs) FileGroupedDeclIDs.push_back(LocDeclEntry.second); } auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(FILE_SORTED_DECLS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {FILE_SORTED_DECLS, FileGroupedDeclIDs.size()}; Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs)); } void ASTWriter::WriteComments() { Stream.EnterSubblock(COMMENTS_BLOCK_ID, 3); auto _ = llvm::make_scope_exit([this] { Stream.ExitBlock(); }); if (!PP->getPreprocessorOpts().WriteCommentListToPCH) return; // Don't write comments to BMI to reduce the size of BMI. // If language services (e.g., clangd) want such abilities, // we can offer a special option then. if (isWritingStdCXXNamedModules()) return; RecordData Record; for (const auto &FO : Context->Comments.OrderedComments) { for (const auto &OC : FO.second) { const RawComment *I = OC.second; Record.clear(); AddSourceRange(I->getSourceRange(), Record); Record.push_back(I->getKind()); Record.push_back(I->isTrailingComment()); Record.push_back(I->isAlmostTrailingComment()); Stream.EmitRecord(COMMENTS_RAW_COMMENT, Record); } } } //===----------------------------------------------------------------------===// // Global Method Pool and Selector Serialization //===----------------------------------------------------------------------===// namespace { // Trait used for the on-disk hash table used in the method pool. class ASTMethodPoolTrait { ASTWriter &Writer; public: using key_type = Selector; using key_type_ref = key_type; struct data_type { SelectorID ID; ObjCMethodList Instance, Factory; }; using data_type_ref = const data_type &; using hash_value_type = unsigned; using offset_type = unsigned; explicit ASTMethodPoolTrait(ASTWriter &Writer) : Writer(Writer) {} static hash_value_type ComputeHash(Selector Sel) { return serialization::ComputeHash(Sel); } std::pair EmitKeyDataLength(raw_ostream& Out, Selector Sel, data_type_ref Methods) { unsigned KeyLen = 2 + (Sel.getNumArgs()? Sel.getNumArgs() * 4 : 4); unsigned DataLen = 4 + 2 + 2; // 2 bytes for each of the method counts for (const ObjCMethodList *Method = &Methods.Instance; Method; Method = Method->getNext()) if (ShouldWriteMethodListNode(Method)) DataLen += 4; for (const ObjCMethodList *Method = &Methods.Factory; Method; Method = Method->getNext()) if (ShouldWriteMethodListNode(Method)) DataLen += 4; return emitULEBKeyDataLength(KeyLen, DataLen, Out); } void EmitKey(raw_ostream& Out, Selector Sel, unsigned) { using namespace llvm::support; endian::Writer LE(Out, llvm::endianness::little); uint64_t Start = Out.tell(); assert((Start >> 32) == 0 && "Selector key offset too large"); Writer.SetSelectorOffset(Sel, Start); unsigned N = Sel.getNumArgs(); LE.write(N); if (N == 0) N = 1; for (unsigned I = 0; I != N; ++I) LE.write( Writer.getIdentifierRef(Sel.getIdentifierInfoForSlot(I))); } void EmitData(raw_ostream& Out, key_type_ref, data_type_ref Methods, unsigned DataLen) { using namespace llvm::support; endian::Writer LE(Out, llvm::endianness::little); uint64_t Start = Out.tell(); (void)Start; LE.write(Methods.ID); unsigned NumInstanceMethods = 0; for (const ObjCMethodList *Method = &Methods.Instance; Method; Method = Method->getNext()) if (ShouldWriteMethodListNode(Method)) ++NumInstanceMethods; unsigned NumFactoryMethods = 0; for (const ObjCMethodList *Method = &Methods.Factory; Method; Method = Method->getNext()) if (ShouldWriteMethodListNode(Method)) ++NumFactoryMethods; unsigned InstanceBits = Methods.Instance.getBits(); assert(InstanceBits < 4); unsigned InstanceHasMoreThanOneDeclBit = Methods.Instance.hasMoreThanOneDecl(); unsigned FullInstanceBits = (NumInstanceMethods << 3) | (InstanceHasMoreThanOneDeclBit << 2) | InstanceBits; unsigned FactoryBits = Methods.Factory.getBits(); assert(FactoryBits < 4); unsigned FactoryHasMoreThanOneDeclBit = Methods.Factory.hasMoreThanOneDecl(); unsigned FullFactoryBits = (NumFactoryMethods << 3) | (FactoryHasMoreThanOneDeclBit << 2) | FactoryBits; LE.write(FullInstanceBits); LE.write(FullFactoryBits); for (const ObjCMethodList *Method = &Methods.Instance; Method; Method = Method->getNext()) if (ShouldWriteMethodListNode(Method)) LE.write(Writer.getDeclID(Method->getMethod())); for (const ObjCMethodList *Method = &Methods.Factory; Method; Method = Method->getNext()) if (ShouldWriteMethodListNode(Method)) LE.write(Writer.getDeclID(Method->getMethod())); assert(Out.tell() - Start == DataLen && "Data length is wrong"); } private: static bool ShouldWriteMethodListNode(const ObjCMethodList *Node) { return (Node->getMethod() && !Node->getMethod()->isFromASTFile()); } }; } // namespace /// Write ObjC data: selectors and the method pool. /// /// The method pool contains both instance and factory methods, stored /// in an on-disk hash table indexed by the selector. The hash table also /// contains an empty entry for every other selector known to Sema. void ASTWriter::WriteSelectors(Sema &SemaRef) { using namespace llvm; // Do we have to do anything at all? if (SemaRef.MethodPool.empty() && SelectorIDs.empty()) return; unsigned NumTableEntries = 0; // Create and write out the blob that contains selectors and the method pool. { llvm::OnDiskChainedHashTableGenerator Generator; ASTMethodPoolTrait Trait(*this); // Create the on-disk hash table representation. We walk through every // selector we've seen and look it up in the method pool. SelectorOffsets.resize(NextSelectorID - FirstSelectorID); for (auto &SelectorAndID : SelectorIDs) { Selector S = SelectorAndID.first; SelectorID ID = SelectorAndID.second; Sema::GlobalMethodPool::iterator F = SemaRef.MethodPool.find(S); ASTMethodPoolTrait::data_type Data = { ID, ObjCMethodList(), ObjCMethodList() }; if (F != SemaRef.MethodPool.end()) { Data.Instance = F->second.first; Data.Factory = F->second.second; } // Only write this selector if it's not in an existing AST or something // changed. if (Chain && ID < FirstSelectorID) { // Selector already exists. Did it change? bool changed = false; for (ObjCMethodList *M = &Data.Instance; M && M->getMethod(); M = M->getNext()) { if (!M->getMethod()->isFromASTFile()) { changed = true; Data.Instance = *M; break; } } for (ObjCMethodList *M = &Data.Factory; M && M->getMethod(); M = M->getNext()) { if (!M->getMethod()->isFromASTFile()) { changed = true; Data.Factory = *M; break; } } if (!changed) continue; } else if (Data.Instance.getMethod() || Data.Factory.getMethod()) { // A new method pool entry. ++NumTableEntries; } Generator.insert(S, Data, Trait); } // Create the on-disk hash table in a buffer. SmallString<4096> MethodPool; uint32_t BucketOffset; { using namespace llvm::support; ASTMethodPoolTrait Trait(*this); llvm::raw_svector_ostream Out(MethodPool); // Make sure that no bucket is at offset 0 endian::write(Out, 0, llvm::endianness::little); BucketOffset = Generator.Emit(Out, Trait); } // Create a blob abbreviation auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(METHOD_POOL)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned MethodPoolAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the method pool { RecordData::value_type Record[] = {METHOD_POOL, BucketOffset, NumTableEntries}; Stream.EmitRecordWithBlob(MethodPoolAbbrev, Record, MethodPool); } // Create a blob abbreviation for the selector table offsets. Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SELECTOR_OFFSETS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned SelectorOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the selector offsets table. { RecordData::value_type Record[] = { SELECTOR_OFFSETS, SelectorOffsets.size(), FirstSelectorID - NUM_PREDEF_SELECTOR_IDS}; Stream.EmitRecordWithBlob(SelectorOffsetAbbrev, Record, bytes(SelectorOffsets)); } } } /// Write the selectors referenced in @selector expression into AST file. void ASTWriter::WriteReferencedSelectorsPool(Sema &SemaRef) { using namespace llvm; if (SemaRef.ReferencedSelectors.empty()) return; RecordData Record; ASTRecordWriter Writer(*this, Record); // Note: this writes out all references even for a dependent AST. But it is // very tricky to fix, and given that @selector shouldn't really appear in // headers, probably not worth it. It's not a correctness issue. for (auto &SelectorAndLocation : SemaRef.ReferencedSelectors) { Selector Sel = SelectorAndLocation.first; SourceLocation Loc = SelectorAndLocation.second; Writer.AddSelectorRef(Sel); Writer.AddSourceLocation(Loc); } Writer.Emit(REFERENCED_SELECTOR_POOL); } //===----------------------------------------------------------------------===// // Identifier Table Serialization //===----------------------------------------------------------------------===// /// Determine the declaration that should be put into the name lookup table to /// represent the given declaration in this module. This is usually D itself, /// but if D was imported and merged into a local declaration, we want the most /// recent local declaration instead. The chosen declaration will be the most /// recent declaration in any module that imports this one. static NamedDecl *getDeclForLocalLookup(const LangOptions &LangOpts, NamedDecl *D) { if (!LangOpts.Modules || !D->isFromASTFile()) return D; if (Decl *Redecl = D->getPreviousDecl()) { // For Redeclarable decls, a prior declaration might be local. for (; Redecl; Redecl = Redecl->getPreviousDecl()) { // If we find a local decl, we're done. if (!Redecl->isFromASTFile()) { // Exception: in very rare cases (for injected-class-names), not all // redeclarations are in the same semantic context. Skip ones in a // different context. They don't go in this lookup table at all. if (!Redecl->getDeclContext()->getRedeclContext()->Equals( D->getDeclContext()->getRedeclContext())) continue; return cast(Redecl); } // If we find a decl from a (chained-)PCH stop since we won't find a // local one. if (Redecl->getOwningModuleID() == 0) break; } } else if (Decl *First = D->getCanonicalDecl()) { // For Mergeable decls, the first decl might be local. if (!First->isFromASTFile()) return cast(First); } // All declarations are imported. Our most recent declaration will also be // the most recent one in anyone who imports us. return D; } namespace { class ASTIdentifierTableTrait { ASTWriter &Writer; Preprocessor &PP; IdentifierResolver &IdResolver; bool IsModule; bool NeedDecls; ASTWriter::RecordData *InterestingIdentifierOffsets; /// Determines whether this is an "interesting" identifier that needs a /// full IdentifierInfo structure written into the hash table. Notably, this /// doesn't check whether the name has macros defined; use PublicMacroIterator /// to check that. bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) { if (MacroOffset || II->isPoisoned() || (!IsModule && II->getObjCOrBuiltinID()) || II->hasRevertedTokenIDToIdentifier() || (NeedDecls && II->getFETokenInfo())) return true; return false; } public: using key_type = IdentifierInfo *; using key_type_ref = key_type; using data_type = IdentID; using data_type_ref = data_type; using hash_value_type = unsigned; using offset_type = unsigned; ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP, IdentifierResolver &IdResolver, bool IsModule, ASTWriter::RecordData *InterestingIdentifierOffsets) : Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule), NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus), InterestingIdentifierOffsets(InterestingIdentifierOffsets) {} bool needDecls() const { return NeedDecls; } static hash_value_type ComputeHash(const IdentifierInfo* II) { return llvm::djbHash(II->getName()); } bool isInterestingIdentifier(const IdentifierInfo *II) { auto MacroOffset = Writer.getMacroDirectivesOffset(II); return isInterestingIdentifier(II, MacroOffset); } bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) { return isInterestingIdentifier(II, 0); } std::pair EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) { // Record the location of the identifier data. This is used when generating // the mapping from persistent IDs to strings. Writer.SetIdentifierOffset(II, Out.tell()); auto MacroOffset = Writer.getMacroDirectivesOffset(II); // Emit the offset of the key/data length information to the interesting // identifiers table if necessary. if (InterestingIdentifierOffsets && isInterestingIdentifier(II, MacroOffset)) InterestingIdentifierOffsets->push_back(Out.tell()); unsigned KeyLen = II->getLength() + 1; unsigned DataLen = 4; // 4 bytes for the persistent ID << 1 if (isInterestingIdentifier(II, MacroOffset)) { DataLen += 2; // 2 bytes for builtin ID DataLen += 2; // 2 bytes for flags if (MacroOffset) DataLen += 4; // MacroDirectives offset. if (NeedDecls) DataLen += std::distance(IdResolver.begin(II), IdResolver.end()) * 4; } return emitULEBKeyDataLength(KeyLen, DataLen, Out); } void EmitKey(raw_ostream& Out, const IdentifierInfo* II, unsigned KeyLen) { Out.write(II->getNameStart(), KeyLen); } void EmitData(raw_ostream& Out, IdentifierInfo* II, IdentID ID, unsigned) { using namespace llvm::support; endian::Writer LE(Out, llvm::endianness::little); auto MacroOffset = Writer.getMacroDirectivesOffset(II); if (!isInterestingIdentifier(II, MacroOffset)) { LE.write(ID << 1); return; } LE.write((ID << 1) | 0x01); uint32_t Bits = (uint32_t)II->getObjCOrBuiltinID(); assert((Bits & 0xffff) == Bits && "ObjCOrBuiltinID too big for ASTReader."); LE.write(Bits); Bits = 0; bool HadMacroDefinition = MacroOffset != 0; Bits = (Bits << 1) | unsigned(HadMacroDefinition); Bits = (Bits << 1) | unsigned(II->isExtensionToken()); Bits = (Bits << 1) | unsigned(II->isPoisoned()); Bits = (Bits << 1) | unsigned(II->hasRevertedTokenIDToIdentifier()); Bits = (Bits << 1) | unsigned(II->isCPlusPlusOperatorKeyword()); LE.write(Bits); if (HadMacroDefinition) LE.write(MacroOffset); if (NeedDecls) { // Emit the declaration IDs in reverse order, because the // IdentifierResolver provides the declarations as they would be // visible (e.g., the function "stat" would come before the struct // "stat"), but the ASTReader adds declarations to the end of the list // (so we need to see the struct "stat" before the function "stat"). // Only emit declarations that aren't from a chained PCH, though. SmallVector Decls(IdResolver.decls(II)); for (NamedDecl *D : llvm::reverse(Decls)) LE.write( Writer.getDeclID(getDeclForLocalLookup(PP.getLangOpts(), D))); } } }; } // namespace /// Write the identifier table into the AST file. /// /// The identifier table consists of a blob containing string data /// (the actual identifiers themselves) and a separate "offsets" index /// that maps identifier IDs to locations within the blob. void ASTWriter::WriteIdentifierTable(Preprocessor &PP, IdentifierResolver &IdResolver, bool IsModule) { using namespace llvm; RecordData InterestingIdents; // Create and write out the blob that contains the identifier // strings. { llvm::OnDiskChainedHashTableGenerator Generator; ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule, IsModule ? &InterestingIdents : nullptr); // Look for any identifiers that were named while processing the // headers, but are otherwise not needed. We add these to the hash // table to enable checking of the predefines buffer in the case // where the user adds new macro definitions when building the AST // file. SmallVector IIs; for (const auto &ID : PP.getIdentifierTable()) if (Trait.isInterestingNonMacroIdentifier(ID.second)) IIs.push_back(ID.second); // Sort the identifiers lexicographically before getting the references so // that their order is stable. llvm::sort(IIs, llvm::deref>()); for (const IdentifierInfo *II : IIs) getIdentifierRef(II); // Create the on-disk hash table representation. We only store offsets // for identifiers that appear here for the first time. IdentifierOffsets.resize(NextIdentID - FirstIdentID); for (auto IdentIDPair : IdentifierIDs) { auto *II = const_cast(IdentIDPair.first); IdentID ID = IdentIDPair.second; assert(II && "NULL identifier in identifier table"); // Write out identifiers if either the ID is local or the identifier has // changed since it was loaded. if (ID >= FirstIdentID || !Chain || !II->isFromAST() || II->hasChangedSinceDeserialization() || (Trait.needDecls() && II->hasFETokenInfoChangedSinceDeserialization())) Generator.insert(II, ID, Trait); } // Create the on-disk hash table in a buffer. SmallString<4096> IdentifierTable; uint32_t BucketOffset; { using namespace llvm::support; llvm::raw_svector_ostream Out(IdentifierTable); // Make sure that no bucket is at offset 0 endian::write(Out, 0, llvm::endianness::little); BucketOffset = Generator.Emit(Out, Trait); } // Create a blob abbreviation auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_TABLE)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the identifier table RecordData::value_type Record[] = {IDENTIFIER_TABLE, BucketOffset}; Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable); } // Write the offsets table for identifier IDs. auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of identifiers Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); #ifndef NDEBUG for (unsigned I = 0, N = IdentifierOffsets.size(); I != N; ++I) assert(IdentifierOffsets[I] && "Missing identifier offset?"); #endif RecordData::value_type Record[] = {IDENTIFIER_OFFSET, IdentifierOffsets.size(), FirstIdentID - NUM_PREDEF_IDENT_IDS}; Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record, bytes(IdentifierOffsets)); // In C++, write the list of interesting identifiers (those that are // defined as macros, poisoned, or similar unusual things). if (!InterestingIdents.empty()) Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents); } //===----------------------------------------------------------------------===// // DeclContext's Name Lookup Table Serialization //===----------------------------------------------------------------------===// namespace { // Trait used for the on-disk hash table used in the method pool. class ASTDeclContextNameLookupTrait { ASTWriter &Writer; llvm::SmallVector DeclIDs; public: using key_type = DeclarationNameKey; using key_type_ref = key_type; /// A start and end index into DeclIDs, representing a sequence of decls. using data_type = std::pair; using data_type_ref = const data_type &; using hash_value_type = unsigned; using offset_type = unsigned; explicit ASTDeclContextNameLookupTrait(ASTWriter &Writer) : Writer(Writer) {} template data_type getData(const Coll &Decls) { unsigned Start = DeclIDs.size(); for (NamedDecl *D : Decls) { DeclIDs.push_back( Writer.GetDeclRef(getDeclForLocalLookup(Writer.getLangOpts(), D))); } return std::make_pair(Start, DeclIDs.size()); } data_type ImportData(const reader::ASTDeclContextNameLookupTrait::data_type &FromReader) { unsigned Start = DeclIDs.size(); llvm::append_range(DeclIDs, FromReader); return std::make_pair(Start, DeclIDs.size()); } static bool EqualKey(key_type_ref a, key_type_ref b) { return a == b; } hash_value_type ComputeHash(DeclarationNameKey Name) { return Name.getHash(); } void EmitFileRef(raw_ostream &Out, ModuleFile *F) const { assert(Writer.hasChain() && "have reference to loaded module file but no chain?"); using namespace llvm::support; endian::write(Out, Writer.getChain()->getModuleFileID(F), llvm::endianness::little); } std::pair EmitKeyDataLength(raw_ostream &Out, DeclarationNameKey Name, data_type_ref Lookup) { unsigned KeyLen = 1; switch (Name.getKind()) { case DeclarationName::Identifier: case DeclarationName::ObjCZeroArgSelector: case DeclarationName::ObjCOneArgSelector: case DeclarationName::ObjCMultiArgSelector: case DeclarationName::CXXLiteralOperatorName: case DeclarationName::CXXDeductionGuideName: KeyLen += 4; break; case DeclarationName::CXXOperatorName: KeyLen += 1; break; case DeclarationName::CXXConstructorName: case DeclarationName::CXXDestructorName: case DeclarationName::CXXConversionFunctionName: case DeclarationName::CXXUsingDirective: break; } // 4 bytes for each DeclID. unsigned DataLen = 4 * (Lookup.second - Lookup.first); return emitULEBKeyDataLength(KeyLen, DataLen, Out); } void EmitKey(raw_ostream &Out, DeclarationNameKey Name, unsigned) { using namespace llvm::support; endian::Writer LE(Out, llvm::endianness::little); LE.write(Name.getKind()); switch (Name.getKind()) { case DeclarationName::Identifier: case DeclarationName::CXXLiteralOperatorName: case DeclarationName::CXXDeductionGuideName: LE.write(Writer.getIdentifierRef(Name.getIdentifier())); return; case DeclarationName::ObjCZeroArgSelector: case DeclarationName::ObjCOneArgSelector: case DeclarationName::ObjCMultiArgSelector: LE.write(Writer.getSelectorRef(Name.getSelector())); return; case DeclarationName::CXXOperatorName: assert(Name.getOperatorKind() < NUM_OVERLOADED_OPERATORS && "Invalid operator?"); LE.write(Name.getOperatorKind()); return; case DeclarationName::CXXConstructorName: case DeclarationName::CXXDestructorName: case DeclarationName::CXXConversionFunctionName: case DeclarationName::CXXUsingDirective: return; } llvm_unreachable("Invalid name kind?"); } void EmitData(raw_ostream &Out, key_type_ref, data_type Lookup, unsigned DataLen) { using namespace llvm::support; endian::Writer LE(Out, llvm::endianness::little); uint64_t Start = Out.tell(); (void)Start; for (unsigned I = Lookup.first, N = Lookup.second; I != N; ++I) LE.write(DeclIDs[I]); assert(Out.tell() - Start == DataLen && "Data length is wrong"); } }; } // namespace bool ASTWriter::isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC) { return Result.hasExternalDecls() && DC->hasNeedToReconcileExternalVisibleStorage(); } bool ASTWriter::isLookupResultEntirelyExternal(StoredDeclsList &Result, DeclContext *DC) { for (auto *D : Result.getLookupResult()) if (!getDeclForLocalLookup(getLangOpts(), D)->isFromASTFile()) return false; return true; } void ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC, llvm::SmallVectorImpl &LookupTable) { assert(!ConstDC->hasLazyLocalLexicalLookups() && !ConstDC->hasLazyExternalLexicalLookups() && "must call buildLookups first"); // FIXME: We need to build the lookups table, which is logically const. auto *DC = const_cast(ConstDC); assert(DC == DC->getPrimaryContext() && "only primary DC has lookup table"); // Create the on-disk hash table representation. MultiOnDiskHashTableGenerator Generator; ASTDeclContextNameLookupTrait Trait(*this); // The first step is to collect the declaration names which we need to // serialize into the name lookup table, and to collect them in a stable // order. SmallVector Names; // We also build up small sets of the constructor and conversion function // names which are visible. llvm::SmallPtrSet ConstructorNameSet, ConversionNameSet; for (auto &Lookup : *DC->buildLookup()) { auto &Name = Lookup.first; auto &Result = Lookup.second; // If there are no local declarations in our lookup result, we // don't need to write an entry for the name at all. If we can't // write out a lookup set without performing more deserialization, // just skip this entry. if (isLookupResultExternal(Result, DC) && isLookupResultEntirelyExternal(Result, DC)) continue; // We also skip empty results. If any of the results could be external and // the currently available results are empty, then all of the results are // external and we skip it above. So the only way we get here with an empty // results is when no results could have been external *and* we have // external results. // // FIXME: While we might want to start emitting on-disk entries for negative // lookups into a decl context as an optimization, today we *have* to skip // them because there are names with empty lookup results in decl contexts // which we can't emit in any stable ordering: we lookup constructors and // conversion functions in the enclosing namespace scope creating empty // results for them. This in almost certainly a bug in Clang's name lookup, // but that is likely to be hard or impossible to fix and so we tolerate it // here by omitting lookups with empty results. if (Lookup.second.getLookupResult().empty()) continue; switch (Lookup.first.getNameKind()) { default: Names.push_back(Lookup.first); break; case DeclarationName::CXXConstructorName: assert(isa(DC) && "Cannot have a constructor name outside of a class!"); ConstructorNameSet.insert(Name); break; case DeclarationName::CXXConversionFunctionName: assert(isa(DC) && "Cannot have a conversion function name outside of a class!"); ConversionNameSet.insert(Name); break; } } // Sort the names into a stable order. llvm::sort(Names); if (auto *D = dyn_cast(DC)) { // We need to establish an ordering of constructor and conversion function // names, and they don't have an intrinsic ordering. // First we try the easy case by forming the current context's constructor // name and adding that name first. This is a very useful optimization to // avoid walking the lexical declarations in many cases, and it also // handles the only case where a constructor name can come from some other // lexical context -- when that name is an implicit constructor merged from // another declaration in the redecl chain. Any non-implicit constructor or // conversion function which doesn't occur in all the lexical contexts // would be an ODR violation. auto ImplicitCtorName = Context->DeclarationNames.getCXXConstructorName( Context->getCanonicalType(Context->getRecordType(D))); if (ConstructorNameSet.erase(ImplicitCtorName)) Names.push_back(ImplicitCtorName); // If we still have constructors or conversion functions, we walk all the // names in the decl and add the constructors and conversion functions // which are visible in the order they lexically occur within the context. if (!ConstructorNameSet.empty() || !ConversionNameSet.empty()) for (Decl *ChildD : cast(DC)->decls()) if (auto *ChildND = dyn_cast(ChildD)) { auto Name = ChildND->getDeclName(); switch (Name.getNameKind()) { default: continue; case DeclarationName::CXXConstructorName: if (ConstructorNameSet.erase(Name)) Names.push_back(Name); break; case DeclarationName::CXXConversionFunctionName: if (ConversionNameSet.erase(Name)) Names.push_back(Name); break; } if (ConstructorNameSet.empty() && ConversionNameSet.empty()) break; } assert(ConstructorNameSet.empty() && "Failed to find all of the visible " "constructors by walking all the " "lexical members of the context."); assert(ConversionNameSet.empty() && "Failed to find all of the visible " "conversion functions by walking all " "the lexical members of the context."); } // Next we need to do a lookup with each name into this decl context to fully // populate any results from external sources. We don't actually use the // results of these lookups because we only want to use the results after all // results have been loaded and the pointers into them will be stable. for (auto &Name : Names) DC->lookup(Name); // Now we need to insert the results for each name into the hash table. For // constructor names and conversion function names, we actually need to merge // all of the results for them into one list of results each and insert // those. SmallVector ConstructorDecls; SmallVector ConversionDecls; // Now loop over the names, either inserting them or appending for the two // special cases. for (auto &Name : Names) { DeclContext::lookup_result Result = DC->noload_lookup(Name); switch (Name.getNameKind()) { default: Generator.insert(Name, Trait.getData(Result), Trait); break; case DeclarationName::CXXConstructorName: ConstructorDecls.append(Result.begin(), Result.end()); break; case DeclarationName::CXXConversionFunctionName: ConversionDecls.append(Result.begin(), Result.end()); break; } } // Handle our two special cases if we ended up having any. We arbitrarily use // the first declaration's name here because the name itself isn't part of // the key, only the kind of name is used. if (!ConstructorDecls.empty()) Generator.insert(ConstructorDecls.front()->getDeclName(), Trait.getData(ConstructorDecls), Trait); if (!ConversionDecls.empty()) Generator.insert(ConversionDecls.front()->getDeclName(), Trait.getData(ConversionDecls), Trait); // Create the on-disk hash table. Also emit the existing imported and // merged table if there is one. auto *Lookups = Chain ? Chain->getLoadedLookupTables(DC) : nullptr; Generator.emit(LookupTable, Trait, Lookups ? &Lookups->Table : nullptr); } /// Write the block containing all of the declaration IDs /// visible from the given DeclContext. /// /// \returns the offset of the DECL_CONTEXT_VISIBLE block within the /// bitstream, or 0 if no block was written. uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC) { // If we imported a key declaration of this namespace, write the visible // lookup results as an update record for it rather than including them // on this declaration. We will only look at key declarations on reload. if (isa(DC) && Chain && Chain->getKeyDeclaration(cast(DC))->isFromASTFile()) { // Only do this once, for the first local declaration of the namespace. for (auto *Prev = cast(DC)->getPreviousDecl(); Prev; Prev = Prev->getPreviousDecl()) if (!Prev->isFromASTFile()) return 0; // Note that we need to emit an update record for the primary context. UpdatedDeclContexts.insert(DC->getPrimaryContext()); // Make sure all visible decls are written. They will be recorded later. We // do this using a side data structure so we can sort the names into // a deterministic order. StoredDeclsMap *Map = DC->getPrimaryContext()->buildLookup(); SmallVector, 16> LookupResults; if (Map) { LookupResults.reserve(Map->size()); for (auto &Entry : *Map) LookupResults.push_back( std::make_pair(Entry.first, Entry.second.getLookupResult())); } llvm::sort(LookupResults, llvm::less_first()); for (auto &NameAndResult : LookupResults) { DeclarationName Name = NameAndResult.first; DeclContext::lookup_result Result = NameAndResult.second; if (Name.getNameKind() == DeclarationName::CXXConstructorName || Name.getNameKind() == DeclarationName::CXXConversionFunctionName) { // We have to work around a name lookup bug here where negative lookup // results for these names get cached in namespace lookup tables (these // names should never be looked up in a namespace). assert(Result.empty() && "Cannot have a constructor or conversion " "function name in a namespace!"); continue; } for (NamedDecl *ND : Result) if (!ND->isFromASTFile()) GetDeclRef(ND); } return 0; } if (DC->getPrimaryContext() != DC) return 0; // Skip contexts which don't support name lookup. if (!DC->isLookupContext()) return 0; // If not in C++, we perform name lookup for the translation unit via the // IdentifierInfo chains, don't bother to build a visible-declarations table. if (DC->isTranslationUnit() && !Context.getLangOpts().CPlusPlus) return 0; // Serialize the contents of the mapping used for lookup. Note that, // although we have two very different code paths, the serialized // representation is the same for both cases: a declaration name, // followed by a size, followed by references to the visible // declarations that have that name. uint64_t Offset = Stream.GetCurrentBitNo(); StoredDeclsMap *Map = DC->buildLookup(); if (!Map || Map->empty()) return 0; // Create the on-disk hash table in a buffer. SmallString<4096> LookupTable; GenerateNameLookupTable(DC, LookupTable); // Write the lookup table RecordData::value_type Record[] = {DECL_CONTEXT_VISIBLE}; Stream.EmitRecordWithBlob(DeclContextVisibleLookupAbbrev, Record, LookupTable); ++NumVisibleDeclContexts; return Offset; } /// Write an UPDATE_VISIBLE block for the given context. /// /// UPDATE_VISIBLE blocks contain the declarations that are added to an existing /// DeclContext in a dependent AST file. As such, they only exist for the TU /// (in C++), for namespaces, and for classes with forward-declared unscoped /// enumeration members (in C++11). void ASTWriter::WriteDeclContextVisibleUpdate(const DeclContext *DC) { StoredDeclsMap *Map = DC->getLookupPtr(); if (!Map || Map->empty()) return; // Create the on-disk hash table in a buffer. SmallString<4096> LookupTable; GenerateNameLookupTable(DC, LookupTable); // If we're updating a namespace, select a key declaration as the key for the // update record; those are the only ones that will be checked on reload. if (isa(DC)) DC = cast(Chain->getKeyDeclaration(cast(DC))); // Write the lookup table RecordData::value_type Record[] = {UPDATE_VISIBLE, getDeclID(cast(DC))}; Stream.EmitRecordWithBlob(UpdateVisibleAbbrev, Record, LookupTable); } /// Write an FP_PRAGMA_OPTIONS block for the given FPOptions. void ASTWriter::WriteFPPragmaOptions(const FPOptionsOverride &Opts) { RecordData::value_type Record[] = {Opts.getAsOpaqueInt()}; Stream.EmitRecord(FP_PRAGMA_OPTIONS, Record); } /// Write an OPENCL_EXTENSIONS block for the given OpenCLOptions. void ASTWriter::WriteOpenCLExtensions(Sema &SemaRef) { if (!SemaRef.Context.getLangOpts().OpenCL) return; const OpenCLOptions &Opts = SemaRef.getOpenCLOptions(); RecordData Record; for (const auto &I:Opts.OptMap) { AddString(I.getKey(), Record); auto V = I.getValue(); Record.push_back(V.Supported ? 1 : 0); Record.push_back(V.Enabled ? 1 : 0); Record.push_back(V.WithPragma ? 1 : 0); Record.push_back(V.Avail); Record.push_back(V.Core); Record.push_back(V.Opt); } Stream.EmitRecord(OPENCL_EXTENSIONS, Record); } void ASTWriter::WriteCUDAPragmas(Sema &SemaRef) { if (SemaRef.ForceCUDAHostDeviceDepth > 0) { RecordData::value_type Record[] = {SemaRef.ForceCUDAHostDeviceDepth}; Stream.EmitRecord(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH, Record); } } void ASTWriter::WriteObjCCategories() { SmallVector CategoriesMap; RecordData Categories; for (unsigned I = 0, N = ObjCClassesWithCategories.size(); I != N; ++I) { unsigned Size = 0; unsigned StartIndex = Categories.size(); ObjCInterfaceDecl *Class = ObjCClassesWithCategories[I]; // Allocate space for the size. Categories.push_back(0); // Add the categories. for (ObjCInterfaceDecl::known_categories_iterator Cat = Class->known_categories_begin(), CatEnd = Class->known_categories_end(); Cat != CatEnd; ++Cat, ++Size) { assert(getDeclID(*Cat) != 0 && "Bogus category"); AddDeclRef(*Cat, Categories); } // Update the size. Categories[StartIndex] = Size; // Record this interface -> category map. ObjCCategoriesInfo CatInfo = { getDeclID(Class), StartIndex }; CategoriesMap.push_back(CatInfo); } // Sort the categories map by the definition ID, since the reader will be // performing binary searches on this information. llvm::array_pod_sort(CategoriesMap.begin(), CategoriesMap.end()); // Emit the categories map. using namespace llvm; auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(OBJC_CATEGORIES_MAP)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of entries Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned AbbrevID = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {OBJC_CATEGORIES_MAP, CategoriesMap.size()}; Stream.EmitRecordWithBlob(AbbrevID, Record, reinterpret_cast(CategoriesMap.data()), CategoriesMap.size() * sizeof(ObjCCategoriesInfo)); // Emit the category lists. Stream.EmitRecord(OBJC_CATEGORIES, Categories); } void ASTWriter::WriteLateParsedTemplates(Sema &SemaRef) { Sema::LateParsedTemplateMapT &LPTMap = SemaRef.LateParsedTemplateMap; if (LPTMap.empty()) return; RecordData Record; for (auto &LPTMapEntry : LPTMap) { const FunctionDecl *FD = LPTMapEntry.first; LateParsedTemplate &LPT = *LPTMapEntry.second; AddDeclRef(FD, Record); AddDeclRef(LPT.D, Record); Record.push_back(LPT.FPO.getAsOpaqueInt()); Record.push_back(LPT.Toks.size()); for (const auto &Tok : LPT.Toks) { AddToken(Tok, Record); } } Stream.EmitRecord(LATE_PARSED_TEMPLATE, Record); } /// Write the state of 'pragma clang optimize' at the end of the module. void ASTWriter::WriteOptimizePragmaOptions(Sema &SemaRef) { RecordData Record; SourceLocation PragmaLoc = SemaRef.getOptimizeOffPragmaLocation(); AddSourceLocation(PragmaLoc, Record); Stream.EmitRecord(OPTIMIZE_PRAGMA_OPTIONS, Record); } /// Write the state of 'pragma ms_struct' at the end of the module. void ASTWriter::WriteMSStructPragmaOptions(Sema &SemaRef) { RecordData Record; Record.push_back(SemaRef.MSStructPragmaOn ? PMSST_ON : PMSST_OFF); Stream.EmitRecord(MSSTRUCT_PRAGMA_OPTIONS, Record); } /// Write the state of 'pragma pointers_to_members' at the end of the //module. void ASTWriter::WriteMSPointersToMembersPragmaOptions(Sema &SemaRef) { RecordData Record; Record.push_back(SemaRef.MSPointerToMemberRepresentationMethod); AddSourceLocation(SemaRef.ImplicitMSInheritanceAttrLoc, Record); Stream.EmitRecord(POINTERS_TO_MEMBERS_PRAGMA_OPTIONS, Record); } /// Write the state of 'pragma align/pack' at the end of the module. void ASTWriter::WritePackPragmaOptions(Sema &SemaRef) { // Don't serialize pragma align/pack state for modules, since it should only // take effect on a per-submodule basis. if (WritingModule) return; RecordData Record; AddAlignPackInfo(SemaRef.AlignPackStack.CurrentValue, Record); AddSourceLocation(SemaRef.AlignPackStack.CurrentPragmaLocation, Record); Record.push_back(SemaRef.AlignPackStack.Stack.size()); for (const auto &StackEntry : SemaRef.AlignPackStack.Stack) { AddAlignPackInfo(StackEntry.Value, Record); AddSourceLocation(StackEntry.PragmaLocation, Record); AddSourceLocation(StackEntry.PragmaPushLocation, Record); AddString(StackEntry.StackSlotLabel, Record); } Stream.EmitRecord(ALIGN_PACK_PRAGMA_OPTIONS, Record); } /// Write the state of 'pragma float_control' at the end of the module. void ASTWriter::WriteFloatControlPragmaOptions(Sema &SemaRef) { // Don't serialize pragma float_control state for modules, // since it should only take effect on a per-submodule basis. if (WritingModule) return; RecordData Record; Record.push_back(SemaRef.FpPragmaStack.CurrentValue.getAsOpaqueInt()); AddSourceLocation(SemaRef.FpPragmaStack.CurrentPragmaLocation, Record); Record.push_back(SemaRef.FpPragmaStack.Stack.size()); for (const auto &StackEntry : SemaRef.FpPragmaStack.Stack) { Record.push_back(StackEntry.Value.getAsOpaqueInt()); AddSourceLocation(StackEntry.PragmaLocation, Record); AddSourceLocation(StackEntry.PragmaPushLocation, Record); AddString(StackEntry.StackSlotLabel, Record); } Stream.EmitRecord(FLOAT_CONTROL_PRAGMA_OPTIONS, Record); } void ASTWriter::WriteModuleFileExtension(Sema &SemaRef, ModuleFileExtensionWriter &Writer) { // Enter the extension block. Stream.EnterSubblock(EXTENSION_BLOCK_ID, 4); // Emit the metadata record abbreviation. auto Abv = std::make_shared(); Abv->Add(llvm::BitCodeAbbrevOp(EXTENSION_METADATA)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); unsigned Abbrev = Stream.EmitAbbrev(std::move(Abv)); // Emit the metadata record. RecordData Record; auto Metadata = Writer.getExtension()->getExtensionMetadata(); Record.push_back(EXTENSION_METADATA); Record.push_back(Metadata.MajorVersion); Record.push_back(Metadata.MinorVersion); Record.push_back(Metadata.BlockName.size()); Record.push_back(Metadata.UserInfo.size()); SmallString<64> Buffer; Buffer += Metadata.BlockName; Buffer += Metadata.UserInfo; Stream.EmitRecordWithBlob(Abbrev, Record, Buffer); // Emit the contents of the extension block. Writer.writeExtensionContents(SemaRef, Stream); // Exit the extension block. Stream.ExitBlock(); } //===----------------------------------------------------------------------===// // General Serialization Routines //===----------------------------------------------------------------------===// void ASTRecordWriter::AddAttr(const Attr *A) { auto &Record = *this; // FIXME: Clang can't handle the serialization/deserialization of // preferred_name properly now. See // https://github.com/llvm/llvm-project/issues/56490 for example. if (!A || (isa(A) && Writer->isWritingStdCXXNamedModules())) return Record.push_back(0); Record.push_back(A->getKind() + 1); // FIXME: stable encoding, target attrs Record.AddIdentifierRef(A->getAttrName()); Record.AddIdentifierRef(A->getScopeName()); Record.AddSourceRange(A->getRange()); Record.AddSourceLocation(A->getScopeLoc()); Record.push_back(A->getParsedKind()); Record.push_back(A->getSyntax()); Record.push_back(A->getAttributeSpellingListIndexRaw()); Record.push_back(A->isRegularKeywordAttribute()); #include "clang/Serialization/AttrPCHWrite.inc" } /// Emit the list of attributes to the specified record. void ASTRecordWriter::AddAttributes(ArrayRef Attrs) { push_back(Attrs.size()); for (const auto *A : Attrs) AddAttr(A); } void ASTWriter::AddToken(const Token &Tok, RecordDataImpl &Record) { AddSourceLocation(Tok.getLocation(), Record); // FIXME: Should translate token kind to a stable encoding. Record.push_back(Tok.getKind()); // FIXME: Should translate token flags to a stable encoding. Record.push_back(Tok.getFlags()); if (Tok.isAnnotation()) { AddSourceLocation(Tok.getAnnotationEndLoc(), Record); switch (Tok.getKind()) { case tok::annot_pragma_loop_hint: { auto *Info = static_cast(Tok.getAnnotationValue()); AddToken(Info->PragmaName, Record); AddToken(Info->Option, Record); Record.push_back(Info->Toks.size()); for (const auto &T : Info->Toks) AddToken(T, Record); break; } case tok::annot_pragma_pack: { auto *Info = static_cast(Tok.getAnnotationValue()); Record.push_back(static_cast(Info->Action)); AddString(Info->SlotLabel, Record); AddToken(Info->Alignment, Record); break; } // Some annotation tokens do not use the PtrData field. case tok::annot_pragma_openmp: case tok::annot_pragma_openmp_end: case tok::annot_pragma_unused: case tok::annot_pragma_openacc: case tok::annot_pragma_openacc_end: break; default: llvm_unreachable("missing serialization code for annotation token"); } } else { Record.push_back(Tok.getLength()); // FIXME: When reading literal tokens, reconstruct the literal pointer if it // is needed. AddIdentifierRef(Tok.getIdentifierInfo(), Record); } } void ASTWriter::AddString(StringRef Str, RecordDataImpl &Record) { Record.push_back(Str.size()); Record.insert(Record.end(), Str.begin(), Str.end()); } bool ASTWriter::PreparePathForOutput(SmallVectorImpl &Path) { assert(Context && "should have context when outputting path"); // Leave special file names as they are. StringRef PathStr(Path.data(), Path.size()); if (PathStr == "" || PathStr == "") return false; bool Changed = cleanPathForOutput(Context->getSourceManager().getFileManager(), Path); // Remove a prefix to make the path relative, if relevant. const char *PathBegin = Path.data(); const char *PathPtr = adjustFilenameForRelocatableAST(PathBegin, BaseDirectory); if (PathPtr != PathBegin) { Path.erase(Path.begin(), Path.begin() + (PathPtr - PathBegin)); Changed = true; } return Changed; } void ASTWriter::AddPath(StringRef Path, RecordDataImpl &Record) { SmallString<128> FilePath(Path); PreparePathForOutput(FilePath); AddString(FilePath, Record); } void ASTWriter::EmitRecordWithPath(unsigned Abbrev, RecordDataRef Record, StringRef Path) { SmallString<128> FilePath(Path); PreparePathForOutput(FilePath); Stream.EmitRecordWithBlob(Abbrev, Record, FilePath); } void ASTWriter::AddVersionTuple(const VersionTuple &Version, RecordDataImpl &Record) { Record.push_back(Version.getMajor()); if (std::optional Minor = Version.getMinor()) Record.push_back(*Minor + 1); else Record.push_back(0); if (std::optional Subminor = Version.getSubminor()) Record.push_back(*Subminor + 1); else Record.push_back(0); } /// Note that the identifier II occurs at the given offset /// within the identifier table. void ASTWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) { IdentID ID = IdentifierIDs[II]; // Only store offsets new to this AST file. Other identifier names are looked // up earlier in the chain and thus don't need an offset. if (ID >= FirstIdentID) IdentifierOffsets[ID - FirstIdentID] = Offset; } /// Note that the selector Sel occurs at the given offset /// within the method pool/selector table. void ASTWriter::SetSelectorOffset(Selector Sel, uint32_t Offset) { unsigned ID = SelectorIDs[Sel]; assert(ID && "Unknown selector"); // Don't record offsets for selectors that are also available in a different // file. if (ID < FirstSelectorID) return; SelectorOffsets[ID - FirstSelectorID] = Offset; } ASTWriter::ASTWriter(llvm::BitstreamWriter &Stream, SmallVectorImpl &Buffer, InMemoryModuleCache &ModuleCache, ArrayRef> Extensions, bool IncludeTimestamps, bool BuildingImplicitModule) : Stream(Stream), Buffer(Buffer), ModuleCache(ModuleCache), IncludeTimestamps(IncludeTimestamps), BuildingImplicitModule(BuildingImplicitModule) { for (const auto &Ext : Extensions) { if (auto Writer = Ext->createExtensionWriter(*this)) ModuleFileExtensionWriters.push_back(std::move(Writer)); } } ASTWriter::~ASTWriter() = default; const LangOptions &ASTWriter::getLangOpts() const { assert(WritingAST && "can't determine lang opts when not writing AST"); return Context->getLangOpts(); } time_t ASTWriter::getTimestampForOutput(const FileEntry *E) const { return IncludeTimestamps ? E->getModificationTime() : 0; } ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef, StringRef OutputFile, Module *WritingModule, StringRef isysroot, bool ShouldCacheASTInMemory) { llvm::TimeTraceScope scope("WriteAST", OutputFile); WritingAST = true; ASTHasCompilerErrors = SemaRef.PP.getDiagnostics().hasUncompilableErrorOccurred(); // Emit the file header. Stream.Emit((unsigned)'C', 8); Stream.Emit((unsigned)'P', 8); Stream.Emit((unsigned)'C', 8); Stream.Emit((unsigned)'H', 8); WriteBlockInfoBlock(); Context = &SemaRef.Context; PP = &SemaRef.PP; this->WritingModule = WritingModule; ASTFileSignature Signature = WriteASTCore(SemaRef, isysroot, WritingModule); Context = nullptr; PP = nullptr; this->WritingModule = nullptr; this->BaseDirectory.clear(); WritingAST = false; if (ShouldCacheASTInMemory) { // Construct MemoryBuffer and update buffer manager. ModuleCache.addBuiltPCM(OutputFile, llvm::MemoryBuffer::getMemBufferCopy( StringRef(Buffer.begin(), Buffer.size()))); } return Signature; } template static void AddLazyVectorDecls(ASTWriter &Writer, Vector &Vec, ASTWriter::RecordData &Record) { for (typename Vector::iterator I = Vec.begin(nullptr, true), E = Vec.end(); I != E; ++I) { Writer.AddDeclRef(*I, Record); } } void ASTWriter::collectNonAffectingInputFiles() { SourceManager &SrcMgr = PP->getSourceManager(); unsigned N = SrcMgr.local_sloc_entry_size(); IsSLocAffecting.resize(N, true); if (!WritingModule) return; auto AffectingModuleMaps = GetAffectingModuleMaps(*PP, WritingModule); unsigned FileIDAdjustment = 0; unsigned OffsetAdjustment = 0; NonAffectingFileIDAdjustments.reserve(N); NonAffectingOffsetAdjustments.reserve(N); NonAffectingFileIDAdjustments.push_back(FileIDAdjustment); NonAffectingOffsetAdjustments.push_back(OffsetAdjustment); for (unsigned I = 1; I != N; ++I) { const SrcMgr::SLocEntry *SLoc = &SrcMgr.getLocalSLocEntry(I); FileID FID = FileID::get(I); assert(&SrcMgr.getSLocEntry(FID) == SLoc); if (!SLoc->isFile()) continue; const SrcMgr::FileInfo &File = SLoc->getFile(); const SrcMgr::ContentCache *Cache = &File.getContentCache(); if (!Cache->OrigEntry) continue; if (!isModuleMap(File.getFileCharacteristic()) || AffectingModuleMaps.empty() || llvm::is_contained(AffectingModuleMaps, *Cache->OrigEntry)) continue; IsSLocAffecting[I] = false; FileIDAdjustment += 1; // Even empty files take up one element in the offset table. OffsetAdjustment += SrcMgr.getFileIDSize(FID) + 1; // If the previous file was non-affecting as well, just extend its entry // with our information. if (!NonAffectingFileIDs.empty() && NonAffectingFileIDs.back().ID == FID.ID - 1) { NonAffectingFileIDs.back() = FID; NonAffectingRanges.back().setEnd(SrcMgr.getLocForEndOfFile(FID)); NonAffectingFileIDAdjustments.back() = FileIDAdjustment; NonAffectingOffsetAdjustments.back() = OffsetAdjustment; continue; } NonAffectingFileIDs.push_back(FID); NonAffectingRanges.emplace_back(SrcMgr.getLocForStartOfFile(FID), SrcMgr.getLocForEndOfFile(FID)); NonAffectingFileIDAdjustments.push_back(FileIDAdjustment); NonAffectingOffsetAdjustments.push_back(OffsetAdjustment); } } ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, Module *WritingModule) { using namespace llvm; bool isModule = WritingModule != nullptr; // Make sure that the AST reader knows to finalize itself. if (Chain) Chain->finalizeForWriting(); ASTContext &Context = SemaRef.Context; Preprocessor &PP = SemaRef.PP; // This needs to be done very early, since everything that writes // SourceLocations or FileIDs depends on it. collectNonAffectingInputFiles(); writeUnhashedControlBlock(PP, Context); // Set up predefined declaration IDs. auto RegisterPredefDecl = [&] (Decl *D, PredefinedDeclIDs ID) { if (D) { assert(D->isCanonicalDecl() && "predefined decl is not canonical"); DeclIDs[D] = ID; } }; RegisterPredefDecl(Context.getTranslationUnitDecl(), PREDEF_DECL_TRANSLATION_UNIT_ID); RegisterPredefDecl(Context.ObjCIdDecl, PREDEF_DECL_OBJC_ID_ID); RegisterPredefDecl(Context.ObjCSelDecl, PREDEF_DECL_OBJC_SEL_ID); RegisterPredefDecl(Context.ObjCClassDecl, PREDEF_DECL_OBJC_CLASS_ID); RegisterPredefDecl(Context.ObjCProtocolClassDecl, PREDEF_DECL_OBJC_PROTOCOL_ID); RegisterPredefDecl(Context.Int128Decl, PREDEF_DECL_INT_128_ID); RegisterPredefDecl(Context.UInt128Decl, PREDEF_DECL_UNSIGNED_INT_128_ID); RegisterPredefDecl(Context.ObjCInstanceTypeDecl, PREDEF_DECL_OBJC_INSTANCETYPE_ID); RegisterPredefDecl(Context.BuiltinVaListDecl, PREDEF_DECL_BUILTIN_VA_LIST_ID); RegisterPredefDecl(Context.VaListTagDecl, PREDEF_DECL_VA_LIST_TAG); RegisterPredefDecl(Context.BuiltinMSVaListDecl, PREDEF_DECL_BUILTIN_MS_VA_LIST_ID); RegisterPredefDecl(Context.MSGuidTagDecl, PREDEF_DECL_BUILTIN_MS_GUID_ID); RegisterPredefDecl(Context.ExternCContext, PREDEF_DECL_EXTERN_C_CONTEXT_ID); RegisterPredefDecl(Context.MakeIntegerSeqDecl, PREDEF_DECL_MAKE_INTEGER_SEQ_ID); RegisterPredefDecl(Context.CFConstantStringTypeDecl, PREDEF_DECL_CF_CONSTANT_STRING_ID); RegisterPredefDecl(Context.CFConstantStringTagDecl, PREDEF_DECL_CF_CONSTANT_STRING_TAG_ID); RegisterPredefDecl(Context.TypePackElementDecl, PREDEF_DECL_TYPE_PACK_ELEMENT_ID); // Build a record containing all of the tentative definitions in this file, in // TentativeDefinitions order. Generally, this record will be empty for // headers. RecordData TentativeDefinitions; AddLazyVectorDecls(*this, SemaRef.TentativeDefinitions, TentativeDefinitions); // Build a record containing all of the file scoped decls in this file. RecordData UnusedFileScopedDecls; if (!isModule) AddLazyVectorDecls(*this, SemaRef.UnusedFileScopedDecls, UnusedFileScopedDecls); // Build a record containing all of the delegating constructors we still need // to resolve. RecordData DelegatingCtorDecls; if (!isModule) AddLazyVectorDecls(*this, SemaRef.DelegatingCtorDecls, DelegatingCtorDecls); // Write the set of weak, undeclared identifiers. We always write the // entire table, since later PCH files in a PCH chain are only interested in // the results at the end of the chain. RecordData WeakUndeclaredIdentifiers; for (const auto &WeakUndeclaredIdentifierList : SemaRef.WeakUndeclaredIdentifiers) { const IdentifierInfo *const II = WeakUndeclaredIdentifierList.first; for (const auto &WI : WeakUndeclaredIdentifierList.second) { AddIdentifierRef(II, WeakUndeclaredIdentifiers); AddIdentifierRef(WI.getAlias(), WeakUndeclaredIdentifiers); AddSourceLocation(WI.getLocation(), WeakUndeclaredIdentifiers); } } // Build a record containing all of the ext_vector declarations. RecordData ExtVectorDecls; AddLazyVectorDecls(*this, SemaRef.ExtVectorDecls, ExtVectorDecls); // Build a record containing all of the VTable uses information. RecordData VTableUses; if (!SemaRef.VTableUses.empty()) { for (unsigned I = 0, N = SemaRef.VTableUses.size(); I != N; ++I) { AddDeclRef(SemaRef.VTableUses[I].first, VTableUses); AddSourceLocation(SemaRef.VTableUses[I].second, VTableUses); VTableUses.push_back(SemaRef.VTablesUsed[SemaRef.VTableUses[I].first]); } } // Build a record containing all of the UnusedLocalTypedefNameCandidates. RecordData UnusedLocalTypedefNameCandidates; for (const TypedefNameDecl *TD : SemaRef.UnusedLocalTypedefNameCandidates) AddDeclRef(TD, UnusedLocalTypedefNameCandidates); // Build a record containing all of pending implicit instantiations. RecordData PendingInstantiations; for (const auto &I : SemaRef.PendingInstantiations) { AddDeclRef(I.first, PendingInstantiations); AddSourceLocation(I.second, PendingInstantiations); } assert(SemaRef.PendingLocalImplicitInstantiations.empty() && "There are local ones at end of translation unit!"); // Build a record containing some declaration references. RecordData SemaDeclRefs; if (SemaRef.StdNamespace || SemaRef.StdBadAlloc || SemaRef.StdAlignValT) { AddDeclRef(SemaRef.getStdNamespace(), SemaDeclRefs); AddDeclRef(SemaRef.getStdBadAlloc(), SemaDeclRefs); AddDeclRef(SemaRef.getStdAlignValT(), SemaDeclRefs); } RecordData CUDASpecialDeclRefs; if (Context.getcudaConfigureCallDecl()) { AddDeclRef(Context.getcudaConfigureCallDecl(), CUDASpecialDeclRefs); } // Build a record containing all of the known namespaces. RecordData KnownNamespaces; for (const auto &I : SemaRef.KnownNamespaces) { if (!I.second) AddDeclRef(I.first, KnownNamespaces); } // Build a record of all used, undefined objects that require definitions. RecordData UndefinedButUsed; SmallVector, 16> Undefined; SemaRef.getUndefinedButUsed(Undefined); for (const auto &I : Undefined) { AddDeclRef(I.first, UndefinedButUsed); AddSourceLocation(I.second, UndefinedButUsed); } // Build a record containing all delete-expressions that we would like to // analyze later in AST. RecordData DeleteExprsToAnalyze; if (!isModule) { for (const auto &DeleteExprsInfo : SemaRef.getMismatchingDeleteExpressions()) { AddDeclRef(DeleteExprsInfo.first, DeleteExprsToAnalyze); DeleteExprsToAnalyze.push_back(DeleteExprsInfo.second.size()); for (const auto &DeleteLoc : DeleteExprsInfo.second) { AddSourceLocation(DeleteLoc.first, DeleteExprsToAnalyze); DeleteExprsToAnalyze.push_back(DeleteLoc.second); } } } // Write the control block WriteControlBlock(PP, Context, isysroot); // Write the remaining AST contents. Stream.FlushToWord(); ASTBlockRange.first = Stream.GetCurrentBitNo() >> 3; Stream.EnterSubblock(AST_BLOCK_ID, 5); ASTBlockStartOffset = Stream.GetCurrentBitNo(); // This is so that older clang versions, before the introduction // of the control block, can read and reject the newer PCH format. { RecordData Record = {VERSION_MAJOR}; Stream.EmitRecord(METADATA_OLD_FORMAT, Record); } // Create a lexical update block containing all of the declarations in the // translation unit that do not come from other AST files. const TranslationUnitDecl *TU = Context.getTranslationUnitDecl(); SmallVector NewGlobalKindDeclPairs; for (const auto *D : TU->noload_decls()) { if (!D->isFromASTFile()) { NewGlobalKindDeclPairs.push_back(D->getKind()); NewGlobalKindDeclPairs.push_back(GetDeclRef(D)); } } auto Abv = std::make_shared(); Abv->Add(llvm::BitCodeAbbrevOp(TU_UPDATE_LEXICAL)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv)); { RecordData::value_type Record[] = {TU_UPDATE_LEXICAL}; Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record, bytes(NewGlobalKindDeclPairs)); } // And a visible updates block for the translation unit. Abv = std::make_shared(); Abv->Add(llvm::BitCodeAbbrevOp(UPDATE_VISIBLE)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv)); WriteDeclContextVisibleUpdate(TU); // If we have any extern "C" names, write out a visible update for them. if (Context.ExternCContext) WriteDeclContextVisibleUpdate(Context.ExternCContext); // If the translation unit has an anonymous namespace, and we don't already // have an update block for it, write it as an update block. // FIXME: Why do we not do this if there's already an update block? if (NamespaceDecl *NS = TU->getAnonymousNamespace()) { ASTWriter::UpdateRecord &Record = DeclUpdates[TU]; if (Record.empty()) Record.push_back(DeclUpdate(UPD_CXX_ADDED_ANONYMOUS_NAMESPACE, NS)); } // Add update records for all mangling numbers and static local numbers. // These aren't really update records, but this is a convenient way of // tagging this rare extra data onto the declarations. for (const auto &Number : Context.MangleNumbers) if (!Number.first->isFromASTFile()) DeclUpdates[Number.first].push_back(DeclUpdate(UPD_MANGLING_NUMBER, Number.second)); for (const auto &Number : Context.StaticLocalNumbers) if (!Number.first->isFromASTFile()) DeclUpdates[Number.first].push_back(DeclUpdate(UPD_STATIC_LOCAL_NUMBER, Number.second)); // Make sure visible decls, added to DeclContexts previously loaded from // an AST file, are registered for serialization. Likewise for template // specializations added to imported templates. for (const auto *I : DeclsToEmitEvenIfUnreferenced) { GetDeclRef(I); } // Make sure all decls associated with an identifier are registered for // serialization, if we're storing decls with identifiers. if (!WritingModule || !getLangOpts().CPlusPlus) { llvm::SmallVector IIs; for (const auto &ID : PP.getIdentifierTable()) { const IdentifierInfo *II = ID.second; if (!Chain || !II->isFromAST() || II->hasChangedSinceDeserialization()) IIs.push_back(II); } // Sort the identifiers to visit based on their name. llvm::sort(IIs, llvm::deref>()); for (const IdentifierInfo *II : IIs) for (const Decl *D : SemaRef.IdResolver.decls(II)) GetDeclRef(D); } // For method pool in the module, if it contains an entry for a selector, // the entry should be complete, containing everything introduced by that // module and all modules it imports. It's possible that the entry is out of // date, so we need to pull in the new content here. // It's possible that updateOutOfDateSelector can update SelectorIDs. To be // safe, we copy all selectors out. llvm::SmallVector AllSelectors; for (auto &SelectorAndID : SelectorIDs) AllSelectors.push_back(SelectorAndID.first); for (auto &Selector : AllSelectors) SemaRef.updateOutOfDateSelector(Selector); // Form the record of special types. RecordData SpecialTypes; AddTypeRef(Context.getRawCFConstantStringType(), SpecialTypes); AddTypeRef(Context.getFILEType(), SpecialTypes); AddTypeRef(Context.getjmp_bufType(), SpecialTypes); AddTypeRef(Context.getsigjmp_bufType(), SpecialTypes); AddTypeRef(Context.ObjCIdRedefinitionType, SpecialTypes); AddTypeRef(Context.ObjCClassRedefinitionType, SpecialTypes); AddTypeRef(Context.ObjCSelRedefinitionType, SpecialTypes); AddTypeRef(Context.getucontext_tType(), SpecialTypes); if (Chain) { // Write the mapping information describing our module dependencies and how // each of those modules were mapped into our own offset/ID space, so that // the reader can build the appropriate mapping to its own offset/ID space. // The map consists solely of a blob with the following format: // *(module-kind:i8 // module-name-len:i16 module-name:len*i8 // source-location-offset:i32 // identifier-id:i32 // preprocessed-entity-id:i32 // macro-definition-id:i32 // submodule-id:i32 // selector-id:i32 // declaration-id:i32 // c++-base-specifiers-id:i32 // type-id:i32) // // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule, // MK_ExplicitModule or MK_ImplicitModule, then the module-name is the // module name. Otherwise, it is the module file name. auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned ModuleOffsetMapAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); SmallString<2048> Buffer; { llvm::raw_svector_ostream Out(Buffer); for (ModuleFile &M : Chain->ModuleMgr) { using namespace llvm::support; endian::Writer LE(Out, llvm::endianness::little); LE.write(static_cast(M.Kind)); StringRef Name = M.isModule() ? M.ModuleName : M.FileName; LE.write(Name.size()); Out.write(Name.data(), Name.size()); // Note: if a base ID was uint max, it would not be possible to load // another module after it or have more than one entity inside it. uint32_t None = std::numeric_limits::max(); auto writeBaseIDOrNone = [&](auto BaseID, bool ShouldWrite) { assert(BaseID < std::numeric_limits::max() && "base id too high"); if (ShouldWrite) LE.write(BaseID); else LE.write(None); }; // These values should be unique within a chain, since they will be read // as keys into ContinuousRangeMaps. writeBaseIDOrNone(M.SLocEntryBaseOffset, M.LocalNumSLocEntries); writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers); writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros); writeBaseIDOrNone(M.BasePreprocessedEntityID, M.NumPreprocessedEntities); writeBaseIDOrNone(M.BaseSubmoduleID, M.LocalNumSubmodules); writeBaseIDOrNone(M.BaseSelectorID, M.LocalNumSelectors); writeBaseIDOrNone(M.BaseDeclID, M.LocalNumDecls); writeBaseIDOrNone(M.BaseTypeIndex, M.LocalNumTypes); } } RecordData::value_type Record[] = {MODULE_OFFSET_MAP}; Stream.EmitRecordWithBlob(ModuleOffsetMapAbbrev, Record, Buffer.data(), Buffer.size()); } // Build a record containing all of the DeclsToCheckForDeferredDiags. SmallVector DeclsToCheckForDeferredDiags; for (auto *D : SemaRef.DeclsToCheckForDeferredDiags) DeclsToCheckForDeferredDiags.push_back(GetDeclRef(D)); RecordData DeclUpdatesOffsetsRecord; // Keep writing types, declarations, and declaration update records // until we've emitted all of them. Stream.EnterSubblock(DECLTYPES_BLOCK_ID, /*bits for abbreviations*/5); DeclTypesBlockStartOffset = Stream.GetCurrentBitNo(); WriteTypeAbbrevs(); WriteDeclAbbrevs(); do { WriteDeclUpdatesBlocks(DeclUpdatesOffsetsRecord); while (!DeclTypesToEmit.empty()) { DeclOrType DOT = DeclTypesToEmit.front(); DeclTypesToEmit.pop(); if (DOT.isType()) WriteType(DOT.getType()); else WriteDecl(Context, DOT.getDecl()); } } while (!DeclUpdates.empty()); Stream.ExitBlock(); DoneWritingDeclsAndTypes = true; // These things can only be done once we've written out decls and types. WriteTypeDeclOffsets(); if (!DeclUpdatesOffsetsRecord.empty()) Stream.EmitRecord(DECL_UPDATE_OFFSETS, DeclUpdatesOffsetsRecord); WriteFileDeclIDsMap(); WriteSourceManagerBlock(Context.getSourceManager(), PP); WriteComments(); WritePreprocessor(PP, isModule); WriteHeaderSearch(PP.getHeaderSearchInfo()); WriteSelectors(SemaRef); WriteReferencedSelectorsPool(SemaRef); WriteLateParsedTemplates(SemaRef); WriteIdentifierTable(PP, SemaRef.IdResolver, isModule); WriteFPPragmaOptions(SemaRef.CurFPFeatureOverrides()); WriteOpenCLExtensions(SemaRef); WriteCUDAPragmas(SemaRef); // If we're emitting a module, write out the submodule information. if (WritingModule) WriteSubmodules(WritingModule); Stream.EmitRecord(SPECIAL_TYPES, SpecialTypes); // Write the record containing external, unnamed definitions. if (!EagerlyDeserializedDecls.empty()) Stream.EmitRecord(EAGERLY_DESERIALIZED_DECLS, EagerlyDeserializedDecls); if (!ModularCodegenDecls.empty()) Stream.EmitRecord(MODULAR_CODEGEN_DECLS, ModularCodegenDecls); // Write the record containing tentative definitions. if (!TentativeDefinitions.empty()) Stream.EmitRecord(TENTATIVE_DEFINITIONS, TentativeDefinitions); // Write the record containing unused file scoped decls. if (!UnusedFileScopedDecls.empty()) Stream.EmitRecord(UNUSED_FILESCOPED_DECLS, UnusedFileScopedDecls); // Write the record containing weak undeclared identifiers. if (!WeakUndeclaredIdentifiers.empty()) Stream.EmitRecord(WEAK_UNDECLARED_IDENTIFIERS, WeakUndeclaredIdentifiers); // Write the record containing ext_vector type names. if (!ExtVectorDecls.empty()) Stream.EmitRecord(EXT_VECTOR_DECLS, ExtVectorDecls); // Write the record containing VTable uses information. if (!VTableUses.empty()) Stream.EmitRecord(VTABLE_USES, VTableUses); // Write the record containing potentially unused local typedefs. if (!UnusedLocalTypedefNameCandidates.empty()) Stream.EmitRecord(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES, UnusedLocalTypedefNameCandidates); // Write the record containing pending implicit instantiations. if (!PendingInstantiations.empty()) Stream.EmitRecord(PENDING_IMPLICIT_INSTANTIATIONS, PendingInstantiations); // Write the record containing declaration references of Sema. if (!SemaDeclRefs.empty()) Stream.EmitRecord(SEMA_DECL_REFS, SemaDeclRefs); // Write the record containing decls to be checked for deferred diags. if (!DeclsToCheckForDeferredDiags.empty()) Stream.EmitRecord(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS, DeclsToCheckForDeferredDiags); // Write the record containing CUDA-specific declaration references. if (!CUDASpecialDeclRefs.empty()) Stream.EmitRecord(CUDA_SPECIAL_DECL_REFS, CUDASpecialDeclRefs); // Write the delegating constructors. if (!DelegatingCtorDecls.empty()) Stream.EmitRecord(DELEGATING_CTORS, DelegatingCtorDecls); // Write the known namespaces. if (!KnownNamespaces.empty()) Stream.EmitRecord(KNOWN_NAMESPACES, KnownNamespaces); // Write the undefined internal functions and variables, and inline functions. if (!UndefinedButUsed.empty()) Stream.EmitRecord(UNDEFINED_BUT_USED, UndefinedButUsed); if (!DeleteExprsToAnalyze.empty()) Stream.EmitRecord(DELETE_EXPRS_TO_ANALYZE, DeleteExprsToAnalyze); // Write the visible updates to DeclContexts. for (auto *DC : UpdatedDeclContexts) WriteDeclContextVisibleUpdate(DC); if (!WritingModule) { // Write the submodules that were imported, if any. struct ModuleInfo { uint64_t ID; Module *M; ModuleInfo(uint64_t ID, Module *M) : ID(ID), M(M) {} }; llvm::SmallVector Imports; for (const auto *I : Context.local_imports()) { assert(SubmoduleIDs.contains(I->getImportedModule())); Imports.push_back(ModuleInfo(SubmoduleIDs[I->getImportedModule()], I->getImportedModule())); } if (!Imports.empty()) { auto Cmp = [](const ModuleInfo &A, const ModuleInfo &B) { return A.ID < B.ID; }; auto Eq = [](const ModuleInfo &A, const ModuleInfo &B) { return A.ID == B.ID; }; // Sort and deduplicate module IDs. llvm::sort(Imports, Cmp); Imports.erase(std::unique(Imports.begin(), Imports.end(), Eq), Imports.end()); RecordData ImportedModules; for (const auto &Import : Imports) { ImportedModules.push_back(Import.ID); // FIXME: If the module has macros imported then later has declarations // imported, this location won't be the right one as a location for the // declaration imports. AddSourceLocation(PP.getModuleImportLoc(Import.M), ImportedModules); } Stream.EmitRecord(IMPORTED_MODULES, ImportedModules); } } WriteObjCCategories(); if(!WritingModule) { WriteOptimizePragmaOptions(SemaRef); WriteMSStructPragmaOptions(SemaRef); WriteMSPointersToMembersPragmaOptions(SemaRef); } WritePackPragmaOptions(SemaRef); WriteFloatControlPragmaOptions(SemaRef); // Some simple statistics RecordData::value_type Record[] = { NumStatements, NumMacros, NumLexicalDeclContexts, NumVisibleDeclContexts}; Stream.EmitRecord(STATISTICS, Record); Stream.ExitBlock(); Stream.FlushToWord(); ASTBlockRange.second = Stream.GetCurrentBitNo() >> 3; // Write the module file extension blocks. for (const auto &ExtWriter : ModuleFileExtensionWriters) WriteModuleFileExtension(SemaRef, *ExtWriter); return backpatchSignature(); } void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { if (DeclUpdates.empty()) return; DeclUpdateMap LocalUpdates; LocalUpdates.swap(DeclUpdates); for (auto &DeclUpdate : LocalUpdates) { const Decl *D = DeclUpdate.first; bool HasUpdatedBody = false; bool HasAddedVarDefinition = false; RecordData RecordData; ASTRecordWriter Record(*this, RecordData); for (auto &Update : DeclUpdate.second) { DeclUpdateKind Kind = (DeclUpdateKind)Update.getKind(); // An updated body is emitted last, so that the reader doesn't need // to skip over the lazy body to reach statements for other records. if (Kind == UPD_CXX_ADDED_FUNCTION_DEFINITION) HasUpdatedBody = true; else if (Kind == UPD_CXX_ADDED_VAR_DEFINITION) HasAddedVarDefinition = true; else Record.push_back(Kind); switch (Kind) { case UPD_CXX_ADDED_IMPLICIT_MEMBER: case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION: case UPD_CXX_ADDED_ANONYMOUS_NAMESPACE: assert(Update.getDecl() && "no decl to add?"); Record.push_back(GetDeclRef(Update.getDecl())); break; case UPD_CXX_ADDED_FUNCTION_DEFINITION: case UPD_CXX_ADDED_VAR_DEFINITION: break; case UPD_CXX_POINT_OF_INSTANTIATION: // FIXME: Do we need to also save the template specialization kind here? Record.AddSourceLocation(Update.getLoc()); break; case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: Record.AddStmt(const_cast( cast(Update.getDecl())->getDefaultArg())); break; case UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER: Record.AddStmt( cast(Update.getDecl())->getInClassInitializer()); break; case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: { auto *RD = cast(D); UpdatedDeclContexts.insert(RD->getPrimaryContext()); Record.push_back(RD->isParamDestroyedInCallee()); Record.push_back(llvm::to_underlying(RD->getArgPassingRestrictions())); Record.AddCXXDefinitionData(RD); Record.AddOffset(WriteDeclContextLexicalBlock( *Context, const_cast(RD))); // This state is sometimes updated by template instantiation, when we // switch from the specialization referring to the template declaration // to it referring to the template definition. if (auto *MSInfo = RD->getMemberSpecializationInfo()) { Record.push_back(MSInfo->getTemplateSpecializationKind()); Record.AddSourceLocation(MSInfo->getPointOfInstantiation()); } else { auto *Spec = cast(RD); Record.push_back(Spec->getTemplateSpecializationKind()); Record.AddSourceLocation(Spec->getPointOfInstantiation()); // The instantiation might have been resolved to a partial // specialization. If so, record which one. auto From = Spec->getInstantiatedFrom(); if (auto PartialSpec = From.dyn_cast()) { Record.push_back(true); Record.AddDeclRef(PartialSpec); Record.AddTemplateArgumentList( &Spec->getTemplateInstantiationArgs()); } else { Record.push_back(false); } } Record.push_back(llvm::to_underlying(RD->getTagKind())); Record.AddSourceLocation(RD->getLocation()); Record.AddSourceLocation(RD->getBeginLoc()); Record.AddSourceRange(RD->getBraceRange()); // Instantiation may change attributes; write them all out afresh. Record.push_back(D->hasAttrs()); if (D->hasAttrs()) Record.AddAttributes(D->getAttrs()); // FIXME: Ensure we don't get here for explicit instantiations. break; } case UPD_CXX_RESOLVED_DTOR_DELETE: Record.AddDeclRef(Update.getDecl()); Record.AddStmt(cast(D)->getOperatorDeleteThisArg()); break; case UPD_CXX_RESOLVED_EXCEPTION_SPEC: { auto prototype = cast(D)->getType()->castAs(); Record.writeExceptionSpecInfo(prototype->getExceptionSpecInfo()); break; } case UPD_CXX_DEDUCED_RETURN_TYPE: Record.push_back(GetOrCreateTypeID(Update.getType())); break; case UPD_DECL_MARKED_USED: break; case UPD_MANGLING_NUMBER: case UPD_STATIC_LOCAL_NUMBER: Record.push_back(Update.getNumber()); break; case UPD_DECL_MARKED_OPENMP_THREADPRIVATE: Record.AddSourceRange( D->getAttr()->getRange()); break; case UPD_DECL_MARKED_OPENMP_ALLOCATE: { auto *A = D->getAttr(); Record.push_back(A->getAllocatorType()); Record.AddStmt(A->getAllocator()); Record.AddStmt(A->getAlignment()); Record.AddSourceRange(A->getRange()); break; } case UPD_DECL_MARKED_OPENMP_DECLARETARGET: Record.push_back(D->getAttr()->getMapType()); Record.AddSourceRange( D->getAttr()->getRange()); break; case UPD_DECL_EXPORTED: Record.push_back(getSubmoduleID(Update.getModule())); break; case UPD_ADDED_ATTR_TO_RECORD: Record.AddAttributes(llvm::ArrayRef(Update.getAttr())); break; } } // Add a trailing update record, if any. These must go last because we // lazily load their attached statement. if (HasUpdatedBody) { const auto *Def = cast(D); Record.push_back(UPD_CXX_ADDED_FUNCTION_DEFINITION); Record.push_back(Def->isInlined()); Record.AddSourceLocation(Def->getInnerLocStart()); Record.AddFunctionDefinition(Def); } else if (HasAddedVarDefinition) { const auto *VD = cast(D); Record.push_back(UPD_CXX_ADDED_VAR_DEFINITION); Record.push_back(VD->isInline()); Record.push_back(VD->isInlineSpecified()); Record.AddVarDeclInit(VD); } OffsetsRecord.push_back(GetDeclRef(D)); OffsetsRecord.push_back(Record.Emit(DECL_UPDATES)); } } void ASTWriter::AddAlignPackInfo(const Sema::AlignPackInfo &Info, RecordDataImpl &Record) { uint32_t Raw = Sema::AlignPackInfo::getRawEncoding(Info); Record.push_back(Raw); } FileID ASTWriter::getAdjustedFileID(FileID FID) const { if (FID.isInvalid() || PP->getSourceManager().isLoadedFileID(FID) || NonAffectingFileIDs.empty()) return FID; auto It = llvm::lower_bound(NonAffectingFileIDs, FID); unsigned Idx = std::distance(NonAffectingFileIDs.begin(), It); unsigned Offset = NonAffectingFileIDAdjustments[Idx]; return FileID::get(FID.getOpaqueValue() - Offset); } unsigned ASTWriter::getAdjustedNumCreatedFIDs(FileID FID) const { unsigned NumCreatedFIDs = PP->getSourceManager() .getLocalSLocEntry(FID.ID) .getFile() .NumCreatedFIDs; unsigned AdjustedNumCreatedFIDs = 0; for (unsigned I = FID.ID, N = I + NumCreatedFIDs; I != N; ++I) if (IsSLocAffecting[I]) ++AdjustedNumCreatedFIDs; return AdjustedNumCreatedFIDs; } SourceLocation ASTWriter::getAdjustedLocation(SourceLocation Loc) const { if (Loc.isInvalid()) return Loc; return Loc.getLocWithOffset(-getAdjustment(Loc.getOffset())); } SourceRange ASTWriter::getAdjustedRange(SourceRange Range) const { return SourceRange(getAdjustedLocation(Range.getBegin()), getAdjustedLocation(Range.getEnd())); } SourceLocation::UIntTy ASTWriter::getAdjustedOffset(SourceLocation::UIntTy Offset) const { return Offset - getAdjustment(Offset); } SourceLocation::UIntTy ASTWriter::getAdjustment(SourceLocation::UIntTy Offset) const { if (NonAffectingRanges.empty()) return 0; if (PP->getSourceManager().isLoadedOffset(Offset)) return 0; if (Offset > NonAffectingRanges.back().getEnd().getOffset()) return NonAffectingOffsetAdjustments.back(); if (Offset < NonAffectingRanges.front().getBegin().getOffset()) return 0; auto Contains = [](const SourceRange &Range, SourceLocation::UIntTy Offset) { return Range.getEnd().getOffset() < Offset; }; auto It = llvm::lower_bound(NonAffectingRanges, Offset, Contains); unsigned Idx = std::distance(NonAffectingRanges.begin(), It); return NonAffectingOffsetAdjustments[Idx]; } void ASTWriter::AddFileID(FileID FID, RecordDataImpl &Record) { Record.push_back(getAdjustedFileID(FID).getOpaqueValue()); } void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, SourceLocationSequence *Seq) { Loc = getAdjustedLocation(Loc); Record.push_back(SourceLocationEncoding::encode(Loc, Seq)); } void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record, SourceLocationSequence *Seq) { AddSourceLocation(Range.getBegin(), Record, Seq); AddSourceLocation(Range.getEnd(), Record, Seq); } void ASTRecordWriter::AddAPFloat(const llvm::APFloat &Value) { AddAPInt(Value.bitcastToAPInt()); } void ASTWriter::AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record) { Record.push_back(getIdentifierRef(II)); } IdentID ASTWriter::getIdentifierRef(const IdentifierInfo *II) { if (!II) return 0; IdentID &ID = IdentifierIDs[II]; if (ID == 0) ID = NextIdentID++; return ID; } MacroID ASTWriter::getMacroRef(MacroInfo *MI, const IdentifierInfo *Name) { // Don't emit builtin macros like __LINE__ to the AST file unless they // have been redefined by the header (in which case they are not // isBuiltinMacro). if (!MI || MI->isBuiltinMacro()) return 0; MacroID &ID = MacroIDs[MI]; if (ID == 0) { ID = NextMacroID++; MacroInfoToEmitData Info = { Name, MI, ID }; MacroInfosToEmit.push_back(Info); } return ID; } MacroID ASTWriter::getMacroID(MacroInfo *MI) { if (!MI || MI->isBuiltinMacro()) return 0; assert(MacroIDs.contains(MI) && "Macro not emitted!"); return MacroIDs[MI]; } uint32_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) { return IdentMacroDirectivesOffsetMap.lookup(Name); } void ASTRecordWriter::AddSelectorRef(const Selector SelRef) { Record->push_back(Writer->getSelectorRef(SelRef)); } SelectorID ASTWriter::getSelectorRef(Selector Sel) { if (Sel.getAsOpaquePtr() == nullptr) { return 0; } SelectorID SID = SelectorIDs[Sel]; if (SID == 0 && Chain) { // This might trigger a ReadSelector callback, which will set the ID for // this selector. Chain->LoadSelector(Sel); SID = SelectorIDs[Sel]; } if (SID == 0) { SID = NextSelectorID++; SelectorIDs[Sel] = SID; } return SID; } void ASTRecordWriter::AddCXXTemporary(const CXXTemporary *Temp) { AddDeclRef(Temp->getDestructor()); } void ASTRecordWriter::AddTemplateArgumentLocInfo( TemplateArgument::ArgKind Kind, const TemplateArgumentLocInfo &Arg) { switch (Kind) { case TemplateArgument::Expression: AddStmt(Arg.getAsExpr()); break; case TemplateArgument::Type: AddTypeSourceInfo(Arg.getAsTypeSourceInfo()); break; case TemplateArgument::Template: AddNestedNameSpecifierLoc(Arg.getTemplateQualifierLoc()); AddSourceLocation(Arg.getTemplateNameLoc()); break; case TemplateArgument::TemplateExpansion: AddNestedNameSpecifierLoc(Arg.getTemplateQualifierLoc()); AddSourceLocation(Arg.getTemplateNameLoc()); AddSourceLocation(Arg.getTemplateEllipsisLoc()); break; case TemplateArgument::Null: case TemplateArgument::Integral: case TemplateArgument::Declaration: case TemplateArgument::NullPtr: case TemplateArgument::StructuralValue: case TemplateArgument::Pack: // FIXME: Is this right? break; } } void ASTRecordWriter::AddTemplateArgumentLoc(const TemplateArgumentLoc &Arg) { AddTemplateArgument(Arg.getArgument()); if (Arg.getArgument().getKind() == TemplateArgument::Expression) { bool InfoHasSameExpr = Arg.getArgument().getAsExpr() == Arg.getLocInfo().getAsExpr(); Record->push_back(InfoHasSameExpr); if (InfoHasSameExpr) return; // Avoid storing the same expr twice. } AddTemplateArgumentLocInfo(Arg.getArgument().getKind(), Arg.getLocInfo()); } void ASTRecordWriter::AddTypeSourceInfo(TypeSourceInfo *TInfo) { if (!TInfo) { AddTypeRef(QualType()); return; } AddTypeRef(TInfo->getType()); AddTypeLoc(TInfo->getTypeLoc()); } void ASTRecordWriter::AddTypeLoc(TypeLoc TL, LocSeq *OuterSeq) { LocSeq::State Seq(OuterSeq); TypeLocWriter TLW(*this, Seq); for (; !TL.isNull(); TL = TL.getNextTypeLoc()) TLW.Visit(TL); } void ASTWriter::AddTypeRef(QualType T, RecordDataImpl &Record) { Record.push_back(GetOrCreateTypeID(T)); } TypeID ASTWriter::GetOrCreateTypeID(QualType T) { assert(Context); return MakeTypeID(*Context, T, [&](QualType T) -> TypeIdx { if (T.isNull()) return TypeIdx(); assert(!T.getLocalFastQualifiers()); TypeIdx &Idx = TypeIdxs[T]; if (Idx.getIndex() == 0) { if (DoneWritingDeclsAndTypes) { assert(0 && "New type seen after serializing all the types to emit!"); return TypeIdx(); } // We haven't seen this type before. Assign it a new ID and put it // into the queue of types to emit. Idx = TypeIdx(NextTypeID++); DeclTypesToEmit.push(T); } return Idx; }); } TypeID ASTWriter::getTypeID(QualType T) const { assert(Context); return MakeTypeID(*Context, T, [&](QualType T) -> TypeIdx { if (T.isNull()) return TypeIdx(); assert(!T.getLocalFastQualifiers()); TypeIdxMap::const_iterator I = TypeIdxs.find(T); assert(I != TypeIdxs.end() && "Type not emitted!"); return I->second; }); } void ASTWriter::AddDeclRef(const Decl *D, RecordDataImpl &Record) { Record.push_back(GetDeclRef(D)); } DeclID ASTWriter::GetDeclRef(const Decl *D) { assert(WritingAST && "Cannot request a declaration ID before AST writing"); if (!D) { return 0; } // If D comes from an AST file, its declaration ID is already known and // fixed. if (D->isFromASTFile()) return D->getGlobalID(); assert(!(reinterpret_cast(D) & 0x01) && "Invalid decl pointer"); DeclID &ID = DeclIDs[D]; if (ID == 0) { if (DoneWritingDeclsAndTypes) { assert(0 && "New decl seen after serializing all the decls to emit!"); return 0; } // We haven't seen this declaration before. Give it a new ID and // enqueue it in the list of declarations to emit. ID = NextDeclID++; DeclTypesToEmit.push(const_cast(D)); } return ID; } DeclID ASTWriter::getDeclID(const Decl *D) { if (!D) return 0; // If D comes from an AST file, its declaration ID is already known and // fixed. if (D->isFromASTFile()) return D->getGlobalID(); assert(DeclIDs.contains(D) && "Declaration not emitted!"); return DeclIDs[D]; } void ASTWriter::associateDeclWithFile(const Decl *D, DeclID ID) { assert(ID); assert(D); SourceLocation Loc = D->getLocation(); if (Loc.isInvalid()) return; // We only keep track of the file-level declarations of each file. if (!D->getLexicalDeclContext()->isFileContext()) return; // FIXME: ParmVarDecls that are part of a function type of a parameter of // a function/objc method, should not have TU as lexical context. // TemplateTemplateParmDecls that are part of an alias template, should not // have TU as lexical context. if (isa(D)) return; SourceManager &SM = Context->getSourceManager(); SourceLocation FileLoc = SM.getFileLoc(Loc); assert(SM.isLocalSourceLocation(FileLoc)); FileID FID; unsigned Offset; std::tie(FID, Offset) = SM.getDecomposedLoc(FileLoc); if (FID.isInvalid()) return; assert(SM.getSLocEntry(FID).isFile()); assert(IsSLocAffecting[FID.ID]); std::unique_ptr &Info = FileDeclIDs[FID]; if (!Info) Info = std::make_unique(); std::pair LocDecl(Offset, ID); LocDeclIDsTy &Decls = Info->DeclIDs; Decls.push_back(LocDecl); } unsigned ASTWriter::getAnonymousDeclarationNumber(const NamedDecl *D) { assert(needsAnonymousDeclarationNumber(D) && "expected an anonymous declaration"); // Number the anonymous declarations within this context, if we've not // already done so. auto It = AnonymousDeclarationNumbers.find(D); if (It == AnonymousDeclarationNumbers.end()) { auto *DC = D->getLexicalDeclContext(); numberAnonymousDeclsWithin(DC, [&](const NamedDecl *ND, unsigned Number) { AnonymousDeclarationNumbers[ND] = Number; }); It = AnonymousDeclarationNumbers.find(D); assert(It != AnonymousDeclarationNumbers.end() && "declaration not found within its lexical context"); } return It->second; } void ASTRecordWriter::AddDeclarationNameLoc(const DeclarationNameLoc &DNLoc, DeclarationName Name) { switch (Name.getNameKind()) { case DeclarationName::CXXConstructorName: case DeclarationName::CXXDestructorName: case DeclarationName::CXXConversionFunctionName: AddTypeSourceInfo(DNLoc.getNamedTypeInfo()); break; case DeclarationName::CXXOperatorName: AddSourceRange(DNLoc.getCXXOperatorNameRange()); break; case DeclarationName::CXXLiteralOperatorName: AddSourceLocation(DNLoc.getCXXLiteralOperatorNameLoc()); break; case DeclarationName::Identifier: case DeclarationName::ObjCZeroArgSelector: case DeclarationName::ObjCOneArgSelector: case DeclarationName::ObjCMultiArgSelector: case DeclarationName::CXXUsingDirective: case DeclarationName::CXXDeductionGuideName: break; } } void ASTRecordWriter::AddDeclarationNameInfo( const DeclarationNameInfo &NameInfo) { AddDeclarationName(NameInfo.getName()); AddSourceLocation(NameInfo.getLoc()); AddDeclarationNameLoc(NameInfo.getInfo(), NameInfo.getName()); } void ASTRecordWriter::AddQualifierInfo(const QualifierInfo &Info) { AddNestedNameSpecifierLoc(Info.QualifierLoc); Record->push_back(Info.NumTemplParamLists); for (unsigned i = 0, e = Info.NumTemplParamLists; i != e; ++i) AddTemplateParameterList(Info.TemplParamLists[i]); } void ASTRecordWriter::AddNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) { // Nested name specifiers usually aren't too long. I think that 8 would // typically accommodate the vast majority. SmallVector NestedNames; // Push each of the nested-name-specifiers's onto a stack for // serialization in reverse order. while (NNS) { NestedNames.push_back(NNS); NNS = NNS.getPrefix(); } Record->push_back(NestedNames.size()); while(!NestedNames.empty()) { NNS = NestedNames.pop_back_val(); NestedNameSpecifier::SpecifierKind Kind = NNS.getNestedNameSpecifier()->getKind(); Record->push_back(Kind); switch (Kind) { case NestedNameSpecifier::Identifier: AddIdentifierRef(NNS.getNestedNameSpecifier()->getAsIdentifier()); AddSourceRange(NNS.getLocalSourceRange()); break; case NestedNameSpecifier::Namespace: AddDeclRef(NNS.getNestedNameSpecifier()->getAsNamespace()); AddSourceRange(NNS.getLocalSourceRange()); break; case NestedNameSpecifier::NamespaceAlias: AddDeclRef(NNS.getNestedNameSpecifier()->getAsNamespaceAlias()); AddSourceRange(NNS.getLocalSourceRange()); break; case NestedNameSpecifier::TypeSpec: case NestedNameSpecifier::TypeSpecWithTemplate: Record->push_back(Kind == NestedNameSpecifier::TypeSpecWithTemplate); AddTypeRef(NNS.getTypeLoc().getType()); AddTypeLoc(NNS.getTypeLoc()); AddSourceLocation(NNS.getLocalSourceRange().getEnd()); break; case NestedNameSpecifier::Global: AddSourceLocation(NNS.getLocalSourceRange().getEnd()); break; case NestedNameSpecifier::Super: AddDeclRef(NNS.getNestedNameSpecifier()->getAsRecordDecl()); AddSourceRange(NNS.getLocalSourceRange()); break; } } } void ASTRecordWriter::AddTemplateParameterList( const TemplateParameterList *TemplateParams) { assert(TemplateParams && "No TemplateParams!"); AddSourceLocation(TemplateParams->getTemplateLoc()); AddSourceLocation(TemplateParams->getLAngleLoc()); AddSourceLocation(TemplateParams->getRAngleLoc()); Record->push_back(TemplateParams->size()); for (const auto &P : *TemplateParams) AddDeclRef(P); if (const Expr *RequiresClause = TemplateParams->getRequiresClause()) { Record->push_back(true); AddStmt(const_cast(RequiresClause)); } else { Record->push_back(false); } } /// Emit a template argument list. void ASTRecordWriter::AddTemplateArgumentList( const TemplateArgumentList *TemplateArgs) { assert(TemplateArgs && "No TemplateArgs!"); Record->push_back(TemplateArgs->size()); for (int i = 0, e = TemplateArgs->size(); i != e; ++i) AddTemplateArgument(TemplateArgs->get(i)); } void ASTRecordWriter::AddASTTemplateArgumentListInfo( const ASTTemplateArgumentListInfo *ASTTemplArgList) { assert(ASTTemplArgList && "No ASTTemplArgList!"); AddSourceLocation(ASTTemplArgList->LAngleLoc); AddSourceLocation(ASTTemplArgList->RAngleLoc); Record->push_back(ASTTemplArgList->NumTemplateArgs); const TemplateArgumentLoc *TemplArgs = ASTTemplArgList->getTemplateArgs(); for (int i = 0, e = ASTTemplArgList->NumTemplateArgs; i != e; ++i) AddTemplateArgumentLoc(TemplArgs[i]); } void ASTRecordWriter::AddUnresolvedSet(const ASTUnresolvedSet &Set) { Record->push_back(Set.size()); for (ASTUnresolvedSet::const_iterator I = Set.begin(), E = Set.end(); I != E; ++I) { AddDeclRef(I.getDecl()); Record->push_back(I.getAccess()); } } // FIXME: Move this out of the main ASTRecordWriter interface. void ASTRecordWriter::AddCXXBaseSpecifier(const CXXBaseSpecifier &Base) { Record->push_back(Base.isVirtual()); Record->push_back(Base.isBaseOfClass()); Record->push_back(Base.getAccessSpecifierAsWritten()); Record->push_back(Base.getInheritConstructors()); AddTypeSourceInfo(Base.getTypeSourceInfo()); AddSourceRange(Base.getSourceRange()); AddSourceLocation(Base.isPackExpansion()? Base.getEllipsisLoc() : SourceLocation()); } static uint64_t EmitCXXBaseSpecifiers(ASTWriter &W, ArrayRef Bases) { ASTWriter::RecordData Record; ASTRecordWriter Writer(W, Record); Writer.push_back(Bases.size()); for (auto &Base : Bases) Writer.AddCXXBaseSpecifier(Base); return Writer.Emit(serialization::DECL_CXX_BASE_SPECIFIERS); } // FIXME: Move this out of the main ASTRecordWriter interface. void ASTRecordWriter::AddCXXBaseSpecifiers(ArrayRef Bases) { AddOffset(EmitCXXBaseSpecifiers(*Writer, Bases)); } static uint64_t EmitCXXCtorInitializers(ASTWriter &W, ArrayRef CtorInits) { ASTWriter::RecordData Record; ASTRecordWriter Writer(W, Record); Writer.push_back(CtorInits.size()); for (auto *Init : CtorInits) { if (Init->isBaseInitializer()) { Writer.push_back(CTOR_INITIALIZER_BASE); Writer.AddTypeSourceInfo(Init->getTypeSourceInfo()); Writer.push_back(Init->isBaseVirtual()); } else if (Init->isDelegatingInitializer()) { Writer.push_back(CTOR_INITIALIZER_DELEGATING); Writer.AddTypeSourceInfo(Init->getTypeSourceInfo()); } else if (Init->isMemberInitializer()){ Writer.push_back(CTOR_INITIALIZER_MEMBER); Writer.AddDeclRef(Init->getMember()); } else { Writer.push_back(CTOR_INITIALIZER_INDIRECT_MEMBER); Writer.AddDeclRef(Init->getIndirectMember()); } Writer.AddSourceLocation(Init->getMemberLocation()); Writer.AddStmt(Init->getInit()); Writer.AddSourceLocation(Init->getLParenLoc()); Writer.AddSourceLocation(Init->getRParenLoc()); Writer.push_back(Init->isWritten()); if (Init->isWritten()) Writer.push_back(Init->getSourceOrder()); } return Writer.Emit(serialization::DECL_CXX_CTOR_INITIALIZERS); } // FIXME: Move this out of the main ASTRecordWriter interface. void ASTRecordWriter::AddCXXCtorInitializers( ArrayRef CtorInits) { AddOffset(EmitCXXCtorInitializers(*Writer, CtorInits)); } void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { auto &Data = D->data(); Record->push_back(Data.IsLambda); BitsPacker DefinitionBits; + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + DefinitionBits.addBit(ShouldSkipCheckingODR); + #define FIELD(Name, Width, Merge) \ if (!DefinitionBits.canWriteNextNBits(Width)) { \ Record->push_back(DefinitionBits); \ DefinitionBits.reset(0); \ } \ DefinitionBits.addBits(Data.Name, Width); #include "clang/AST/CXXRecordDeclDefinitionBits.def" #undef FIELD Record->push_back(DefinitionBits); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) { + if (!ShouldSkipCheckingODR) // getODRHash will compute the ODRHash if it has not been previously // computed. Record->push_back(D->getODRHash()); - } bool ModulesDebugInfo = Writer->Context->getLangOpts().ModulesDebugInfo && !D->isDependentType(); Record->push_back(ModulesDebugInfo); if (ModulesDebugInfo) Writer->ModularCodegenDecls.push_back(Writer->GetDeclRef(D)); // IsLambda bit is already saved. AddUnresolvedSet(Data.Conversions.get(*Writer->Context)); Record->push_back(Data.ComputedVisibleConversions); if (Data.ComputedVisibleConversions) AddUnresolvedSet(Data.VisibleConversions.get(*Writer->Context)); // Data.Definition is the owning decl, no need to write it. if (!Data.IsLambda) { Record->push_back(Data.NumBases); if (Data.NumBases > 0) AddCXXBaseSpecifiers(Data.bases()); // FIXME: Make VBases lazily computed when needed to avoid storing them. Record->push_back(Data.NumVBases); if (Data.NumVBases > 0) AddCXXBaseSpecifiers(Data.vbases()); AddDeclRef(D->getFirstFriend()); } else { auto &Lambda = D->getLambdaData(); BitsPacker LambdaBits; LambdaBits.addBits(Lambda.DependencyKind, /*Width=*/2); LambdaBits.addBit(Lambda.IsGenericLambda); LambdaBits.addBits(Lambda.CaptureDefault, /*Width=*/2); LambdaBits.addBits(Lambda.NumCaptures, /*Width=*/15); LambdaBits.addBit(Lambda.HasKnownInternalLinkage); Record->push_back(LambdaBits); Record->push_back(Lambda.NumExplicitCaptures); Record->push_back(Lambda.ManglingNumber); Record->push_back(D->getDeviceLambdaManglingNumber()); // The lambda context declaration and index within the context are provided // separately, so that they can be used for merging. AddTypeSourceInfo(Lambda.MethodTyInfo); for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) { const LambdaCapture &Capture = Lambda.Captures.front()[I]; AddSourceLocation(Capture.getLocation()); BitsPacker CaptureBits; CaptureBits.addBit(Capture.isImplicit()); CaptureBits.addBits(Capture.getCaptureKind(), /*Width=*/3); Record->push_back(CaptureBits); switch (Capture.getCaptureKind()) { case LCK_StarThis: case LCK_This: case LCK_VLAType: break; case LCK_ByCopy: case LCK_ByRef: ValueDecl *Var = Capture.capturesVariable() ? Capture.getCapturedVar() : nullptr; AddDeclRef(Var); AddSourceLocation(Capture.isPackExpansion() ? Capture.getEllipsisLoc() : SourceLocation()); break; } } } } void ASTRecordWriter::AddVarDeclInit(const VarDecl *VD) { const Expr *Init = VD->getInit(); if (!Init) { push_back(0); return; } uint64_t Val = 1; if (EvaluatedStmt *ES = VD->getEvaluatedStmt()) { Val |= (ES->HasConstantInitialization ? 2 : 0); Val |= (ES->HasConstantDestruction ? 4 : 0); APValue *Evaluated = VD->getEvaluatedValue(); // If the evaluated result is constant, emit it. if (Evaluated && (Evaluated->isInt() || Evaluated->isFloat())) Val |= 8; } push_back(Val); if (Val & 8) { AddAPValue(*VD->getEvaluatedValue()); } writeStmtRef(Init); } void ASTWriter::ReaderInitialized(ASTReader *Reader) { assert(Reader && "Cannot remove chain"); assert((!Chain || Chain == Reader) && "Cannot replace chain"); assert(FirstDeclID == NextDeclID && FirstTypeID == NextTypeID && FirstIdentID == NextIdentID && FirstMacroID == NextMacroID && FirstSubmoduleID == NextSubmoduleID && FirstSelectorID == NextSelectorID && "Setting chain after writing has started."); Chain = Reader; // Note, this will get called multiple times, once one the reader starts up // and again each time it's done reading a PCH or module. FirstDeclID = NUM_PREDEF_DECL_IDS + Chain->getTotalNumDecls(); FirstTypeID = NUM_PREDEF_TYPE_IDS + Chain->getTotalNumTypes(); FirstIdentID = NUM_PREDEF_IDENT_IDS + Chain->getTotalNumIdentifiers(); FirstMacroID = NUM_PREDEF_MACRO_IDS + Chain->getTotalNumMacros(); FirstSubmoduleID = NUM_PREDEF_SUBMODULE_IDS + Chain->getTotalNumSubmodules(); FirstSelectorID = NUM_PREDEF_SELECTOR_IDS + Chain->getTotalNumSelectors(); NextDeclID = FirstDeclID; NextTypeID = FirstTypeID; NextIdentID = FirstIdentID; NextMacroID = FirstMacroID; NextSelectorID = FirstSelectorID; NextSubmoduleID = FirstSubmoduleID; } void ASTWriter::IdentifierRead(IdentID ID, IdentifierInfo *II) { // Always keep the highest ID. See \p TypeRead() for more information. IdentID &StoredID = IdentifierIDs[II]; if (ID > StoredID) StoredID = ID; } void ASTWriter::MacroRead(serialization::MacroID ID, MacroInfo *MI) { // Always keep the highest ID. See \p TypeRead() for more information. MacroID &StoredID = MacroIDs[MI]; if (ID > StoredID) StoredID = ID; } void ASTWriter::TypeRead(TypeIdx Idx, QualType T) { // Always take the highest-numbered type index. This copes with an interesting // case for chained AST writing where we schedule writing the type and then, // later, deserialize the type from another AST. In this case, we want to // keep the higher-numbered entry so that we can properly write it out to // the AST file. TypeIdx &StoredIdx = TypeIdxs[T]; if (Idx.getIndex() >= StoredIdx.getIndex()) StoredIdx = Idx; } void ASTWriter::SelectorRead(SelectorID ID, Selector S) { // Always keep the highest ID. See \p TypeRead() for more information. SelectorID &StoredID = SelectorIDs[S]; if (ID > StoredID) StoredID = ID; } void ASTWriter::MacroDefinitionRead(serialization::PreprocessedEntityID ID, MacroDefinitionRecord *MD) { assert(!MacroDefinitions.contains(MD)); MacroDefinitions[MD] = ID; } void ASTWriter::ModuleRead(serialization::SubmoduleID ID, Module *Mod) { assert(!SubmoduleIDs.contains(Mod)); SubmoduleIDs[Mod] = ID; } void ASTWriter::CompletedTagDefinition(const TagDecl *D) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(D->isCompleteDefinition()); assert(!WritingAST && "Already writing the AST!"); if (auto *RD = dyn_cast(D)) { // We are interested when a PCH decl is modified. if (RD->isFromASTFile()) { // A forward reference was mutated into a definition. Rewrite it. // FIXME: This happens during template instantiation, should we // have created a new definition decl instead ? assert(isTemplateInstantiation(RD->getTemplateSpecializationKind()) && "completed a tag from another module but not by instantiation?"); DeclUpdates[RD].push_back( DeclUpdate(UPD_CXX_INSTANTIATED_CLASS_DEFINITION)); } } } static bool isImportedDeclContext(ASTReader *Chain, const Decl *D) { if (D->isFromASTFile()) return true; // The predefined __va_list_tag struct is imported if we imported any decls. // FIXME: This is a gross hack. return D == D->getASTContext().getVaListTagDecl(); } void ASTWriter::AddedVisibleDecl(const DeclContext *DC, const Decl *D) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(DC->isLookupContext() && "Should not add lookup results to non-lookup contexts!"); // TU is handled elsewhere. if (isa(DC)) return; // Namespaces are handled elsewhere, except for template instantiations of // FunctionTemplateDecls in namespaces. We are interested in cases where the // local instantiations are added to an imported context. Only happens when // adding ADL lookup candidates, for example templated friends. if (isa(DC) && D->getFriendObjectKind() == Decl::FOK_None && !isa(D)) return; // We're only interested in cases where a local declaration is added to an // imported context. if (D->isFromASTFile() || !isImportedDeclContext(Chain, cast(DC))) return; assert(DC == DC->getPrimaryContext() && "added to non-primary context"); assert(!getDefinitiveDeclContext(DC) && "DeclContext not definitive!"); assert(!WritingAST && "Already writing the AST!"); if (UpdatedDeclContexts.insert(DC) && !cast(DC)->isFromASTFile()) { // We're adding a visible declaration to a predefined decl context. Ensure // that we write out all of its lookup results so we don't get a nasty // surprise when we try to emit its lookup table. llvm::append_range(DeclsToEmitEvenIfUnreferenced, DC->decls()); } DeclsToEmitEvenIfUnreferenced.push_back(D); } void ASTWriter::AddedCXXImplicitMember(const CXXRecordDecl *RD, const Decl *D) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(D->isImplicit()); // We're only interested in cases where a local declaration is added to an // imported context. if (D->isFromASTFile() || !isImportedDeclContext(Chain, RD)) return; if (!isa(D)) return; // A decl coming from PCH was modified. assert(RD->isCompleteDefinition()); assert(!WritingAST && "Already writing the AST!"); DeclUpdates[RD].push_back(DeclUpdate(UPD_CXX_ADDED_IMPLICIT_MEMBER, D)); } void ASTWriter::ResolvedExceptionSpec(const FunctionDecl *FD) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!DoneWritingDeclsAndTypes && "Already done writing updates!"); if (!Chain) return; Chain->forEachImportedKeyDecl(FD, [&](const Decl *D) { // If we don't already know the exception specification for this redecl // chain, add an update record for it. if (isUnresolvedExceptionSpec(cast(D) ->getType() ->castAs() ->getExceptionSpecType())) DeclUpdates[D].push_back(UPD_CXX_RESOLVED_EXCEPTION_SPEC); }); } void ASTWriter::DeducedReturnType(const FunctionDecl *FD, QualType ReturnType) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); if (!Chain) return; Chain->forEachImportedKeyDecl(FD, [&](const Decl *D) { DeclUpdates[D].push_back( DeclUpdate(UPD_CXX_DEDUCED_RETURN_TYPE, ReturnType)); }); } void ASTWriter::ResolvedOperatorDelete(const CXXDestructorDecl *DD, const FunctionDecl *Delete, Expr *ThisArg) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); assert(Delete && "Not given an operator delete"); if (!Chain) return; Chain->forEachImportedKeyDecl(DD, [&](const Decl *D) { DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_RESOLVED_DTOR_DELETE, Delete)); }); } void ASTWriter::CompletedImplicitDefinition(const FunctionDecl *D) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); if (!D->isFromASTFile()) return; // Declaration not imported from PCH. // Implicit function decl from a PCH was defined. DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_ADDED_FUNCTION_DEFINITION)); } void ASTWriter::VariableDefinitionInstantiated(const VarDecl *D) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); if (!D->isFromASTFile()) return; DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_ADDED_VAR_DEFINITION)); } void ASTWriter::FunctionDefinitionInstantiated(const FunctionDecl *D) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); if (!D->isFromASTFile()) return; DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_ADDED_FUNCTION_DEFINITION)); } void ASTWriter::InstantiationRequested(const ValueDecl *D) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); if (!D->isFromASTFile()) return; // Since the actual instantiation is delayed, this really means that we need // to update the instantiation location. SourceLocation POI; if (auto *VD = dyn_cast(D)) POI = VD->getPointOfInstantiation(); else POI = cast(D)->getPointOfInstantiation(); DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_POINT_OF_INSTANTIATION, POI)); } void ASTWriter::DefaultArgumentInstantiated(const ParmVarDecl *D) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); if (!D->isFromASTFile()) return; DeclUpdates[D].push_back( DeclUpdate(UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT, D)); } void ASTWriter::DefaultMemberInitializerInstantiated(const FieldDecl *D) { assert(!WritingAST && "Already writing the AST!"); if (!D->isFromASTFile()) return; DeclUpdates[D].push_back( DeclUpdate(UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER, D)); } void ASTWriter::AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD, const ObjCInterfaceDecl *IFD) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); if (!IFD->isFromASTFile()) return; // Declaration not imported from PCH. assert(IFD->getDefinition() && "Category on a class without a definition?"); ObjCClassesWithCategories.insert( const_cast(IFD->getDefinition())); } void ASTWriter::DeclarationMarkedUsed(const Decl *D) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); // If there is *any* declaration of the entity that's not from an AST file, // we can skip writing the update record. We make sure that isUsed() triggers // completion of the redeclaration chain of the entity. for (auto Prev = D->getMostRecentDecl(); Prev; Prev = Prev->getPreviousDecl()) if (IsLocalDecl(Prev)) return; DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_USED)); } void ASTWriter::DeclarationMarkedOpenMPThreadPrivate(const Decl *D) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); if (!D->isFromASTFile()) return; DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_OPENMP_THREADPRIVATE)); } void ASTWriter::DeclarationMarkedOpenMPAllocate(const Decl *D, const Attr *A) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); if (!D->isFromASTFile()) return; DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_OPENMP_ALLOCATE, A)); } void ASTWriter::DeclarationMarkedOpenMPDeclareTarget(const Decl *D, const Attr *Attr) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); if (!D->isFromASTFile()) return; DeclUpdates[D].push_back( DeclUpdate(UPD_DECL_MARKED_OPENMP_DECLARETARGET, Attr)); } void ASTWriter::RedefinedHiddenDefinition(const NamedDecl *D, Module *M) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); assert(!D->isUnconditionallyVisible() && "expected a hidden declaration"); DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_EXPORTED, M)); } void ASTWriter::AddedAttributeToRecord(const Attr *Attr, const RecordDecl *Record) { if (Chain && Chain->isProcessingUpdateRecords()) return; assert(!WritingAST && "Already writing the AST!"); if (!Record->isFromASTFile()) return; DeclUpdates[Record].push_back(DeclUpdate(UPD_ADDED_ATTR_TO_RECORD, Attr)); } void ASTWriter::AddedCXXTemplateSpecialization( const ClassTemplateDecl *TD, const ClassTemplateSpecializationDecl *D) { assert(!WritingAST && "Already writing the AST!"); if (!TD->getFirstDecl()->isFromASTFile()) return; if (Chain && Chain->isProcessingUpdateRecords()) return; DeclsToEmitEvenIfUnreferenced.push_back(D); } void ASTWriter::AddedCXXTemplateSpecialization( const VarTemplateDecl *TD, const VarTemplateSpecializationDecl *D) { assert(!WritingAST && "Already writing the AST!"); if (!TD->getFirstDecl()->isFromASTFile()) return; if (Chain && Chain->isProcessingUpdateRecords()) return; DeclsToEmitEvenIfUnreferenced.push_back(D); } void ASTWriter::AddedCXXTemplateSpecialization(const FunctionTemplateDecl *TD, const FunctionDecl *D) { assert(!WritingAST && "Already writing the AST!"); if (!TD->getFirstDecl()->isFromASTFile()) return; if (Chain && Chain->isProcessingUpdateRecords()) return; DeclsToEmitEvenIfUnreferenced.push_back(D); } //===----------------------------------------------------------------------===// //// OMPClause Serialization ////===----------------------------------------------------------------------===// namespace { class OMPClauseWriter : public OMPClauseVisitor { ASTRecordWriter &Record; public: OMPClauseWriter(ASTRecordWriter &Record) : Record(Record) {} #define GEN_CLANG_CLAUSE_CLASS #define CLAUSE_CLASS(Enum, Str, Class) void Visit##Class(Class *S); #include "llvm/Frontend/OpenMP/OMP.inc" void writeClause(OMPClause *C); void VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C); void VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *C); }; } void ASTRecordWriter::writeOMPClause(OMPClause *C) { OMPClauseWriter(*this).writeClause(C); } void OMPClauseWriter::writeClause(OMPClause *C) { Record.push_back(unsigned(C->getClauseKind())); Visit(C); Record.AddSourceLocation(C->getBeginLoc()); Record.AddSourceLocation(C->getEndLoc()); } void OMPClauseWriter::VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C) { Record.push_back(uint64_t(C->getCaptureRegion())); Record.AddStmt(C->getPreInitStmt()); } void OMPClauseWriter::VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *C) { VisitOMPClauseWithPreInit(C); Record.AddStmt(C->getPostUpdateExpr()); } void OMPClauseWriter::VisitOMPIfClause(OMPIfClause *C) { VisitOMPClauseWithPreInit(C); Record.push_back(uint64_t(C->getNameModifier())); Record.AddSourceLocation(C->getNameModifierLoc()); Record.AddSourceLocation(C->getColonLoc()); Record.AddStmt(C->getCondition()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPFinalClause(OMPFinalClause *C) { VisitOMPClauseWithPreInit(C); Record.AddStmt(C->getCondition()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) { VisitOMPClauseWithPreInit(C); Record.AddStmt(C->getNumThreads()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPSafelenClause(OMPSafelenClause *C) { Record.AddStmt(C->getSafelen()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPSimdlenClause(OMPSimdlenClause *C) { Record.AddStmt(C->getSimdlen()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPSizesClause(OMPSizesClause *C) { Record.push_back(C->getNumSizes()); for (Expr *Size : C->getSizesRefs()) Record.AddStmt(Size); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPFullClause(OMPFullClause *C) {} void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) { Record.AddStmt(C->getFactor()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) { Record.AddStmt(C->getAllocator()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPCollapseClause(OMPCollapseClause *C) { Record.AddStmt(C->getNumForLoops()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPDetachClause(OMPDetachClause *C) { Record.AddStmt(C->getEventHandler()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPDefaultClause(OMPDefaultClause *C) { Record.push_back(unsigned(C->getDefaultKind())); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getDefaultKindKwLoc()); } void OMPClauseWriter::VisitOMPProcBindClause(OMPProcBindClause *C) { Record.push_back(unsigned(C->getProcBindKind())); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getProcBindKindKwLoc()); } void OMPClauseWriter::VisitOMPScheduleClause(OMPScheduleClause *C) { VisitOMPClauseWithPreInit(C); Record.push_back(C->getScheduleKind()); Record.push_back(C->getFirstScheduleModifier()); Record.push_back(C->getSecondScheduleModifier()); Record.AddStmt(C->getChunkSize()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getFirstScheduleModifierLoc()); Record.AddSourceLocation(C->getSecondScheduleModifierLoc()); Record.AddSourceLocation(C->getScheduleKindLoc()); Record.AddSourceLocation(C->getCommaLoc()); } void OMPClauseWriter::VisitOMPOrderedClause(OMPOrderedClause *C) { Record.push_back(C->getLoopNumIterations().size()); Record.AddStmt(C->getNumForLoops()); for (Expr *NumIter : C->getLoopNumIterations()) Record.AddStmt(NumIter); for (unsigned I = 0, E = C->getLoopNumIterations().size(); I getLoopCounter(I)); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPNowaitClause(OMPNowaitClause *) {} void OMPClauseWriter::VisitOMPUntiedClause(OMPUntiedClause *) {} void OMPClauseWriter::VisitOMPMergeableClause(OMPMergeableClause *) {} void OMPClauseWriter::VisitOMPReadClause(OMPReadClause *) {} void OMPClauseWriter::VisitOMPWriteClause(OMPWriteClause *) {} void OMPClauseWriter::VisitOMPUpdateClause(OMPUpdateClause *C) { Record.push_back(C->isExtended() ? 1 : 0); if (C->isExtended()) { Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getArgumentLoc()); Record.writeEnum(C->getDependencyKind()); } } void OMPClauseWriter::VisitOMPCaptureClause(OMPCaptureClause *) {} void OMPClauseWriter::VisitOMPCompareClause(OMPCompareClause *) {} // Save the parameter of fail clause. void OMPClauseWriter::VisitOMPFailClause(OMPFailClause *C) { Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getFailParameterLoc()); Record.writeEnum(C->getFailParameter()); } void OMPClauseWriter::VisitOMPSeqCstClause(OMPSeqCstClause *) {} void OMPClauseWriter::VisitOMPAcqRelClause(OMPAcqRelClause *) {} void OMPClauseWriter::VisitOMPAcquireClause(OMPAcquireClause *) {} void OMPClauseWriter::VisitOMPReleaseClause(OMPReleaseClause *) {} void OMPClauseWriter::VisitOMPRelaxedClause(OMPRelaxedClause *) {} void OMPClauseWriter::VisitOMPThreadsClause(OMPThreadsClause *) {} void OMPClauseWriter::VisitOMPSIMDClause(OMPSIMDClause *) {} void OMPClauseWriter::VisitOMPNogroupClause(OMPNogroupClause *) {} void OMPClauseWriter::VisitOMPInitClause(OMPInitClause *C) { Record.push_back(C->varlist_size()); for (Expr *VE : C->varlists()) Record.AddStmt(VE); Record.writeBool(C->getIsTarget()); Record.writeBool(C->getIsTargetSync()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getVarLoc()); } void OMPClauseWriter::VisitOMPUseClause(OMPUseClause *C) { Record.AddStmt(C->getInteropVar()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getVarLoc()); } void OMPClauseWriter::VisitOMPDestroyClause(OMPDestroyClause *C) { Record.AddStmt(C->getInteropVar()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getVarLoc()); } void OMPClauseWriter::VisitOMPNovariantsClause(OMPNovariantsClause *C) { VisitOMPClauseWithPreInit(C); Record.AddStmt(C->getCondition()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPNocontextClause(OMPNocontextClause *C) { VisitOMPClauseWithPreInit(C); Record.AddStmt(C->getCondition()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPFilterClause(OMPFilterClause *C) { VisitOMPClauseWithPreInit(C); Record.AddStmt(C->getThreadID()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPAlignClause(OMPAlignClause *C) { Record.AddStmt(C->getAlignment()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPPrivateClause(OMPPrivateClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *VE : C->varlists()) { Record.AddStmt(VE); } for (auto *VE : C->private_copies()) { Record.AddStmt(VE); } } void OMPClauseWriter::VisitOMPFirstprivateClause(OMPFirstprivateClause *C) { Record.push_back(C->varlist_size()); VisitOMPClauseWithPreInit(C); Record.AddSourceLocation(C->getLParenLoc()); for (auto *VE : C->varlists()) { Record.AddStmt(VE); } for (auto *VE : C->private_copies()) { Record.AddStmt(VE); } for (auto *VE : C->inits()) { Record.AddStmt(VE); } } void OMPClauseWriter::VisitOMPLastprivateClause(OMPLastprivateClause *C) { Record.push_back(C->varlist_size()); VisitOMPClauseWithPostUpdate(C); Record.AddSourceLocation(C->getLParenLoc()); Record.writeEnum(C->getKind()); Record.AddSourceLocation(C->getKindLoc()); Record.AddSourceLocation(C->getColonLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); for (auto *E : C->private_copies()) Record.AddStmt(E); for (auto *E : C->source_exprs()) Record.AddStmt(E); for (auto *E : C->destination_exprs()) Record.AddStmt(E); for (auto *E : C->assignment_ops()) Record.AddStmt(E); } void OMPClauseWriter::VisitOMPSharedClause(OMPSharedClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); } void OMPClauseWriter::VisitOMPReductionClause(OMPReductionClause *C) { Record.push_back(C->varlist_size()); Record.writeEnum(C->getModifier()); VisitOMPClauseWithPostUpdate(C); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getModifierLoc()); Record.AddSourceLocation(C->getColonLoc()); Record.AddNestedNameSpecifierLoc(C->getQualifierLoc()); Record.AddDeclarationNameInfo(C->getNameInfo()); for (auto *VE : C->varlists()) Record.AddStmt(VE); for (auto *VE : C->privates()) Record.AddStmt(VE); for (auto *E : C->lhs_exprs()) Record.AddStmt(E); for (auto *E : C->rhs_exprs()) Record.AddStmt(E); for (auto *E : C->reduction_ops()) Record.AddStmt(E); if (C->getModifier() == clang::OMPC_REDUCTION_inscan) { for (auto *E : C->copy_ops()) Record.AddStmt(E); for (auto *E : C->copy_array_temps()) Record.AddStmt(E); for (auto *E : C->copy_array_elems()) Record.AddStmt(E); } } void OMPClauseWriter::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) { Record.push_back(C->varlist_size()); VisitOMPClauseWithPostUpdate(C); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getColonLoc()); Record.AddNestedNameSpecifierLoc(C->getQualifierLoc()); Record.AddDeclarationNameInfo(C->getNameInfo()); for (auto *VE : C->varlists()) Record.AddStmt(VE); for (auto *VE : C->privates()) Record.AddStmt(VE); for (auto *E : C->lhs_exprs()) Record.AddStmt(E); for (auto *E : C->rhs_exprs()) Record.AddStmt(E); for (auto *E : C->reduction_ops()) Record.AddStmt(E); } void OMPClauseWriter::VisitOMPInReductionClause(OMPInReductionClause *C) { Record.push_back(C->varlist_size()); VisitOMPClauseWithPostUpdate(C); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getColonLoc()); Record.AddNestedNameSpecifierLoc(C->getQualifierLoc()); Record.AddDeclarationNameInfo(C->getNameInfo()); for (auto *VE : C->varlists()) Record.AddStmt(VE); for (auto *VE : C->privates()) Record.AddStmt(VE); for (auto *E : C->lhs_exprs()) Record.AddStmt(E); for (auto *E : C->rhs_exprs()) Record.AddStmt(E); for (auto *E : C->reduction_ops()) Record.AddStmt(E); for (auto *E : C->taskgroup_descriptors()) Record.AddStmt(E); } void OMPClauseWriter::VisitOMPLinearClause(OMPLinearClause *C) { Record.push_back(C->varlist_size()); VisitOMPClauseWithPostUpdate(C); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getColonLoc()); Record.push_back(C->getModifier()); Record.AddSourceLocation(C->getModifierLoc()); for (auto *VE : C->varlists()) { Record.AddStmt(VE); } for (auto *VE : C->privates()) { Record.AddStmt(VE); } for (auto *VE : C->inits()) { Record.AddStmt(VE); } for (auto *VE : C->updates()) { Record.AddStmt(VE); } for (auto *VE : C->finals()) { Record.AddStmt(VE); } Record.AddStmt(C->getStep()); Record.AddStmt(C->getCalcStep()); for (auto *VE : C->used_expressions()) Record.AddStmt(VE); } void OMPClauseWriter::VisitOMPAlignedClause(OMPAlignedClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getColonLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); Record.AddStmt(C->getAlignment()); } void OMPClauseWriter::VisitOMPCopyinClause(OMPCopyinClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); for (auto *E : C->source_exprs()) Record.AddStmt(E); for (auto *E : C->destination_exprs()) Record.AddStmt(E); for (auto *E : C->assignment_ops()) Record.AddStmt(E); } void OMPClauseWriter::VisitOMPCopyprivateClause(OMPCopyprivateClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); for (auto *E : C->source_exprs()) Record.AddStmt(E); for (auto *E : C->destination_exprs()) Record.AddStmt(E); for (auto *E : C->assignment_ops()) Record.AddStmt(E); } void OMPClauseWriter::VisitOMPFlushClause(OMPFlushClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); } void OMPClauseWriter::VisitOMPDepobjClause(OMPDepobjClause *C) { Record.AddStmt(C->getDepobj()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPDependClause(OMPDependClause *C) { Record.push_back(C->varlist_size()); Record.push_back(C->getNumLoops()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddStmt(C->getModifier()); Record.push_back(C->getDependencyKind()); Record.AddSourceLocation(C->getDependencyLoc()); Record.AddSourceLocation(C->getColonLoc()); Record.AddSourceLocation(C->getOmpAllMemoryLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) Record.AddStmt(C->getLoopData(I)); } void OMPClauseWriter::VisitOMPDeviceClause(OMPDeviceClause *C) { VisitOMPClauseWithPreInit(C); Record.writeEnum(C->getModifier()); Record.AddStmt(C->getDevice()); Record.AddSourceLocation(C->getModifierLoc()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPMapClause(OMPMapClause *C) { Record.push_back(C->varlist_size()); Record.push_back(C->getUniqueDeclarationsNum()); Record.push_back(C->getTotalComponentListNum()); Record.push_back(C->getTotalComponentsNum()); Record.AddSourceLocation(C->getLParenLoc()); bool HasIteratorModifier = false; for (unsigned I = 0; I < NumberOfOMPMapClauseModifiers; ++I) { Record.push_back(C->getMapTypeModifier(I)); Record.AddSourceLocation(C->getMapTypeModifierLoc(I)); if (C->getMapTypeModifier(I) == OMPC_MAP_MODIFIER_iterator) HasIteratorModifier = true; } Record.AddNestedNameSpecifierLoc(C->getMapperQualifierLoc()); Record.AddDeclarationNameInfo(C->getMapperIdInfo()); Record.push_back(C->getMapType()); Record.AddSourceLocation(C->getMapLoc()); Record.AddSourceLocation(C->getColonLoc()); for (auto *E : C->varlists()) Record.AddStmt(E); for (auto *E : C->mapperlists()) Record.AddStmt(E); if (HasIteratorModifier) Record.AddStmt(C->getIteratorModifier()); for (auto *D : C->all_decls()) Record.AddDeclRef(D); for (auto N : C->all_num_lists()) Record.push_back(N); for (auto N : C->all_lists_sizes()) Record.push_back(N); for (auto &M : C->all_components()) { Record.AddStmt(M.getAssociatedExpression()); Record.AddDeclRef(M.getAssociatedDeclaration()); } } void OMPClauseWriter::VisitOMPAllocateClause(OMPAllocateClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getColonLoc()); Record.AddStmt(C->getAllocator()); for (auto *VE : C->varlists()) Record.AddStmt(VE); } void OMPClauseWriter::VisitOMPNumTeamsClause(OMPNumTeamsClause *C) { VisitOMPClauseWithPreInit(C); Record.AddStmt(C->getNumTeams()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPThreadLimitClause(OMPThreadLimitClause *C) { VisitOMPClauseWithPreInit(C); Record.AddStmt(C->getThreadLimit()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPPriorityClause(OMPPriorityClause *C) { VisitOMPClauseWithPreInit(C); Record.AddStmt(C->getPriority()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPGrainsizeClause(OMPGrainsizeClause *C) { VisitOMPClauseWithPreInit(C); Record.writeEnum(C->getModifier()); Record.AddStmt(C->getGrainsize()); Record.AddSourceLocation(C->getModifierLoc()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPNumTasksClause(OMPNumTasksClause *C) { VisitOMPClauseWithPreInit(C); Record.writeEnum(C->getModifier()); Record.AddStmt(C->getNumTasks()); Record.AddSourceLocation(C->getModifierLoc()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPHintClause(OMPHintClause *C) { Record.AddStmt(C->getHint()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPDistScheduleClause(OMPDistScheduleClause *C) { VisitOMPClauseWithPreInit(C); Record.push_back(C->getDistScheduleKind()); Record.AddStmt(C->getChunkSize()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getDistScheduleKindLoc()); Record.AddSourceLocation(C->getCommaLoc()); } void OMPClauseWriter::VisitOMPDefaultmapClause(OMPDefaultmapClause *C) { Record.push_back(C->getDefaultmapKind()); Record.push_back(C->getDefaultmapModifier()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getDefaultmapModifierLoc()); Record.AddSourceLocation(C->getDefaultmapKindLoc()); } void OMPClauseWriter::VisitOMPToClause(OMPToClause *C) { Record.push_back(C->varlist_size()); Record.push_back(C->getUniqueDeclarationsNum()); Record.push_back(C->getTotalComponentListNum()); Record.push_back(C->getTotalComponentsNum()); Record.AddSourceLocation(C->getLParenLoc()); for (unsigned I = 0; I < NumberOfOMPMotionModifiers; ++I) { Record.push_back(C->getMotionModifier(I)); Record.AddSourceLocation(C->getMotionModifierLoc(I)); } Record.AddNestedNameSpecifierLoc(C->getMapperQualifierLoc()); Record.AddDeclarationNameInfo(C->getMapperIdInfo()); Record.AddSourceLocation(C->getColonLoc()); for (auto *E : C->varlists()) Record.AddStmt(E); for (auto *E : C->mapperlists()) Record.AddStmt(E); for (auto *D : C->all_decls()) Record.AddDeclRef(D); for (auto N : C->all_num_lists()) Record.push_back(N); for (auto N : C->all_lists_sizes()) Record.push_back(N); for (auto &M : C->all_components()) { Record.AddStmt(M.getAssociatedExpression()); Record.writeBool(M.isNonContiguous()); Record.AddDeclRef(M.getAssociatedDeclaration()); } } void OMPClauseWriter::VisitOMPFromClause(OMPFromClause *C) { Record.push_back(C->varlist_size()); Record.push_back(C->getUniqueDeclarationsNum()); Record.push_back(C->getTotalComponentListNum()); Record.push_back(C->getTotalComponentsNum()); Record.AddSourceLocation(C->getLParenLoc()); for (unsigned I = 0; I < NumberOfOMPMotionModifiers; ++I) { Record.push_back(C->getMotionModifier(I)); Record.AddSourceLocation(C->getMotionModifierLoc(I)); } Record.AddNestedNameSpecifierLoc(C->getMapperQualifierLoc()); Record.AddDeclarationNameInfo(C->getMapperIdInfo()); Record.AddSourceLocation(C->getColonLoc()); for (auto *E : C->varlists()) Record.AddStmt(E); for (auto *E : C->mapperlists()) Record.AddStmt(E); for (auto *D : C->all_decls()) Record.AddDeclRef(D); for (auto N : C->all_num_lists()) Record.push_back(N); for (auto N : C->all_lists_sizes()) Record.push_back(N); for (auto &M : C->all_components()) { Record.AddStmt(M.getAssociatedExpression()); Record.writeBool(M.isNonContiguous()); Record.AddDeclRef(M.getAssociatedDeclaration()); } } void OMPClauseWriter::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) { Record.push_back(C->varlist_size()); Record.push_back(C->getUniqueDeclarationsNum()); Record.push_back(C->getTotalComponentListNum()); Record.push_back(C->getTotalComponentsNum()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *E : C->varlists()) Record.AddStmt(E); for (auto *VE : C->private_copies()) Record.AddStmt(VE); for (auto *VE : C->inits()) Record.AddStmt(VE); for (auto *D : C->all_decls()) Record.AddDeclRef(D); for (auto N : C->all_num_lists()) Record.push_back(N); for (auto N : C->all_lists_sizes()) Record.push_back(N); for (auto &M : C->all_components()) { Record.AddStmt(M.getAssociatedExpression()); Record.AddDeclRef(M.getAssociatedDeclaration()); } } void OMPClauseWriter::VisitOMPUseDeviceAddrClause(OMPUseDeviceAddrClause *C) { Record.push_back(C->varlist_size()); Record.push_back(C->getUniqueDeclarationsNum()); Record.push_back(C->getTotalComponentListNum()); Record.push_back(C->getTotalComponentsNum()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *E : C->varlists()) Record.AddStmt(E); for (auto *D : C->all_decls()) Record.AddDeclRef(D); for (auto N : C->all_num_lists()) Record.push_back(N); for (auto N : C->all_lists_sizes()) Record.push_back(N); for (auto &M : C->all_components()) { Record.AddStmt(M.getAssociatedExpression()); Record.AddDeclRef(M.getAssociatedDeclaration()); } } void OMPClauseWriter::VisitOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) { Record.push_back(C->varlist_size()); Record.push_back(C->getUniqueDeclarationsNum()); Record.push_back(C->getTotalComponentListNum()); Record.push_back(C->getTotalComponentsNum()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *E : C->varlists()) Record.AddStmt(E); for (auto *D : C->all_decls()) Record.AddDeclRef(D); for (auto N : C->all_num_lists()) Record.push_back(N); for (auto N : C->all_lists_sizes()) Record.push_back(N); for (auto &M : C->all_components()) { Record.AddStmt(M.getAssociatedExpression()); Record.AddDeclRef(M.getAssociatedDeclaration()); } } void OMPClauseWriter::VisitOMPHasDeviceAddrClause(OMPHasDeviceAddrClause *C) { Record.push_back(C->varlist_size()); Record.push_back(C->getUniqueDeclarationsNum()); Record.push_back(C->getTotalComponentListNum()); Record.push_back(C->getTotalComponentsNum()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *E : C->varlists()) Record.AddStmt(E); for (auto *D : C->all_decls()) Record.AddDeclRef(D); for (auto N : C->all_num_lists()) Record.push_back(N); for (auto N : C->all_lists_sizes()) Record.push_back(N); for (auto &M : C->all_components()) { Record.AddStmt(M.getAssociatedExpression()); Record.AddDeclRef(M.getAssociatedDeclaration()); } } void OMPClauseWriter::VisitOMPUnifiedAddressClause(OMPUnifiedAddressClause *) {} void OMPClauseWriter::VisitOMPUnifiedSharedMemoryClause( OMPUnifiedSharedMemoryClause *) {} void OMPClauseWriter::VisitOMPReverseOffloadClause(OMPReverseOffloadClause *) {} void OMPClauseWriter::VisitOMPDynamicAllocatorsClause(OMPDynamicAllocatorsClause *) { } void OMPClauseWriter::VisitOMPAtomicDefaultMemOrderClause( OMPAtomicDefaultMemOrderClause *C) { Record.push_back(C->getAtomicDefaultMemOrderKind()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getAtomicDefaultMemOrderKindKwLoc()); } void OMPClauseWriter::VisitOMPAtClause(OMPAtClause *C) { Record.push_back(C->getAtKind()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getAtKindKwLoc()); } void OMPClauseWriter::VisitOMPSeverityClause(OMPSeverityClause *C) { Record.push_back(C->getSeverityKind()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getSeverityKindKwLoc()); } void OMPClauseWriter::VisitOMPMessageClause(OMPMessageClause *C) { Record.AddStmt(C->getMessageString()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPNontemporalClause(OMPNontemporalClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); for (auto *E : C->private_refs()) Record.AddStmt(E); } void OMPClauseWriter::VisitOMPInclusiveClause(OMPInclusiveClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); } void OMPClauseWriter::VisitOMPExclusiveClause(OMPExclusiveClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); } void OMPClauseWriter::VisitOMPOrderClause(OMPOrderClause *C) { Record.writeEnum(C->getKind()); Record.writeEnum(C->getModifier()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getKindKwLoc()); Record.AddSourceLocation(C->getModifierKwLoc()); } void OMPClauseWriter::VisitOMPUsesAllocatorsClause(OMPUsesAllocatorsClause *C) { Record.push_back(C->getNumberOfAllocators()); Record.AddSourceLocation(C->getLParenLoc()); for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { OMPUsesAllocatorsClause::Data Data = C->getAllocatorData(I); Record.AddStmt(Data.Allocator); Record.AddStmt(Data.AllocatorTraits); Record.AddSourceLocation(Data.LParenLoc); Record.AddSourceLocation(Data.RParenLoc); } } void OMPClauseWriter::VisitOMPAffinityClause(OMPAffinityClause *C) { Record.push_back(C->varlist_size()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddStmt(C->getModifier()); Record.AddSourceLocation(C->getColonLoc()); for (Expr *E : C->varlists()) Record.AddStmt(E); } void OMPClauseWriter::VisitOMPBindClause(OMPBindClause *C) { Record.writeEnum(C->getBindKind()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getBindKindLoc()); } void OMPClauseWriter::VisitOMPXDynCGroupMemClause(OMPXDynCGroupMemClause *C) { VisitOMPClauseWithPreInit(C); Record.AddStmt(C->getSize()); Record.AddSourceLocation(C->getLParenLoc()); } void OMPClauseWriter::VisitOMPDoacrossClause(OMPDoacrossClause *C) { Record.push_back(C->varlist_size()); Record.push_back(C->getNumLoops()); Record.AddSourceLocation(C->getLParenLoc()); Record.push_back(C->getDependenceType()); Record.AddSourceLocation(C->getDependenceLoc()); Record.AddSourceLocation(C->getColonLoc()); for (auto *VE : C->varlists()) Record.AddStmt(VE); for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) Record.AddStmt(C->getLoopData(I)); } void OMPClauseWriter::VisitOMPXAttributeClause(OMPXAttributeClause *C) { Record.AddAttributes(C->getAttrs()); Record.AddSourceLocation(C->getBeginLoc()); Record.AddSourceLocation(C->getLParenLoc()); Record.AddSourceLocation(C->getEndLoc()); } void OMPClauseWriter::VisitOMPXBareClause(OMPXBareClause *C) {} void ASTRecordWriter::writeOMPTraitInfo(const OMPTraitInfo *TI) { writeUInt32(TI->Sets.size()); for (const auto &Set : TI->Sets) { writeEnum(Set.Kind); writeUInt32(Set.Selectors.size()); for (const auto &Selector : Set.Selectors) { writeEnum(Selector.Kind); writeBool(Selector.ScoreOrCondition); if (Selector.ScoreOrCondition) writeExprRef(Selector.ScoreOrCondition); writeUInt32(Selector.Properties.size()); for (const auto &Property : Selector.Properties) writeEnum(Property.Kind); } } } void ASTRecordWriter::writeOMPChildren(OMPChildren *Data) { if (!Data) return; writeUInt32(Data->getNumClauses()); writeUInt32(Data->getNumChildren()); writeBool(Data->hasAssociatedStmt()); for (unsigned I = 0, E = Data->getNumClauses(); I < E; ++I) writeOMPClause(Data->getClauses()[I]); if (Data->hasAssociatedStmt()) AddStmt(Data->getAssociatedStmt()); for (unsigned I = 0, E = Data->getNumChildren(); I < E; ++I) AddStmt(Data->getChildren()[I]); } diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp index f224075643e9..e73800100e3c 100644 --- a/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp @@ -1,2800 +1,2805 @@ //===--- ASTWriterDecl.cpp - Declaration Serialization --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements serialization for Declarations. // //===----------------------------------------------------------------------===// #include "ASTCommon.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclVisitor.h" #include "clang/AST/Expr.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/PrettyDeclStackTrace.h" #include "clang/Basic/SourceManager.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTRecordWriter.h" #include "llvm/Bitstream/BitstreamWriter.h" #include "llvm/Support/ErrorHandling.h" #include using namespace clang; using namespace serialization; //===----------------------------------------------------------------------===// // Declaration serialization //===----------------------------------------------------------------------===// namespace clang { class ASTDeclWriter : public DeclVisitor { ASTWriter &Writer; ASTContext &Context; ASTRecordWriter Record; serialization::DeclCode Code; unsigned AbbrevToUse; public: ASTDeclWriter(ASTWriter &Writer, ASTContext &Context, ASTWriter::RecordDataImpl &Record) : Writer(Writer), Context(Context), Record(Writer, Record), Code((serialization::DeclCode)0), AbbrevToUse(0) {} uint64_t Emit(Decl *D) { if (!Code) llvm::report_fatal_error(StringRef("unexpected declaration kind '") + D->getDeclKindName() + "'"); return Record.Emit(Code, AbbrevToUse); } void Visit(Decl *D); void VisitDecl(Decl *D); void VisitPragmaCommentDecl(PragmaCommentDecl *D); void VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D); void VisitTranslationUnitDecl(TranslationUnitDecl *D); void VisitNamedDecl(NamedDecl *D); void VisitLabelDecl(LabelDecl *LD); void VisitNamespaceDecl(NamespaceDecl *D); void VisitUsingDirectiveDecl(UsingDirectiveDecl *D); void VisitNamespaceAliasDecl(NamespaceAliasDecl *D); void VisitTypeDecl(TypeDecl *D); void VisitTypedefNameDecl(TypedefNameDecl *D); void VisitTypedefDecl(TypedefDecl *D); void VisitTypeAliasDecl(TypeAliasDecl *D); void VisitUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *D); void VisitUnresolvedUsingIfExistsDecl(UnresolvedUsingIfExistsDecl *D); void VisitTagDecl(TagDecl *D); void VisitEnumDecl(EnumDecl *D); void VisitRecordDecl(RecordDecl *D); void VisitCXXRecordDecl(CXXRecordDecl *D); void VisitClassTemplateSpecializationDecl( ClassTemplateSpecializationDecl *D); void VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D); void VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D); void VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D); void VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D); void VisitValueDecl(ValueDecl *D); void VisitEnumConstantDecl(EnumConstantDecl *D); void VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D); void VisitDeclaratorDecl(DeclaratorDecl *D); void VisitFunctionDecl(FunctionDecl *D); void VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D); void VisitCXXMethodDecl(CXXMethodDecl *D); void VisitCXXConstructorDecl(CXXConstructorDecl *D); void VisitCXXDestructorDecl(CXXDestructorDecl *D); void VisitCXXConversionDecl(CXXConversionDecl *D); void VisitFieldDecl(FieldDecl *D); void VisitMSPropertyDecl(MSPropertyDecl *D); void VisitMSGuidDecl(MSGuidDecl *D); void VisitUnnamedGlobalConstantDecl(UnnamedGlobalConstantDecl *D); void VisitTemplateParamObjectDecl(TemplateParamObjectDecl *D); void VisitIndirectFieldDecl(IndirectFieldDecl *D); void VisitVarDecl(VarDecl *D); void VisitImplicitParamDecl(ImplicitParamDecl *D); void VisitParmVarDecl(ParmVarDecl *D); void VisitDecompositionDecl(DecompositionDecl *D); void VisitBindingDecl(BindingDecl *D); void VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D); void VisitTemplateDecl(TemplateDecl *D); void VisitConceptDecl(ConceptDecl *D); void VisitImplicitConceptSpecializationDecl( ImplicitConceptSpecializationDecl *D); void VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D); void VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D); void VisitClassTemplateDecl(ClassTemplateDecl *D); void VisitVarTemplateDecl(VarTemplateDecl *D); void VisitFunctionTemplateDecl(FunctionTemplateDecl *D); void VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D); void VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D); void VisitUsingDecl(UsingDecl *D); void VisitUsingEnumDecl(UsingEnumDecl *D); void VisitUsingPackDecl(UsingPackDecl *D); void VisitUsingShadowDecl(UsingShadowDecl *D); void VisitConstructorUsingShadowDecl(ConstructorUsingShadowDecl *D); void VisitLinkageSpecDecl(LinkageSpecDecl *D); void VisitExportDecl(ExportDecl *D); void VisitFileScopeAsmDecl(FileScopeAsmDecl *D); void VisitTopLevelStmtDecl(TopLevelStmtDecl *D); void VisitImportDecl(ImportDecl *D); void VisitAccessSpecDecl(AccessSpecDecl *D); void VisitFriendDecl(FriendDecl *D); void VisitFriendTemplateDecl(FriendTemplateDecl *D); void VisitStaticAssertDecl(StaticAssertDecl *D); void VisitBlockDecl(BlockDecl *D); void VisitCapturedDecl(CapturedDecl *D); void VisitEmptyDecl(EmptyDecl *D); void VisitLifetimeExtendedTemporaryDecl(LifetimeExtendedTemporaryDecl *D); void VisitDeclContext(DeclContext *DC); template void VisitRedeclarable(Redeclarable *D); void VisitHLSLBufferDecl(HLSLBufferDecl *D); // FIXME: Put in the same order is DeclNodes.td? void VisitObjCMethodDecl(ObjCMethodDecl *D); void VisitObjCTypeParamDecl(ObjCTypeParamDecl *D); void VisitObjCContainerDecl(ObjCContainerDecl *D); void VisitObjCInterfaceDecl(ObjCInterfaceDecl *D); void VisitObjCIvarDecl(ObjCIvarDecl *D); void VisitObjCProtocolDecl(ObjCProtocolDecl *D); void VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D); void VisitObjCCategoryDecl(ObjCCategoryDecl *D); void VisitObjCImplDecl(ObjCImplDecl *D); void VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D); void VisitObjCImplementationDecl(ObjCImplementationDecl *D); void VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *D); void VisitObjCPropertyDecl(ObjCPropertyDecl *D); void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D); void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D); void VisitOMPAllocateDecl(OMPAllocateDecl *D); void VisitOMPRequiresDecl(OMPRequiresDecl *D); void VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D); void VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D); void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D); /// Add an Objective-C type parameter list to the given record. void AddObjCTypeParamList(ObjCTypeParamList *typeParams) { // Empty type parameter list. if (!typeParams) { Record.push_back(0); return; } Record.push_back(typeParams->size()); for (auto *typeParam : *typeParams) { Record.AddDeclRef(typeParam); } Record.AddSourceLocation(typeParams->getLAngleLoc()); Record.AddSourceLocation(typeParams->getRAngleLoc()); } /// Add to the record the first declaration from each module file that /// provides a declaration of D. The intent is to provide a sufficient /// set such that reloading this set will load all current redeclarations. void AddFirstDeclFromEachModule(const Decl *D, bool IncludeLocal) { llvm::MapVector Firsts; // FIXME: We can skip entries that we know are implied by others. for (const Decl *R = D->getMostRecentDecl(); R; R = R->getPreviousDecl()) { if (R->isFromASTFile()) Firsts[Writer.Chain->getOwningModuleFile(R)] = R; else if (IncludeLocal) Firsts[nullptr] = R; } for (const auto &F : Firsts) Record.AddDeclRef(F.second); } /// Get the specialization decl from an entry in the specialization list. template typename RedeclarableTemplateDecl::SpecEntryTraits::DeclType * getSpecializationDecl(EntryType &T) { return RedeclarableTemplateDecl::SpecEntryTraits::getDecl(&T); } /// Get the list of partial specializations from a template's common ptr. template decltype(T::PartialSpecializations) &getPartialSpecializations(T *Common) { return Common->PartialSpecializations; } ArrayRef getPartialSpecializations(FunctionTemplateDecl::Common *) { return std::nullopt; } template void AddTemplateSpecializations(DeclTy *D) { auto *Common = D->getCommonPtr(); // If we have any lazy specializations, and the external AST source is // our chained AST reader, we can just write out the DeclIDs. Otherwise, // we need to resolve them to actual declarations. if (Writer.Chain != Writer.Context->getExternalSource() && Common->LazySpecializations) { D->LoadLazySpecializations(); assert(!Common->LazySpecializations); } ArrayRef LazySpecializations; if (auto *LS = Common->LazySpecializations) LazySpecializations = llvm::ArrayRef(LS + 1, LS[0]); // Add a slot to the record for the number of specializations. unsigned I = Record.size(); Record.push_back(0); // AddFirstDeclFromEachModule might trigger deserialization, invalidating // *Specializations iterators. llvm::SmallVector Specs; for (auto &Entry : Common->Specializations) Specs.push_back(getSpecializationDecl(Entry)); for (auto &Entry : getPartialSpecializations(Common)) Specs.push_back(getSpecializationDecl(Entry)); for (auto *D : Specs) { assert(D->isCanonicalDecl() && "non-canonical decl in set"); AddFirstDeclFromEachModule(D, /*IncludeLocal*/true); } Record.append(LazySpecializations.begin(), LazySpecializations.end()); // Update the size entry we added earlier. Record[I] = Record.size() - I - 1; } /// Ensure that this template specialization is associated with the specified /// template on reload. void RegisterTemplateSpecialization(const Decl *Template, const Decl *Specialization) { Template = Template->getCanonicalDecl(); // If the canonical template is local, we'll write out this specialization // when we emit it. // FIXME: We can do the same thing if there is any local declaration of // the template, to avoid emitting an update record. if (!Template->isFromASTFile()) return; // We only need to associate the first local declaration of the // specialization. The other declarations will get pulled in by it. if (Writer.getFirstLocalDecl(Specialization) != Specialization) return; Writer.DeclUpdates[Template].push_back(ASTWriter::DeclUpdate( UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION, Specialization)); } }; } void ASTDeclWriter::Visit(Decl *D) { DeclVisitor::Visit(D); // Source locations require array (variable-length) abbreviations. The // abbreviation infrastructure requires that arrays are encoded last, so // we handle it here in the case of those classes derived from DeclaratorDecl if (auto *DD = dyn_cast(D)) { if (auto *TInfo = DD->getTypeSourceInfo()) Record.AddTypeLoc(TInfo->getTypeLoc()); } // Handle FunctionDecl's body here and write it after all other Stmts/Exprs // have been written. We want it last because we will not read it back when // retrieving it from the AST, we'll just lazily set the offset. if (auto *FD = dyn_cast(D)) { Record.push_back(FD->doesThisDeclarationHaveABody()); if (FD->doesThisDeclarationHaveABody()) Record.AddFunctionDefinition(FD); } // Similar to FunctionDecls, handle VarDecl's initializer here and write it // after all other Stmts/Exprs. We will not read the initializer until after // we have finished recursive deserialization, because it can recursively // refer back to the variable. if (auto *VD = dyn_cast(D)) { Record.AddVarDeclInit(VD); } // And similarly for FieldDecls. We already serialized whether there is a // default member initializer. if (auto *FD = dyn_cast(D)) { if (FD->hasInClassInitializer()) { if (Expr *Init = FD->getInClassInitializer()) { Record.push_back(1); Record.AddStmt(Init); } else { Record.push_back(0); // Initializer has not been instantiated yet. } } } // If this declaration is also a DeclContext, write blocks for the // declarations that lexically stored inside its context and those // declarations that are visible from its context. if (auto *DC = dyn_cast(D)) VisitDeclContext(DC); } void ASTDeclWriter::VisitDecl(Decl *D) { BitsPacker DeclBits; // The order matters here. It will be better to put the bit with higher // probability to be 0 in the end of the bits. // // Since we're using VBR6 format to store it. // It will be pretty effient if all the higher bits are 0. // For example, if we need to pack 8 bits into a value and the stored value // is 0xf0, the actual stored value will be 0b000111'110000, which takes 12 // bits actually. However, if we changed the order to be 0x0f, then we can // store it as 0b001111, which takes 6 bits only now. DeclBits.addBits((uint64_t)D->getModuleOwnershipKind(), /*BitWidth=*/3); DeclBits.addBit(D->isReferenced()); DeclBits.addBit(D->isUsed(false)); DeclBits.addBits(D->getAccess(), /*BitWidth=*/2); DeclBits.addBit(D->isImplicit()); DeclBits.addBit(D->getDeclContext() != D->getLexicalDeclContext()); DeclBits.addBit(D->hasAttrs()); DeclBits.addBit(D->isTopLevelDeclInObjCContainer()); DeclBits.addBit(D->isInvalidDecl()); Record.push_back(DeclBits); Record.AddDeclRef(cast_or_null(D->getDeclContext())); if (D->getDeclContext() != D->getLexicalDeclContext()) Record.AddDeclRef(cast_or_null(D->getLexicalDeclContext())); if (D->hasAttrs()) Record.AddAttributes(D->getAttrs()); Record.push_back(Writer.getSubmoduleID(D->getOwningModule())); // If this declaration injected a name into a context different from its // lexical context, and that context is an imported namespace, we need to // update its visible declarations to include this name. // // This happens when we instantiate a class with a friend declaration or a // function with a local extern declaration, for instance. // // FIXME: Can we handle this in AddedVisibleDecl instead? if (D->isOutOfLine()) { auto *DC = D->getDeclContext(); while (auto *NS = dyn_cast(DC->getRedeclContext())) { if (!NS->isFromASTFile()) break; Writer.UpdatedDeclContexts.insert(NS->getPrimaryContext()); if (!NS->isInlineNamespace()) break; DC = NS->getParent(); } } } void ASTDeclWriter::VisitPragmaCommentDecl(PragmaCommentDecl *D) { StringRef Arg = D->getArg(); Record.push_back(Arg.size()); VisitDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Record.push_back(D->getCommentKind()); Record.AddString(Arg); Code = serialization::DECL_PRAGMA_COMMENT; } void ASTDeclWriter::VisitPragmaDetectMismatchDecl( PragmaDetectMismatchDecl *D) { StringRef Name = D->getName(); StringRef Value = D->getValue(); Record.push_back(Name.size() + 1 + Value.size()); VisitDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Record.AddString(Name); Record.AddString(Value); Code = serialization::DECL_PRAGMA_DETECT_MISMATCH; } void ASTDeclWriter::VisitTranslationUnitDecl(TranslationUnitDecl *D) { llvm_unreachable("Translation units aren't directly serialized"); } void ASTDeclWriter::VisitNamedDecl(NamedDecl *D) { VisitDecl(D); Record.AddDeclarationName(D->getDeclName()); Record.push_back(needsAnonymousDeclarationNumber(D) ? Writer.getAnonymousDeclarationNumber(D) : 0); } void ASTDeclWriter::VisitTypeDecl(TypeDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Record.AddTypeRef(QualType(D->getTypeForDecl(), 0)); } void ASTDeclWriter::VisitTypedefNameDecl(TypedefNameDecl *D) { VisitRedeclarable(D); VisitTypeDecl(D); Record.AddTypeSourceInfo(D->getTypeSourceInfo()); Record.push_back(D->isModed()); if (D->isModed()) Record.AddTypeRef(D->getUnderlyingType()); Record.AddDeclRef(D->getAnonDeclWithTypedefName(false)); } void ASTDeclWriter::VisitTypedefDecl(TypedefDecl *D) { VisitTypedefNameDecl(D); if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isImplicit() && D->getFirstDecl() == D->getMostRecentDecl() && !D->isInvalidDecl() && !D->isTopLevelDeclInObjCContainer() && !D->isModulePrivate() && !needsAnonymousDeclarationNumber(D) && D->getDeclName().getNameKind() == DeclarationName::Identifier) AbbrevToUse = Writer.getDeclTypedefAbbrev(); Code = serialization::DECL_TYPEDEF; } void ASTDeclWriter::VisitTypeAliasDecl(TypeAliasDecl *D) { VisitTypedefNameDecl(D); Record.AddDeclRef(D->getDescribedAliasTemplate()); Code = serialization::DECL_TYPEALIAS; } void ASTDeclWriter::VisitTagDecl(TagDecl *D) { static_assert(DeclContext::NumTagDeclBits == 23, "You need to update the serializer after you change the " "TagDeclBits"); VisitRedeclarable(D); VisitTypeDecl(D); Record.push_back(D->getIdentifierNamespace()); BitsPacker TagDeclBits; TagDeclBits.addBits(llvm::to_underlying(D->getTagKind()), /*BitWidth=*/3); TagDeclBits.addBit(!isa(D) ? D->isCompleteDefinition() : 0); TagDeclBits.addBit(D->isEmbeddedInDeclarator()); TagDeclBits.addBit(D->isFreeStanding()); TagDeclBits.addBit(D->isCompleteDefinitionRequired()); TagDeclBits.addBits( D->hasExtInfo() ? 1 : (D->getTypedefNameForAnonDecl() ? 2 : 0), /*BitWidth=*/2); Record.push_back(TagDeclBits); Record.AddSourceRange(D->getBraceRange()); if (D->hasExtInfo()) { Record.AddQualifierInfo(*D->getExtInfo()); } else if (auto *TD = D->getTypedefNameForAnonDecl()) { Record.AddDeclRef(TD); Record.AddIdentifierRef(TD->getDeclName().getAsIdentifierInfo()); } } void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) { static_assert(DeclContext::NumEnumDeclBits == 43, "You need to update the serializer after you change the " "EnumDeclBits"); VisitTagDecl(D); Record.AddTypeSourceInfo(D->getIntegerTypeSourceInfo()); if (!D->getIntegerTypeSourceInfo()) Record.AddTypeRef(D->getIntegerType()); Record.AddTypeRef(D->getPromotionType()); BitsPacker EnumDeclBits; EnumDeclBits.addBits(D->getNumPositiveBits(), /*BitWidth=*/8); EnumDeclBits.addBits(D->getNumNegativeBits(), /*BitWidth=*/8); + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + EnumDeclBits.addBit(ShouldSkipCheckingODR); EnumDeclBits.addBit(D->isScoped()); EnumDeclBits.addBit(D->isScopedUsingClassTag()); EnumDeclBits.addBit(D->isFixed()); Record.push_back(EnumDeclBits); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) + if (!ShouldSkipCheckingODR) Record.push_back(D->getODRHash()); if (MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo()) { Record.AddDeclRef(MemberInfo->getInstantiatedFrom()); Record.push_back(MemberInfo->getTemplateSpecializationKind()); Record.AddSourceLocation(MemberInfo->getPointOfInstantiation()); } else { Record.AddDeclRef(nullptr); } if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isInvalidDecl() && !D->isImplicit() && !D->hasExtInfo() && !D->getTypedefNameForAnonDecl() && D->getFirstDecl() == D->getMostRecentDecl() && !D->isTopLevelDeclInObjCContainer() && !CXXRecordDecl::classofKind(D->getKind()) && !D->getIntegerTypeSourceInfo() && !D->getMemberSpecializationInfo() && !needsAnonymousDeclarationNumber(D) && !shouldSkipCheckingODR(D) && D->getDeclName().getNameKind() == DeclarationName::Identifier) AbbrevToUse = Writer.getDeclEnumAbbrev(); Code = serialization::DECL_ENUM; } void ASTDeclWriter::VisitRecordDecl(RecordDecl *D) { static_assert(DeclContext::NumRecordDeclBits == 64, "You need to update the serializer after you change the " "RecordDeclBits"); VisitTagDecl(D); BitsPacker RecordDeclBits; RecordDeclBits.addBit(D->hasFlexibleArrayMember()); RecordDeclBits.addBit(D->isAnonymousStructOrUnion()); RecordDeclBits.addBit(D->hasObjectMember()); RecordDeclBits.addBit(D->hasVolatileMember()); RecordDeclBits.addBit(D->isNonTrivialToPrimitiveDefaultInitialize()); RecordDeclBits.addBit(D->isNonTrivialToPrimitiveCopy()); RecordDeclBits.addBit(D->isNonTrivialToPrimitiveDestroy()); RecordDeclBits.addBit(D->hasNonTrivialToPrimitiveDefaultInitializeCUnion()); RecordDeclBits.addBit(D->hasNonTrivialToPrimitiveDestructCUnion()); RecordDeclBits.addBit(D->hasNonTrivialToPrimitiveCopyCUnion()); RecordDeclBits.addBit(D->isParamDestroyedInCallee()); RecordDeclBits.addBits(llvm::to_underlying(D->getArgPassingRestrictions()), 2); Record.push_back(RecordDeclBits); // Only compute this for C/Objective-C, in C++ this is computed as part // of CXXRecordDecl. if (!isa(D)) Record.push_back(D->getODRHash()); if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isImplicit() && !D->isInvalidDecl() && !D->hasExtInfo() && !D->getTypedefNameForAnonDecl() && D->getFirstDecl() == D->getMostRecentDecl() && !D->isTopLevelDeclInObjCContainer() && !CXXRecordDecl::classofKind(D->getKind()) && !needsAnonymousDeclarationNumber(D) && D->getDeclName().getNameKind() == DeclarationName::Identifier) AbbrevToUse = Writer.getDeclRecordAbbrev(); Code = serialization::DECL_RECORD; } void ASTDeclWriter::VisitValueDecl(ValueDecl *D) { VisitNamedDecl(D); Record.AddTypeRef(D->getType()); } void ASTDeclWriter::VisitEnumConstantDecl(EnumConstantDecl *D) { VisitValueDecl(D); Record.push_back(D->getInitExpr()? 1 : 0); if (D->getInitExpr()) Record.AddStmt(D->getInitExpr()); Record.AddAPSInt(D->getInitVal()); Code = serialization::DECL_ENUM_CONSTANT; } void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) { VisitValueDecl(D); Record.AddSourceLocation(D->getInnerLocStart()); Record.push_back(D->hasExtInfo()); if (D->hasExtInfo()) { DeclaratorDecl::ExtInfo *Info = D->getExtInfo(); Record.AddQualifierInfo(*Info); Record.AddStmt(Info->TrailingRequiresClause); } // The location information is deferred until the end of the record. Record.AddTypeRef(D->getTypeSourceInfo() ? D->getTypeSourceInfo()->getType() : QualType()); } void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { static_assert(DeclContext::NumFunctionDeclBits == 44, "You need to update the serializer after you change the " "FunctionDeclBits"); VisitRedeclarable(D); Record.push_back(D->getTemplatedKind()); switch (D->getTemplatedKind()) { case FunctionDecl::TK_NonTemplate: break; case FunctionDecl::TK_DependentNonTemplate: Record.AddDeclRef(D->getInstantiatedFromDecl()); break; case FunctionDecl::TK_FunctionTemplate: Record.AddDeclRef(D->getDescribedFunctionTemplate()); break; case FunctionDecl::TK_MemberSpecialization: { MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo(); Record.AddDeclRef(MemberInfo->getInstantiatedFrom()); Record.push_back(MemberInfo->getTemplateSpecializationKind()); Record.AddSourceLocation(MemberInfo->getPointOfInstantiation()); break; } case FunctionDecl::TK_FunctionTemplateSpecialization: { FunctionTemplateSpecializationInfo * FTSInfo = D->getTemplateSpecializationInfo(); RegisterTemplateSpecialization(FTSInfo->getTemplate(), D); Record.AddDeclRef(FTSInfo->getTemplate()); Record.push_back(FTSInfo->getTemplateSpecializationKind()); // Template arguments. Record.AddTemplateArgumentList(FTSInfo->TemplateArguments); // Template args as written. Record.push_back(FTSInfo->TemplateArgumentsAsWritten != nullptr); if (FTSInfo->TemplateArgumentsAsWritten) Record.AddASTTemplateArgumentListInfo( FTSInfo->TemplateArgumentsAsWritten); Record.AddSourceLocation(FTSInfo->getPointOfInstantiation()); if (MemberSpecializationInfo *MemberInfo = FTSInfo->getMemberSpecializationInfo()) { Record.push_back(1); Record.AddDeclRef(MemberInfo->getInstantiatedFrom()); Record.push_back(MemberInfo->getTemplateSpecializationKind()); Record.AddSourceLocation(MemberInfo->getPointOfInstantiation()); } else { Record.push_back(0); } if (D->isCanonicalDecl()) { // Write the template that contains the specializations set. We will // add a FunctionTemplateSpecializationInfo to it when reading. Record.AddDeclRef(FTSInfo->getTemplate()->getCanonicalDecl()); } break; } case FunctionDecl::TK_DependentFunctionTemplateSpecialization: { DependentFunctionTemplateSpecializationInfo * DFTSInfo = D->getDependentSpecializationInfo(); // Candidates. Record.push_back(DFTSInfo->getCandidates().size()); for (FunctionTemplateDecl *FTD : DFTSInfo->getCandidates()) Record.AddDeclRef(FTD); // Templates args. Record.push_back(DFTSInfo->TemplateArgumentsAsWritten != nullptr); if (DFTSInfo->TemplateArgumentsAsWritten) Record.AddASTTemplateArgumentListInfo( DFTSInfo->TemplateArgumentsAsWritten); break; } } VisitDeclaratorDecl(D); Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName()); Record.push_back(D->getIdentifierNamespace()); // The order matters here. It will be better to put the bit with higher // probability to be 0 in the end of the bits. See the comments in VisitDecl // for details. BitsPacker FunctionDeclBits; // FIXME: stable encoding FunctionDeclBits.addBits(llvm::to_underlying(D->getLinkageInternal()), 3); FunctionDeclBits.addBits((uint32_t)D->getStorageClass(), /*BitWidth=*/3); + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + FunctionDeclBits.addBit(ShouldSkipCheckingODR); FunctionDeclBits.addBit(D->isInlineSpecified()); FunctionDeclBits.addBit(D->isInlined()); FunctionDeclBits.addBit(D->hasSkippedBody()); FunctionDeclBits.addBit(D->isVirtualAsWritten()); FunctionDeclBits.addBit(D->isPureVirtual()); FunctionDeclBits.addBit(D->hasInheritedPrototype()); FunctionDeclBits.addBit(D->hasWrittenPrototype()); FunctionDeclBits.addBit(D->isDeletedBit()); FunctionDeclBits.addBit(D->isTrivial()); FunctionDeclBits.addBit(D->isTrivialForCall()); FunctionDeclBits.addBit(D->isDefaulted()); FunctionDeclBits.addBit(D->isExplicitlyDefaulted()); FunctionDeclBits.addBit(D->isIneligibleOrNotSelected()); FunctionDeclBits.addBits((uint64_t)(D->getConstexprKind()), /*BitWidth=*/2); FunctionDeclBits.addBit(D->hasImplicitReturnZero()); FunctionDeclBits.addBit(D->isMultiVersion()); FunctionDeclBits.addBit(D->isLateTemplateParsed()); FunctionDeclBits.addBit(D->FriendConstraintRefersToEnclosingTemplate()); FunctionDeclBits.addBit(D->usesSEHTry()); Record.push_back(FunctionDeclBits); Record.AddSourceLocation(D->getEndLoc()); if (D->isExplicitlyDefaulted()) Record.AddSourceLocation(D->getDefaultLoc()); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) + if (!ShouldSkipCheckingODR) Record.push_back(D->getODRHash()); if (D->isDefaulted()) { if (auto *FDI = D->getDefaultedFunctionInfo()) { Record.push_back(FDI->getUnqualifiedLookups().size()); for (DeclAccessPair P : FDI->getUnqualifiedLookups()) { Record.AddDeclRef(P.getDecl()); Record.push_back(P.getAccess()); } } else { Record.push_back(0); } } Record.push_back(D->param_size()); for (auto *P : D->parameters()) Record.AddDeclRef(P); Code = serialization::DECL_FUNCTION; } static void addExplicitSpecifier(ExplicitSpecifier ES, ASTRecordWriter &Record) { uint64_t Kind = static_cast(ES.getKind()); Kind = Kind << 1 | static_cast(ES.getExpr()); Record.push_back(Kind); if (ES.getExpr()) { Record.AddStmt(ES.getExpr()); } } void ASTDeclWriter::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) { addExplicitSpecifier(D->getExplicitSpecifier(), Record); Record.AddDeclRef(D->Ctor); VisitFunctionDecl(D); Record.push_back(static_cast(D->getDeductionCandidateKind())); Code = serialization::DECL_CXX_DEDUCTION_GUIDE; } void ASTDeclWriter::VisitObjCMethodDecl(ObjCMethodDecl *D) { static_assert(DeclContext::NumObjCMethodDeclBits == 37, "You need to update the serializer after you change the " "ObjCMethodDeclBits"); VisitNamedDecl(D); // FIXME: convert to LazyStmtPtr? // Unlike C/C++, method bodies will never be in header files. bool HasBodyStuff = D->getBody() != nullptr; Record.push_back(HasBodyStuff); if (HasBodyStuff) { Record.AddStmt(D->getBody()); } Record.AddDeclRef(D->getSelfDecl()); Record.AddDeclRef(D->getCmdDecl()); Record.push_back(D->isInstanceMethod()); Record.push_back(D->isVariadic()); Record.push_back(D->isPropertyAccessor()); Record.push_back(D->isSynthesizedAccessorStub()); Record.push_back(D->isDefined()); Record.push_back(D->isOverriding()); Record.push_back(D->hasSkippedBody()); Record.push_back(D->isRedeclaration()); Record.push_back(D->hasRedeclaration()); if (D->hasRedeclaration()) { assert(Context.getObjCMethodRedeclaration(D)); Record.AddDeclRef(Context.getObjCMethodRedeclaration(D)); } // FIXME: stable encoding for @required/@optional Record.push_back(llvm::to_underlying(D->getImplementationControl())); // FIXME: stable encoding for in/out/inout/bycopy/byref/oneway/nullability Record.push_back(D->getObjCDeclQualifier()); Record.push_back(D->hasRelatedResultType()); Record.AddTypeRef(D->getReturnType()); Record.AddTypeSourceInfo(D->getReturnTypeSourceInfo()); Record.AddSourceLocation(D->getEndLoc()); Record.push_back(D->param_size()); for (const auto *P : D->parameters()) Record.AddDeclRef(P); Record.push_back(D->getSelLocsKind()); unsigned NumStoredSelLocs = D->getNumStoredSelLocs(); SourceLocation *SelLocs = D->getStoredSelLocs(); Record.push_back(NumStoredSelLocs); for (unsigned i = 0; i != NumStoredSelLocs; ++i) Record.AddSourceLocation(SelLocs[i]); Code = serialization::DECL_OBJC_METHOD; } void ASTDeclWriter::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) { VisitTypedefNameDecl(D); Record.push_back(D->Variance); Record.push_back(D->Index); Record.AddSourceLocation(D->VarianceLoc); Record.AddSourceLocation(D->ColonLoc); Code = serialization::DECL_OBJC_TYPE_PARAM; } void ASTDeclWriter::VisitObjCContainerDecl(ObjCContainerDecl *D) { static_assert(DeclContext::NumObjCContainerDeclBits == 64, "You need to update the serializer after you change the " "ObjCContainerDeclBits"); VisitNamedDecl(D); Record.AddSourceLocation(D->getAtStartLoc()); Record.AddSourceRange(D->getAtEndRange()); // Abstract class (no need to define a stable serialization::DECL code). } void ASTDeclWriter::VisitObjCInterfaceDecl(ObjCInterfaceDecl *D) { VisitRedeclarable(D); VisitObjCContainerDecl(D); Record.AddTypeRef(QualType(D->getTypeForDecl(), 0)); AddObjCTypeParamList(D->TypeParamList); Record.push_back(D->isThisDeclarationADefinition()); if (D->isThisDeclarationADefinition()) { // Write the DefinitionData ObjCInterfaceDecl::DefinitionData &Data = D->data(); Record.AddTypeSourceInfo(D->getSuperClassTInfo()); Record.AddSourceLocation(D->getEndOfDefinitionLoc()); Record.push_back(Data.HasDesignatedInitializers); Record.push_back(D->getODRHash()); // Write out the protocols that are directly referenced by the @interface. Record.push_back(Data.ReferencedProtocols.size()); for (const auto *P : D->protocols()) Record.AddDeclRef(P); for (const auto &PL : D->protocol_locs()) Record.AddSourceLocation(PL); // Write out the protocols that are transitively referenced. Record.push_back(Data.AllReferencedProtocols.size()); for (ObjCList::iterator P = Data.AllReferencedProtocols.begin(), PEnd = Data.AllReferencedProtocols.end(); P != PEnd; ++P) Record.AddDeclRef(*P); if (ObjCCategoryDecl *Cat = D->getCategoryListRaw()) { // Ensure that we write out the set of categories for this class. Writer.ObjCClassesWithCategories.insert(D); // Make sure that the categories get serialized. for (; Cat; Cat = Cat->getNextClassCategoryRaw()) (void)Writer.GetDeclRef(Cat); } } Code = serialization::DECL_OBJC_INTERFACE; } void ASTDeclWriter::VisitObjCIvarDecl(ObjCIvarDecl *D) { VisitFieldDecl(D); // FIXME: stable encoding for @public/@private/@protected/@package Record.push_back(D->getAccessControl()); Record.push_back(D->getSynthesize()); if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isImplicit() && !D->isUsed(false) && !D->isInvalidDecl() && !D->isReferenced() && !D->isModulePrivate() && !D->getBitWidth() && !D->hasExtInfo() && D->getDeclName()) AbbrevToUse = Writer.getDeclObjCIvarAbbrev(); Code = serialization::DECL_OBJC_IVAR; } void ASTDeclWriter::VisitObjCProtocolDecl(ObjCProtocolDecl *D) { VisitRedeclarable(D); VisitObjCContainerDecl(D); Record.push_back(D->isThisDeclarationADefinition()); if (D->isThisDeclarationADefinition()) { Record.push_back(D->protocol_size()); for (const auto *I : D->protocols()) Record.AddDeclRef(I); for (const auto &PL : D->protocol_locs()) Record.AddSourceLocation(PL); Record.push_back(D->getODRHash()); } Code = serialization::DECL_OBJC_PROTOCOL; } void ASTDeclWriter::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D) { VisitFieldDecl(D); Code = serialization::DECL_OBJC_AT_DEFS_FIELD; } void ASTDeclWriter::VisitObjCCategoryDecl(ObjCCategoryDecl *D) { VisitObjCContainerDecl(D); Record.AddSourceLocation(D->getCategoryNameLoc()); Record.AddSourceLocation(D->getIvarLBraceLoc()); Record.AddSourceLocation(D->getIvarRBraceLoc()); Record.AddDeclRef(D->getClassInterface()); AddObjCTypeParamList(D->TypeParamList); Record.push_back(D->protocol_size()); for (const auto *I : D->protocols()) Record.AddDeclRef(I); for (const auto &PL : D->protocol_locs()) Record.AddSourceLocation(PL); Code = serialization::DECL_OBJC_CATEGORY; } void ASTDeclWriter::VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *D) { VisitNamedDecl(D); Record.AddDeclRef(D->getClassInterface()); Code = serialization::DECL_OBJC_COMPATIBLE_ALIAS; } void ASTDeclWriter::VisitObjCPropertyDecl(ObjCPropertyDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getAtLoc()); Record.AddSourceLocation(D->getLParenLoc()); Record.AddTypeRef(D->getType()); Record.AddTypeSourceInfo(D->getTypeSourceInfo()); // FIXME: stable encoding Record.push_back((unsigned)D->getPropertyAttributes()); Record.push_back((unsigned)D->getPropertyAttributesAsWritten()); // FIXME: stable encoding Record.push_back((unsigned)D->getPropertyImplementation()); Record.AddDeclarationName(D->getGetterName()); Record.AddSourceLocation(D->getGetterNameLoc()); Record.AddDeclarationName(D->getSetterName()); Record.AddSourceLocation(D->getSetterNameLoc()); Record.AddDeclRef(D->getGetterMethodDecl()); Record.AddDeclRef(D->getSetterMethodDecl()); Record.AddDeclRef(D->getPropertyIvarDecl()); Code = serialization::DECL_OBJC_PROPERTY; } void ASTDeclWriter::VisitObjCImplDecl(ObjCImplDecl *D) { VisitObjCContainerDecl(D); Record.AddDeclRef(D->getClassInterface()); // Abstract class (no need to define a stable serialization::DECL code). } void ASTDeclWriter::VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D) { VisitObjCImplDecl(D); Record.AddSourceLocation(D->getCategoryNameLoc()); Code = serialization::DECL_OBJC_CATEGORY_IMPL; } void ASTDeclWriter::VisitObjCImplementationDecl(ObjCImplementationDecl *D) { VisitObjCImplDecl(D); Record.AddDeclRef(D->getSuperClass()); Record.AddSourceLocation(D->getSuperClassLoc()); Record.AddSourceLocation(D->getIvarLBraceLoc()); Record.AddSourceLocation(D->getIvarRBraceLoc()); Record.push_back(D->hasNonZeroConstructors()); Record.push_back(D->hasDestructors()); Record.push_back(D->NumIvarInitializers); if (D->NumIvarInitializers) Record.AddCXXCtorInitializers( llvm::ArrayRef(D->init_begin(), D->init_end())); Code = serialization::DECL_OBJC_IMPLEMENTATION; } void ASTDeclWriter::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) { VisitDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Record.AddDeclRef(D->getPropertyDecl()); Record.AddDeclRef(D->getPropertyIvarDecl()); Record.AddSourceLocation(D->getPropertyIvarDeclLoc()); Record.AddDeclRef(D->getGetterMethodDecl()); Record.AddDeclRef(D->getSetterMethodDecl()); Record.AddStmt(D->getGetterCXXConstructor()); Record.AddStmt(D->getSetterCXXAssignment()); Code = serialization::DECL_OBJC_PROPERTY_IMPL; } void ASTDeclWriter::VisitFieldDecl(FieldDecl *D) { VisitDeclaratorDecl(D); Record.push_back(D->isMutable()); Record.push_back((D->StorageKind << 1) | D->BitField); if (D->StorageKind == FieldDecl::ISK_CapturedVLAType) Record.AddTypeRef(QualType(D->getCapturedVLAType(), 0)); else if (D->BitField) Record.AddStmt(D->getBitWidth()); if (!D->getDeclName()) Record.AddDeclRef(Context.getInstantiatedFromUnnamedFieldDecl(D)); if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isImplicit() && !D->isUsed(false) && !D->isInvalidDecl() && !D->isReferenced() && !D->isTopLevelDeclInObjCContainer() && !D->isModulePrivate() && !D->getBitWidth() && !D->hasInClassInitializer() && !D->hasCapturedVLAType() && !D->hasExtInfo() && !ObjCIvarDecl::classofKind(D->getKind()) && !ObjCAtDefsFieldDecl::classofKind(D->getKind()) && D->getDeclName()) AbbrevToUse = Writer.getDeclFieldAbbrev(); Code = serialization::DECL_FIELD; } void ASTDeclWriter::VisitMSPropertyDecl(MSPropertyDecl *D) { VisitDeclaratorDecl(D); Record.AddIdentifierRef(D->getGetterId()); Record.AddIdentifierRef(D->getSetterId()); Code = serialization::DECL_MS_PROPERTY; } void ASTDeclWriter::VisitMSGuidDecl(MSGuidDecl *D) { VisitValueDecl(D); MSGuidDecl::Parts Parts = D->getParts(); Record.push_back(Parts.Part1); Record.push_back(Parts.Part2); Record.push_back(Parts.Part3); Record.append(std::begin(Parts.Part4And5), std::end(Parts.Part4And5)); Code = serialization::DECL_MS_GUID; } void ASTDeclWriter::VisitUnnamedGlobalConstantDecl( UnnamedGlobalConstantDecl *D) { VisitValueDecl(D); Record.AddAPValue(D->getValue()); Code = serialization::DECL_UNNAMED_GLOBAL_CONSTANT; } void ASTDeclWriter::VisitTemplateParamObjectDecl(TemplateParamObjectDecl *D) { VisitValueDecl(D); Record.AddAPValue(D->getValue()); Code = serialization::DECL_TEMPLATE_PARAM_OBJECT; } void ASTDeclWriter::VisitIndirectFieldDecl(IndirectFieldDecl *D) { VisitValueDecl(D); Record.push_back(D->getChainingSize()); for (const auto *P : D->chain()) Record.AddDeclRef(P); Code = serialization::DECL_INDIRECTFIELD; } void ASTDeclWriter::VisitVarDecl(VarDecl *D) { VisitRedeclarable(D); VisitDeclaratorDecl(D); // The order matters here. It will be better to put the bit with higher // probability to be 0 in the end of the bits. See the comments in VisitDecl // for details. BitsPacker VarDeclBits; VarDeclBits.addBits(llvm::to_underlying(D->getLinkageInternal()), /*BitWidth=*/3); bool ModulesCodegen = false; if (Writer.WritingModule && D->getStorageDuration() == SD_Static && !D->getDescribedVarTemplate()) { // When building a C++20 module interface unit or a partition unit, a // strong definition in the module interface is provided by the // compilation of that unit, not by its users. (Inline variables are still // emitted in module users.) ModulesCodegen = (Writer.WritingModule->isInterfaceOrPartition() || (D->hasAttr() && Writer.Context->getLangOpts().BuildingPCHWithObjectFile)) && Writer.Context->GetGVALinkageForVariable(D) >= GVA_StrongExternal; } VarDeclBits.addBit(ModulesCodegen); VarDeclBits.addBits(D->getStorageClass(), /*BitWidth=*/3); VarDeclBits.addBits(D->getTSCSpec(), /*BitWidth=*/2); VarDeclBits.addBits(D->getInitStyle(), /*BitWidth=*/2); VarDeclBits.addBit(D->isARCPseudoStrong()); bool HasDeducedType = false; if (!isa(D)) { VarDeclBits.addBit(D->isThisDeclarationADemotedDefinition()); VarDeclBits.addBit(D->isExceptionVariable()); VarDeclBits.addBit(D->isNRVOVariable()); VarDeclBits.addBit(D->isCXXForRangeDecl()); VarDeclBits.addBit(D->isInline()); VarDeclBits.addBit(D->isInlineSpecified()); VarDeclBits.addBit(D->isConstexpr()); VarDeclBits.addBit(D->isInitCapture()); VarDeclBits.addBit(D->isPreviousDeclInSameBlockScope()); VarDeclBits.addBit(D->isEscapingByref()); HasDeducedType = D->getType()->getContainedDeducedType(); VarDeclBits.addBit(HasDeducedType); if (const auto *IPD = dyn_cast(D)) VarDeclBits.addBits(llvm::to_underlying(IPD->getParameterKind()), /*Width=*/3); else VarDeclBits.addBits(0, /*Width=*/3); VarDeclBits.addBit(D->isObjCForDecl()); } Record.push_back(VarDeclBits); if (ModulesCodegen) Writer.ModularCodegenDecls.push_back(Writer.GetDeclRef(D)); if (D->hasAttr()) { BlockVarCopyInit Init = Writer.Context->getBlockVarCopyInit(D); Record.AddStmt(Init.getCopyExpr()); if (Init.getCopyExpr()) Record.push_back(Init.canThrow()); } enum { VarNotTemplate = 0, VarTemplate, StaticDataMemberSpecialization }; if (VarTemplateDecl *TemplD = D->getDescribedVarTemplate()) { Record.push_back(VarTemplate); Record.AddDeclRef(TemplD); } else if (MemberSpecializationInfo *SpecInfo = D->getMemberSpecializationInfo()) { Record.push_back(StaticDataMemberSpecialization); Record.AddDeclRef(SpecInfo->getInstantiatedFrom()); Record.push_back(SpecInfo->getTemplateSpecializationKind()); Record.AddSourceLocation(SpecInfo->getPointOfInstantiation()); } else { Record.push_back(VarNotTemplate); } if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isTopLevelDeclInObjCContainer() && !needsAnonymousDeclarationNumber(D) && D->getDeclName().getNameKind() == DeclarationName::Identifier && !D->hasExtInfo() && D->getFirstDecl() == D->getMostRecentDecl() && D->getKind() == Decl::Var && !D->isInline() && !D->isConstexpr() && !D->isInitCapture() && !D->isPreviousDeclInSameBlockScope() && !D->isEscapingByref() && !HasDeducedType && D->getStorageDuration() != SD_Static && !D->getDescribedVarTemplate() && !D->getMemberSpecializationInfo() && !D->isObjCForDecl() && !isa(D) && !D->isEscapingByref()) AbbrevToUse = Writer.getDeclVarAbbrev(); Code = serialization::DECL_VAR; } void ASTDeclWriter::VisitImplicitParamDecl(ImplicitParamDecl *D) { VisitVarDecl(D); Code = serialization::DECL_IMPLICIT_PARAM; } void ASTDeclWriter::VisitParmVarDecl(ParmVarDecl *D) { VisitVarDecl(D); // See the implementation of `ParmVarDecl::getParameterIndex()`, which may // exceed the size of the normal bitfield. So it may be better to not pack // these bits. Record.push_back(D->getFunctionScopeIndex()); BitsPacker ParmVarDeclBits; ParmVarDeclBits.addBit(D->isObjCMethodParameter()); ParmVarDeclBits.addBits(D->getFunctionScopeDepth(), /*BitsWidth=*/7); // FIXME: stable encoding ParmVarDeclBits.addBits(D->getObjCDeclQualifier(), /*BitsWidth=*/7); ParmVarDeclBits.addBit(D->isKNRPromoted()); ParmVarDeclBits.addBit(D->hasInheritedDefaultArg()); ParmVarDeclBits.addBit(D->hasUninstantiatedDefaultArg()); ParmVarDeclBits.addBit(D->getExplicitObjectParamThisLoc().isValid()); Record.push_back(ParmVarDeclBits); if (D->hasUninstantiatedDefaultArg()) Record.AddStmt(D->getUninstantiatedDefaultArg()); if (D->getExplicitObjectParamThisLoc().isValid()) Record.AddSourceLocation(D->getExplicitObjectParamThisLoc()); Code = serialization::DECL_PARM_VAR; // If the assumptions about the DECL_PARM_VAR abbrev are true, use it. Here // we dynamically check for the properties that we optimize for, but don't // know are true of all PARM_VAR_DECLs. if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->hasExtInfo() && D->getStorageClass() == 0 && !D->isInvalidDecl() && !D->isTopLevelDeclInObjCContainer() && D->getInitStyle() == VarDecl::CInit && // Can params have anything else? D->getInit() == nullptr) // No default expr. AbbrevToUse = Writer.getDeclParmVarAbbrev(); // Check things we know are true of *every* PARM_VAR_DECL, which is more than // just us assuming it. assert(!D->getTSCSpec() && "PARM_VAR_DECL can't use TLS"); assert(!D->isThisDeclarationADemotedDefinition() && "PARM_VAR_DECL can't be demoted definition."); assert(D->getAccess() == AS_none && "PARM_VAR_DECL can't be public/private"); assert(!D->isExceptionVariable() && "PARM_VAR_DECL can't be exception var"); assert(D->getPreviousDecl() == nullptr && "PARM_VAR_DECL can't be redecl"); assert(!D->isStaticDataMember() && "PARM_VAR_DECL can't be static data member"); } void ASTDeclWriter::VisitDecompositionDecl(DecompositionDecl *D) { // Record the number of bindings first to simplify deserialization. Record.push_back(D->bindings().size()); VisitVarDecl(D); for (auto *B : D->bindings()) Record.AddDeclRef(B); Code = serialization::DECL_DECOMPOSITION; } void ASTDeclWriter::VisitBindingDecl(BindingDecl *D) { VisitValueDecl(D); Record.AddStmt(D->getBinding()); Code = serialization::DECL_BINDING; } void ASTDeclWriter::VisitFileScopeAsmDecl(FileScopeAsmDecl *D) { VisitDecl(D); Record.AddStmt(D->getAsmString()); Record.AddSourceLocation(D->getRParenLoc()); Code = serialization::DECL_FILE_SCOPE_ASM; } void ASTDeclWriter::VisitTopLevelStmtDecl(TopLevelStmtDecl *D) { VisitDecl(D); Record.AddStmt(D->getStmt()); Code = serialization::DECL_TOP_LEVEL_STMT_DECL; } void ASTDeclWriter::VisitEmptyDecl(EmptyDecl *D) { VisitDecl(D); Code = serialization::DECL_EMPTY; } void ASTDeclWriter::VisitLifetimeExtendedTemporaryDecl( LifetimeExtendedTemporaryDecl *D) { VisitDecl(D); Record.AddDeclRef(D->getExtendingDecl()); Record.AddStmt(D->getTemporaryExpr()); Record.push_back(static_cast(D->getValue())); if (D->getValue()) Record.AddAPValue(*D->getValue()); Record.push_back(D->getManglingNumber()); Code = serialization::DECL_LIFETIME_EXTENDED_TEMPORARY; } void ASTDeclWriter::VisitBlockDecl(BlockDecl *D) { VisitDecl(D); Record.AddStmt(D->getBody()); Record.AddTypeSourceInfo(D->getSignatureAsWritten()); Record.push_back(D->param_size()); for (ParmVarDecl *P : D->parameters()) Record.AddDeclRef(P); Record.push_back(D->isVariadic()); Record.push_back(D->blockMissingReturnType()); Record.push_back(D->isConversionFromLambda()); Record.push_back(D->doesNotEscape()); Record.push_back(D->canAvoidCopyToHeap()); Record.push_back(D->capturesCXXThis()); Record.push_back(D->getNumCaptures()); for (const auto &capture : D->captures()) { Record.AddDeclRef(capture.getVariable()); unsigned flags = 0; if (capture.isByRef()) flags |= 1; if (capture.isNested()) flags |= 2; if (capture.hasCopyExpr()) flags |= 4; Record.push_back(flags); if (capture.hasCopyExpr()) Record.AddStmt(capture.getCopyExpr()); } Code = serialization::DECL_BLOCK; } void ASTDeclWriter::VisitCapturedDecl(CapturedDecl *CD) { Record.push_back(CD->getNumParams()); VisitDecl(CD); Record.push_back(CD->getContextParamPosition()); Record.push_back(CD->isNothrow() ? 1 : 0); // Body is stored by VisitCapturedStmt. for (unsigned I = 0; I < CD->getNumParams(); ++I) Record.AddDeclRef(CD->getParam(I)); Code = serialization::DECL_CAPTURED; } void ASTDeclWriter::VisitLinkageSpecDecl(LinkageSpecDecl *D) { static_assert(DeclContext::NumLinkageSpecDeclBits == 17, "You need to update the serializer after you change the" "LinkageSpecDeclBits"); VisitDecl(D); Record.push_back(llvm::to_underlying(D->getLanguage())); Record.AddSourceLocation(D->getExternLoc()); Record.AddSourceLocation(D->getRBraceLoc()); Code = serialization::DECL_LINKAGE_SPEC; } void ASTDeclWriter::VisitExportDecl(ExportDecl *D) { VisitDecl(D); Record.AddSourceLocation(D->getRBraceLoc()); Code = serialization::DECL_EXPORT; } void ASTDeclWriter::VisitLabelDecl(LabelDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Code = serialization::DECL_LABEL; } void ASTDeclWriter::VisitNamespaceDecl(NamespaceDecl *D) { VisitRedeclarable(D); VisitNamedDecl(D); BitsPacker NamespaceDeclBits; NamespaceDeclBits.addBit(D->isInline()); NamespaceDeclBits.addBit(D->isNested()); Record.push_back(NamespaceDeclBits); Record.AddSourceLocation(D->getBeginLoc()); Record.AddSourceLocation(D->getRBraceLoc()); if (D->isOriginalNamespace()) Record.AddDeclRef(D->getAnonymousNamespace()); Code = serialization::DECL_NAMESPACE; if (Writer.hasChain() && D->isAnonymousNamespace() && D == D->getMostRecentDecl()) { // This is a most recent reopening of the anonymous namespace. If its parent // is in a previous PCH (or is the TU), mark that parent for update, because // the original namespace always points to the latest re-opening of its // anonymous namespace. Decl *Parent = cast( D->getParent()->getRedeclContext()->getPrimaryContext()); if (Parent->isFromASTFile() || isa(Parent)) { Writer.DeclUpdates[Parent].push_back( ASTWriter::DeclUpdate(UPD_CXX_ADDED_ANONYMOUS_NAMESPACE, D)); } } } void ASTDeclWriter::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) { VisitRedeclarable(D); VisitNamedDecl(D); Record.AddSourceLocation(D->getNamespaceLoc()); Record.AddSourceLocation(D->getTargetNameLoc()); Record.AddNestedNameSpecifierLoc(D->getQualifierLoc()); Record.AddDeclRef(D->getNamespace()); Code = serialization::DECL_NAMESPACE_ALIAS; } void ASTDeclWriter::VisitUsingDecl(UsingDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getUsingLoc()); Record.AddNestedNameSpecifierLoc(D->getQualifierLoc()); Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName()); Record.AddDeclRef(D->FirstUsingShadow.getPointer()); Record.push_back(D->hasTypename()); Record.AddDeclRef(Context.getInstantiatedFromUsingDecl(D)); Code = serialization::DECL_USING; } void ASTDeclWriter::VisitUsingEnumDecl(UsingEnumDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getUsingLoc()); Record.AddSourceLocation(D->getEnumLoc()); Record.AddTypeSourceInfo(D->getEnumType()); Record.AddDeclRef(D->FirstUsingShadow.getPointer()); Record.AddDeclRef(Context.getInstantiatedFromUsingEnumDecl(D)); Code = serialization::DECL_USING_ENUM; } void ASTDeclWriter::VisitUsingPackDecl(UsingPackDecl *D) { Record.push_back(D->NumExpansions); VisitNamedDecl(D); Record.AddDeclRef(D->getInstantiatedFromUsingDecl()); for (auto *E : D->expansions()) Record.AddDeclRef(E); Code = serialization::DECL_USING_PACK; } void ASTDeclWriter::VisitUsingShadowDecl(UsingShadowDecl *D) { VisitRedeclarable(D); VisitNamedDecl(D); Record.AddDeclRef(D->getTargetDecl()); Record.push_back(D->getIdentifierNamespace()); Record.AddDeclRef(D->UsingOrNextShadow); Record.AddDeclRef(Context.getInstantiatedFromUsingShadowDecl(D)); if (D->getDeclContext() == D->getLexicalDeclContext() && D->getFirstDecl() == D->getMostRecentDecl() && !D->hasAttrs() && !needsAnonymousDeclarationNumber(D) && D->getDeclName().getNameKind() == DeclarationName::Identifier) AbbrevToUse = Writer.getDeclUsingShadowAbbrev(); Code = serialization::DECL_USING_SHADOW; } void ASTDeclWriter::VisitConstructorUsingShadowDecl( ConstructorUsingShadowDecl *D) { VisitUsingShadowDecl(D); Record.AddDeclRef(D->NominatedBaseClassShadowDecl); Record.AddDeclRef(D->ConstructedBaseClassShadowDecl); Record.push_back(D->IsVirtual); Code = serialization::DECL_CONSTRUCTOR_USING_SHADOW; } void ASTDeclWriter::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getUsingLoc()); Record.AddSourceLocation(D->getNamespaceKeyLocation()); Record.AddNestedNameSpecifierLoc(D->getQualifierLoc()); Record.AddDeclRef(D->getNominatedNamespace()); Record.AddDeclRef(dyn_cast(D->getCommonAncestor())); Code = serialization::DECL_USING_DIRECTIVE; } void ASTDeclWriter::VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D) { VisitValueDecl(D); Record.AddSourceLocation(D->getUsingLoc()); Record.AddNestedNameSpecifierLoc(D->getQualifierLoc()); Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName()); Record.AddSourceLocation(D->getEllipsisLoc()); Code = serialization::DECL_UNRESOLVED_USING_VALUE; } void ASTDeclWriter::VisitUnresolvedUsingTypenameDecl( UnresolvedUsingTypenameDecl *D) { VisitTypeDecl(D); Record.AddSourceLocation(D->getTypenameLoc()); Record.AddNestedNameSpecifierLoc(D->getQualifierLoc()); Record.AddSourceLocation(D->getEllipsisLoc()); Code = serialization::DECL_UNRESOLVED_USING_TYPENAME; } void ASTDeclWriter::VisitUnresolvedUsingIfExistsDecl( UnresolvedUsingIfExistsDecl *D) { VisitNamedDecl(D); Code = serialization::DECL_UNRESOLVED_USING_IF_EXISTS; } void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { VisitRecordDecl(D); enum { CXXRecNotTemplate = 0, CXXRecTemplate, CXXRecMemberSpecialization, CXXLambda }; if (ClassTemplateDecl *TemplD = D->getDescribedClassTemplate()) { Record.push_back(CXXRecTemplate); Record.AddDeclRef(TemplD); } else if (MemberSpecializationInfo *MSInfo = D->getMemberSpecializationInfo()) { Record.push_back(CXXRecMemberSpecialization); Record.AddDeclRef(MSInfo->getInstantiatedFrom()); Record.push_back(MSInfo->getTemplateSpecializationKind()); Record.AddSourceLocation(MSInfo->getPointOfInstantiation()); } else if (D->isLambda()) { // For a lambda, we need some information early for merging. Record.push_back(CXXLambda); if (auto *Context = D->getLambdaContextDecl()) { Record.AddDeclRef(Context); Record.push_back(D->getLambdaIndexInContext()); } else { Record.push_back(0); } } else { Record.push_back(CXXRecNotTemplate); } Record.push_back(D->isThisDeclarationADefinition()); if (D->isThisDeclarationADefinition()) Record.AddCXXDefinitionData(D); // Store (what we currently believe to be) the key function to avoid // deserializing every method so we can compute it. if (D->isCompleteDefinition()) Record.AddDeclRef(Context.getCurrentKeyFunction(D)); Code = serialization::DECL_CXX_RECORD; } void ASTDeclWriter::VisitCXXMethodDecl(CXXMethodDecl *D) { VisitFunctionDecl(D); if (D->isCanonicalDecl()) { Record.push_back(D->size_overridden_methods()); for (const CXXMethodDecl *MD : D->overridden_methods()) Record.AddDeclRef(MD); } else { // We only need to record overridden methods once for the canonical decl. Record.push_back(0); } if (D->getDeclContext() == D->getLexicalDeclContext() && D->getFirstDecl() == D->getMostRecentDecl() && !D->isInvalidDecl() && !D->hasAttrs() && !D->isTopLevelDeclInObjCContainer() && D->getDeclName().getNameKind() == DeclarationName::Identifier && !shouldSkipCheckingODR(D) && !D->hasExtInfo() && !D->isExplicitlyDefaulted()) { if (D->getTemplatedKind() == FunctionDecl::TK_NonTemplate || D->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate || D->getTemplatedKind() == FunctionDecl::TK_MemberSpecialization || D->getTemplatedKind() == FunctionDecl::TK_DependentNonTemplate) AbbrevToUse = Writer.getDeclCXXMethodAbbrev(D->getTemplatedKind()); else if (D->getTemplatedKind() == FunctionDecl::TK_FunctionTemplateSpecialization) { FunctionTemplateSpecializationInfo *FTSInfo = D->getTemplateSpecializationInfo(); if (FTSInfo->TemplateArguments->size() == 1) { const TemplateArgument &TA = FTSInfo->TemplateArguments->get(0); if (TA.getKind() == TemplateArgument::Type && !FTSInfo->TemplateArgumentsAsWritten && !FTSInfo->getMemberSpecializationInfo()) AbbrevToUse = Writer.getDeclCXXMethodAbbrev(D->getTemplatedKind()); } } else if (D->getTemplatedKind() == FunctionDecl::TK_DependentFunctionTemplateSpecialization) { DependentFunctionTemplateSpecializationInfo *DFTSInfo = D->getDependentSpecializationInfo(); if (!DFTSInfo->TemplateArgumentsAsWritten) AbbrevToUse = Writer.getDeclCXXMethodAbbrev(D->getTemplatedKind()); } } Code = serialization::DECL_CXX_METHOD; } void ASTDeclWriter::VisitCXXConstructorDecl(CXXConstructorDecl *D) { static_assert(DeclContext::NumCXXConstructorDeclBits == 64, "You need to update the serializer after you change the " "CXXConstructorDeclBits"); Record.push_back(D->getTrailingAllocKind()); addExplicitSpecifier(D->getExplicitSpecifier(), Record); if (auto Inherited = D->getInheritedConstructor()) { Record.AddDeclRef(Inherited.getShadowDecl()); Record.AddDeclRef(Inherited.getConstructor()); } VisitCXXMethodDecl(D); Code = serialization::DECL_CXX_CONSTRUCTOR; } void ASTDeclWriter::VisitCXXDestructorDecl(CXXDestructorDecl *D) { VisitCXXMethodDecl(D); Record.AddDeclRef(D->getOperatorDelete()); if (D->getOperatorDelete()) Record.AddStmt(D->getOperatorDeleteThisArg()); Code = serialization::DECL_CXX_DESTRUCTOR; } void ASTDeclWriter::VisitCXXConversionDecl(CXXConversionDecl *D) { addExplicitSpecifier(D->getExplicitSpecifier(), Record); VisitCXXMethodDecl(D); Code = serialization::DECL_CXX_CONVERSION; } void ASTDeclWriter::VisitImportDecl(ImportDecl *D) { VisitDecl(D); Record.push_back(Writer.getSubmoduleID(D->getImportedModule())); ArrayRef IdentifierLocs = D->getIdentifierLocs(); Record.push_back(!IdentifierLocs.empty()); if (IdentifierLocs.empty()) { Record.AddSourceLocation(D->getEndLoc()); Record.push_back(1); } else { for (unsigned I = 0, N = IdentifierLocs.size(); I != N; ++I) Record.AddSourceLocation(IdentifierLocs[I]); Record.push_back(IdentifierLocs.size()); } // Note: the number of source locations must always be the last element in // the record. Code = serialization::DECL_IMPORT; } void ASTDeclWriter::VisitAccessSpecDecl(AccessSpecDecl *D) { VisitDecl(D); Record.AddSourceLocation(D->getColonLoc()); Code = serialization::DECL_ACCESS_SPEC; } void ASTDeclWriter::VisitFriendDecl(FriendDecl *D) { // Record the number of friend type template parameter lists here // so as to simplify memory allocation during deserialization. Record.push_back(D->NumTPLists); VisitDecl(D); bool hasFriendDecl = D->Friend.is(); Record.push_back(hasFriendDecl); if (hasFriendDecl) Record.AddDeclRef(D->getFriendDecl()); else Record.AddTypeSourceInfo(D->getFriendType()); for (unsigned i = 0; i < D->NumTPLists; ++i) Record.AddTemplateParameterList(D->getFriendTypeTemplateParameterList(i)); Record.AddDeclRef(D->getNextFriend()); Record.push_back(D->UnsupportedFriend); Record.AddSourceLocation(D->FriendLoc); Code = serialization::DECL_FRIEND; } void ASTDeclWriter::VisitFriendTemplateDecl(FriendTemplateDecl *D) { VisitDecl(D); Record.push_back(D->getNumTemplateParameters()); for (unsigned i = 0, e = D->getNumTemplateParameters(); i != e; ++i) Record.AddTemplateParameterList(D->getTemplateParameterList(i)); Record.push_back(D->getFriendDecl() != nullptr); if (D->getFriendDecl()) Record.AddDeclRef(D->getFriendDecl()); else Record.AddTypeSourceInfo(D->getFriendType()); Record.AddSourceLocation(D->getFriendLoc()); Code = serialization::DECL_FRIEND_TEMPLATE; } void ASTDeclWriter::VisitTemplateDecl(TemplateDecl *D) { VisitNamedDecl(D); Record.AddTemplateParameterList(D->getTemplateParameters()); Record.AddDeclRef(D->getTemplatedDecl()); } void ASTDeclWriter::VisitConceptDecl(ConceptDecl *D) { VisitTemplateDecl(D); Record.AddStmt(D->getConstraintExpr()); Code = serialization::DECL_CONCEPT; } void ASTDeclWriter::VisitImplicitConceptSpecializationDecl( ImplicitConceptSpecializationDecl *D) { Record.push_back(D->getTemplateArguments().size()); VisitDecl(D); for (const TemplateArgument &Arg : D->getTemplateArguments()) Record.AddTemplateArgument(Arg); Code = serialization::DECL_IMPLICIT_CONCEPT_SPECIALIZATION; } void ASTDeclWriter::VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D) { Code = serialization::DECL_REQUIRES_EXPR_BODY; } void ASTDeclWriter::VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D) { VisitRedeclarable(D); // Emit data to initialize CommonOrPrev before VisitTemplateDecl so that // getCommonPtr() can be used while this is still initializing. if (D->isFirstDecl()) { // This declaration owns the 'common' pointer, so serialize that data now. Record.AddDeclRef(D->getInstantiatedFromMemberTemplate()); if (D->getInstantiatedFromMemberTemplate()) Record.push_back(D->isMemberSpecialization()); } VisitTemplateDecl(D); Record.push_back(D->getIdentifierNamespace()); } void ASTDeclWriter::VisitClassTemplateDecl(ClassTemplateDecl *D) { VisitRedeclarableTemplateDecl(D); if (D->isFirstDecl()) AddTemplateSpecializations(D); Code = serialization::DECL_CLASS_TEMPLATE; } void ASTDeclWriter::VisitClassTemplateSpecializationDecl( ClassTemplateSpecializationDecl *D) { RegisterTemplateSpecialization(D->getSpecializedTemplate(), D); VisitCXXRecordDecl(D); llvm::PointerUnion InstFrom = D->getSpecializedTemplateOrPartial(); if (Decl *InstFromD = InstFrom.dyn_cast()) { Record.AddDeclRef(InstFromD); } else { Record.AddDeclRef(InstFrom.get()); Record.AddTemplateArgumentList(&D->getTemplateInstantiationArgs()); } Record.AddTemplateArgumentList(&D->getTemplateArgs()); Record.AddSourceLocation(D->getPointOfInstantiation()); Record.push_back(D->getSpecializationKind()); Record.push_back(D->isCanonicalDecl()); if (D->isCanonicalDecl()) { // When reading, we'll add it to the folding set of the following template. Record.AddDeclRef(D->getSpecializedTemplate()->getCanonicalDecl()); } // Explicit info. Record.AddTypeSourceInfo(D->getTypeAsWritten()); if (D->getTypeAsWritten()) { Record.AddSourceLocation(D->getExternLoc()); Record.AddSourceLocation(D->getTemplateKeywordLoc()); } Code = serialization::DECL_CLASS_TEMPLATE_SPECIALIZATION; } void ASTDeclWriter::VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D) { Record.AddTemplateParameterList(D->getTemplateParameters()); Record.AddASTTemplateArgumentListInfo(D->getTemplateArgsAsWritten()); VisitClassTemplateSpecializationDecl(D); // These are read/set from/to the first declaration. if (D->getPreviousDecl() == nullptr) { Record.AddDeclRef(D->getInstantiatedFromMember()); Record.push_back(D->isMemberSpecialization()); } Code = serialization::DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION; } void ASTDeclWriter::VisitVarTemplateDecl(VarTemplateDecl *D) { VisitRedeclarableTemplateDecl(D); if (D->isFirstDecl()) AddTemplateSpecializations(D); Code = serialization::DECL_VAR_TEMPLATE; } void ASTDeclWriter::VisitVarTemplateSpecializationDecl( VarTemplateSpecializationDecl *D) { RegisterTemplateSpecialization(D->getSpecializedTemplate(), D); llvm::PointerUnion InstFrom = D->getSpecializedTemplateOrPartial(); if (Decl *InstFromD = InstFrom.dyn_cast()) { Record.AddDeclRef(InstFromD); } else { Record.AddDeclRef(InstFrom.get()); Record.AddTemplateArgumentList(&D->getTemplateInstantiationArgs()); } // Explicit info. Record.AddTypeSourceInfo(D->getTypeAsWritten()); if (D->getTypeAsWritten()) { Record.AddSourceLocation(D->getExternLoc()); Record.AddSourceLocation(D->getTemplateKeywordLoc()); } Record.AddTemplateArgumentList(&D->getTemplateArgs()); Record.AddSourceLocation(D->getPointOfInstantiation()); Record.push_back(D->getSpecializationKind()); Record.push_back(D->IsCompleteDefinition); VisitVarDecl(D); Record.push_back(D->isCanonicalDecl()); if (D->isCanonicalDecl()) { // When reading, we'll add it to the folding set of the following template. Record.AddDeclRef(D->getSpecializedTemplate()->getCanonicalDecl()); } Code = serialization::DECL_VAR_TEMPLATE_SPECIALIZATION; } void ASTDeclWriter::VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D) { Record.AddTemplateParameterList(D->getTemplateParameters()); Record.AddASTTemplateArgumentListInfo(D->getTemplateArgsAsWritten()); VisitVarTemplateSpecializationDecl(D); // These are read/set from/to the first declaration. if (D->getPreviousDecl() == nullptr) { Record.AddDeclRef(D->getInstantiatedFromMember()); Record.push_back(D->isMemberSpecialization()); } Code = serialization::DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION; } void ASTDeclWriter::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { VisitRedeclarableTemplateDecl(D); if (D->isFirstDecl()) AddTemplateSpecializations(D); Code = serialization::DECL_FUNCTION_TEMPLATE; } void ASTDeclWriter::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) { Record.push_back(D->hasTypeConstraint()); VisitTypeDecl(D); Record.push_back(D->wasDeclaredWithTypename()); const TypeConstraint *TC = D->getTypeConstraint(); assert((bool)TC == D->hasTypeConstraint()); if (TC) { auto *CR = TC->getConceptReference(); Record.push_back(CR != nullptr); if (CR) Record.AddConceptReference(CR); Record.AddStmt(TC->getImmediatelyDeclaredConstraint()); Record.push_back(D->isExpandedParameterPack()); if (D->isExpandedParameterPack()) Record.push_back(D->getNumExpansionParameters()); } bool OwnsDefaultArg = D->hasDefaultArgument() && !D->defaultArgumentWasInherited(); Record.push_back(OwnsDefaultArg); if (OwnsDefaultArg) Record.AddTypeSourceInfo(D->getDefaultArgumentInfo()); if (!TC && !OwnsDefaultArg && D->getDeclContext() == D->getLexicalDeclContext() && !D->isInvalidDecl() && !D->hasAttrs() && !D->isTopLevelDeclInObjCContainer() && !D->isImplicit() && D->getDeclName().getNameKind() == DeclarationName::Identifier) AbbrevToUse = Writer.getDeclTemplateTypeParmAbbrev(); Code = serialization::DECL_TEMPLATE_TYPE_PARM; } void ASTDeclWriter::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) { // For an expanded parameter pack, record the number of expansion types here // so that it's easier for deserialization to allocate the right amount of // memory. Expr *TypeConstraint = D->getPlaceholderTypeConstraint(); Record.push_back(!!TypeConstraint); if (D->isExpandedParameterPack()) Record.push_back(D->getNumExpansionTypes()); VisitDeclaratorDecl(D); // TemplateParmPosition. Record.push_back(D->getDepth()); Record.push_back(D->getPosition()); if (TypeConstraint) Record.AddStmt(TypeConstraint); if (D->isExpandedParameterPack()) { for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) { Record.AddTypeRef(D->getExpansionType(I)); Record.AddTypeSourceInfo(D->getExpansionTypeSourceInfo(I)); } Code = serialization::DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK; } else { // Rest of NonTypeTemplateParmDecl. Record.push_back(D->isParameterPack()); bool OwnsDefaultArg = D->hasDefaultArgument() && !D->defaultArgumentWasInherited(); Record.push_back(OwnsDefaultArg); if (OwnsDefaultArg) Record.AddStmt(D->getDefaultArgument()); Code = serialization::DECL_NON_TYPE_TEMPLATE_PARM; } } void ASTDeclWriter::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) { // For an expanded parameter pack, record the number of expansion types here // so that it's easier for deserialization to allocate the right amount of // memory. if (D->isExpandedParameterPack()) Record.push_back(D->getNumExpansionTemplateParameters()); VisitTemplateDecl(D); // TemplateParmPosition. Record.push_back(D->getDepth()); Record.push_back(D->getPosition()); if (D->isExpandedParameterPack()) { for (unsigned I = 0, N = D->getNumExpansionTemplateParameters(); I != N; ++I) Record.AddTemplateParameterList(D->getExpansionTemplateParameters(I)); Code = serialization::DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK; } else { // Rest of TemplateTemplateParmDecl. Record.push_back(D->isParameterPack()); bool OwnsDefaultArg = D->hasDefaultArgument() && !D->defaultArgumentWasInherited(); Record.push_back(OwnsDefaultArg); if (OwnsDefaultArg) Record.AddTemplateArgumentLoc(D->getDefaultArgument()); Code = serialization::DECL_TEMPLATE_TEMPLATE_PARM; } } void ASTDeclWriter::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) { VisitRedeclarableTemplateDecl(D); Code = serialization::DECL_TYPE_ALIAS_TEMPLATE; } void ASTDeclWriter::VisitStaticAssertDecl(StaticAssertDecl *D) { VisitDecl(D); Record.AddStmt(D->getAssertExpr()); Record.push_back(D->isFailed()); Record.AddStmt(D->getMessage()); Record.AddSourceLocation(D->getRParenLoc()); Code = serialization::DECL_STATIC_ASSERT; } /// Emit the DeclContext part of a declaration context decl. void ASTDeclWriter::VisitDeclContext(DeclContext *DC) { static_assert(DeclContext::NumDeclContextBits == 13, "You need to update the serializer after you change the " "DeclContextBits"); Record.AddOffset(Writer.WriteDeclContextLexicalBlock(Context, DC)); Record.AddOffset(Writer.WriteDeclContextVisibleBlock(Context, DC)); } const Decl *ASTWriter::getFirstLocalDecl(const Decl *D) { assert(IsLocalDecl(D) && "expected a local declaration"); const Decl *Canon = D->getCanonicalDecl(); if (IsLocalDecl(Canon)) return Canon; const Decl *&CacheEntry = FirstLocalDeclCache[Canon]; if (CacheEntry) return CacheEntry; for (const Decl *Redecl = D; Redecl; Redecl = Redecl->getPreviousDecl()) if (IsLocalDecl(Redecl)) D = Redecl; return CacheEntry = D; } template void ASTDeclWriter::VisitRedeclarable(Redeclarable *D) { T *First = D->getFirstDecl(); T *MostRecent = First->getMostRecentDecl(); T *DAsT = static_cast(D); if (MostRecent != First) { assert(isRedeclarableDeclKind(DAsT->getKind()) && "Not considered redeclarable?"); Record.AddDeclRef(First); // Write out a list of local redeclarations of this declaration if it's the // first local declaration in the chain. const Decl *FirstLocal = Writer.getFirstLocalDecl(DAsT); if (DAsT == FirstLocal) { // Emit a list of all imported first declarations so that we can be sure // that all redeclarations visible to this module are before D in the // redecl chain. unsigned I = Record.size(); Record.push_back(0); if (Writer.Chain) AddFirstDeclFromEachModule(DAsT, /*IncludeLocal*/false); // This is the number of imported first declarations + 1. Record[I] = Record.size() - I; // Collect the set of local redeclarations of this declaration, from // newest to oldest. ASTWriter::RecordData LocalRedecls; ASTRecordWriter LocalRedeclWriter(Record, LocalRedecls); for (const Decl *Prev = FirstLocal->getMostRecentDecl(); Prev != FirstLocal; Prev = Prev->getPreviousDecl()) if (!Prev->isFromASTFile()) LocalRedeclWriter.AddDeclRef(Prev); // If we have any redecls, write them now as a separate record preceding // the declaration itself. if (LocalRedecls.empty()) Record.push_back(0); else Record.AddOffset(LocalRedeclWriter.Emit(LOCAL_REDECLARATIONS)); } else { Record.push_back(0); Record.AddDeclRef(FirstLocal); } // Make sure that we serialize both the previous and the most-recent // declarations, which (transitively) ensures that all declarations in the // chain get serialized. // // FIXME: This is not correct; when we reach an imported declaration we // won't emit its previous declaration. (void)Writer.GetDeclRef(D->getPreviousDecl()); (void)Writer.GetDeclRef(MostRecent); } else { // We use the sentinel value 0 to indicate an only declaration. Record.push_back(0); } } void ASTDeclWriter::VisitHLSLBufferDecl(HLSLBufferDecl *D) { VisitNamedDecl(D); VisitDeclContext(D); Record.push_back(D->isCBuffer()); Record.AddSourceLocation(D->getLocStart()); Record.AddSourceLocation(D->getLBraceLoc()); Record.AddSourceLocation(D->getRBraceLoc()); Code = serialization::DECL_HLSL_BUFFER; } void ASTDeclWriter::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) { Record.writeOMPChildren(D->Data); VisitDecl(D); Code = serialization::DECL_OMP_THREADPRIVATE; } void ASTDeclWriter::VisitOMPAllocateDecl(OMPAllocateDecl *D) { Record.writeOMPChildren(D->Data); VisitDecl(D); Code = serialization::DECL_OMP_ALLOCATE; } void ASTDeclWriter::VisitOMPRequiresDecl(OMPRequiresDecl *D) { Record.writeOMPChildren(D->Data); VisitDecl(D); Code = serialization::DECL_OMP_REQUIRES; } void ASTDeclWriter::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) { static_assert(DeclContext::NumOMPDeclareReductionDeclBits == 15, "You need to update the serializer after you change the " "NumOMPDeclareReductionDeclBits"); VisitValueDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Record.AddStmt(D->getCombinerIn()); Record.AddStmt(D->getCombinerOut()); Record.AddStmt(D->getCombiner()); Record.AddStmt(D->getInitOrig()); Record.AddStmt(D->getInitPriv()); Record.AddStmt(D->getInitializer()); Record.push_back(llvm::to_underlying(D->getInitializerKind())); Record.AddDeclRef(D->getPrevDeclInScope()); Code = serialization::DECL_OMP_DECLARE_REDUCTION; } void ASTDeclWriter::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) { Record.writeOMPChildren(D->Data); VisitValueDecl(D); Record.AddDeclarationName(D->getVarName()); Record.AddDeclRef(D->getPrevDeclInScope()); Code = serialization::DECL_OMP_DECLARE_MAPPER; } void ASTDeclWriter::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) { VisitVarDecl(D); Code = serialization::DECL_OMP_CAPTUREDEXPR; } //===----------------------------------------------------------------------===// // ASTWriter Implementation //===----------------------------------------------------------------------===// namespace { template std::shared_ptr getFunctionDeclAbbrev(serialization::DeclCode Code) { using namespace llvm; auto Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(Code)); // RedeclarableDecl Abv->Add(BitCodeAbbrevOp(0)); // CanonicalDecl Abv->Add(BitCodeAbbrevOp(Kind)); if constexpr (Kind == FunctionDecl::TK_NonTemplate) { } else if constexpr (Kind == FunctionDecl::TK_FunctionTemplate) { // DescribedFunctionTemplate Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); } else if constexpr (Kind == FunctionDecl::TK_DependentNonTemplate) { // Instantiated From Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); } else if constexpr (Kind == FunctionDecl::TK_MemberSpecialization) { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InstantiatedFrom Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // TemplateSpecializationKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Specialized Location } else if constexpr (Kind == FunctionDecl::TK_FunctionTemplateSpecialization) { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Template Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // TemplateSpecializationKind Abv->Add(BitCodeAbbrevOp(1)); // Template Argument Size Abv->Add(BitCodeAbbrevOp(TemplateArgument::Type)); // Template Argument Kind Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Template Argument Type Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Is Defaulted Abv->Add(BitCodeAbbrevOp(0)); // TemplateArgumentsAsWritten Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SourceLocation Abv->Add(BitCodeAbbrevOp(0)); Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Canonical Decl of template } else if constexpr (Kind == FunctionDecl:: TK_DependentFunctionTemplateSpecialization) { // Candidates of specialization Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(0)); // TemplateArgumentsAsWritten } else { llvm_unreachable("Unknown templated kind?"); } // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // Packed DeclBits: ModuleOwnershipKind, // isUsed, isReferenced, AccessSpecifier, // isImplicit // // The following bits should be 0: // HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, // isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(DeclarationName::Identifier)); // NameKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Identifier Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // ValueDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclaratorDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InnerLocStart Abv->Add(BitCodeAbbrevOp(0)); // HasExtInfo Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TSIType // FunctionDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 11)); // IDNS Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, - 27)); // Packed Function Bits: StorageClass, Inline, InlineSpecified, + 28)); // Packed Function Bits: StorageClass, Inline, InlineSpecified, // VirtualAsWritten, Pure, HasInheritedProto, HasWrittenProto, // Deleted, Trivial, TrivialForCall, Defaulted, ExplicitlyDefaulted, // IsIneligibleOrNotSelected, ImplicitReturnZero, Constexpr, // UsesSEHTry, SkippedBody, MultiVersion, LateParsed, - // FriendConstraintRefersToEnclosingTemplate, Linkage + // FriendConstraintRefersToEnclosingTemplate, Linkage, + // ShouldSkipCheckingODR Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LocEnd Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // ODRHash // This Array slurps the rest of the record. Fortunately we want to encode // (nearly) all the remaining (variable number of) fields in the same way. // // This is: // NumParams and Params[] from FunctionDecl, and // NumOverriddenMethods, OverriddenMethods[] from CXXMethodDecl. // // Add an AbbrevOp for 'size then elements' and use it here. Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); return Abv; } template std::shared_ptr getCXXMethodAbbrev() { return getFunctionDeclAbbrev(serialization::DECL_CXX_METHOD); } } // namespace void ASTWriter::WriteDeclAbbrevs() { using namespace llvm; std::shared_ptr Abv; // Abbreviation for DECL_FIELD Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_FIELD)); // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // Packed DeclBits: ModuleOwnershipKind, // isUsed, isReferenced, AccessSpecifier, // // The following bits should be 0: // isImplicit, HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, // isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // ValueDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclaratorDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InnerStartLoc Abv->Add(BitCodeAbbrevOp(0)); // hasExtInfo Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TSIType // FieldDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isMutable Abv->Add(BitCodeAbbrevOp(0)); // StorageKind // Type Source Info Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc DeclFieldAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_OBJC_IVAR Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_OBJC_IVAR)); // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 12)); // Packed DeclBits: HasStandaloneLexicalDC, // isInvalidDecl, HasAttrs, isImplicit, isUsed, // isReferenced, TopLevelDeclInObjCContainer, // AccessSpecifier, ModuleOwnershipKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // ValueDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclaratorDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InnerStartLoc Abv->Add(BitCodeAbbrevOp(0)); // hasExtInfo Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TSIType // FieldDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isMutable Abv->Add(BitCodeAbbrevOp(0)); // InitStyle // ObjC Ivar Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getAccessControl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getSynthesize // Type Source Info Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc DeclObjCIvarAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_ENUM Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_ENUM)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // Packed DeclBits: ModuleOwnershipKind, // isUsed, isReferenced, AccessSpecifier, // // The following bits should be 0: // isImplicit, HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, // isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // TypeDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type Ref // TagDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // IdentifierNamespace Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, 9)); // Packed Tag Decl Bits: getTagKind, isCompleteDefinition, // EmbeddedInDeclarator, IsFreeStanding, // isCompleteDefinitionRequired, ExtInfoKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SourceLocation Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SourceLocation // EnumDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddTypeRef Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // IntegerType Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getPromotionType - Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 19)); // Enum Decl Bits + Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 20)); // Enum Decl Bits Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));// ODRHash Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InstantiatedMembEnum // DC Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalOffset Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // VisibleOffset DeclEnumAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_RECORD Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_RECORD)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // Packed DeclBits: ModuleOwnershipKind, // isUsed, isReferenced, AccessSpecifier, // // The following bits should be 0: // isImplicit, HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, // isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // TypeDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type Ref // TagDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // IdentifierNamespace Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, 9)); // Packed Tag Decl Bits: getTagKind, isCompleteDefinition, // EmbeddedInDeclarator, IsFreeStanding, // isCompleteDefinitionRequired, ExtInfoKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SourceLocation Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SourceLocation // RecordDecl Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, 13)); // Packed Record Decl Bits: FlexibleArrayMember, // AnonymousStructUnion, hasObjectMember, hasVolatileMember, // isNonTrivialToPrimitiveDefaultInitialize, // isNonTrivialToPrimitiveCopy, isNonTrivialToPrimitiveDestroy, // hasNonTrivialToPrimitiveDefaultInitializeCUnion, // hasNonTrivialToPrimitiveDestructCUnion, // hasNonTrivialToPrimitiveCopyCUnion, isParamDestroyedInCallee, // getArgPassingRestrictions // ODRHash Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 26)); // DC Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalOffset Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // VisibleOffset DeclRecordAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_PARM_VAR Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_PARM_VAR)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // Packed DeclBits: ModuleOwnershipKind, isUsed, // isReferenced, AccessSpecifier, // HasStandaloneLexicalDC, HasAttrs, isImplicit, // TopLevelDeclInObjCContainer, // isInvalidDecl, Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // ValueDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclaratorDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InnerStartLoc Abv->Add(BitCodeAbbrevOp(0)); // hasExtInfo Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TSIType // VarDecl Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 12)); // Packed Var Decl bits: SClass, TSCSpec, InitStyle, // isARCPseudoStrong, Linkage, ModulesCodegen Abv->Add(BitCodeAbbrevOp(0)); // VarKind (local enum) // ParmVarDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ScopeIndex Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, 19)); // Packed Parm Var Decl bits: IsObjCMethodParameter, ScopeDepth, // ObjCDeclQualifier, KNRPromoted, // HasInheritedDefaultArg, HasUninstantiatedDefaultArg // Type Source Info Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc DeclParmVarAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_TYPEDEF Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_TYPEDEF)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // Packed DeclBits: ModuleOwnershipKind, // isReferenced, isUsed, AccessSpecifier. Other // higher bits should be 0: isImplicit, // HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // TypeDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type Ref // TypedefDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc DeclTypedefAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_VAR Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_VAR)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 12)); // Packed DeclBits: HasStandaloneLexicalDC, // isInvalidDecl, HasAttrs, isImplicit, isUsed, // isReferenced, TopLevelDeclInObjCContainer, // AccessSpecifier, ModuleOwnershipKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // ValueDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclaratorDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InnerStartLoc Abv->Add(BitCodeAbbrevOp(0)); // hasExtInfo Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TSIType // VarDecl Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, 21)); // Packed Var Decl bits: Linkage, ModulesCodegen, // SClass, TSCSpec, InitStyle, // isARCPseudoStrong, IsThisDeclarationADemotedDefinition, // isExceptionVariable, isNRVOVariable, isCXXForRangeDecl, // isInline, isInlineSpecified, isConstexpr, // isInitCapture, isPrevDeclInSameScope, // EscapingByref, HasDeducedType, ImplicitParamKind, isObjCForDecl Abv->Add(BitCodeAbbrevOp(0)); // VarKind (local enum) // Type Source Info Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc DeclVarAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_CXX_METHOD DeclCXXMethodAbbrev = Stream.EmitAbbrev(getCXXMethodAbbrev()); DeclTemplateCXXMethodAbbrev = Stream.EmitAbbrev( getCXXMethodAbbrev()); DeclDependentNonTemplateCXXMethodAbbrev = Stream.EmitAbbrev( getCXXMethodAbbrev()); DeclMemberSpecializedCXXMethodAbbrev = Stream.EmitAbbrev( getCXXMethodAbbrev()); DeclTemplateSpecializedCXXMethodAbbrev = Stream.EmitAbbrev( getCXXMethodAbbrev()); DeclDependentSpecializationCXXMethodAbbrev = Stream.EmitAbbrev( getCXXMethodAbbrev< FunctionDecl::TK_DependentFunctionTemplateSpecialization>()); // Abbreviation for DECL_TEMPLATE_TYPE_PARM Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_TEMPLATE_TYPE_PARM)); Abv->Add(BitCodeAbbrevOp(0)); // hasTypeConstraint // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // Packed DeclBits: ModuleOwnershipKind, // isReferenced, isUsed, AccessSpecifier. Other // higher bits should be 0: isImplicit, // HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // TypeDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type Ref // TemplateTypeParmDecl Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // wasDeclaredWithTypename Abv->Add(BitCodeAbbrevOp(0)); // OwnsDefaultArg DeclTemplateTypeParmAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_USING_SHADOW Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_USING_SHADOW)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 12)); // Packed DeclBits: HasStandaloneLexicalDC, // isInvalidDecl, HasAttrs, isImplicit, isUsed, // isReferenced, TopLevelDeclInObjCContainer, // AccessSpecifier, ModuleOwnershipKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // UsingShadowDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TargetDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 11)); // IDNS Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // UsingOrNextShadow Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InstantiatedFromUsingShadowDecl DeclUsingShadowAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_DECL_REF Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_DECL_REF)); // Stmt // Expr // PackingBits: DependenceKind, ValueKind. ObjectKind should be 0. Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclRefExpr // Packing Bits: , HadMultipleCandidates, RefersToEnclosingVariableOrCapture, // IsImmediateEscalating, NonOdrUseReason. // GetDeclFound, HasQualifier and ExplicitTemplateArgs should be 0. Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclRef Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location DeclRefExprAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_INTEGER_LITERAL Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_INTEGER_LITERAL)); //Stmt // Expr // DependenceKind, ValueKind, ObjectKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // Integer Literal Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location Abv->Add(BitCodeAbbrevOp(32)); // Bit Width Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Value IntegerLiteralAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_CHARACTER_LITERAL Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_CHARACTER_LITERAL)); //Stmt // Expr // DependenceKind, ValueKind, ObjectKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // Character Literal Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getValue Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // getKind CharacterLiteralAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_IMPLICIT_CAST Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_IMPLICIT_CAST)); // Stmt // Expr // Packing Bits: DependenceKind, ValueKind, ObjectKind, Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // CastExpr Abv->Add(BitCodeAbbrevOp(0)); // PathSize // Packing Bits: CastKind, StoredFPFeatures, isPartOfExplicitCast Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 9)); // ImplicitCastExpr ExprImplicitCastAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_BINARY_OPERATOR Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_BINARY_OPERATOR)); // Stmt // Expr // Packing Bits: DependenceKind. ValueKind and ObjectKind should // be 0 in this case. Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // BinaryOperator Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpCode and HasFPFeatures Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location BinaryOperatorAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_COMPOUND_ASSIGN_OPERATOR Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_COMPOUND_ASSIGN_OPERATOR)); // Stmt // Expr // Packing Bits: DependenceKind. ValueKind and ObjectKind should // be 0 in this case. Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // BinaryOperator // Packing Bits: OpCode. The HasFPFeatures bit should be 0 Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpCode and HasFPFeatures Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location // CompoundAssignOperator Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHSType Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Result Type CompoundAssignOperatorAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_CALL Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_CALL)); // Stmt // Expr // Packing Bits: DependenceKind, ValueKind, ObjectKind, Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // CallExpr Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // NumArgs Abv->Add(BitCodeAbbrevOp(0)); // ADLCallKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location CallExprAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_CXX_OPERATOR_CALL Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_CXX_OPERATOR_CALL)); // Stmt // Expr // Packing Bits: DependenceKind, ValueKind, ObjectKind, Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // CallExpr Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // NumArgs Abv->Add(BitCodeAbbrevOp(0)); // ADLCallKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location // CXXOperatorCallExpr Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Operator Kind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location CXXOperatorCallExprAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_CXX_MEMBER_CALL Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_CXX_MEMBER_CALL)); // Stmt // Expr // Packing Bits: DependenceKind, ValueKind, ObjectKind, Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // CallExpr Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // NumArgs Abv->Add(BitCodeAbbrevOp(0)); // ADLCallKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location // CXXMemberCallExpr CXXMemberCallExprAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for STMT_COMPOUND Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::STMT_COMPOUND)); // Stmt // CompoundStmt Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Num Stmts Abv->Add(BitCodeAbbrevOp(0)); // hasStoredFPFeatures Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location CompoundStmtAbbrev = Stream.EmitAbbrev(std::move(Abv)); Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_CONTEXT_LEXICAL)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); DeclContextLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv)); Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_CONTEXT_VISIBLE)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); DeclContextVisibleLookupAbbrev = Stream.EmitAbbrev(std::move(Abv)); } /// isRequiredDecl - Check if this is a "required" Decl, which must be seen by /// consumers of the AST. /// /// Such decls will always be deserialized from the AST file, so we would like /// this to be as restrictive as possible. Currently the predicate is driven by /// code generation requirements, if other clients have a different notion of /// what is "required" then we may have to consider an alternate scheme where /// clients can iterate over the top-level decls and get information on them, /// without necessary deserializing them. We could explicitly require such /// clients to use a separate API call to "realize" the decl. This should be /// relatively painless since they would presumably only do it for top-level /// decls. static bool isRequiredDecl(const Decl *D, ASTContext &Context, Module *WritingModule) { // Named modules have different semantics than header modules. Every named // module units owns a translation unit. So the importer of named modules // doesn't need to deserilize everything ahead of time. if (WritingModule && WritingModule->isNamedModule()) { // The PragmaCommentDecl and PragmaDetectMismatchDecl are MSVC's extension. // And the behavior of MSVC for such cases will leak this to the module // users. Given pragma is not a standard thing, the compiler has the space // to do their own decision. Let's follow MSVC here. if (isa(D)) return true; return false; } // An ObjCMethodDecl is never considered as "required" because its // implementation container always is. // File scoped assembly or obj-c or OMP declare target implementation must be // seen. if (isa(D)) return true; if (WritingModule && isPartOfPerModuleInitializer(D)) { // These declarations are part of the module initializer, and are emitted // if and when the module is imported, rather than being emitted eagerly. return false; } return Context.DeclMustBeEmitted(D); } void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) { PrettyDeclStackTraceEntry CrashInfo(Context, D, SourceLocation(), "serializing"); // Determine the ID for this declaration. serialization::DeclID ID; assert(!D->isFromASTFile() && "should not be emitting imported decl"); serialization::DeclID &IDR = DeclIDs[D]; if (IDR == 0) IDR = NextDeclID++; ID = IDR; assert(ID >= FirstDeclID && "invalid decl ID"); RecordData Record; ASTDeclWriter W(*this, Context, Record); // Build a record for this declaration W.Visit(D); // Emit this declaration to the bitstream. uint64_t Offset = W.Emit(D); // Record the offset for this declaration SourceLocation Loc = D->getLocation(); unsigned Index = ID - FirstDeclID; if (DeclOffsets.size() == Index) DeclOffsets.emplace_back(getAdjustedLocation(Loc), Offset, DeclTypesBlockStartOffset); else if (DeclOffsets.size() < Index) { // FIXME: Can/should this happen? DeclOffsets.resize(Index+1); DeclOffsets[Index].setLocation(getAdjustedLocation(Loc)); DeclOffsets[Index].setBitOffset(Offset, DeclTypesBlockStartOffset); } else { llvm_unreachable("declarations should be emitted in ID order"); } SourceManager &SM = Context.getSourceManager(); if (Loc.isValid() && SM.isLocalSourceLocation(Loc)) associateDeclWithFile(D, ID); // Note declarations that should be deserialized eagerly so that we can add // them to a record in the AST file later. if (isRequiredDecl(D, Context, WritingModule)) EagerlyDeserializedDecls.push_back(ID); } void ASTRecordWriter::AddFunctionDefinition(const FunctionDecl *FD) { // Switch case IDs are per function body. Writer->ClearSwitchCaseIDs(); assert(FD->doesThisDeclarationHaveABody()); bool ModulesCodegen = false; if (!FD->isDependentContext()) { std::optional Linkage; if (Writer->WritingModule && Writer->WritingModule->isInterfaceOrPartition()) { // When building a C++20 module interface unit or a partition unit, a // strong definition in the module interface is provided by the // compilation of that unit, not by its users. (Inline functions are still // emitted in module users.) Linkage = Writer->Context->GetGVALinkageForFunction(FD); ModulesCodegen = *Linkage >= GVA_StrongExternal; } if (Writer->Context->getLangOpts().ModulesCodegen || (FD->hasAttr() && Writer->Context->getLangOpts().BuildingPCHWithObjectFile)) { // Under -fmodules-codegen, codegen is performed for all non-internal, // non-always_inline functions, unless they are available elsewhere. if (!FD->hasAttr()) { if (!Linkage) Linkage = Writer->Context->GetGVALinkageForFunction(FD); ModulesCodegen = *Linkage != GVA_Internal && *Linkage != GVA_AvailableExternally; } } } Record->push_back(ModulesCodegen); if (ModulesCodegen) Writer->ModularCodegenDecls.push_back(Writer->GetDeclRef(FD)); if (auto *CD = dyn_cast(FD)) { Record->push_back(CD->getNumCtorInitializers()); if (CD->getNumCtorInitializers()) AddCXXCtorInitializers(llvm::ArrayRef(CD->init_begin(), CD->init_end())); } AddStmt(FD->getBody()); } diff --git a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c index 867ae73f0d3b..f3b457d786e6 100644 --- a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c @@ -1,1271 +1,1274 @@ /*===- InstrProfilingFile.c - Write instrumentation to a file -------------===*\ |* |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |* See https://llvm.org/LICENSE.txt for license information. |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |* \*===----------------------------------------------------------------------===*/ #if !defined(__Fuchsia__) #include #include #include #include #include #ifdef _MSC_VER /* For _alloca. */ #include #endif #if defined(_WIN32) #include "WindowsMMap.h" /* For _chsize_s */ #include #include #else #include #include #include #if defined(__linux__) #include #endif #endif #include "InstrProfiling.h" #include "InstrProfilingInternal.h" #include "InstrProfilingPort.h" #include "InstrProfilingUtil.h" /* From where is profile name specified. * The order the enumerators define their * precedence. Re-order them may lead to * runtime behavior change. */ typedef enum ProfileNameSpecifier { PNS_unknown = 0, PNS_default, PNS_command_line, PNS_environment, PNS_runtime_api } ProfileNameSpecifier; static const char *getPNSStr(ProfileNameSpecifier PNS) { switch (PNS) { case PNS_default: return "default setting"; case PNS_command_line: return "command line"; case PNS_environment: return "environment variable"; case PNS_runtime_api: return "runtime API"; default: return "Unknown"; } } #define MAX_PID_SIZE 16 /* Data structure holding the result of parsed filename pattern. */ typedef struct lprofFilename { /* File name string possibly with %p or %h specifiers. */ const char *FilenamePat; /* A flag indicating if FilenamePat's memory is allocated * by runtime. */ unsigned OwnsFilenamePat; const char *ProfilePathPrefix; char PidChars[MAX_PID_SIZE]; char *TmpDir; char Hostname[COMPILER_RT_MAX_HOSTLEN]; unsigned NumPids; unsigned NumHosts; /* When in-process merging is enabled, this parameter specifies * the total number of profile data files shared by all the processes * spawned from the same binary. By default the value is 1. If merging * is not enabled, its value should be 0. This parameter is specified * by the %[0-9]m specifier. For instance %2m enables merging using * 2 profile data files. %1m is equivalent to %m. Also %m specifier * can only appear once at the end of the name pattern. */ unsigned MergePoolSize; ProfileNameSpecifier PNS; } lprofFilename; static lprofFilename lprofCurFilename = {0, 0, 0, {0}, NULL, {0}, 0, 0, 0, PNS_unknown}; static int ProfileMergeRequested = 0; static int getProfileFileSizeForMerging(FILE *ProfileFile, uint64_t *ProfileFileSize); #if defined(__APPLE__) static const int ContinuousModeSupported = 1; static const int UseBiasVar = 0; static const char *FileOpenMode = "a+b"; static void *BiasAddr = NULL; static void *BiasDefaultAddr = NULL; static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { /* Get the sizes of various profile data sections. Taken from * __llvm_profile_get_size_for_buffer(). */ const __llvm_profile_data *DataBegin = __llvm_profile_begin_data(); const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const char *CountersBegin = __llvm_profile_begin_counters(); const char *CountersEnd = __llvm_profile_end_counters(); const char *BitmapBegin = __llvm_profile_begin_bitmap(); const char *BitmapEnd = __llvm_profile_end_bitmap(); const char *NamesBegin = __llvm_profile_begin_names(); const char *NamesEnd = __llvm_profile_end_names(); const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char); uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); uint64_t CountersSize = __llvm_profile_get_counters_size(CountersBegin, CountersEnd); uint64_t NumBitmapBytes = __llvm_profile_get_num_bitmap_bytes(BitmapBegin, BitmapEnd); /* Check that the counter, bitmap, and data sections in this image are * page-aligned. */ unsigned PageSize = getpagesize(); if ((intptr_t)CountersBegin % PageSize != 0) { PROF_ERR("Counters section not page-aligned (start = %p, pagesz = %u).\n", CountersBegin, PageSize); return 1; } if ((intptr_t)BitmapBegin % PageSize != 0) { PROF_ERR("Bitmap section not page-aligned (start = %p, pagesz = %u).\n", BitmapBegin, PageSize); return 1; } if ((intptr_t)DataBegin % PageSize != 0) { PROF_ERR("Data section not page-aligned (start = %p, pagesz = %u).\n", DataBegin, PageSize); return 1; } int Fileno = fileno(File); /* Determine how much padding is needed before/after the counters and * after the names. */ uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters, PaddingBytesAfterNames, PaddingBytesAfterBitmapBytes; __llvm_profile_get_padding_sizes_for_counters( DataSize, CountersSize, NumBitmapBytes, NamesSize, &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters, &PaddingBytesAfterBitmapBytes, &PaddingBytesAfterNames); uint64_t PageAlignedCountersLength = CountersSize + PaddingBytesAfterCounters; uint64_t FileOffsetToCounters = CurrentFileOffset + sizeof(__llvm_profile_header) + DataSize + PaddingBytesBeforeCounters; void *CounterMmap = mmap((void *)CountersBegin, PageAlignedCountersLength, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, Fileno, FileOffsetToCounters); if (CounterMmap != CountersBegin) { PROF_ERR( "Continuous counter sync mode is enabled, but mmap() failed (%s).\n" " - CountersBegin: %p\n" " - PageAlignedCountersLength: %" PRIu64 "\n" " - Fileno: %d\n" " - FileOffsetToCounters: %" PRIu64 "\n", strerror(errno), CountersBegin, PageAlignedCountersLength, Fileno, FileOffsetToCounters); return 1; } /* Also mmap MCDC bitmap bytes. If there aren't any bitmap bytes, mmap() * will fail with EINVAL. */ if (NumBitmapBytes == 0) return 0; uint64_t PageAlignedBitmapLength = NumBitmapBytes + PaddingBytesAfterBitmapBytes; uint64_t FileOffsetToBitmap = CurrentFileOffset + sizeof(__llvm_profile_header) + DataSize + PaddingBytesBeforeCounters + CountersSize + PaddingBytesAfterCounters; void *BitmapMmap = mmap((void *)BitmapBegin, PageAlignedBitmapLength, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, Fileno, FileOffsetToBitmap); if (BitmapMmap != BitmapBegin) { PROF_ERR( "Continuous counter sync mode is enabled, but mmap() failed (%s).\n" " - BitmapBegin: %p\n" " - PageAlignedBitmapLength: %" PRIu64 "\n" " - Fileno: %d\n" " - FileOffsetToBitmap: %" PRIu64 "\n", strerror(errno), BitmapBegin, PageAlignedBitmapLength, Fileno, FileOffsetToBitmap); return 1; } return 0; } #elif defined(__ELF__) || defined(_WIN32) #define INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR \ INSTR_PROF_CONCAT(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR, _default) COMPILER_RT_VISIBILITY intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR = 0; /* This variable is a weak external reference which could be used to detect * whether or not the compiler defined this symbol. */ #if defined(_MSC_VER) COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR; #if defined(_M_IX86) || defined(__i386__) #define WIN_SYM_PREFIX "_" #else #define WIN_SYM_PREFIX #endif #pragma comment( \ linker, "/alternatename:" WIN_SYM_PREFIX INSTR_PROF_QUOTE( \ INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" WIN_SYM_PREFIX \ INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR)) #else COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __attribute__((weak, alias(INSTR_PROF_QUOTE( INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR)))); #endif static const int ContinuousModeSupported = 1; static const int UseBiasVar = 1; /* TODO: If there are two DSOs, the second DSO initilization will truncate the * first profile file. */ static const char *FileOpenMode = "w+b"; /* This symbol is defined by the compiler when runtime counter relocation is * used and runtime provides a weak alias so we can check if it's defined. */ static void *BiasAddr = &INSTR_PROF_PROFILE_COUNTER_BIAS_VAR; static void *BiasDefaultAddr = &INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR; static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { /* Get the sizes of various profile data sections. Taken from * __llvm_profile_get_size_for_buffer(). */ const __llvm_profile_data *DataBegin = __llvm_profile_begin_data(); const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const char *CountersBegin = __llvm_profile_begin_counters(); const char *CountersEnd = __llvm_profile_end_counters(); const char *BitmapBegin = __llvm_profile_begin_bitmap(); const char *BitmapEnd = __llvm_profile_end_bitmap(); uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); /* Get the file size. */ uint64_t FileSize = 0; if (getProfileFileSizeForMerging(File, &FileSize)) return 1; /* Map the profile. */ char *Profile = (char *)mmap(NULL, FileSize, PROT_READ | PROT_WRITE, MAP_SHARED, fileno(File), 0); if (Profile == MAP_FAILED) { PROF_ERR("Unable to mmap profile: %s\n", strerror(errno)); return 1; } const uint64_t CountersOffsetInBiasMode = sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) + DataSize; /* Update the profile fields based on the current mapping. */ INSTR_PROF_PROFILE_COUNTER_BIAS_VAR = (intptr_t)Profile - (uintptr_t)CountersBegin + CountersOffsetInBiasMode; /* Return the memory allocated for counters to OS. */ lprofReleaseMemoryPagesToOS((uintptr_t)CountersBegin, (uintptr_t)CountersEnd); /* BIAS MODE not supported yet for Bitmap (MCDC). */ /* Return the memory allocated for counters to OS. */ lprofReleaseMemoryPagesToOS((uintptr_t)BitmapBegin, (uintptr_t)BitmapEnd); return 0; } #else static const int ContinuousModeSupported = 0; static const int UseBiasVar = 0; static const char *FileOpenMode = "a+b"; static void *BiasAddr = NULL; static void *BiasDefaultAddr = NULL; static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { return 0; } #endif static int isProfileMergeRequested(void) { return ProfileMergeRequested; } static void setProfileMergeRequested(int EnableMerge) { ProfileMergeRequested = EnableMerge; } static FILE *ProfileFile = NULL; static FILE *getProfileFile(void) { return ProfileFile; } static void setProfileFile(FILE *File) { ProfileFile = File; } static int getCurFilenameLength(void); static const char *getCurFilename(char *FilenameBuf, int ForceUseBuf); static unsigned doMerging(void) { return lprofCurFilename.MergePoolSize || isProfileMergeRequested(); } /* Return 1 if there is an error, otherwise return 0. */ static uint32_t fileWriter(ProfDataWriter *This, ProfDataIOVec *IOVecs, uint32_t NumIOVecs) { uint32_t I; FILE *File = (FILE *)This->WriterCtx; char Zeroes[sizeof(uint64_t)] = {0}; for (I = 0; I < NumIOVecs; I++) { if (IOVecs[I].Data) { if (fwrite(IOVecs[I].Data, IOVecs[I].ElmSize, IOVecs[I].NumElm, File) != IOVecs[I].NumElm) return 1; } else if (IOVecs[I].UseZeroPadding) { size_t BytesToWrite = IOVecs[I].ElmSize * IOVecs[I].NumElm; while (BytesToWrite > 0) { size_t PartialWriteLen = (sizeof(uint64_t) > BytesToWrite) ? BytesToWrite : sizeof(uint64_t); if (fwrite(Zeroes, sizeof(uint8_t), PartialWriteLen, File) != PartialWriteLen) { return 1; } BytesToWrite -= PartialWriteLen; } } else { if (fseek(File, IOVecs[I].ElmSize * IOVecs[I].NumElm, SEEK_CUR) == -1) return 1; } } return 0; } /* TODO: make buffer size controllable by an internal option, and compiler can pass the size to runtime via a variable. */ static uint32_t orderFileWriter(FILE *File, const uint32_t *DataStart) { if (fwrite(DataStart, sizeof(uint32_t), INSTR_ORDER_FILE_BUFFER_SIZE, File) != INSTR_ORDER_FILE_BUFFER_SIZE) return 1; return 0; } static void initFileWriter(ProfDataWriter *This, FILE *File) { This->Write = fileWriter; This->WriterCtx = File; } COMPILER_RT_VISIBILITY ProfBufferIO * lprofCreateBufferIOInternal(void *File, uint32_t BufferSz) { FreeHook = &free; DynamicBufferIOBuffer = (uint8_t *)calloc(1, BufferSz); VPBufferSize = BufferSz; ProfDataWriter *fileWriter = (ProfDataWriter *)calloc(1, sizeof(ProfDataWriter)); initFileWriter(fileWriter, File); ProfBufferIO *IO = lprofCreateBufferIO(fileWriter); IO->OwnFileWriter = 1; return IO; } static void setupIOBuffer(void) { const char *BufferSzStr = 0; BufferSzStr = getenv("LLVM_VP_BUFFER_SIZE"); if (BufferSzStr && BufferSzStr[0]) { VPBufferSize = atoi(BufferSzStr); DynamicBufferIOBuffer = (uint8_t *)calloc(VPBufferSize, 1); } } /* Get the size of the profile file. If there are any errors, print the * message under the assumption that the profile is being read for merging * purposes, and return -1. Otherwise return the file size in the inout param * \p ProfileFileSize. */ static int getProfileFileSizeForMerging(FILE *ProfileFile, uint64_t *ProfileFileSize) { if (fseek(ProfileFile, 0L, SEEK_END) == -1) { PROF_ERR("Unable to merge profile data, unable to get size: %s\n", strerror(errno)); return -1; } *ProfileFileSize = ftell(ProfileFile); /* Restore file offset. */ if (fseek(ProfileFile, 0L, SEEK_SET) == -1) { PROF_ERR("Unable to merge profile data, unable to rewind: %s\n", strerror(errno)); return -1; } if (*ProfileFileSize > 0 && *ProfileFileSize < sizeof(__llvm_profile_header)) { PROF_WARN("Unable to merge profile data: %s\n", "source profile file is too small."); return -1; } return 0; } /* mmap() \p ProfileFile for profile merging purposes, assuming that an * exclusive lock is held on the file and that \p ProfileFileSize is the * length of the file. Return the mmap'd buffer in the inout variable * \p ProfileBuffer. Returns -1 on failure. On success, the caller is * responsible for unmapping the mmap'd buffer in \p ProfileBuffer. */ static int mmapProfileForMerging(FILE *ProfileFile, uint64_t ProfileFileSize, char **ProfileBuffer) { *ProfileBuffer = mmap(NULL, ProfileFileSize, PROT_READ, MAP_SHARED | MAP_FILE, fileno(ProfileFile), 0); if (*ProfileBuffer == MAP_FAILED) { PROF_ERR("Unable to merge profile data, mmap failed: %s\n", strerror(errno)); return -1; } if (__llvm_profile_check_compatibility(*ProfileBuffer, ProfileFileSize)) { (void)munmap(*ProfileBuffer, ProfileFileSize); PROF_WARN("Unable to merge profile data: %s\n", "source profile file is not compatible."); return -1; } return 0; } /* Read profile data in \c ProfileFile and merge with in-memory profile counters. Returns -1 if there is fatal error, otheriwse 0 is returned. Returning 0 does not mean merge is actually performed. If merge is actually done, *MergeDone is set to 1. */ static int doProfileMerging(FILE *ProfileFile, int *MergeDone) { uint64_t ProfileFileSize; char *ProfileBuffer; /* Get the size of the profile on disk. */ if (getProfileFileSizeForMerging(ProfileFile, &ProfileFileSize) == -1) return -1; /* Nothing to merge. */ if (!ProfileFileSize) return 0; /* mmap() the profile and check that it is compatible with the data in * the current image. */ if (mmapProfileForMerging(ProfileFile, ProfileFileSize, &ProfileBuffer) == -1) return -1; /* Now start merging */ if (__llvm_profile_merge_from_buffer(ProfileBuffer, ProfileFileSize)) { PROF_ERR("%s\n", "Invalid profile data to merge"); (void)munmap(ProfileBuffer, ProfileFileSize); return -1; } // Truncate the file in case merging of value profile did not happen to // prevent from leaving garbage data at the end of the profile file. (void)COMPILER_RT_FTRUNCATE(ProfileFile, __llvm_profile_get_size_for_buffer()); (void)munmap(ProfileBuffer, ProfileFileSize); *MergeDone = 1; return 0; } /* Create the directory holding the file, if needed. */ static void createProfileDir(const char *Filename) { size_t Length = strlen(Filename); if (lprofFindFirstDirSeparator(Filename)) { char *Copy = (char *)COMPILER_RT_ALLOCA(Length + 1); strncpy(Copy, Filename, Length + 1); __llvm_profile_recursive_mkdir(Copy); } } /* Open the profile data for merging. It opens the file in r+b mode with * file locking. If the file has content which is compatible with the * current process, it also reads in the profile data in the file and merge * it with in-memory counters. After the profile data is merged in memory, * the original profile data is truncated and gets ready for the profile * dumper. With profile merging enabled, each executable as well as any of * its instrumented shared libraries dump profile data into their own data file. */ static FILE *openFileForMerging(const char *ProfileFileName, int *MergeDone) { FILE *ProfileFile = getProfileFile(); int rc; // initializeProfileForContinuousMode will lock the profile, but if // ProfileFile is set by user via __llvm_profile_set_file_object, it's assumed // unlocked at this point. if (ProfileFile && !__llvm_profile_is_continuous_mode_enabled()) { lprofLockFileHandle(ProfileFile); } if (!ProfileFile) { createProfileDir(ProfileFileName); ProfileFile = lprofOpenFileEx(ProfileFileName); } if (!ProfileFile) return NULL; rc = doProfileMerging(ProfileFile, MergeDone); if (rc || (!*MergeDone && COMPILER_RT_FTRUNCATE(ProfileFile, 0L)) || fseek(ProfileFile, 0L, SEEK_SET) == -1) { PROF_ERR("Profile Merging of file %s failed: %s\n", ProfileFileName, strerror(errno)); fclose(ProfileFile); return NULL; } return ProfileFile; } static FILE *getFileObject(const char *OutputName) { FILE *File; File = getProfileFile(); if (File != NULL) { return File; } return fopen(OutputName, "ab"); } /* Write profile data to file \c OutputName. */ static int writeFile(const char *OutputName) { int RetVal; FILE *OutputFile; int MergeDone = 0; VPMergeHook = &lprofMergeValueProfData; if (doMerging()) OutputFile = openFileForMerging(OutputName, &MergeDone); else OutputFile = getFileObject(OutputName); if (!OutputFile) return -1; FreeHook = &free; setupIOBuffer(); ProfDataWriter fileWriter; initFileWriter(&fileWriter, OutputFile); RetVal = lprofWriteData(&fileWriter, lprofGetVPDataReader(), MergeDone); if (OutputFile == getProfileFile()) { fflush(OutputFile); if (doMerging() && !__llvm_profile_is_continuous_mode_enabled()) { lprofUnlockFileHandle(OutputFile); } } else { fclose(OutputFile); } return RetVal; } /* Write order data to file \c OutputName. */ static int writeOrderFile(const char *OutputName) { int RetVal; FILE *OutputFile; OutputFile = fopen(OutputName, "w"); if (!OutputFile) { PROF_WARN("can't open file with mode ab: %s\n", OutputName); return -1; } FreeHook = &free; setupIOBuffer(); const uint32_t *DataBegin = __llvm_profile_begin_orderfile(); RetVal = orderFileWriter(OutputFile, DataBegin); fclose(OutputFile); return RetVal; } #define LPROF_INIT_ONCE_ENV "__LLVM_PROFILE_RT_INIT_ONCE" static void truncateCurrentFile(void) { const char *Filename; char *FilenameBuf; FILE *File; int Length; Length = getCurFilenameLength(); FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1); Filename = getCurFilename(FilenameBuf, 0); if (!Filename) return; /* Only create the profile directory and truncate an existing profile once. * In continuous mode, this is necessary, as the profile is written-to by the * runtime initializer. */ int initialized = getenv(LPROF_INIT_ONCE_ENV) != NULL; if (initialized) return; #if defined(_WIN32) _putenv(LPROF_INIT_ONCE_ENV "=" LPROF_INIT_ONCE_ENV); #else setenv(LPROF_INIT_ONCE_ENV, LPROF_INIT_ONCE_ENV, 1); #endif /* Create the profile dir (even if online merging is enabled), so that * the profile file can be set up if continuous mode is enabled. */ createProfileDir(Filename); /* By pass file truncation to allow online raw profile merging. */ if (lprofCurFilename.MergePoolSize) return; /* Truncate the file. Later we'll reopen and append. */ File = fopen(Filename, "w"); if (!File) return; fclose(File); } /* Write a partial profile to \p Filename, which is required to be backed by * the open file object \p File. */ static int writeProfileWithFileObject(const char *Filename, FILE *File) { setProfileFile(File); int rc = writeFile(Filename); if (rc) PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno)); setProfileFile(NULL); return rc; } static void initializeProfileForContinuousMode(void) { if (!__llvm_profile_is_continuous_mode_enabled()) return; if (!ContinuousModeSupported) { PROF_ERR("%s\n", "continuous mode is unsupported on this platform"); return; } if (UseBiasVar && BiasAddr == BiasDefaultAddr) { PROF_ERR("%s\n", "__llvm_profile_counter_bias is undefined"); return; } /* Get the sizes of counter section. */ uint64_t CountersSize = __llvm_profile_get_counters_size( __llvm_profile_begin_counters(), __llvm_profile_end_counters()); int Length = getCurFilenameLength(); char *FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1); const char *Filename = getCurFilename(FilenameBuf, 0); if (!Filename) return; FILE *File = NULL; uint64_t CurrentFileOffset = 0; if (doMerging()) { /* We are merging profiles. Map the counter section as shared memory into * the profile, i.e. into each participating process. An increment in one * process should be visible to every other process with the same counter * section mapped. */ File = lprofOpenFileEx(Filename); if (!File) return; uint64_t ProfileFileSize = 0; if (getProfileFileSizeForMerging(File, &ProfileFileSize) == -1) { lprofUnlockFileHandle(File); fclose(File); return; } if (ProfileFileSize == 0) { /* Grow the profile so that mmap() can succeed. Leak the file handle, as * the file should stay open. */ if (writeProfileWithFileObject(Filename, File) != 0) { lprofUnlockFileHandle(File); fclose(File); return; } } else { /* The merged profile has a non-zero length. Check that it is compatible * with the data in this process. */ char *ProfileBuffer; if (mmapProfileForMerging(File, ProfileFileSize, &ProfileBuffer) == -1) { lprofUnlockFileHandle(File); fclose(File); return; } (void)munmap(ProfileBuffer, ProfileFileSize); } } else { File = fopen(Filename, FileOpenMode); if (!File) return; /* Check that the offset within the file is page-aligned. */ CurrentFileOffset = ftell(File); unsigned PageSize = getpagesize(); if (CurrentFileOffset % PageSize != 0) { PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not" "page-aligned. CurrentFileOffset = %" PRIu64 ", pagesz = %u.\n", (uint64_t)CurrentFileOffset, PageSize); + fclose(File); return; } if (writeProfileWithFileObject(Filename, File) != 0) { fclose(File); return; } } /* mmap() the profile counters so long as there is at least one counter. * If there aren't any counters, mmap() would fail with EINVAL. */ if (CountersSize > 0) mmapForContinuousMode(CurrentFileOffset, File); if (doMerging()) { lprofUnlockFileHandle(File); + } + if (File != NULL) { fclose(File); } } static const char *DefaultProfileName = "default.profraw"; static void resetFilenameToDefault(void) { if (lprofCurFilename.FilenamePat && lprofCurFilename.OwnsFilenamePat) { #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wcast-qual" #elif defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wcast-qual" #endif free((void *)lprofCurFilename.FilenamePat); #ifdef __GNUC__ #pragma GCC diagnostic pop #elif defined(__clang__) #pragma clang diagnostic pop #endif } memset(&lprofCurFilename, 0, sizeof(lprofCurFilename)); lprofCurFilename.FilenamePat = DefaultProfileName; lprofCurFilename.PNS = PNS_default; } static unsigned getMergePoolSize(const char *FilenamePat, int *I) { unsigned J = 0, Num = 0; for (;; ++J) { char C = FilenamePat[*I + J]; if (C == 'm') { *I += J; return Num ? Num : 1; } if (C < '0' || C > '9') break; Num = Num * 10 + C - '0'; /* If FilenamePat[*I+J] is between '0' and '9', the next byte is guaranteed * to be in-bound as the string is null terminated. */ } return 0; } /* Assert that Idx does index past a string null terminator. Return the * result of the check. */ static int checkBounds(int Idx, int Strlen) { assert(Idx <= Strlen && "Indexing past string null terminator"); return Idx <= Strlen; } /* Parses the pattern string \p FilenamePat and stores the result to * lprofcurFilename structure. */ static int parseFilenamePattern(const char *FilenamePat, unsigned CopyFilenamePat) { int NumPids = 0, NumHosts = 0, I; char *PidChars = &lprofCurFilename.PidChars[0]; char *Hostname = &lprofCurFilename.Hostname[0]; int MergingEnabled = 0; int FilenamePatLen = strlen(FilenamePat); #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wcast-qual" #elif defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wcast-qual" #endif /* Clean up cached prefix and filename. */ if (lprofCurFilename.ProfilePathPrefix) free((void *)lprofCurFilename.ProfilePathPrefix); if (lprofCurFilename.FilenamePat && lprofCurFilename.OwnsFilenamePat) { free((void *)lprofCurFilename.FilenamePat); } #ifdef __GNUC__ #pragma GCC diagnostic pop #elif defined(__clang__) #pragma clang diagnostic pop #endif memset(&lprofCurFilename, 0, sizeof(lprofCurFilename)); if (!CopyFilenamePat) lprofCurFilename.FilenamePat = FilenamePat; else { lprofCurFilename.FilenamePat = strdup(FilenamePat); lprofCurFilename.OwnsFilenamePat = 1; } /* Check the filename for "%p", which indicates a pid-substitution. */ for (I = 0; checkBounds(I, FilenamePatLen) && FilenamePat[I]; ++I) { if (FilenamePat[I] == '%') { ++I; /* Advance to the next character. */ if (!checkBounds(I, FilenamePatLen)) break; if (FilenamePat[I] == 'p') { if (!NumPids++) { if (snprintf(PidChars, MAX_PID_SIZE, "%ld", (long)getpid()) <= 0) { PROF_WARN("Unable to get pid for filename pattern %s. Using the " "default name.", FilenamePat); return -1; } } } else if (FilenamePat[I] == 'h') { if (!NumHosts++) if (COMPILER_RT_GETHOSTNAME(Hostname, COMPILER_RT_MAX_HOSTLEN)) { PROF_WARN("Unable to get hostname for filename pattern %s. Using " "the default name.", FilenamePat); return -1; } } else if (FilenamePat[I] == 't') { lprofCurFilename.TmpDir = getenv("TMPDIR"); if (!lprofCurFilename.TmpDir) { PROF_WARN("Unable to get the TMPDIR environment variable, referenced " "in %s. Using the default path.", FilenamePat); return -1; } } else if (FilenamePat[I] == 'c') { if (__llvm_profile_is_continuous_mode_enabled()) { PROF_WARN("%%c specifier can only be specified once in %s.\n", FilenamePat); __llvm_profile_disable_continuous_mode(); return -1; } #if defined(__APPLE__) || defined(__ELF__) || defined(_WIN32) __llvm_profile_set_page_size(getpagesize()); __llvm_profile_enable_continuous_mode(); #else PROF_WARN("%s", "Continous mode is currently only supported for Mach-O," " ELF and COFF formats."); return -1; #endif } else { unsigned MergePoolSize = getMergePoolSize(FilenamePat, &I); if (!MergePoolSize) continue; if (MergingEnabled) { PROF_WARN("%%m specifier can only be specified once in %s.\n", FilenamePat); return -1; } MergingEnabled = 1; lprofCurFilename.MergePoolSize = MergePoolSize; } } } lprofCurFilename.NumPids = NumPids; lprofCurFilename.NumHosts = NumHosts; return 0; } static void parseAndSetFilename(const char *FilenamePat, ProfileNameSpecifier PNS, unsigned CopyFilenamePat) { const char *OldFilenamePat = lprofCurFilename.FilenamePat; ProfileNameSpecifier OldPNS = lprofCurFilename.PNS; /* The old profile name specifier takes precedence over the old one. */ if (PNS < OldPNS) return; if (!FilenamePat) FilenamePat = DefaultProfileName; if (OldFilenamePat && !strcmp(OldFilenamePat, FilenamePat)) { lprofCurFilename.PNS = PNS; return; } /* When PNS >= OldPNS, the last one wins. */ if (!FilenamePat || parseFilenamePattern(FilenamePat, CopyFilenamePat)) resetFilenameToDefault(); lprofCurFilename.PNS = PNS; if (!OldFilenamePat) { if (getenv("LLVM_PROFILE_VERBOSE")) PROF_NOTE("Set profile file path to \"%s\" via %s.\n", lprofCurFilename.FilenamePat, getPNSStr(PNS)); } else { if (getenv("LLVM_PROFILE_VERBOSE")) PROF_NOTE("Override old profile path \"%s\" via %s to \"%s\" via %s.\n", OldFilenamePat, getPNSStr(OldPNS), lprofCurFilename.FilenamePat, getPNSStr(PNS)); } truncateCurrentFile(); if (__llvm_profile_is_continuous_mode_enabled()) initializeProfileForContinuousMode(); } /* Return buffer length that is required to store the current profile * filename with PID and hostname substitutions. */ /* The length to hold uint64_t followed by 3 digits pool id including '_' */ #define SIGLEN 24 static int getCurFilenameLength(void) { int Len; if (!lprofCurFilename.FilenamePat || !lprofCurFilename.FilenamePat[0]) return 0; if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts || lprofCurFilename.TmpDir || lprofCurFilename.MergePoolSize)) return strlen(lprofCurFilename.FilenamePat); Len = strlen(lprofCurFilename.FilenamePat) + lprofCurFilename.NumPids * (strlen(lprofCurFilename.PidChars) - 2) + lprofCurFilename.NumHosts * (strlen(lprofCurFilename.Hostname) - 2) + (lprofCurFilename.TmpDir ? (strlen(lprofCurFilename.TmpDir) - 1) : 0); if (lprofCurFilename.MergePoolSize) Len += SIGLEN; return Len; } /* Return the pointer to the current profile file name (after substituting * PIDs and Hostnames in filename pattern. \p FilenameBuf is the buffer * to store the resulting filename. If no substitution is needed, the * current filename pattern string is directly returned, unless ForceUseBuf * is enabled. */ static const char *getCurFilename(char *FilenameBuf, int ForceUseBuf) { int I, J, PidLength, HostNameLength, TmpDirLength, FilenamePatLength; const char *FilenamePat = lprofCurFilename.FilenamePat; if (!lprofCurFilename.FilenamePat || !lprofCurFilename.FilenamePat[0]) return 0; if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts || lprofCurFilename.TmpDir || lprofCurFilename.MergePoolSize || __llvm_profile_is_continuous_mode_enabled())) { if (!ForceUseBuf) return lprofCurFilename.FilenamePat; FilenamePatLength = strlen(lprofCurFilename.FilenamePat); memcpy(FilenameBuf, lprofCurFilename.FilenamePat, FilenamePatLength); FilenameBuf[FilenamePatLength] = '\0'; return FilenameBuf; } PidLength = strlen(lprofCurFilename.PidChars); HostNameLength = strlen(lprofCurFilename.Hostname); TmpDirLength = lprofCurFilename.TmpDir ? strlen(lprofCurFilename.TmpDir) : 0; /* Construct the new filename. */ for (I = 0, J = 0; FilenamePat[I]; ++I) if (FilenamePat[I] == '%') { if (FilenamePat[++I] == 'p') { memcpy(FilenameBuf + J, lprofCurFilename.PidChars, PidLength); J += PidLength; } else if (FilenamePat[I] == 'h') { memcpy(FilenameBuf + J, lprofCurFilename.Hostname, HostNameLength); J += HostNameLength; } else if (FilenamePat[I] == 't') { memcpy(FilenameBuf + J, lprofCurFilename.TmpDir, TmpDirLength); FilenameBuf[J + TmpDirLength] = DIR_SEPARATOR; J += TmpDirLength + 1; } else { if (!getMergePoolSize(FilenamePat, &I)) continue; char LoadModuleSignature[SIGLEN + 1]; int S; int ProfilePoolId = getpid() % lprofCurFilename.MergePoolSize; S = snprintf(LoadModuleSignature, SIGLEN + 1, "%" PRIu64 "_%d", lprofGetLoadModuleSignature(), ProfilePoolId); if (S == -1 || S > SIGLEN) S = SIGLEN; memcpy(FilenameBuf + J, LoadModuleSignature, S); J += S; } /* Drop any unknown substitutions. */ } else FilenameBuf[J++] = FilenamePat[I]; FilenameBuf[J] = 0; return FilenameBuf; } /* Returns the pointer to the environment variable * string. Returns null if the env var is not set. */ static const char *getFilenamePatFromEnv(void) { const char *Filename = getenv("LLVM_PROFILE_FILE"); if (!Filename || !Filename[0]) return 0; return Filename; } COMPILER_RT_VISIBILITY const char *__llvm_profile_get_path_prefix(void) { int Length; char *FilenameBuf, *Prefix; const char *Filename, *PrefixEnd; if (lprofCurFilename.ProfilePathPrefix) return lprofCurFilename.ProfilePathPrefix; Length = getCurFilenameLength(); FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1); Filename = getCurFilename(FilenameBuf, 0); if (!Filename) return "\0"; PrefixEnd = lprofFindLastDirSeparator(Filename); if (!PrefixEnd) return "\0"; Length = PrefixEnd - Filename + 1; Prefix = (char *)malloc(Length + 1); if (!Prefix) { PROF_ERR("Failed to %s\n", "allocate memory."); return "\0"; } memcpy(Prefix, Filename, Length); Prefix[Length] = '\0'; lprofCurFilename.ProfilePathPrefix = Prefix; return Prefix; } COMPILER_RT_VISIBILITY const char *__llvm_profile_get_filename(void) { int Length; char *FilenameBuf; const char *Filename; Length = getCurFilenameLength(); FilenameBuf = (char *)malloc(Length + 1); if (!FilenameBuf) { PROF_ERR("Failed to %s\n", "allocate memory."); return "\0"; } Filename = getCurFilename(FilenameBuf, 1); if (!Filename) return "\0"; return FilenameBuf; } /* This API initializes the file handling, both user specified * profile path via -fprofile-instr-generate= and LLVM_PROFILE_FILE * environment variable can override this default value. */ COMPILER_RT_VISIBILITY void __llvm_profile_initialize_file(void) { const char *EnvFilenamePat; const char *SelectedPat = NULL; ProfileNameSpecifier PNS = PNS_unknown; int hasCommandLineOverrider = (INSTR_PROF_PROFILE_NAME_VAR[0] != 0); EnvFilenamePat = getFilenamePatFromEnv(); if (EnvFilenamePat) { /* Pass CopyFilenamePat = 1, to ensure that the filename would be valid at the moment when __llvm_profile_write_file() gets executed. */ parseAndSetFilename(EnvFilenamePat, PNS_environment, 1); return; } else if (hasCommandLineOverrider) { SelectedPat = INSTR_PROF_PROFILE_NAME_VAR; PNS = PNS_command_line; } else { SelectedPat = NULL; PNS = PNS_default; } parseAndSetFilename(SelectedPat, PNS, 0); } /* This method is invoked by the runtime initialization hook * InstrProfilingRuntime.o if it is linked in. */ COMPILER_RT_VISIBILITY void __llvm_profile_initialize(void) { __llvm_profile_initialize_file(); if (!__llvm_profile_is_continuous_mode_enabled()) __llvm_profile_register_write_file_atexit(); } /* This API is directly called by the user application code. It has the * highest precedence compared with LLVM_PROFILE_FILE environment variable * and command line option -fprofile-instr-generate=. */ COMPILER_RT_VISIBILITY void __llvm_profile_set_filename(const char *FilenamePat) { if (__llvm_profile_is_continuous_mode_enabled()) return; parseAndSetFilename(FilenamePat, PNS_runtime_api, 1); } /* The public API for writing profile data into the file with name * set by previous calls to __llvm_profile_set_filename or * __llvm_profile_override_default_filename or * __llvm_profile_initialize_file. */ COMPILER_RT_VISIBILITY int __llvm_profile_write_file(void) { int rc, Length; const char *Filename; char *FilenameBuf; // Temporarily suspend getting SIGKILL when the parent exits. int PDeathSig = lprofSuspendSigKill(); if (lprofProfileDumped() || __llvm_profile_is_continuous_mode_enabled()) { PROF_NOTE("Profile data not written to file: %s.\n", "already written"); if (PDeathSig == 1) lprofRestoreSigKill(); return 0; } Length = getCurFilenameLength(); FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1); Filename = getCurFilename(FilenameBuf, 0); /* Check the filename. */ if (!Filename) { PROF_ERR("Failed to write file : %s\n", "Filename not set"); if (PDeathSig == 1) lprofRestoreSigKill(); return -1; } /* Check if there is llvm/runtime version mismatch. */ if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) { PROF_ERR("Runtime and instrumentation version mismatch : " "expected %d, but get %d\n", INSTR_PROF_RAW_VERSION, (int)GET_VERSION(__llvm_profile_get_version())); if (PDeathSig == 1) lprofRestoreSigKill(); return -1; } /* Write profile data to the file. */ rc = writeFile(Filename); if (rc) PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno)); // Restore SIGKILL. if (PDeathSig == 1) lprofRestoreSigKill(); return rc; } COMPILER_RT_VISIBILITY int __llvm_profile_dump(void) { if (!doMerging()) PROF_WARN("Later invocation of __llvm_profile_dump can lead to clobbering " " of previously dumped profile data : %s. Either use %%m " "in profile name or change profile name before dumping.\n", "online profile merging is not on"); int rc = __llvm_profile_write_file(); lprofSetProfileDumped(1); return rc; } /* Order file data will be saved in a file with suffx .order. */ static const char *OrderFileSuffix = ".order"; COMPILER_RT_VISIBILITY int __llvm_orderfile_write_file(void) { int rc, Length, LengthBeforeAppend, SuffixLength; const char *Filename; char *FilenameBuf; // Temporarily suspend getting SIGKILL when the parent exits. int PDeathSig = lprofSuspendSigKill(); SuffixLength = strlen(OrderFileSuffix); Length = getCurFilenameLength() + SuffixLength; FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1); Filename = getCurFilename(FilenameBuf, 1); /* Check the filename. */ if (!Filename) { PROF_ERR("Failed to write file : %s\n", "Filename not set"); if (PDeathSig == 1) lprofRestoreSigKill(); return -1; } /* Append order file suffix */ LengthBeforeAppend = strlen(Filename); memcpy(FilenameBuf + LengthBeforeAppend, OrderFileSuffix, SuffixLength); FilenameBuf[LengthBeforeAppend + SuffixLength] = '\0'; /* Check if there is llvm/runtime version mismatch. */ if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) { PROF_ERR("Runtime and instrumentation version mismatch : " "expected %d, but get %d\n", INSTR_PROF_RAW_VERSION, (int)GET_VERSION(__llvm_profile_get_version())); if (PDeathSig == 1) lprofRestoreSigKill(); return -1; } /* Write order data to the file. */ rc = writeOrderFile(Filename); if (rc) PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno)); // Restore SIGKILL. if (PDeathSig == 1) lprofRestoreSigKill(); return rc; } COMPILER_RT_VISIBILITY int __llvm_orderfile_dump(void) { int rc = __llvm_orderfile_write_file(); return rc; } static void writeFileWithoutReturn(void) { __llvm_profile_write_file(); } COMPILER_RT_VISIBILITY int __llvm_profile_register_write_file_atexit(void) { static int HasBeenRegistered = 0; if (HasBeenRegistered) return 0; lprofSetupValueProfiler(); HasBeenRegistered = 1; return atexit(writeFileWithoutReturn); } COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File, int EnableMerge) { if (__llvm_profile_is_continuous_mode_enabled()) { if (!EnableMerge) { PROF_WARN("__llvm_profile_set_file_object(fd=%d) not supported in " "continuous sync mode when merging is disabled\n", fileno(File)); return 1; } if (lprofLockFileHandle(File) != 0) { PROF_WARN("Data may be corrupted during profile merging : %s\n", "Fail to obtain file lock due to system limit."); } uint64_t ProfileFileSize = 0; if (getProfileFileSizeForMerging(File, &ProfileFileSize) == -1) { lprofUnlockFileHandle(File); return 1; } if (ProfileFileSize == 0) { FreeHook = &free; setupIOBuffer(); ProfDataWriter fileWriter; initFileWriter(&fileWriter, File); if (lprofWriteData(&fileWriter, 0, 0)) { lprofUnlockFileHandle(File); PROF_ERR("Failed to write file \"%d\": %s\n", fileno(File), strerror(errno)); return 1; } fflush(File); } else { /* The merged profile has a non-zero length. Check that it is compatible * with the data in this process. */ char *ProfileBuffer; if (mmapProfileForMerging(File, ProfileFileSize, &ProfileBuffer) == -1) { lprofUnlockFileHandle(File); return 1; } (void)munmap(ProfileBuffer, ProfileFileSize); } mmapForContinuousMode(0, File); lprofUnlockFileHandle(File); } else { setProfileFile(File); setProfileMergeRequested(EnableMerge); } return 0; } #endif diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h index af3ad822e0b0..0880f9c65aa4 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1,2403 +1,2410 @@ //===- llvm/Analysis/ScalarEvolution.h - Scalar Evolution -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // The ScalarEvolution class is an LLVM pass which can be used to analyze and // categorize scalar expressions in loops. It specializes in recognizing // general induction variables, representing them with the abstract and opaque // SCEV class. Given this analysis, trip counts of loops and other important // properties can be obtained. // // This analysis is primarily useful for induction variable substitution and // strength reduction. // //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_SCALAREVOLUTION_H #define LLVM_ANALYSIS_SCALAREVOLUTION_H #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" #include #include #include #include #include namespace llvm { class OverflowingBinaryOperator; class AssumptionCache; class BasicBlock; class Constant; class ConstantInt; class DataLayout; class DominatorTree; class Function; class GEPOperator; class Instruction; class LLVMContext; class Loop; class LoopInfo; class raw_ostream; class ScalarEvolution; class SCEVAddRecExpr; class SCEVUnknown; class StructType; class TargetLibraryInfo; class Type; class Value; enum SCEVTypes : unsigned short; extern bool VerifySCEV; /// This class represents an analyzed expression in the program. These are /// opaque objects that the client is not allowed to do much with directly. /// class SCEV : public FoldingSetNode { friend struct FoldingSetTrait; /// A reference to an Interned FoldingSetNodeID for this node. The /// ScalarEvolution's BumpPtrAllocator holds the data. FoldingSetNodeIDRef FastID; // The SCEV baseclass this node corresponds to const SCEVTypes SCEVType; protected: // Estimated complexity of this node's expression tree size. const unsigned short ExpressionSize; /// This field is initialized to zero and may be used in subclasses to store /// miscellaneous information. unsigned short SubclassData = 0; public: /// NoWrapFlags are bitfield indices into SubclassData. /// /// Add and Mul expressions may have no-unsigned-wrap or /// no-signed-wrap properties, which are derived from the IR /// operator. NSW is a misnomer that we use to mean no signed overflow or /// underflow. /// /// AddRec expressions may have a no-self-wraparound property if, in /// the integer domain, abs(step) * max-iteration(loop) <= /// unsigned-max(bitwidth). This means that the recurrence will never reach /// its start value if the step is non-zero. Computing the same value on /// each iteration is not considered wrapping, and recurrences with step = 0 /// are trivially . is independent of the sign of step and the /// value the add recurrence starts with. /// /// Note that NUW and NSW are also valid properties of a recurrence, and /// either implies NW. For convenience, NW will be set for a recurrence /// whenever either NUW or NSW are set. /// /// We require that the flag on a SCEV apply to the entire scope in which /// that SCEV is defined. A SCEV's scope is set of locations dominated by /// a defining location, which is in turn described by the following rules: /// * A SCEVUnknown is at the point of definition of the Value. /// * A SCEVConstant is defined at all points. /// * A SCEVAddRec is defined starting with the header of the associated /// loop. /// * All other SCEVs are defined at the earlest point all operands are /// defined. /// /// The above rules describe a maximally hoisted form (without regards to /// potential control dependence). A SCEV is defined anywhere a /// corresponding instruction could be defined in said maximally hoisted /// form. Note that SCEVUDivExpr (currently the only expression type which /// can trap) can be defined per these rules in regions where it would trap /// at runtime. A SCEV being defined does not require the existence of any /// instruction within the defined scope. enum NoWrapFlags { FlagAnyWrap = 0, // No guarantee. FlagNW = (1 << 0), // No self-wrap. FlagNUW = (1 << 1), // No unsigned wrap. FlagNSW = (1 << 2), // No signed wrap. NoWrapMask = (1 << 3) - 1 }; explicit SCEV(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, unsigned short ExpressionSize) : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {} SCEV(const SCEV &) = delete; SCEV &operator=(const SCEV &) = delete; SCEVTypes getSCEVType() const { return SCEVType; } /// Return the LLVM type of this SCEV expression. Type *getType() const; /// Return operands of this SCEV expression. ArrayRef operands() const; /// Return true if the expression is a constant zero. bool isZero() const; /// Return true if the expression is a constant one. bool isOne() const; /// Return true if the expression is a constant all-ones value. bool isAllOnesValue() const; /// Return true if the specified scev is negated, but not a constant. bool isNonConstantNegative() const; // Returns estimated size of the mathematical expression represented by this // SCEV. The rules of its calculation are following: // 1) Size of a SCEV without operands (like constants and SCEVUnknown) is 1; // 2) Size SCEV with operands Op1, Op2, ..., OpN is calculated by formula: // (1 + Size(Op1) + ... + Size(OpN)). // This value gives us an estimation of time we need to traverse through this // SCEV and all its operands recursively. We may use it to avoid performing // heavy transformations on SCEVs of excessive size for sake of saving the // compilation time. unsigned short getExpressionSize() const { return ExpressionSize; } /// Print out the internal representation of this scalar to the specified /// stream. This should really only be used for debugging purposes. void print(raw_ostream &OS) const; /// This method is used for debugging. void dump() const; }; // Specialize FoldingSetTrait for SCEV to avoid needing to compute // temporary FoldingSetNodeID values. template <> struct FoldingSetTrait : DefaultFoldingSetTrait { static void Profile(const SCEV &X, FoldingSetNodeID &ID) { ID = X.FastID; } static bool Equals(const SCEV &X, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID) { return ID == X.FastID; } static unsigned ComputeHash(const SCEV &X, FoldingSetNodeID &TempID) { return X.FastID.ComputeHash(); } }; inline raw_ostream &operator<<(raw_ostream &OS, const SCEV &S) { S.print(OS); return OS; } /// An object of this class is returned by queries that could not be answered. /// For example, if you ask for the number of iterations of a linked-list /// traversal loop, you will get one of these. None of the standard SCEV /// operations are valid on this class, it is just a marker. struct SCEVCouldNotCompute : public SCEV { SCEVCouldNotCompute(); /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S); }; /// This class represents an assumption made using SCEV expressions which can /// be checked at run-time. class SCEVPredicate : public FoldingSetNode { friend struct FoldingSetTrait; /// A reference to an Interned FoldingSetNodeID for this node. The /// ScalarEvolution's BumpPtrAllocator holds the data. FoldingSetNodeIDRef FastID; public: enum SCEVPredicateKind { P_Union, P_Compare, P_Wrap }; protected: SCEVPredicateKind Kind; ~SCEVPredicate() = default; SCEVPredicate(const SCEVPredicate &) = default; SCEVPredicate &operator=(const SCEVPredicate &) = default; public: SCEVPredicate(const FoldingSetNodeIDRef ID, SCEVPredicateKind Kind); SCEVPredicateKind getKind() const { return Kind; } /// Returns the estimated complexity of this predicate. This is roughly /// measured in the number of run-time checks required. virtual unsigned getComplexity() const { return 1; } /// Returns true if the predicate is always true. This means that no /// assumptions were made and nothing needs to be checked at run-time. virtual bool isAlwaysTrue() const = 0; /// Returns true if this predicate implies \p N. virtual bool implies(const SCEVPredicate *N) const = 0; /// Prints a textual representation of this predicate with an indentation of /// \p Depth. virtual void print(raw_ostream &OS, unsigned Depth = 0) const = 0; }; inline raw_ostream &operator<<(raw_ostream &OS, const SCEVPredicate &P) { P.print(OS); return OS; } // Specialize FoldingSetTrait for SCEVPredicate to avoid needing to compute // temporary FoldingSetNodeID values. template <> struct FoldingSetTrait : DefaultFoldingSetTrait { static void Profile(const SCEVPredicate &X, FoldingSetNodeID &ID) { ID = X.FastID; } static bool Equals(const SCEVPredicate &X, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID) { return ID == X.FastID; } static unsigned ComputeHash(const SCEVPredicate &X, FoldingSetNodeID &TempID) { return X.FastID.ComputeHash(); } }; /// This class represents an assumption that the expression LHS Pred RHS /// evaluates to true, and this can be checked at run-time. class SCEVComparePredicate final : public SCEVPredicate { /// We assume that LHS Pred RHS is true. const ICmpInst::Predicate Pred; const SCEV *LHS; const SCEV *RHS; public: SCEVComparePredicate(const FoldingSetNodeIDRef ID, const ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Implementation of the SCEVPredicate interface bool implies(const SCEVPredicate *N) const override; void print(raw_ostream &OS, unsigned Depth = 0) const override; bool isAlwaysTrue() const override; ICmpInst::Predicate getPredicate() const { return Pred; } /// Returns the left hand side of the predicate. const SCEV *getLHS() const { return LHS; } /// Returns the right hand side of the predicate. const SCEV *getRHS() const { return RHS; } /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEVPredicate *P) { return P->getKind() == P_Compare; } }; /// This class represents an assumption made on an AddRec expression. Given an /// affine AddRec expression {a,+,b}, we assume that it has the nssw or nusw /// flags (defined below) in the first X iterations of the loop, where X is a /// SCEV expression returned by getPredicatedBackedgeTakenCount). /// /// Note that this does not imply that X is equal to the backedge taken /// count. This means that if we have a nusw predicate for i32 {0,+,1} with a /// predicated backedge taken count of X, we only guarantee that {0,+,1} has /// nusw in the first X iterations. {0,+,1} may still wrap in the loop if we /// have more than X iterations. class SCEVWrapPredicate final : public SCEVPredicate { public: /// Similar to SCEV::NoWrapFlags, but with slightly different semantics /// for FlagNUSW. The increment is considered to be signed, and a + b /// (where b is the increment) is considered to wrap if: /// zext(a + b) != zext(a) + sext(b) /// /// If Signed is a function that takes an n-bit tuple and maps to the /// integer domain as the tuples value interpreted as twos complement, /// and Unsigned a function that takes an n-bit tuple and maps to the /// integer domain as the base two value of input tuple, then a + b /// has IncrementNUSW iff: /// /// 0 <= Unsigned(a) + Signed(b) < 2^n /// /// The IncrementNSSW flag has identical semantics with SCEV::FlagNSW. /// /// Note that the IncrementNUSW flag is not commutative: if base + inc /// has IncrementNUSW, then inc + base doesn't neccessarily have this /// property. The reason for this is that this is used for sign/zero /// extending affine AddRec SCEV expressions when a SCEVWrapPredicate is /// assumed. A {base,+,inc} expression is already non-commutative with /// regards to base and inc, since it is interpreted as: /// (((base + inc) + inc) + inc) ... enum IncrementWrapFlags { IncrementAnyWrap = 0, // No guarantee. IncrementNUSW = (1 << 0), // No unsigned with signed increment wrap. IncrementNSSW = (1 << 1), // No signed with signed increment wrap // (equivalent with SCEV::NSW) IncrementNoWrapMask = (1 << 2) - 1 }; /// Convenient IncrementWrapFlags manipulation methods. [[nodiscard]] static SCEVWrapPredicate::IncrementWrapFlags clearFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, SCEVWrapPredicate::IncrementWrapFlags OffFlags) { assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!"); assert((OffFlags & IncrementNoWrapMask) == OffFlags && "Invalid flags value!"); return (SCEVWrapPredicate::IncrementWrapFlags)(Flags & ~OffFlags); } [[nodiscard]] static SCEVWrapPredicate::IncrementWrapFlags maskFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, int Mask) { assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!"); assert((Mask & IncrementNoWrapMask) == Mask && "Invalid mask value!"); return (SCEVWrapPredicate::IncrementWrapFlags)(Flags & Mask); } [[nodiscard]] static SCEVWrapPredicate::IncrementWrapFlags setFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, SCEVWrapPredicate::IncrementWrapFlags OnFlags) { assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!"); assert((OnFlags & IncrementNoWrapMask) == OnFlags && "Invalid flags value!"); return (SCEVWrapPredicate::IncrementWrapFlags)(Flags | OnFlags); } /// Returns the set of SCEVWrapPredicate no wrap flags implied by a /// SCEVAddRecExpr. [[nodiscard]] static SCEVWrapPredicate::IncrementWrapFlags getImpliedFlags(const SCEVAddRecExpr *AR, ScalarEvolution &SE); private: const SCEVAddRecExpr *AR; IncrementWrapFlags Flags; public: explicit SCEVWrapPredicate(const FoldingSetNodeIDRef ID, const SCEVAddRecExpr *AR, IncrementWrapFlags Flags); /// Returns the set assumed no overflow flags. IncrementWrapFlags getFlags() const { return Flags; } /// Implementation of the SCEVPredicate interface const SCEVAddRecExpr *getExpr() const; bool implies(const SCEVPredicate *N) const override; void print(raw_ostream &OS, unsigned Depth = 0) const override; bool isAlwaysTrue() const override; /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEVPredicate *P) { return P->getKind() == P_Wrap; } }; /// This class represents a composition of other SCEV predicates, and is the /// class that most clients will interact with. This is equivalent to a /// logical "AND" of all the predicates in the union. /// /// NB! Unlike other SCEVPredicate sub-classes this class does not live in the /// ScalarEvolution::Preds folding set. This is why the \c add function is sound. class SCEVUnionPredicate final : public SCEVPredicate { private: using PredicateMap = DenseMap>; /// Vector with references to all predicates in this union. SmallVector Preds; /// Adds a predicate to this union. void add(const SCEVPredicate *N); public: SCEVUnionPredicate(ArrayRef Preds); const SmallVectorImpl &getPredicates() const { return Preds; } /// Implementation of the SCEVPredicate interface bool isAlwaysTrue() const override; bool implies(const SCEVPredicate *N) const override; void print(raw_ostream &OS, unsigned Depth) const override; /// We estimate the complexity of a union predicate as the size number of /// predicates in the union. unsigned getComplexity() const override { return Preds.size(); } /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEVPredicate *P) { return P->getKind() == P_Union; } }; /// The main scalar evolution driver. Because client code (intentionally) /// can't do much with the SCEV objects directly, they must ask this class /// for services. class ScalarEvolution { friend class ScalarEvolutionsTest; public: /// An enum describing the relationship between a SCEV and a loop. enum LoopDisposition { LoopVariant, ///< The SCEV is loop-variant (unknown). LoopInvariant, ///< The SCEV is loop-invariant. LoopComputable ///< The SCEV varies predictably with the loop. }; /// An enum describing the relationship between a SCEV and a basic block. enum BlockDisposition { DoesNotDominateBlock, ///< The SCEV does not dominate the block. DominatesBlock, ///< The SCEV dominates the block. ProperlyDominatesBlock ///< The SCEV properly dominates the block. }; /// Convenient NoWrapFlags manipulation that hides enum casts and is /// visible in the ScalarEvolution name space. [[nodiscard]] static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask) { return (SCEV::NoWrapFlags)(Flags & Mask); } [[nodiscard]] static SCEV::NoWrapFlags setFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OnFlags) { return (SCEV::NoWrapFlags)(Flags | OnFlags); } [[nodiscard]] static SCEV::NoWrapFlags clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags) { return (SCEV::NoWrapFlags)(Flags & ~OffFlags); } [[nodiscard]] static bool hasFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags TestFlags) { return TestFlags == maskFlags(Flags, TestFlags); }; ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI); ScalarEvolution(ScalarEvolution &&Arg); ~ScalarEvolution(); LLVMContext &getContext() const { return F.getContext(); } /// Test if values of the given type are analyzable within the SCEV /// framework. This primarily includes integer types, and it can optionally /// include pointer types if the ScalarEvolution class has access to /// target-specific information. bool isSCEVable(Type *Ty) const; /// Return the size in bits of the specified type, for which isSCEVable must /// return true. uint64_t getTypeSizeInBits(Type *Ty) const; /// Return a type with the same bitwidth as the given type and which /// represents how SCEV will treat the given type, for which isSCEVable must /// return true. For pointer types, this is the pointer-sized integer type. Type *getEffectiveSCEVType(Type *Ty) const; // Returns a wider type among {Ty1, Ty2}. Type *getWiderType(Type *Ty1, Type *Ty2) const; /// Return true if there exists a point in the program at which both /// A and B could be operands to the same instruction. /// SCEV expressions are generally assumed to correspond to instructions /// which could exists in IR. In general, this requires that there exists /// a use point in the program where all operands dominate the use. /// /// Example: /// loop { /// if /// loop { v1 = load @global1; } /// else /// loop { v2 = load @global2; } /// } /// No SCEV with operand V1, and v2 can exist in this program. bool instructionCouldExistWithOperands(const SCEV *A, const SCEV *B); /// Return true if the SCEV is a scAddRecExpr or it contains /// scAddRecExpr. The result will be cached in HasRecMap. bool containsAddRecurrence(const SCEV *S); /// Is operation \p BinOp between \p LHS and \p RHS provably does not have /// a signed/unsigned overflow (\p Signed)? If \p CtxI is specified, the /// no-overflow fact should be true in the context of this instruction. bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI = nullptr); /// Parse NSW/NUW flags from add/sub/mul IR binary operation \p Op into /// SCEV no-wrap flags, and deduce flag[s] that aren't known yet. /// Does not mutate the original instruction. Returns std::nullopt if it could /// not deduce more precise flags than the instruction already has, otherwise /// returns proven flags. std::optional getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO); /// Notify this ScalarEvolution that \p User directly uses SCEVs in \p Ops. void registerUser(const SCEV *User, ArrayRef Ops); /// Return true if the SCEV expression contains an undef value. bool containsUndefs(const SCEV *S) const; /// Return true if the SCEV expression contains a Value that has been /// optimised out and is now a nullptr. bool containsErasedValue(const SCEV *S) const; /// Return a SCEV expression for the full generality of the specified /// expression. const SCEV *getSCEV(Value *V); /// Return an existing SCEV for V if there is one, otherwise return nullptr. const SCEV *getExistingSCEV(Value *V); const SCEV *getConstant(ConstantInt *V); const SCEV *getConstant(const APInt &Val); const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false); const SCEV *getLosslessPtrToIntExpr(const SCEV *Op, unsigned Depth = 0); const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty); const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getVScale(Type *Ty); const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getZeroExtendExprImpl(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getSignExtendExprImpl(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getCastExpr(SCEVTypes Kind, const SCEV *Op, Type *Ty); const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); const SCEV *getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, unsigned Depth = 0); const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, unsigned Depth = 0) { SmallVector Ops = {LHS, RHS}; return getAddExpr(Ops, Flags, Depth); } const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, unsigned Depth = 0) { SmallVector Ops = {Op0, Op1, Op2}; return getAddExpr(Ops, Flags, Depth); } const SCEV *getMulExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, unsigned Depth = 0); const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, unsigned Depth = 0) { SmallVector Ops = {LHS, RHS}; return getMulExpr(Ops, Flags, Depth); } const SCEV *getMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, unsigned Depth = 0) { SmallVector Ops = {Op0, Op1, Op2}; return getMulExpr(Ops, Flags, Depth); } const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getUDivExactExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getURemExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags); const SCEV *getAddRecExpr(SmallVectorImpl &Operands, const Loop *L, SCEV::NoWrapFlags Flags); const SCEV *getAddRecExpr(const SmallVectorImpl &Operands, const Loop *L, SCEV::NoWrapFlags Flags) { SmallVector NewOp(Operands.begin(), Operands.end()); return getAddRecExpr(NewOp, L, Flags); } /// Checks if \p SymbolicPHI can be rewritten as an AddRecExpr under some /// Predicates. If successful return these ; /// The function is intended to be called from PSCEV (the caller will decide /// whether to actually add the predicates and carry out the rewrites). std::optional>> createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI); /// Returns an expression for a GEP /// /// \p GEP The GEP. The indices contained in the GEP itself are ignored, /// instead we use IndexExprs. /// \p IndexExprs The expressions for the indices. const SCEV *getGEPExpr(GEPOperator *GEP, const SmallVectorImpl &IndexExprs); const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW); const SCEV *getMinMaxExpr(SCEVTypes Kind, SmallVectorImpl &Operands); const SCEV *getSequentialMinMaxExpr(SCEVTypes Kind, SmallVectorImpl &Operands); const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getSMaxExpr(SmallVectorImpl &Operands); const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getUMaxExpr(SmallVectorImpl &Operands); const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getSMinExpr(SmallVectorImpl &Operands); const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential = false); const SCEV *getUMinExpr(SmallVectorImpl &Operands, bool Sequential = false); const SCEV *getUnknown(Value *V); const SCEV *getCouldNotCompute(); /// Return a SCEV for the constant 0 of a specific type. const SCEV *getZero(Type *Ty) { return getConstant(Ty, 0); } /// Return a SCEV for the constant 1 of a specific type. const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); } /// Return a SCEV for the constant \p Power of two. const SCEV *getPowerOfTwo(Type *Ty, unsigned Power) { assert(Power < getTypeSizeInBits(Ty) && "Power out of range"); return getConstant(APInt::getOneBitSet(getTypeSizeInBits(Ty), Power)); } /// Return a SCEV for the constant -1 of a specific type. const SCEV *getMinusOne(Type *Ty) { return getConstant(Ty, -1, /*isSigned=*/true); } /// Return an expression for a TypeSize. const SCEV *getSizeOfExpr(Type *IntTy, TypeSize Size); /// Return an expression for the alloc size of AllocTy that is type IntTy const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy); /// Return an expression for the store size of StoreTy that is type IntTy const SCEV *getStoreSizeOfExpr(Type *IntTy, Type *StoreTy); /// Return an expression for offsetof on the given field with type IntTy const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo); /// Return the SCEV object corresponding to -V. const SCEV *getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); /// Return the SCEV object corresponding to ~V. const SCEV *getNotSCEV(const SCEV *V); /// Return LHS-RHS. Minus is represented in SCEV as A+B*-1. /// /// If the LHS and RHS are pointers which don't share a common base /// (according to getPointerBase()), this returns a SCEVCouldNotCompute. /// To compute the difference between two unrelated pointers, you can /// explicitly convert the arguments using getPtrToIntExpr(), for pointer /// types that support it. const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, unsigned Depth = 0); /// Compute ceil(N / D). N and D are treated as unsigned values. /// /// Since SCEV doesn't have native ceiling division, this generates a /// SCEV expression of the following form: /// /// umin(N, 1) + floor((N - umin(N, 1)) / D) /// /// A denominator of zero or poison is handled the same way as getUDivExpr(). const SCEV *getUDivCeilSCEV(const SCEV *N, const SCEV *D); /// Return a SCEV corresponding to a conversion of the input value to the /// specified type. If the type must be extended, it is zero extended. const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth = 0); /// Return a SCEV corresponding to a conversion of the input value to the /// specified type. If the type must be extended, it is sign extended. const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth = 0); /// Return a SCEV corresponding to a conversion of the input value to the /// specified type. If the type must be extended, it is zero extended. The /// conversion must not be narrowing. const SCEV *getNoopOrZeroExtend(const SCEV *V, Type *Ty); /// Return a SCEV corresponding to a conversion of the input value to the /// specified type. If the type must be extended, it is sign extended. The /// conversion must not be narrowing. const SCEV *getNoopOrSignExtend(const SCEV *V, Type *Ty); /// Return a SCEV corresponding to a conversion of the input value to the /// specified type. If the type must be extended, it is extended with /// unspecified bits. The conversion must not be narrowing. const SCEV *getNoopOrAnyExtend(const SCEV *V, Type *Ty); /// Return a SCEV corresponding to a conversion of the input value to the /// specified type. The conversion must not be widening. const SCEV *getTruncateOrNoop(const SCEV *V, Type *Ty); /// Promote the operands to the wider of the types using zero-extension, and /// then perform a umax operation with them. const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS); /// Promote the operands to the wider of the types using zero-extension, and /// then perform a umin operation with them. const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential = false); /// Promote the operands to the wider of the types using zero-extension, and /// then perform a umin operation with them. N-ary function. const SCEV *getUMinFromMismatchedTypes(SmallVectorImpl &Ops, bool Sequential = false); /// Transitively follow the chain of pointer-type operands until reaching a /// SCEV that does not have a single pointer operand. This returns a /// SCEVUnknown pointer for well-formed pointer-type expressions, but corner /// cases do exist. const SCEV *getPointerBase(const SCEV *V); /// Compute an expression equivalent to S - getPointerBase(S). const SCEV *removePointerBase(const SCEV *S); /// Return a SCEV expression for the specified value at the specified scope /// in the program. The L value specifies a loop nest to evaluate the /// expression at, where null is the top-level or a specified loop is /// immediately inside of the loop. /// /// This method can be used to compute the exit value for a variable defined /// in a loop by querying what the value will hold in the parent loop. /// /// In the case that a relevant loop exit value cannot be computed, the /// original value V is returned. const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L); /// This is a convenience function which does getSCEVAtScope(getSCEV(V), L). const SCEV *getSCEVAtScope(Value *V, const Loop *L); /// Test whether entry to the loop is protected by a conditional between LHS /// and RHS. This is used to help avoid max expressions in loop trip /// counts, and to eliminate casts. bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Test whether entry to the basic block is protected by a conditional /// between LHS and RHS. bool isBasicBlockEntryGuardedByCond(const BasicBlock *BB, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Test whether the backedge of the loop is protected by a conditional /// between LHS and RHS. This is used to eliminate casts. bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// A version of getTripCountFromExitCount below which always picks an /// evaluation type which can not result in overflow. const SCEV *getTripCountFromExitCount(const SCEV *ExitCount); /// Convert from an "exit count" (i.e. "backedge taken count") to a "trip /// count". A "trip count" is the number of times the header of the loop /// will execute if an exit is taken after the specified number of backedges /// have been taken. (e.g. TripCount = ExitCount + 1). Note that the /// expression can overflow if ExitCount = UINT_MAX. If EvalTy is not wide /// enough to hold the result without overflow, result unsigned wraps with /// 2s-complement semantics. ex: EC = 255 (i8), TC = 0 (i8) const SCEV *getTripCountFromExitCount(const SCEV *ExitCount, Type *EvalTy, const Loop *L); /// Returns the exact trip count of the loop if we can compute it, and /// the result is a small constant. '0' is used to represent an unknown /// or non-constant trip count. Note that a trip count is simply one more /// than the backedge taken count for the loop. unsigned getSmallConstantTripCount(const Loop *L); /// Return the exact trip count for this loop if we exit through ExitingBlock. /// '0' is used to represent an unknown or non-constant trip count. Note /// that a trip count is simply one more than the backedge taken count for /// the same exit. /// This "trip count" assumes that control exits via ExitingBlock. More /// precisely, it is the number of times that control will reach ExitingBlock /// before taking the branch. For loops with multiple exits, it may not be /// the number times that the loop header executes if the loop exits /// prematurely via another branch. unsigned getSmallConstantTripCount(const Loop *L, const BasicBlock *ExitingBlock); /// Returns the upper bound of the loop trip count as a normal unsigned /// value. /// Returns 0 if the trip count is unknown or not constant. unsigned getSmallConstantMaxTripCount(const Loop *L); /// Returns the largest constant divisor of the trip count as a normal /// unsigned value, if possible. This means that the actual trip count is /// always a multiple of the returned value. Returns 1 if the trip count is /// unknown or not guaranteed to be the multiple of a constant., Will also /// return 1 if the trip count is very large (>= 2^32). /// Note that the argument is an exit count for loop L, NOT a trip count. unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount); /// Returns the largest constant divisor of the trip count of the /// loop. Will return 1 if no trip count could be computed, or if a /// divisor could not be found. unsigned getSmallConstantTripMultiple(const Loop *L); /// Returns the largest constant divisor of the trip count of this loop as a /// normal unsigned value, if possible. This means that the actual trip /// count is always a multiple of the returned value (don't forget the trip /// count could very well be zero as well!). As explained in the comments /// for getSmallConstantTripCount, this assumes that control exits the loop /// via ExitingBlock. unsigned getSmallConstantTripMultiple(const Loop *L, const BasicBlock *ExitingBlock); /// The terms "backedge taken count" and "exit count" are used /// interchangeably to refer to the number of times the backedge of a loop /// has executed before the loop is exited. enum ExitCountKind { /// An expression exactly describing the number of times the backedge has /// executed when a loop is exited. Exact, /// A constant which provides an upper bound on the exact trip count. ConstantMaximum, /// An expression which provides an upper bound on the exact trip count. SymbolicMaximum, }; /// Return the number of times the backedge executes before the given exit /// would be taken; if not exactly computable, return SCEVCouldNotCompute. /// For a single exit loop, this value is equivelent to the result of /// getBackedgeTakenCount. The loop is guaranteed to exit (via *some* exit) /// before the backedge is executed (ExitCount + 1) times. Note that there /// is no guarantee about *which* exit is taken on the exiting iteration. const SCEV *getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind = Exact); /// If the specified loop has a predictable backedge-taken count, return it, /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count is /// the number of times the loop header will be branched to from within the /// loop, assuming there are no abnormal exists like exception throws. This is /// one less than the trip count of the loop, since it doesn't count the first /// iteration, when the header is branched to from outside the loop. /// /// Note that it is not valid to call this method on a loop without a /// loop-invariant backedge-taken count (see /// hasLoopInvariantBackedgeTakenCount). const SCEV *getBackedgeTakenCount(const Loop *L, ExitCountKind Kind = Exact); /// Similar to getBackedgeTakenCount, except it will add a set of /// SCEV predicates to Predicates that are required to be true in order for /// the answer to be correct. Predicates can be checked with run-time /// checks and can be used to perform loop versioning. const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, SmallVector &Predicates); /// When successful, this returns a SCEVConstant that is greater than or equal /// to (i.e. a "conservative over-approximation") of the value returend by /// getBackedgeTakenCount. If such a value cannot be computed, it returns the /// SCEVCouldNotCompute object. const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L) { return getBackedgeTakenCount(L, ConstantMaximum); } /// When successful, this returns a SCEV that is greater than or equal /// to (i.e. a "conservative over-approximation") of the value returend by /// getBackedgeTakenCount. If such a value cannot be computed, it returns the /// SCEVCouldNotCompute object. const SCEV *getSymbolicMaxBackedgeTakenCount(const Loop *L) { return getBackedgeTakenCount(L, SymbolicMaximum); } /// Return true if the backedge taken count is either the value returned by /// getConstantMaxBackedgeTakenCount or zero. bool isBackedgeTakenCountMaxOrZero(const Loop *L); /// Return true if the specified loop has an analyzable loop-invariant /// backedge-taken count. bool hasLoopInvariantBackedgeTakenCount(const Loop *L); // This method should be called by the client when it made any change that // would invalidate SCEV's answers, and the client wants to remove all loop // information held internally by ScalarEvolution. This is intended to be used // when the alternative to forget a loop is too expensive (i.e. large loop // bodies). void forgetAllLoops(); /// This method should be called by the client when it has changed a loop in /// a way that may effect ScalarEvolution's ability to compute a trip count, /// or if the loop is deleted. This call is potentially expensive for large /// loop bodies. void forgetLoop(const Loop *L); // This method invokes forgetLoop for the outermost loop of the given loop // \p L, making ScalarEvolution forget about all this subtree. This needs to // be done whenever we make a transform that may affect the parameters of the // outer loop, such as exit counts for branches. void forgetTopmostLoop(const Loop *L); /// This method should be called by the client when it has changed a value /// in a way that may effect its value, or which may disconnect it from a /// def-use chain linking it to a loop. void forgetValue(Value *V); /// Forget LCSSA phi node V of loop L to which a new predecessor was added, /// such that it may no longer be trivial. void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V); /// Called when the client has changed the disposition of values in /// this loop. /// /// We don't have a way to invalidate per-loop dispositions. Clear and /// recompute is simpler. void forgetLoopDispositions(); /// Called when the client has changed the disposition of values in /// a loop or block. /// /// We don't have a way to invalidate per-loop/per-block dispositions. Clear /// and recompute is simpler. void forgetBlockAndLoopDispositions(Value *V = nullptr); /// Determine the minimum number of zero bits that S is guaranteed to end in /// (at every loop iteration). It is, at the same time, the minimum number /// of times S is divisible by 2. For example, given {4,+,8} it returns 2. /// If S is guaranteed to be 0, it returns the bitwidth of S. uint32_t getMinTrailingZeros(const SCEV *S); /// Returns the max constant multiple of S. APInt getConstantMultiple(const SCEV *S); // Returns the max constant multiple of S. If S is exactly 0, return 1. APInt getNonZeroConstantMultiple(const SCEV *S); /// Determine the unsigned range for a particular SCEV. /// NOTE: This returns a copy of the reference returned by getRangeRef. ConstantRange getUnsignedRange(const SCEV *S) { return getRangeRef(S, HINT_RANGE_UNSIGNED); } /// Determine the min of the unsigned range for a particular SCEV. APInt getUnsignedRangeMin(const SCEV *S) { return getRangeRef(S, HINT_RANGE_UNSIGNED).getUnsignedMin(); } /// Determine the max of the unsigned range for a particular SCEV. APInt getUnsignedRangeMax(const SCEV *S) { return getRangeRef(S, HINT_RANGE_UNSIGNED).getUnsignedMax(); } /// Determine the signed range for a particular SCEV. /// NOTE: This returns a copy of the reference returned by getRangeRef. ConstantRange getSignedRange(const SCEV *S) { return getRangeRef(S, HINT_RANGE_SIGNED); } /// Determine the min of the signed range for a particular SCEV. APInt getSignedRangeMin(const SCEV *S) { return getRangeRef(S, HINT_RANGE_SIGNED).getSignedMin(); } /// Determine the max of the signed range for a particular SCEV. APInt getSignedRangeMax(const SCEV *S) { return getRangeRef(S, HINT_RANGE_SIGNED).getSignedMax(); } /// Test if the given expression is known to be negative. bool isKnownNegative(const SCEV *S); /// Test if the given expression is known to be positive. bool isKnownPositive(const SCEV *S); /// Test if the given expression is known to be non-negative. bool isKnownNonNegative(const SCEV *S); /// Test if the given expression is known to be non-positive. bool isKnownNonPositive(const SCEV *S); /// Test if the given expression is known to be non-zero. bool isKnownNonZero(const SCEV *S); /// Splits SCEV expression \p S into two SCEVs. One of them is obtained from /// \p S by substitution of all AddRec sub-expression related to loop \p L /// with initial value of that SCEV. The second is obtained from \p S by /// substitution of all AddRec sub-expressions related to loop \p L with post /// increment of this AddRec in the loop \p L. In both cases all other AddRec /// sub-expressions (not related to \p L) remain the same. /// If the \p S contains non-invariant unknown SCEV the function returns /// CouldNotCompute SCEV in both values of std::pair. /// For example, for SCEV S={0, +, 1} + {0, +, 1} and loop L=L1 /// the function returns pair: /// first = {0, +, 1} /// second = {1, +, 1} + {0, +, 1} /// We can see that for the first AddRec sub-expression it was replaced with /// 0 (initial value) for the first element and to {1, +, 1} (post /// increment value) for the second one. In both cases AddRec expression /// related to L2 remains the same. std::pair SplitIntoInitAndPostInc(const Loop *L, const SCEV *S); /// We'd like to check the predicate on every iteration of the most dominated /// loop between loops used in LHS and RHS. /// To do this we use the following list of steps: /// 1. Collect set S all loops on which either LHS or RHS depend. /// 2. If S is non-empty /// a. Let PD be the element of S which is dominated by all other elements. /// b. Let E(LHS) be value of LHS on entry of PD. /// To get E(LHS), we should just take LHS and replace all AddRecs that are /// attached to PD on with their entry values. /// Define E(RHS) in the same way. /// c. Let B(LHS) be value of L on backedge of PD. /// To get B(LHS), we should just take LHS and replace all AddRecs that are /// attached to PD on with their backedge values. /// Define B(RHS) in the same way. /// d. Note that E(LHS) and E(RHS) are automatically available on entry of PD, /// so we can assert on that. /// e. Return true if isLoopEntryGuardedByCond(Pred, E(LHS), E(RHS)) && /// isLoopBackedgeGuardedByCond(Pred, B(LHS), B(RHS)) bool isKnownViaInduction(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Test if the given expression is known to satisfy the condition described /// by Pred, LHS, and RHS. bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Check whether the condition described by Pred, LHS, and RHS is true or /// false. If we know it, return the evaluation of this condition. If neither /// is proved, return std::nullopt. std::optional evaluatePredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Test if the given expression is known to satisfy the condition described /// by Pred, LHS, and RHS in the given Context. bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI); /// Check whether the condition described by Pred, LHS, and RHS is true or /// false in the given \p Context. If we know it, return the evaluation of /// this condition. If neither is proved, return std::nullopt. std::optional evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI); /// Test if the condition described by Pred, LHS, RHS is known to be true on /// every iteration of the loop of the recurrency LHS. bool isKnownOnEveryIteration(ICmpInst::Predicate Pred, const SCEVAddRecExpr *LHS, const SCEV *RHS); /// Information about the number of loop iterations for which a loop exit's /// branch condition evaluates to the not-taken path. This is a temporary /// pair of exact and max expressions that are eventually summarized in /// ExitNotTakenInfo and BackedgeTakenInfo. struct ExitLimit { const SCEV *ExactNotTaken; // The exit is not taken exactly this many times const SCEV *ConstantMaxNotTaken; // The exit is not taken at most this many // times const SCEV *SymbolicMaxNotTaken; // Not taken either exactly ConstantMaxNotTaken or zero times bool MaxOrZero = false; /// A set of predicate guards for this ExitLimit. The result is only valid /// if all of the predicates in \c Predicates evaluate to 'true' at /// run-time. SmallPtrSet Predicates; void addPredicate(const SCEVPredicate *P) { assert(!isa(P) && "Only add leaf predicates here!"); Predicates.insert(P); } /// Construct either an exact exit limit from a constant, or an unknown /// one from a SCEVCouldNotCompute. No other types of SCEVs are allowed /// as arguments and asserts enforce that internally. /*implicit*/ ExitLimit(const SCEV *E); ExitLimit( const SCEV *E, const SCEV *ConstantMaxNotTaken, const SCEV *SymbolicMaxNotTaken, bool MaxOrZero, ArrayRef *> PredSetList = std::nullopt); ExitLimit(const SCEV *E, const SCEV *ConstantMaxNotTaken, const SCEV *SymbolicMaxNotTaken, bool MaxOrZero, const SmallPtrSetImpl &PredSet); /// Test whether this ExitLimit contains any computed information, or /// whether it's all SCEVCouldNotCompute values. bool hasAnyInfo() const { return !isa(ExactNotTaken) || !isa(ConstantMaxNotTaken); } /// Test whether this ExitLimit contains all information. bool hasFullInfo() const { return !isa(ExactNotTaken); } }; /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of ExitCond. /// /// \p ControlsOnlyExit is true if ExitCond directly controls the only exit /// branch. In this case, we can assume that the loop exits only if the /// condition is true and can infer that failing to meet the condition prior /// to integer wraparound results in undefined behavior. /// /// If \p AllowPredicates is set, this call will try to use a minimal set of /// SCEV predicates in order to return an exact answer. ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates = false); /// A predicate is said to be monotonically increasing if may go from being /// false to being true as the loop iterates, but never the other way /// around. A predicate is said to be monotonically decreasing if may go /// from being true to being false as the loop iterates, but never the other /// way around. enum MonotonicPredicateType { MonotonicallyIncreasing, MonotonicallyDecreasing }; /// If, for all loop invariant X, the predicate "LHS `Pred` X" is /// monotonically increasing or decreasing, returns /// Some(MonotonicallyIncreasing) and Some(MonotonicallyDecreasing) /// respectively. If we could not prove either of these facts, returns /// std::nullopt. std::optional getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred); struct LoopInvariantPredicate { ICmpInst::Predicate Pred; const SCEV *LHS; const SCEV *RHS; LoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) : Pred(Pred), LHS(LHS), RHS(RHS) {} }; /// If the result of the predicate LHS `Pred` RHS is loop invariant with /// respect to L, return a LoopInvariantPredicate with LHS and RHS being /// invariants, available at L's entry. Otherwise, return std::nullopt. std::optional getLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI = nullptr); /// If the result of the predicate LHS `Pred` RHS is loop invariant with /// respect to L at given Context during at least first MaxIter iterations, /// return a LoopInvariantPredicate with LHS and RHS being invariants, /// available at L's entry. Otherwise, return std::nullopt. The predicate /// should be the loop's exit condition. std::optional getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter); std::optional getLoopInvariantExitCondDuringFirstIterationsImpl( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter); /// Simplify LHS and RHS in a comparison with predicate Pred. Return true /// iff any changes were made. If the operands are provably equal or /// unequal, LHS and RHS are set to the same value and Pred is set to either /// ICMP_EQ or ICMP_NE. bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS, const SCEV *&RHS, unsigned Depth = 0); /// Return the "disposition" of the given SCEV with respect to the given /// loop. LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L); /// Return true if the value of the given SCEV is unchanging in the /// specified loop. bool isLoopInvariant(const SCEV *S, const Loop *L); /// Determine if the SCEV can be evaluated at loop's entry. It is true if it /// doesn't depend on a SCEVUnknown of an instruction which is dominated by /// the header of loop L. bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L); /// Return true if the given SCEV changes value in a known way in the /// specified loop. This property being true implies that the value is /// variant in the loop AND that we can emit an expression to compute the /// value of the expression at any particular loop iteration. bool hasComputableLoopEvolution(const SCEV *S, const Loop *L); /// Return the "disposition" of the given SCEV with respect to the given /// block. BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB); /// Return true if elements that makes up the given SCEV dominate the /// specified basic block. bool dominates(const SCEV *S, const BasicBlock *BB); /// Return true if elements that makes up the given SCEV properly dominate /// the specified basic block. bool properlyDominates(const SCEV *S, const BasicBlock *BB); /// Test whether the given SCEV has Op as a direct or indirect operand. bool hasOperand(const SCEV *S, const SCEV *Op) const; /// Return the size of an element read or written by Inst. const SCEV *getElementSize(Instruction *Inst); void print(raw_ostream &OS) const; void verify() const; bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv); /// Return the DataLayout associated with the module this SCEV instance is /// operating on. const DataLayout &getDataLayout() const { return F.getParent()->getDataLayout(); } const SCEVPredicate *getEqualPredicate(const SCEV *LHS, const SCEV *RHS); const SCEVPredicate *getComparePredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); const SCEVPredicate * getWrapPredicate(const SCEVAddRecExpr *AR, SCEVWrapPredicate::IncrementWrapFlags AddedFlags); /// Re-writes the SCEV according to the Predicates in \p A. const SCEV *rewriteUsingPredicate(const SCEV *S, const Loop *L, const SCEVPredicate &A); /// Tries to convert the \p S expression to an AddRec expression, /// adding additional predicates to \p Preds as required. const SCEVAddRecExpr *convertSCEVToAddRecWithPredicates( const SCEV *S, const Loop *L, SmallPtrSetImpl &Preds); /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a /// constant, and std::nullopt if it isn't. /// /// This is intended to be a cheaper version of getMinusSCEV. We can be /// frugal here since we just bail out of actually constructing and /// canonicalizing an expression in the cases where the result isn't going /// to be a constant. std::optional computeConstantDifference(const SCEV *LHS, const SCEV *RHS); /// Update no-wrap flags of an AddRec. This may drop the cached info about /// this AddRec (such as range info) in case if new flags may potentially /// sharpen it. void setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags); /// Try to apply information from loop guards for \p L to \p Expr. const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L); /// Return true if the loop has no abnormal exits. That is, if the loop /// is not infinite, it must exit through an explicit edge in the CFG. /// (As opposed to either a) throwing out of the function or b) entering a /// well defined infinite loop in some callee.) bool loopHasNoAbnormalExits(const Loop *L) { return getLoopProperties(L).HasNoAbnormalExits; } /// Return true if this loop is finite by assumption. That is, /// to be infinite, it must also be undefined. bool loopIsFiniteByAssumption(const Loop *L); /// Return the set of Values that, if poison, will definitively result in S /// being poison as well. The returned set may be incomplete, i.e. there can /// be additional Values that also result in S being poison. void getPoisonGeneratingValues(SmallPtrSetImpl &Result, const SCEV *S); + /// Check whether it is poison-safe to represent the expression S using the + /// instruction I. If such a replacement is performed, the poison flags of + /// instructions in DropPoisonGeneratingInsts must be dropped. + bool canReuseInstruction( + const SCEV *S, Instruction *I, + SmallVectorImpl &DropPoisonGeneratingInsts); + class FoldID { const SCEV *Op = nullptr; const Type *Ty = nullptr; unsigned short C; public: FoldID(SCEVTypes C, const SCEV *Op, const Type *Ty) : Op(Op), Ty(Ty), C(C) { assert(Op); assert(Ty); } FoldID(unsigned short C) : C(C) {} unsigned computeHash() const { return detail::combineHashValue( C, detail::combineHashValue(reinterpret_cast(Op), reinterpret_cast(Ty))); } bool operator==(const FoldID &RHS) const { return std::tie(Op, Ty, C) == std::tie(RHS.Op, RHS.Ty, RHS.C); } }; private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. class SCEVCallbackVH final : public CallbackVH { ScalarEvolution *SE; void deleted() override; void allUsesReplacedWith(Value *New) override; public: SCEVCallbackVH(Value *V, ScalarEvolution *SE = nullptr); }; friend class SCEVCallbackVH; friend class SCEVExpander; friend class SCEVUnknown; /// The function we are analyzing. Function &F; /// Does the module have any calls to the llvm.experimental.guard intrinsic /// at all? If this is false, we avoid doing work that will only help if /// thare are guards present in the IR. bool HasGuards; /// The target library information for the target we are targeting. TargetLibraryInfo &TLI; /// The tracker for \@llvm.assume intrinsics in this function. AssumptionCache &AC; /// The dominator tree. DominatorTree &DT; /// The loop information for the function we are currently analyzing. LoopInfo &LI; /// This SCEV is used to represent unknown trip counts and things. std::unique_ptr CouldNotCompute; /// The type for HasRecMap. using HasRecMapType = DenseMap; /// This is a cache to record whether a SCEV contains any scAddRecExpr. HasRecMapType HasRecMap; /// The type for ExprValueMap. using ValueSetVector = SmallSetVector; using ExprValueMapType = DenseMap; /// ExprValueMap -- This map records the original values from which /// the SCEV expr is generated from. ExprValueMapType ExprValueMap; /// The type for ValueExprMap. using ValueExprMapType = DenseMap>; /// This is a cache of the values we have analyzed so far. ValueExprMapType ValueExprMap; /// This is a cache for expressions that got folded to a different existing /// SCEV. DenseMap FoldCache; DenseMap> FoldCacheUser; /// Mark predicate values currently being processed by isImpliedCond. SmallPtrSet PendingLoopPredicates; /// Mark SCEVUnknown Phis currently being processed by getRangeRef. SmallPtrSet PendingPhiRanges; /// Mark SCEVUnknown Phis currently being processed by getRangeRefIter. SmallPtrSet PendingPhiRangesIter; // Mark SCEVUnknown Phis currently being processed by isImpliedViaMerge. SmallPtrSet PendingMerges; /// Set to true by isLoopBackedgeGuardedByCond when we're walking the set of /// conditions dominating the backedge of a loop. bool WalkingBEDominatingConds = false; /// Set to true by isKnownPredicateViaSplitting when we're trying to prove a /// predicate by splitting it into a set of independent predicates. bool ProvingSplitPredicate = false; /// Memoized values for the getConstantMultiple DenseMap ConstantMultipleCache; /// Return the Value set from which the SCEV expr is generated. ArrayRef getSCEVValues(const SCEV *S); /// Private helper method for the getConstantMultiple method. APInt getConstantMultipleImpl(const SCEV *S); /// Information about the number of times a particular loop exit may be /// reached before exiting the loop. struct ExitNotTakenInfo { PoisoningVH ExitingBlock; const SCEV *ExactNotTaken; const SCEV *ConstantMaxNotTaken; const SCEV *SymbolicMaxNotTaken; SmallPtrSet Predicates; explicit ExitNotTakenInfo( PoisoningVH ExitingBlock, const SCEV *ExactNotTaken, const SCEV *ConstantMaxNotTaken, const SCEV *SymbolicMaxNotTaken, const SmallPtrSet &Predicates) : ExitingBlock(ExitingBlock), ExactNotTaken(ExactNotTaken), ConstantMaxNotTaken(ConstantMaxNotTaken), SymbolicMaxNotTaken(SymbolicMaxNotTaken), Predicates(Predicates) {} bool hasAlwaysTruePredicate() const { return Predicates.empty(); } }; /// Information about the backedge-taken count of a loop. This currently /// includes an exact count and a maximum count. /// class BackedgeTakenInfo { friend class ScalarEvolution; /// A list of computable exits and their not-taken counts. Loops almost /// never have more than one computable exit. SmallVector ExitNotTaken; /// Expression indicating the least constant maximum backedge-taken count of /// the loop that is known, or a SCEVCouldNotCompute. This expression is /// only valid if the redicates associated with all loop exits are true. const SCEV *ConstantMax = nullptr; /// Indicating if \c ExitNotTaken has an element for every exiting block in /// the loop. bool IsComplete = false; /// Expression indicating the least maximum backedge-taken count of the loop /// that is known, or a SCEVCouldNotCompute. Lazily computed on first query. const SCEV *SymbolicMax = nullptr; /// True iff the backedge is taken either exactly Max or zero times. bool MaxOrZero = false; bool isComplete() const { return IsComplete; } const SCEV *getConstantMax() const { return ConstantMax; } public: BackedgeTakenInfo() = default; BackedgeTakenInfo(BackedgeTakenInfo &&) = default; BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default; using EdgeExitInfo = std::pair; /// Initialize BackedgeTakenInfo from a list of exact exit counts. BackedgeTakenInfo(ArrayRef ExitCounts, bool IsComplete, const SCEV *ConstantMax, bool MaxOrZero); /// Test whether this BackedgeTakenInfo contains any computed information, /// or whether it's all SCEVCouldNotCompute values. bool hasAnyInfo() const { return !ExitNotTaken.empty() || !isa(getConstantMax()); } /// Test whether this BackedgeTakenInfo contains complete information. bool hasFullInfo() const { return isComplete(); } /// Return an expression indicating the exact *backedge-taken* /// count of the loop if it is known or SCEVCouldNotCompute /// otherwise. If execution makes it to the backedge on every /// iteration (i.e. there are no abnormal exists like exception /// throws and thread exits) then this is the number of times the /// loop header will execute minus one. /// /// If the SCEV predicate associated with the answer can be different /// from AlwaysTrue, we must add a (non null) Predicates argument. /// The SCEV predicate associated with the answer will be added to /// Predicates. A run-time check needs to be emitted for the SCEV /// predicate in order for the answer to be valid. /// /// Note that we should always know if we need to pass a predicate /// argument or not from the way the ExitCounts vector was computed. /// If we allowed SCEV predicates to be generated when populating this /// vector, this information can contain them and therefore a /// SCEVPredicate argument should be added to getExact. const SCEV *getExact(const Loop *L, ScalarEvolution *SE, SmallVector *Predicates = nullptr) const; /// Return the number of times this loop exit may fall through to the back /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via /// this block before this number of iterations, but may exit via another /// block. const SCEV *getExact(const BasicBlock *ExitingBlock, ScalarEvolution *SE) const; /// Get the constant max backedge taken count for the loop. const SCEV *getConstantMax(ScalarEvolution *SE) const; /// Get the constant max backedge taken count for the particular loop exit. const SCEV *getConstantMax(const BasicBlock *ExitingBlock, ScalarEvolution *SE) const; /// Get the symbolic max backedge taken count for the loop. const SCEV *getSymbolicMax(const Loop *L, ScalarEvolution *SE); /// Get the symbolic max backedge taken count for the particular loop exit. const SCEV *getSymbolicMax(const BasicBlock *ExitingBlock, ScalarEvolution *SE) const; /// Return true if the number of times this backedge is taken is either the /// value returned by getConstantMax or zero. bool isConstantMaxOrZero(ScalarEvolution *SE) const; }; /// Cache the backedge-taken count of the loops for this function as they /// are computed. DenseMap BackedgeTakenCounts; /// Cache the predicated backedge-taken count of the loops for this /// function as they are computed. DenseMap PredicatedBackedgeTakenCounts; /// Loops whose backedge taken counts directly use this non-constant SCEV. DenseMap, 4>> BECountUsers; /// This map contains entries for all of the PHI instructions that we /// attempt to compute constant evolutions for. This allows us to avoid /// potentially expensive recomputation of these properties. An instruction /// maps to null if we are unable to compute its exit value. DenseMap ConstantEvolutionLoopExitValue; /// This map contains entries for all the expressions that we attempt to /// compute getSCEVAtScope information for, which can be expensive in /// extreme cases. DenseMap, 2>> ValuesAtScopes; /// Reverse map for invalidation purposes: Stores of which SCEV and which /// loop this is the value-at-scope of. DenseMap, 2>> ValuesAtScopesUsers; /// Memoized computeLoopDisposition results. DenseMap, 2>> LoopDispositions; struct LoopProperties { /// Set to true if the loop contains no instruction that can abnormally exit /// the loop (i.e. via throwing an exception, by terminating the thread /// cleanly or by infinite looping in a called function). Strictly /// speaking, the last one is not leaving the loop, but is identical to /// leaving the loop for reasoning about undefined behavior. bool HasNoAbnormalExits; /// Set to true if the loop contains no instruction that can have side /// effects (i.e. via throwing an exception, volatile or atomic access). bool HasNoSideEffects; }; /// Cache for \c getLoopProperties. DenseMap LoopPropertiesCache; /// Return a \c LoopProperties instance for \p L, creating one if necessary. LoopProperties getLoopProperties(const Loop *L); bool loopHasNoSideEffects(const Loop *L) { return getLoopProperties(L).HasNoSideEffects; } /// Compute a LoopDisposition value. LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L); /// Memoized computeBlockDisposition results. DenseMap< const SCEV *, SmallVector, 2>> BlockDispositions; /// Compute a BlockDisposition value. BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB); /// Stores all SCEV that use a given SCEV as its direct operand. DenseMap > SCEVUsers; /// Memoized results from getRange DenseMap UnsignedRanges; /// Memoized results from getRange DenseMap SignedRanges; /// Used to parameterize getRange enum RangeSignHint { HINT_RANGE_UNSIGNED, HINT_RANGE_SIGNED }; /// Set the memoized range for the given SCEV. const ConstantRange &setRange(const SCEV *S, RangeSignHint Hint, ConstantRange CR) { DenseMap &Cache = Hint == HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges; auto Pair = Cache.try_emplace(S, std::move(CR)); if (!Pair.second) Pair.first->second = std::move(CR); return Pair.first->second; } /// Determine the range for a particular SCEV. /// NOTE: This returns a reference to an entry in a cache. It must be /// copied if its needed for longer. const ConstantRange &getRangeRef(const SCEV *S, RangeSignHint Hint, unsigned Depth = 0); /// Determine the range for a particular SCEV, but evaluates ranges for /// operands iteratively first. const ConstantRange &getRangeRefIter(const SCEV *S, RangeSignHint Hint); /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Step}. /// Helper for \c getRange. ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Step, const APInt &MaxBECount); /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p /// Start,+,\p Step}. ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth, RangeSignHint SignHint); /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p /// Step} by "factoring out" a ternary expression from the add recurrence. /// Helper called by \c getRange. ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Step, const APInt &MaxBECount); /// If the unknown expression U corresponds to a simple recurrence, return /// a constant range which represents the entire recurrence. Note that /// *add* recurrences with loop invariant steps aren't represented by /// SCEVUnknowns and thus don't use this mechanism. ConstantRange getRangeForUnknownRecurrence(const SCEVUnknown *U); /// We know that there is no SCEV for the specified value. Analyze the /// expression recursively. const SCEV *createSCEV(Value *V); /// We know that there is no SCEV for the specified value. Create a new SCEV /// for \p V iteratively. const SCEV *createSCEVIter(Value *V); /// Collect operands of \p V for which SCEV expressions should be constructed /// first. Returns a SCEV directly if it can be constructed trivially for \p /// V. const SCEV *getOperandsToCreate(Value *V, SmallVectorImpl &Ops); /// Provide the special handling we need to analyze PHI SCEVs. const SCEV *createNodeForPHI(PHINode *PN); /// Helper function called from createNodeForPHI. const SCEV *createAddRecFromPHI(PHINode *PN); /// A helper function for createAddRecFromPHI to handle simple cases. const SCEV *createSimpleAffineAddRec(PHINode *PN, Value *BEValueV, Value *StartValueV); /// Helper function called from createNodeForPHI. const SCEV *createNodeFromSelectLikePHI(PHINode *PN); /// Provide special handling for a select-like instruction (currently this /// is either a select instruction or a phi node). \p Ty is the type of the /// instruction being processed, that is assumed equivalent to /// "Cond ? TrueVal : FalseVal". std::optional createNodeForSelectOrPHIInstWithICmpInstCond(Type *Ty, ICmpInst *Cond, Value *TrueVal, Value *FalseVal); /// See if we can model this select-like instruction via umin_seq expression. const SCEV *createNodeForSelectOrPHIViaUMinSeq(Value *I, Value *Cond, Value *TrueVal, Value *FalseVal); /// Given a value \p V, which is a select-like instruction (currently this is /// either a select instruction or a phi node), which is assumed equivalent to /// Cond ? TrueVal : FalseVal /// see if we can model it as a SCEV expression. const SCEV *createNodeForSelectOrPHI(Value *V, Value *Cond, Value *TrueVal, Value *FalseVal); /// Provide the special handling we need to analyze GEP SCEVs. const SCEV *createNodeForGEP(GEPOperator *GEP); /// Implementation code for getSCEVAtScope; called at most once for each /// SCEV+Loop pair. const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L); /// Return the BackedgeTakenInfo for the given loop, lazily computing new /// values if the loop hasn't been analyzed yet. The returned result is /// guaranteed not to be predicated. BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L); /// Similar to getBackedgeTakenInfo, but will add predicates as required /// with the purpose of returning complete information. const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L); /// Compute the number of times the specified loop will iterate. /// If AllowPredicates is set, we will create new SCEV predicates as /// necessary in order to return an exact answer. BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L, bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will /// execute if it exits via the specified block. If AllowPredicates is set, /// this call will try to use a minimal set of SCEV predicates in order to /// return an exact answer. ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates = false); /// Return a symbolic upper bound for the backedge taken count of the loop. /// This is more general than getConstantMaxBackedgeTakenCount as it returns /// an arbitrary expression as opposed to only constants. const SCEV *computeSymbolicMaxBackedgeTakenCount(const Loop *L); // Helper functions for computeExitLimitFromCond to avoid exponential time // complexity. class ExitLimitCache { // It may look like we need key on the whole (L, ExitIfTrue, // ControlsOnlyExit, AllowPredicates) tuple, but recursive calls to // computeExitLimitFromCondCached from computeExitLimitFromCondImpl only // vary the in \c ExitCond and \c ControlsOnlyExit parameters. We remember // the initial values of the other values to assert our assumption. SmallDenseMap, ExitLimit> TripCountMap; const Loop *L; bool ExitIfTrue; bool AllowPredicates; public: ExitLimitCache(const Loop *L, bool ExitIfTrue, bool AllowPredicates) : L(L), ExitIfTrue(ExitIfTrue), AllowPredicates(AllowPredicates) {} std::optional find(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates); void insert(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates, const ExitLimit &EL); }; using ExitLimitCacheTy = ExitLimitCache; ExitLimit computeExitLimitFromCondCached(ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates); ExitLimit computeExitLimitFromCondImpl(ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates); std::optional computeExitLimitFromCondFromBinOp( ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of the ICmpInst /// ExitCond and ExitIfTrue. If AllowPredicates is set, this call will try /// to use a minimal set of SCEV predicates in order to return an exact /// answer. ExitLimit computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, bool ExitIfTrue, bool IsSubExpr, bool AllowPredicates = false); /// Variant of previous which takes the components representing an ICmp /// as opposed to the ICmpInst itself. Note that the prior version can /// return more precise results in some cases and is preferred when caller /// has a materialized ICmp. ExitLimit computeExitLimitFromICmp(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, bool IsSubExpr, bool AllowPredicates = false); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a switch with a single exiting case /// to ExitingBB. ExitLimit computeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch, BasicBlock *ExitingBB, bool IsSubExpr); /// Compute the exit limit of a loop that is controlled by a /// "(IV >> 1) != 0" type comparison. We cannot compute the exact trip /// count in these cases (since SCEV has no way of expressing them), but we /// can still sometimes compute an upper bound. /// /// Return an ExitLimit for a loop whose backedge is guarded by `LHS Pred /// RHS`. ExitLimit computeShiftCompareExitLimit(Value *LHS, Value *RHS, const Loop *L, ICmpInst::Predicate Pred); /// If the loop is known to execute a constant number of times (the /// condition evolves only from constants), try to evaluate a few iterations /// of the loop until we get the exit condition gets a value of ExitWhen /// (true or false). If we cannot evaluate the exit count of the loop, /// return CouldNotCompute. const SCEV *computeExitCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen); /// Return the number of times an exit condition comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. /// If AllowPredicates is set, this call will try to use a minimal set of /// SCEV predicates in order to return an exact answer. ExitLimit howFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr, bool AllowPredicates = false); /// Return the number of times an exit condition checking the specified /// value for nonzero will execute. If not computable, return /// CouldNotCompute. ExitLimit howFarToNonZero(const SCEV *V, const Loop *L); /// Return the number of times an exit condition containing the specified /// less-than comparison will execute. If not computable, return /// CouldNotCompute. /// /// \p isSigned specifies whether the less-than is signed. /// /// \p ControlsOnlyExit is true when the LHS < RHS condition directly controls /// the branch (loops exits only if condition is true). In this case, we can /// use NoWrapFlags to skip overflow checks. /// /// If \p AllowPredicates is set, this call will try to use a minimal set of /// SCEV predicates in order to return an exact answer. ExitLimit howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned, bool ControlsOnlyExit, bool AllowPredicates = false); ExitLimit howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned, bool IsSubExpr, bool AllowPredicates = false); /// Return a predecessor of BB (which may not be an immediate predecessor) /// which has exactly one successor from which BB is reachable, or null if /// no such block is found. std::pair getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) const; /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the given FoundCondValue value evaluates to true in given /// Context. If Context is nullptr, then the found predicate is true /// everywhere. LHS and FoundLHS may have different type width. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Value *FoundCondValue, bool Inverse, const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the given FoundCondValue value evaluates to true in given /// Context. If Context is nullptr, then the found predicate is true /// everywhere. LHS and FoundLHS must have same type width. bool isImpliedCondBalancedTypes(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *CtxI); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is /// true in given Context. If Context is nullptr, then the found predicate is /// true everywhere. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true in given Context. If Context is nullptr, then the found predicate is /// true everywhere. bool isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. Here LHS is an operation that includes FoundLHS as one of its /// arguments. bool isImpliedViaOperations(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS, unsigned Depth = 0); /// Test whether the condition described by Pred, LHS, and RHS is true. /// Use only simple non-recursive types of checks, such as range analysis etc. bool isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. Utility function used by isImpliedCondOperands. Tries to get /// cases like "X `sgt` 0 => X - 1 `sgt` -1". bool isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS); /// Return true if the condition denoted by \p LHS \p Pred \p RHS is implied /// by a call to @llvm.experimental.guard in \p BB. bool isImpliedViaGuard(const BasicBlock *BB, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. /// /// This routine tries to rule out certain kinds of integer overflow, and /// then tries to reason about arithmetic properties of the predicates. bool isImpliedCondOperandsViaNoOverflow(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. /// /// This routine tries to weaken the known condition basing on fact that /// FoundLHS is an AddRec. bool isImpliedCondOperandsViaAddRecStart(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *CtxI); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. /// /// This routine tries to figure out predicate for Phis which are SCEVUnknown /// if it is true for every possible incoming value from their respective /// basic blocks. bool isImpliedViaMerge(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS, unsigned Depth); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. /// /// This routine tries to reason about shifts. bool isImpliedCondOperandsViaShift(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS); /// If we know that the specified Phi is in the header of its containing /// loop, we know the loop executes a constant number of times, and the PHI /// node is just a recurrence involving constants, fold it. Constant *getConstantEvolutionLoopExitValue(PHINode *PN, const APInt &BEs, const Loop *L); /// Test if the given expression is known to satisfy the condition described /// by Pred and the known constant ranges of LHS and RHS. bool isKnownPredicateViaConstantRanges(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Try to prove the condition described by "LHS Pred RHS" by ruling out /// integer overflow. /// /// For instance, this will return true for "A s< (A + C)" if C is /// positive. bool isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Try to split Pred LHS RHS into logical conjunctions (and's) and try to /// prove them individually. bool isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Try to match the Expr as "(L + R)". bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, SCEV::NoWrapFlags &Flags); /// Forget predicated/non-predicated backedge taken counts for the given loop. void forgetBackedgeTakenCounts(const Loop *L, bool Predicated); /// Drop memoized information for all \p SCEVs. void forgetMemoizedResults(ArrayRef SCEVs); /// Helper for forgetMemoizedResults. void forgetMemoizedResultsImpl(const SCEV *S); /// Iterate over instructions in \p Worklist and their users. Erase entries /// from ValueExprMap and collect SCEV expressions in \p ToForget void visitAndClearUsers(SmallVectorImpl &Worklist, SmallPtrSetImpl &Visited, SmallVectorImpl &ToForget); /// Erase Value from ValueExprMap and ExprValueMap. void eraseValueFromMap(Value *V); /// Insert V to S mapping into ValueExprMap and ExprValueMap. void insertValueToMap(Value *V, const SCEV *S); /// Return false iff given SCEV contains a SCEVUnknown with NULL value- /// pointer. bool checkValidity(const SCEV *S) const; /// Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be /// equal to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}. This is /// equivalent to proving no signed (resp. unsigned) wrap in /// {`Start`,+,`Step`} if `ExtendOpTy` is `SCEVSignExtendExpr` /// (resp. `SCEVZeroExtendExpr`). template bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, const Loop *L); /// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation. SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR); /// Try to prove NSW on \p AR by proving facts about conditions known on /// entry and backedge. SCEV::NoWrapFlags proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR); /// Try to prove NUW on \p AR by proving facts about conditions known on /// entry and backedge. SCEV::NoWrapFlags proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR); std::optional getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred); /// Return SCEV no-wrap flags that can be proven based on reasoning about /// how poison produced from no-wrap flags on this value (e.g. a nuw add) /// would trigger undefined behavior on overflow. SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V); /// Return a scope which provides an upper bound on the defining scope of /// 'S'. Specifically, return the first instruction in said bounding scope. /// Return nullptr if the scope is trivial (function entry). /// (See scope definition rules associated with flag discussion above) const Instruction *getNonTrivialDefiningScopeBound(const SCEV *S); /// Return a scope which provides an upper bound on the defining scope for /// a SCEV with the operands in Ops. The outparam Precise is set if the /// bound found is a precise bound (i.e. must be the defining scope.) const Instruction *getDefiningScopeBound(ArrayRef Ops, bool &Precise); /// Wrapper around the above for cases which don't care if the bound /// is precise. const Instruction *getDefiningScopeBound(ArrayRef Ops); /// Given two instructions in the same function, return true if we can /// prove B must execute given A executes. bool isGuaranteedToTransferExecutionTo(const Instruction *A, const Instruction *B); /// Return true if the SCEV corresponding to \p I is never poison. Proving /// this is more complex than proving that just \p I is never poison, since /// SCEV commons expressions across control flow, and you can have cases /// like: /// /// idx0 = a + b; /// ptr[idx0] = 100; /// if () { /// idx1 = a +nsw b; /// ptr[idx1] = 200; /// } /// /// where the SCEV expression (+ a b) is guaranteed to not be poison (and /// hence not sign-overflow) only if "" is true. Since both /// `idx0` and `idx1` will be mapped to the same SCEV expression, (+ a b), /// it is not okay to annotate (+ a b) with in the above example. bool isSCEVExprNeverPoison(const Instruction *I); /// This is like \c isSCEVExprNeverPoison but it specifically works for /// instructions that will get mapped to SCEV add recurrences. Return true /// if \p I will never generate poison under the assumption that \p I is an /// add recurrence on the loop \p L. bool isAddRecNeverPoison(const Instruction *I, const Loop *L); /// Similar to createAddRecFromPHI, but with the additional flexibility of /// suggesting runtime overflow checks in case casts are encountered. /// If successful, the analysis records that for this loop, \p SymbolicPHI, /// which is the UnknownSCEV currently representing the PHI, can be rewritten /// into an AddRec, assuming some predicates; The function then returns the /// AddRec and the predicates as a pair, and caches this pair in /// PredicatedSCEVRewrites. /// If the analysis is not successful, a mapping from the \p SymbolicPHI to /// itself (with no predicates) is recorded, and a nullptr with an empty /// predicates vector is returned as a pair. std::optional>> createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI); /// Compute the maximum backedge count based on the range of values /// permitted by Start, End, and Stride. This is for loops of the form /// {Start, +, Stride} LT End. /// /// Preconditions: /// * the induction variable is known to be positive. /// * the induction variable is assumed not to overflow (i.e. either it /// actually doesn't, or we'd have to immediately execute UB) /// We *don't* assert these preconditions so please be careful. const SCEV *computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride, const SCEV *End, unsigned BitWidth, bool IsSigned); /// Verify if an linear IV with positive stride can overflow when in a /// less-than comparison, knowing the invariant term of the comparison, /// the stride. bool canIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned); /// Verify if an linear IV with negative stride can overflow when in a /// greater-than comparison, knowing the invariant term of the comparison, /// the stride. bool canIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned); /// Get add expr already created or create a new one. const SCEV *getOrCreateAddExpr(ArrayRef Ops, SCEV::NoWrapFlags Flags); /// Get mul expr already created or create a new one. const SCEV *getOrCreateMulExpr(ArrayRef Ops, SCEV::NoWrapFlags Flags); // Get addrec expr already created or create a new one. const SCEV *getOrCreateAddRecExpr(ArrayRef Ops, const Loop *L, SCEV::NoWrapFlags Flags); /// Return x if \p Val is f(x) where f is a 1-1 function. const SCEV *stripInjectiveFunctions(const SCEV *Val) const; /// Find all of the loops transitively used in \p S, and fill \p LoopsUsed. /// A loop is considered "used" by an expression if it contains /// an add rec on said loop. void getUsedLoops(const SCEV *S, SmallPtrSetImpl &LoopsUsed); /// Try to match the pattern generated by getURemExpr(A, B). If successful, /// Assign A and B to LHS and RHS, respectively. bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS); /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in /// `UniqueSCEVs`. Return if found, else nullptr. SCEV *findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef Ops); /// Get reachable blocks in this function, making limited use of SCEV /// reasoning about conditions. void getReachableBlocks(SmallPtrSetImpl &Reachable, Function &F); /// Return the given SCEV expression with a new set of operands. /// This preserves the origial nowrap flags. const SCEV *getWithOperands(const SCEV *S, SmallVectorImpl &NewOps); FoldingSet UniqueSCEVs; FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; /// This maps loops to a list of addrecs that directly use said loop. DenseMap> LoopUsers; /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression /// they can be rewritten into under certain predicates. DenseMap, std::pair>> PredicatedSCEVRewrites; /// Set of AddRecs for which proving NUW via an induction has already been /// tried. SmallPtrSet UnsignedWrapViaInductionTried; /// Set of AddRecs for which proving NSW via an induction has already been /// tried. SmallPtrSet SignedWrapViaInductionTried; /// The head of a linked list of all SCEVUnknown values that have been /// allocated. This is used by releaseMemory to locate them all and call /// their destructors. SCEVUnknown *FirstUnknown = nullptr; }; /// Analysis pass that exposes the \c ScalarEvolution for a function. class ScalarEvolutionAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; static AnalysisKey Key; public: using Result = ScalarEvolution; ScalarEvolution run(Function &F, FunctionAnalysisManager &AM); }; /// Verifier pass for the \c ScalarEvolutionAnalysis results. class ScalarEvolutionVerifierPass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); static bool isRequired() { return true; } }; /// Printer pass for the \c ScalarEvolutionAnalysis results. class ScalarEvolutionPrinterPass : public PassInfoMixin { raw_ostream &OS; public: explicit ScalarEvolutionPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); static bool isRequired() { return true; } }; class ScalarEvolutionWrapperPass : public FunctionPass { std::unique_ptr SE; public: static char ID; ScalarEvolutionWrapperPass(); ScalarEvolution &getSE() { return *SE; } const ScalarEvolution &getSE() const { return *SE; } bool runOnFunction(Function &F) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &AU) const override; void print(raw_ostream &OS, const Module * = nullptr) const override; void verifyAnalysis() const override; }; /// An interface layer with SCEV used to manage how we see SCEV expressions /// for values in the context of existing predicates. We can add new /// predicates, but we cannot remove them. /// /// This layer has multiple purposes: /// - provides a simple interface for SCEV versioning. /// - guarantees that the order of transformations applied on a SCEV /// expression for a single Value is consistent across two different /// getSCEV calls. This means that, for example, once we've obtained /// an AddRec expression for a certain value through expression /// rewriting, we will continue to get an AddRec expression for that /// Value. /// - lowers the number of expression rewrites. class PredicatedScalarEvolution { public: PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L); const SCEVPredicate &getPredicate() const; /// Returns the SCEV expression of V, in the context of the current SCEV /// predicate. The order of transformations applied on the expression of V /// returned by ScalarEvolution is guaranteed to be preserved, even when /// adding new predicates. const SCEV *getSCEV(Value *V); /// Get the (predicated) backedge count for the analyzed loop. const SCEV *getBackedgeTakenCount(); /// Adds a new predicate. void addPredicate(const SCEVPredicate &Pred); /// Attempts to produce an AddRecExpr for V by adding additional SCEV /// predicates. If we can't transform the expression into an AddRecExpr we /// return nullptr and not add additional SCEV predicates to the current /// context. const SCEVAddRecExpr *getAsAddRec(Value *V); /// Proves that V doesn't overflow by adding SCEV predicate. void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags); /// Returns true if we've proved that V doesn't wrap by means of a SCEV /// predicate. bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags); /// Returns the ScalarEvolution analysis used. ScalarEvolution *getSE() const { return &SE; } /// We need to explicitly define the copy constructor because of FlagsMap. PredicatedScalarEvolution(const PredicatedScalarEvolution &); /// Print the SCEV mappings done by the Predicated Scalar Evolution. /// The printed text is indented by \p Depth. void print(raw_ostream &OS, unsigned Depth) const; /// Check if \p AR1 and \p AR2 are equal, while taking into account /// Equal predicates in Preds. bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const; private: /// Increments the version number of the predicate. This needs to be called /// every time the SCEV predicate changes. void updateGeneration(); /// Holds a SCEV and the version number of the SCEV predicate used to /// perform the rewrite of the expression. using RewriteEntry = std::pair; /// Maps a SCEV to the rewrite result of that SCEV at a certain version /// number. If this number doesn't match the current Generation, we will /// need to do a rewrite. To preserve the transformation order of previous /// rewrites, we will rewrite the previous result instead of the original /// SCEV. DenseMap RewriteMap; /// Records what NoWrap flags we've added to a Value *. ValueMap FlagsMap; /// The ScalarEvolution analysis. ScalarEvolution &SE; /// The analyzed Loop. const Loop &L; /// The SCEVPredicate that forms our context. We will rewrite all /// expressions assuming that this predicate true. std::unique_ptr Preds; /// Marks the version of the SCEV predicate used. When rewriting a SCEV /// expression we mark it with the version of the predicate. We use this to /// figure out if the predicate has changed from the last rewrite of the /// SCEV. If so, we need to perform a new rewrite. unsigned Generation = 0; /// The backedge taken count. const SCEV *BackedgeCount = nullptr; }; template <> struct DenseMapInfo { static inline ScalarEvolution::FoldID getEmptyKey() { ScalarEvolution::FoldID ID(0); return ID; } static inline ScalarEvolution::FoldID getTombstoneKey() { ScalarEvolution::FoldID ID(1); return ID; } static unsigned getHashValue(const ScalarEvolution::FoldID &Val) { return Val.computeHash(); } static bool isEqual(const ScalarEvolution::FoldID &LHS, const ScalarEvolution::FoldID &RHS) { return LHS == RHS; } }; } // end namespace llvm #endif // LLVM_ANALYSIS_SCALAREVOLUTION_H diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp index 2acb45837c48..4b2db80bc1ec 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1,15369 +1,15431 @@ //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains the implementation of the scalar evolution analysis // engine, which is used primarily to analyze expressions involving induction // variables in loops. // // There are several aspects to this library. First is the representation of // scalar expressions, which are represented as subclasses of the SCEV class. // These classes are used to represent certain types of subexpressions that we // can handle. We only create one SCEV of a particular shape, so // pointer-comparisons for equality are legal. // // One important aspect of the SCEV objects is that they are never cyclic, even // if there is a cycle in the dataflow for an expression (ie, a PHI node). If // the PHI node is one of the idioms that we can represent (e.g., a polynomial // recurrence) then we represent it directly as a recurrence node, otherwise we // represent it as a SCEVUnknown node. // // In addition to being able to represent expressions of various types, we also // have folders that are used to build the *canonical* representation for a // particular expression. These folders are capable of using a variety of // rewrite rules to simplify the expressions. // // Once the folders are defined, we can implement the more interesting // higher-level code, such as the code that recognizes PHI nodes of various // types, computes the execution count of a loop, etc. // // TODO: We should use these routines and value representations to implement // dependence analysis! // //===----------------------------------------------------------------------===// // // There are several good references for the techniques used in this analysis. // // Chains of recurrences -- a method to expedite the evaluation // of closed-form functions // Olaf Bachmann, Paul S. Wang, Eugene V. Zima // // On computational properties of chains of recurrences // Eugene V. Zima // // Symbolic Evaluation of Chains of Recurrences for Loop Optimization // Robert A. van Engelen // // Efficient Symbolic Analysis for Optimizing Compilers // Robert A. van Engelen // // Using the chains of recurrences algebra for data dependence testing and // induction variable substitution // MS Thesis, Johnie Birch // //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include #include #include #include #include #include using namespace llvm; using namespace PatternMatch; #define DEBUG_TYPE "scalar-evolution" STATISTIC(NumExitCountsComputed, "Number of loop exits with predictable exit counts"); STATISTIC(NumExitCountsNotComputed, "Number of loop exits without predictable exit counts"); STATISTIC(NumBruteForceTripCountsComputed, "Number of loops with trip counts computed by force"); #ifdef EXPENSIVE_CHECKS bool llvm::VerifySCEV = true; #else bool llvm::VerifySCEV = false; #endif static cl::opt MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, cl::desc("Maximum number of iterations SCEV will " "symbolically execute a constant " "derived loop"), cl::init(100)); static cl::opt VerifySCEVOpt( "verify-scev", cl::Hidden, cl::location(VerifySCEV), cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); static cl::opt VerifySCEVStrict( "verify-scev-strict", cl::Hidden, cl::desc("Enable stricter verification with -verify-scev is passed")); static cl::opt VerifyIR( "scev-verify-ir", cl::Hidden, cl::desc("Verify IR correctness when making sensitive SCEV queries (slow)"), cl::init(false)); static cl::opt MulOpsInlineThreshold( "scev-mulops-inline-threshold", cl::Hidden, cl::desc("Threshold for inlining multiplication operands into a SCEV"), cl::init(32)); static cl::opt AddOpsInlineThreshold( "scev-addops-inline-threshold", cl::Hidden, cl::desc("Threshold for inlining addition operands into a SCEV"), cl::init(500)); static cl::opt MaxSCEVCompareDepth( "scalar-evolution-max-scev-compare-depth", cl::Hidden, cl::desc("Maximum depth of recursive SCEV complexity comparisons"), cl::init(32)); static cl::opt MaxSCEVOperationsImplicationDepth( "scalar-evolution-max-scev-operations-implication-depth", cl::Hidden, cl::desc("Maximum depth of recursive SCEV operations implication analysis"), cl::init(2)); static cl::opt MaxValueCompareDepth( "scalar-evolution-max-value-compare-depth", cl::Hidden, cl::desc("Maximum depth of recursive value complexity comparisons"), cl::init(2)); static cl::opt MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden, cl::desc("Maximum depth of recursive arithmetics"), cl::init(32)); static cl::opt MaxConstantEvolvingDepth( "scalar-evolution-max-constant-evolving-depth", cl::Hidden, cl::desc("Maximum depth of recursive constant evolving"), cl::init(32)); static cl::opt MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden, cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"), cl::init(8)); static cl::opt MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden, cl::desc("Max coefficients in AddRec during evolving"), cl::init(8)); static cl::opt HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden, cl::desc("Size of the expression which is considered huge"), cl::init(4096)); static cl::opt RangeIterThreshold( "scev-range-iter-threshold", cl::Hidden, cl::desc("Threshold for switching to iteratively computing SCEV ranges"), cl::init(32)); static cl::opt ClassifyExpressions("scalar-evolution-classify-expressions", cl::Hidden, cl::init(true), cl::desc("When printing analysis, include information on every instruction")); static cl::opt UseExpensiveRangeSharpening( "scalar-evolution-use-expensive-range-sharpening", cl::Hidden, cl::init(false), cl::desc("Use more powerful methods of sharpening expression ranges. May " "be costly in terms of compile time")); static cl::opt MaxPhiSCCAnalysisSize( "scalar-evolution-max-scc-analysis-depth", cl::Hidden, cl::desc("Maximum amount of nodes to process while searching SCEVUnknown " "Phi strongly connected components"), cl::init(8)); static cl::opt EnableFiniteLoopControl("scalar-evolution-finite-loop", cl::Hidden, cl::desc("Handle <= and >= in finite loops"), cl::init(true)); static cl::opt UseContextForNoWrapFlagInference( "scalar-evolution-use-context-for-no-wrap-flag-strenghening", cl::Hidden, cl::desc("Infer nuw/nsw flags using context where suitable"), cl::init(true)); //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Implementation of the SCEV class. // #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SCEV::dump() const { print(dbgs()); dbgs() << '\n'; } #endif void SCEV::print(raw_ostream &OS) const { switch (getSCEVType()) { case scConstant: cast(this)->getValue()->printAsOperand(OS, false); return; case scVScale: OS << "vscale"; return; case scPtrToInt: { const SCEVPtrToIntExpr *PtrToInt = cast(this); const SCEV *Op = PtrToInt->getOperand(); OS << "(ptrtoint " << *Op->getType() << " " << *Op << " to " << *PtrToInt->getType() << ")"; return; } case scTruncate: { const SCEVTruncateExpr *Trunc = cast(this); const SCEV *Op = Trunc->getOperand(); OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Trunc->getType() << ")"; return; } case scZeroExtend: { const SCEVZeroExtendExpr *ZExt = cast(this); const SCEV *Op = ZExt->getOperand(); OS << "(zext " << *Op->getType() << " " << *Op << " to " << *ZExt->getType() << ")"; return; } case scSignExtend: { const SCEVSignExtendExpr *SExt = cast(this); const SCEV *Op = SExt->getOperand(); OS << "(sext " << *Op->getType() << " " << *Op << " to " << *SExt->getType() << ")"; return; } case scAddRecExpr: { const SCEVAddRecExpr *AR = cast(this); OS << "{" << *AR->getOperand(0); for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i) OS << ",+," << *AR->getOperand(i); OS << "}<"; if (AR->hasNoUnsignedWrap()) OS << "nuw><"; if (AR->hasNoSignedWrap()) OS << "nsw><"; if (AR->hasNoSelfWrap() && !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))) OS << "nw><"; AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ">"; return; } case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: case scSequentialUMinExpr: { const SCEVNAryExpr *NAry = cast(this); const char *OpStr = nullptr; switch (NAry->getSCEVType()) { case scAddExpr: OpStr = " + "; break; case scMulExpr: OpStr = " * "; break; case scUMaxExpr: OpStr = " umax "; break; case scSMaxExpr: OpStr = " smax "; break; case scUMinExpr: OpStr = " umin "; break; case scSMinExpr: OpStr = " smin "; break; case scSequentialUMinExpr: OpStr = " umin_seq "; break; default: llvm_unreachable("There are no other nary expression types."); } OS << "("; ListSeparator LS(OpStr); for (const SCEV *Op : NAry->operands()) OS << LS << *Op; OS << ")"; switch (NAry->getSCEVType()) { case scAddExpr: case scMulExpr: if (NAry->hasNoUnsignedWrap()) OS << ""; if (NAry->hasNoSignedWrap()) OS << ""; break; default: // Nothing to print for other nary expressions. break; } return; } case scUDivExpr: { const SCEVUDivExpr *UDiv = cast(this); OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")"; return; } case scUnknown: cast(this)->getValue()->printAsOperand(OS, false); return; case scCouldNotCompute: OS << "***COULDNOTCOMPUTE***"; return; } llvm_unreachable("Unknown SCEV kind!"); } Type *SCEV::getType() const { switch (getSCEVType()) { case scConstant: return cast(this)->getType(); case scVScale: return cast(this)->getType(); case scPtrToInt: case scTruncate: case scZeroExtend: case scSignExtend: return cast(this)->getType(); case scAddRecExpr: return cast(this)->getType(); case scMulExpr: return cast(this)->getType(); case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: return cast(this)->getType(); case scSequentialUMinExpr: return cast(this)->getType(); case scAddExpr: return cast(this)->getType(); case scUDivExpr: return cast(this)->getType(); case scUnknown: return cast(this)->getType(); case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } ArrayRef SCEV::operands() const { switch (getSCEVType()) { case scConstant: case scVScale: case scUnknown: return {}; case scPtrToInt: case scTruncate: case scZeroExtend: case scSignExtend: return cast(this)->operands(); case scAddRecExpr: case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: case scSequentialUMinExpr: return cast(this)->operands(); case scUDivExpr: return cast(this)->operands(); case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } bool SCEV::isZero() const { if (const SCEVConstant *SC = dyn_cast(this)) return SC->getValue()->isZero(); return false; } bool SCEV::isOne() const { if (const SCEVConstant *SC = dyn_cast(this)) return SC->getValue()->isOne(); return false; } bool SCEV::isAllOnesValue() const { if (const SCEVConstant *SC = dyn_cast(this)) return SC->getValue()->isMinusOne(); return false; } bool SCEV::isNonConstantNegative() const { const SCEVMulExpr *Mul = dyn_cast(this); if (!Mul) return false; // If there is a constant factor, it will be first. const SCEVConstant *SC = dyn_cast(Mul->getOperand(0)); if (!SC) return false; // Return true if the value is negative, this matches things like (-42 * V). return SC->getAPInt().isNegative(); } SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(FoldingSetNodeIDRef(), scCouldNotCompute, 0) {} bool SCEVCouldNotCompute::classof(const SCEV *S) { return S->getSCEVType() == scCouldNotCompute; } const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { FoldingSetNodeID ID; ID.AddInteger(scConstant); ID.AddPointer(V); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); UniqueSCEVs.InsertNode(S, IP); return S; } const SCEV *ScalarEvolution::getConstant(const APInt &Val) { return getConstant(ConstantInt::get(getContext(), Val)); } const SCEV * ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) { IntegerType *ITy = cast(getEffectiveSCEVType(Ty)); return getConstant(ConstantInt::get(ITy, V, isSigned)); } const SCEV *ScalarEvolution::getVScale(Type *Ty) { FoldingSetNodeID ID; ID.AddInteger(scVScale); ID.AddPointer(Ty); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVVScale(ID.Intern(SCEVAllocator), Ty); UniqueSCEVs.InsertNode(S, IP); return S; } SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, const SCEV *op, Type *ty) : SCEV(ID, SCEVTy, computeExpressionSize(op)), Op(op), Ty(ty) {} SCEVPtrToIntExpr::SCEVPtrToIntExpr(const FoldingSetNodeIDRef ID, const SCEV *Op, Type *ITy) : SCEVCastExpr(ID, scPtrToInt, Op, ITy) { assert(getOperand()->getType()->isPointerTy() && Ty->isIntegerTy() && "Must be a non-bit-width-changing pointer-to-integer cast!"); } SCEVIntegralCastExpr::SCEVIntegralCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, const SCEV *op, Type *ty) : SCEVCastExpr(ID, SCEVTy, op, ty) {} SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) : SCEVIntegralCastExpr(ID, scTruncate, op, ty) { assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot truncate non-integer value!"); } SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) : SCEVIntegralCastExpr(ID, scZeroExtend, op, ty) { assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot zero extend non-integer value!"); } SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) : SCEVIntegralCastExpr(ID, scSignExtend, op, ty) { assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot sign extend non-integer value!"); } void SCEVUnknown::deleted() { // Clear this SCEVUnknown from various maps. SE->forgetMemoizedResults(this); // Remove this SCEVUnknown from the uniquing map. SE->UniqueSCEVs.RemoveNode(this); // Release the value. setValPtr(nullptr); } void SCEVUnknown::allUsesReplacedWith(Value *New) { // Clear this SCEVUnknown from various maps. SE->forgetMemoizedResults(this); // Remove this SCEVUnknown from the uniquing map. SE->UniqueSCEVs.RemoveNode(this); // Replace the value pointer in case someone is still using this SCEVUnknown. setValPtr(New); } //===----------------------------------------------------------------------===// // SCEV Utilities //===----------------------------------------------------------------------===// /// Compare the two values \p LV and \p RV in terms of their "complexity" where /// "complexity" is a partial (and somewhat ad-hoc) relation used to order /// operands in SCEV expressions. \p EqCache is a set of pairs of values that /// have been previously deemed to be "equally complex" by this routine. It is /// intended to avoid exponential time complexity in cases like: /// /// %a = f(%x, %y) /// %b = f(%a, %a) /// %c = f(%b, %b) /// /// %d = f(%x, %y) /// %e = f(%d, %d) /// %f = f(%e, %e) /// /// CompareValueComplexity(%f, %c) /// /// Since we do not continue running this routine on expression trees once we /// have seen unequal values, there is no need to track them in the cache. static int CompareValueComplexity(EquivalenceClasses &EqCacheValue, const LoopInfo *const LI, Value *LV, Value *RV, unsigned Depth) { if (Depth > MaxValueCompareDepth || EqCacheValue.isEquivalent(LV, RV)) return 0; // Order pointer values after integer values. This helps SCEVExpander form // GEPs. bool LIsPointer = LV->getType()->isPointerTy(), RIsPointer = RV->getType()->isPointerTy(); if (LIsPointer != RIsPointer) return (int)LIsPointer - (int)RIsPointer; // Compare getValueID values. unsigned LID = LV->getValueID(), RID = RV->getValueID(); if (LID != RID) return (int)LID - (int)RID; // Sort arguments by their position. if (const auto *LA = dyn_cast(LV)) { const auto *RA = cast(RV); unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); return (int)LArgNo - (int)RArgNo; } if (const auto *LGV = dyn_cast(LV)) { const auto *RGV = cast(RV); const auto IsGVNameSemantic = [&](const GlobalValue *GV) { auto LT = GV->getLinkage(); return !(GlobalValue::isPrivateLinkage(LT) || GlobalValue::isInternalLinkage(LT)); }; // Use the names to distinguish the two values, but only if the // names are semantically important. if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV)) return LGV->getName().compare(RGV->getName()); } // For instructions, compare their loop depth, and their operand count. This // is pretty loose. if (const auto *LInst = dyn_cast(LV)) { const auto *RInst = cast(RV); // Compare loop depths. const BasicBlock *LParent = LInst->getParent(), *RParent = RInst->getParent(); if (LParent != RParent) { unsigned LDepth = LI->getLoopDepth(LParent), RDepth = LI->getLoopDepth(RParent); if (LDepth != RDepth) return (int)LDepth - (int)RDepth; } // Compare the number of operands. unsigned LNumOps = LInst->getNumOperands(), RNumOps = RInst->getNumOperands(); if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; for (unsigned Idx : seq(LNumOps)) { int Result = CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx), RInst->getOperand(Idx), Depth + 1); if (Result != 0) return Result; } } EqCacheValue.unionSets(LV, RV); return 0; } // Return negative, zero, or positive, if LHS is less than, equal to, or greater // than RHS, respectively. A three-way result allows recursive comparisons to be // more efficient. // If the max analysis depth was reached, return std::nullopt, assuming we do // not know if they are equivalent for sure. static std::optional CompareSCEVComplexity(EquivalenceClasses &EqCacheSCEV, EquivalenceClasses &EqCacheValue, const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, DominatorTree &DT, unsigned Depth = 0) { // Fast-path: SCEVs are uniqued so we can do a quick equality check. if (LHS == RHS) return 0; // Primarily, sort the SCEVs by their getSCEVType(). SCEVTypes LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); if (LType != RType) return (int)LType - (int)RType; if (EqCacheSCEV.isEquivalent(LHS, RHS)) return 0; if (Depth > MaxSCEVCompareDepth) return std::nullopt; // Aside from the getSCEVType() ordering, the particular ordering // isn't very important except that it's beneficial to be consistent, // so that (a + b) and (b + a) don't end up as different expressions. switch (LType) { case scUnknown: { const SCEVUnknown *LU = cast(LHS); const SCEVUnknown *RU = cast(RHS); int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(), RU->getValue(), Depth + 1); if (X == 0) EqCacheSCEV.unionSets(LHS, RHS); return X; } case scConstant: { const SCEVConstant *LC = cast(LHS); const SCEVConstant *RC = cast(RHS); // Compare constant values. const APInt &LA = LC->getAPInt(); const APInt &RA = RC->getAPInt(); unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); if (LBitWidth != RBitWidth) return (int)LBitWidth - (int)RBitWidth; return LA.ult(RA) ? -1 : 1; } case scVScale: { const auto *LTy = cast(cast(LHS)->getType()); const auto *RTy = cast(cast(RHS)->getType()); return LTy->getBitWidth() - RTy->getBitWidth(); } case scAddRecExpr: { const SCEVAddRecExpr *LA = cast(LHS); const SCEVAddRecExpr *RA = cast(RHS); // There is always a dominance between two recs that are used by one SCEV, // so we can safely sort recs by loop header dominance. We require such // order in getAddExpr. const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); if (LLoop != RLoop) { const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader(); assert(LHead != RHead && "Two loops share the same header?"); if (DT.dominates(LHead, RHead)) return 1; assert(DT.dominates(RHead, LHead) && "No dominance between recurrences used by one SCEV?"); return -1; } [[fallthrough]]; } case scTruncate: case scZeroExtend: case scSignExtend: case scPtrToInt: case scAddExpr: case scMulExpr: case scUDivExpr: case scSMaxExpr: case scUMaxExpr: case scSMinExpr: case scUMinExpr: case scSequentialUMinExpr: { ArrayRef LOps = LHS->operands(); ArrayRef ROps = RHS->operands(); // Lexicographically compare n-ary-like expressions. unsigned LNumOps = LOps.size(), RNumOps = ROps.size(); if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; for (unsigned i = 0; i != LNumOps; ++i) { auto X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LOps[i], ROps[i], DT, Depth + 1); if (X != 0) return X; } EqCacheSCEV.unionSets(LHS, RHS); return 0; } case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } /// Given a list of SCEV objects, order them by their complexity, and group /// objects of the same complexity together by value. When this routine is /// finished, we know that any duplicates in the vector are consecutive and that /// complexity is monotonically increasing. /// /// Note that we go take special precautions to ensure that we get deterministic /// results from this routine. In other words, we don't want the results of /// this to depend on where the addresses of various SCEV objects happened to /// land in memory. static void GroupByComplexity(SmallVectorImpl &Ops, LoopInfo *LI, DominatorTree &DT) { if (Ops.size() < 2) return; // Noop EquivalenceClasses EqCacheSCEV; EquivalenceClasses EqCacheValue; // Whether LHS has provably less complexity than RHS. auto IsLessComplex = [&](const SCEV *LHS, const SCEV *RHS) { auto Complexity = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT); return Complexity && *Complexity < 0; }; if (Ops.size() == 2) { // This is the common case, which also happens to be trivially simple. // Special case it. const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; if (IsLessComplex(RHS, LHS)) std::swap(LHS, RHS); return; } // Do the rough sort by complexity. llvm::stable_sort(Ops, [&](const SCEV *LHS, const SCEV *RHS) { return IsLessComplex(LHS, RHS); }); // Now that we are sorted by complexity, group elements of the same // complexity. Note that this is, at worst, N^2, but the vector is likely to // be extremely short in practice. Note that we take this approach because we // do not want to depend on the addresses of the objects we are grouping. for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) { const SCEV *S = Ops[i]; unsigned Complexity = S->getSCEVType(); // If there are any objects of the same complexity and same value as this // one, group them. for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) { if (Ops[j] == S) { // Found a duplicate. // Move it to immediately after i'th element. std::swap(Ops[i+1], Ops[j]); ++i; // no need to rescan it. if (i == e-2) return; // Done! } } } } /// Returns true if \p Ops contains a huge SCEV (the subtree of S contains at /// least HugeExprThreshold nodes). static bool hasHugeExpression(ArrayRef Ops) { return any_of(Ops, [](const SCEV *S) { return S->getExpressionSize() >= HugeExprThreshold; }); } //===----------------------------------------------------------------------===// // Simple SCEV method implementations //===----------------------------------------------------------------------===// /// Compute BC(It, K). The result has width W. Assume, K > 0. static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, ScalarEvolution &SE, Type *ResultTy) { // Handle the simplest case efficiently. if (K == 1) return SE.getTruncateOrZeroExtend(It, ResultTy); // We are using the following formula for BC(It, K): // // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! // // Suppose, W is the bitwidth of the return value. We must be prepared for // overflow. Hence, we must assure that the result of our computation is // equal to the accurate one modulo 2^W. Unfortunately, division isn't // safe in modular arithmetic. // // However, this code doesn't use exactly that formula; the formula it uses // is something like the following, where T is the number of factors of 2 in // K! (i.e. trailing zeros in the binary representation of K!), and ^ is // exponentiation: // // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T) // // This formula is trivially equivalent to the previous formula. However, // this formula can be implemented much more efficiently. The trick is that // K! / 2^T is odd, and exact division by an odd number *is* safe in modular // arithmetic. To do exact division in modular arithmetic, all we have // to do is multiply by the inverse. Therefore, this step can be done at // width W. // // The next issue is how to safely do the division by 2^T. The way this // is done is by doing the multiplication step at a width of at least W + T // bits. This way, the bottom W+T bits of the product are accurate. Then, // when we perform the division by 2^T (which is equivalent to a right shift // by T), the bottom W bits are accurate. Extra bits are okay; they'll get // truncated out after the division by 2^T. // // In comparison to just directly using the first formula, this technique // is much more efficient; using the first formula requires W * K bits, // but this formula less than W + K bits. Also, the first formula requires // a division step, whereas this formula only requires multiplies and shifts. // // It doesn't matter whether the subtraction step is done in the calculation // width or the input iteration count's width; if the subtraction overflows, // the result must be zero anyway. We prefer here to do it in the width of // the induction variable because it helps a lot for certain cases; CodeGen // isn't smart enough to ignore the overflow, which leads to much less // efficient code if the width of the subtraction is wider than the native // register width. // // (It's possible to not widen at all by pulling out factors of 2 before // the multiplication; for example, K=2 can be calculated as // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires // extra arithmetic, so it's not an obvious win, and it gets // much more complicated for K > 3.) // Protection from insane SCEVs; this bound is conservative, // but it probably doesn't matter. if (K > 1000) return SE.getCouldNotCompute(); unsigned W = SE.getTypeSizeInBits(ResultTy); // Calculate K! / 2^T and T; we divide out the factors of two before // multiplying for calculating K! / 2^T to avoid overflow. // Other overflow doesn't matter because we only care about the bottom // W bits of the result. APInt OddFactorial(W, 1); unsigned T = 1; for (unsigned i = 3; i <= K; ++i) { APInt Mult(W, i); unsigned TwoFactors = Mult.countr_zero(); T += TwoFactors; Mult.lshrInPlace(TwoFactors); OddFactorial *= Mult; } // We need at least W + T bits for the multiplication step unsigned CalculationBits = W + T; // Calculate 2^T, at width T+W. APInt DivFactor = APInt::getOneBitSet(CalculationBits, T); // Calculate the multiplicative inverse of K! / 2^T; // this multiplication factor will perform the exact division by // K! / 2^T. APInt Mod = APInt::getSignedMinValue(W+1); APInt MultiplyFactor = OddFactorial.zext(W+1); MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); MultiplyFactor = MultiplyFactor.trunc(W); // Calculate the product, at width T+W IntegerType *CalculationTy = IntegerType::get(SE.getContext(), CalculationBits); const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); for (unsigned i = 1; i != K; ++i) { const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i)); Dividend = SE.getMulExpr(Dividend, SE.getTruncateOrZeroExtend(S, CalculationTy)); } // Divide by 2^T const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); // Truncate the result, and divide by K! / 2^T. return SE.getMulExpr(SE.getConstant(MultiplyFactor), SE.getTruncateOrZeroExtend(DivResult, ResultTy)); } /// Return the value of this chain of recurrences at the specified iteration /// number. We can evaluate this recurrence by multiplying each element in the /// chain by the binomial coefficient corresponding to it. In other words, we /// can evaluate {A,+,B,+,C,+,D} as: /// /// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) /// /// where BC(It, k) stands for binomial coefficient. const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const { return evaluateAtIteration(operands(), It, SE); } const SCEV * SCEVAddRecExpr::evaluateAtIteration(ArrayRef Operands, const SCEV *It, ScalarEvolution &SE) { assert(Operands.size() > 0); const SCEV *Result = Operands[0]; for (unsigned i = 1, e = Operands.size(); i != e; ++i) { // The computation is correct in the face of overflow provided that the // multiplication is performed _after_ the evaluation of the binomial // coefficient. const SCEV *Coeff = BinomialCoefficient(It, i, SE, Result->getType()); if (isa(Coeff)) return Coeff; Result = SE.getAddExpr(Result, SE.getMulExpr(Operands[i], Coeff)); } return Result; } //===----------------------------------------------------------------------===// // SCEV Expression folder implementations //===----------------------------------------------------------------------===// const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op, unsigned Depth) { assert(Depth <= 1 && "getLosslessPtrToIntExpr() should self-recurse at most once."); // We could be called with an integer-typed operands during SCEV rewrites. // Since the operand is an integer already, just perform zext/trunc/self cast. if (!Op->getType()->isPointerTy()) return Op; // What would be an ID for such a SCEV cast expression? FoldingSetNodeID ID; ID.AddInteger(scPtrToInt); ID.AddPointer(Op); void *IP = nullptr; // Is there already an expression for such a cast? if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // It isn't legal for optimizations to construct new ptrtoint expressions // for non-integral pointers. if (getDataLayout().isNonIntegralPointerType(Op->getType())) return getCouldNotCompute(); Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType()); // We can only trivially model ptrtoint if SCEV's effective (integer) type // is sufficiently wide to represent all possible pointer values. // We could theoretically teach SCEV to truncate wider pointers, but // that isn't implemented for now. if (getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(Op->getType())) != getDataLayout().getTypeSizeInBits(IntPtrTy)) return getCouldNotCompute(); // If not, is this expression something we can't reduce any further? if (auto *U = dyn_cast(Op)) { // Perform some basic constant folding. If the operand of the ptr2int cast // is a null pointer, don't create a ptr2int SCEV expression (that will be // left as-is), but produce a zero constant. // NOTE: We could handle a more general case, but lack motivational cases. if (isa(U->getValue())) return getZero(IntPtrTy); // Create an explicit cast node. // We can reuse the existing insert position since if we get here, // we won't have made any changes which would invalidate it. SCEV *S = new (SCEVAllocator) SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy); UniqueSCEVs.InsertNode(S, IP); registerUser(S, Op); return S; } assert(Depth == 0 && "getLosslessPtrToIntExpr() should not self-recurse for " "non-SCEVUnknown's."); // Otherwise, we've got some expression that is more complex than just a // single SCEVUnknown. But we don't want to have a SCEVPtrToIntExpr of an // arbitrary expression, we want to have SCEVPtrToIntExpr of an SCEVUnknown // only, and the expressions must otherwise be integer-typed. // So sink the cast down to the SCEVUnknown's. /// The SCEVPtrToIntSinkingRewriter takes a scalar evolution expression, /// which computes a pointer-typed value, and rewrites the whole expression /// tree so that *all* the computations are done on integers, and the only /// pointer-typed operands in the expression are SCEVUnknown. class SCEVPtrToIntSinkingRewriter : public SCEVRewriteVisitor { using Base = SCEVRewriteVisitor; public: SCEVPtrToIntSinkingRewriter(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {} static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE) { SCEVPtrToIntSinkingRewriter Rewriter(SE); return Rewriter.visit(Scev); } const SCEV *visit(const SCEV *S) { Type *STy = S->getType(); // If the expression is not pointer-typed, just keep it as-is. if (!STy->isPointerTy()) return S; // Else, recursively sink the cast down into it. return Base::visit(S); } const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { SmallVector Operands; bool Changed = false; for (const auto *Op : Expr->operands()) { Operands.push_back(visit(Op)); Changed |= Op != Operands.back(); } return !Changed ? Expr : SE.getAddExpr(Operands, Expr->getNoWrapFlags()); } const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { SmallVector Operands; bool Changed = false; for (const auto *Op : Expr->operands()) { Operands.push_back(visit(Op)); Changed |= Op != Operands.back(); } return !Changed ? Expr : SE.getMulExpr(Operands, Expr->getNoWrapFlags()); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { assert(Expr->getType()->isPointerTy() && "Should only reach pointer-typed SCEVUnknown's."); return SE.getLosslessPtrToIntExpr(Expr, /*Depth=*/1); } }; // And actually perform the cast sinking. const SCEV *IntOp = SCEVPtrToIntSinkingRewriter::rewrite(Op, *this); assert(IntOp->getType()->isIntegerTy() && "We must have succeeded in sinking the cast, " "and ending up with an integer-typed expression!"); return IntOp; } const SCEV *ScalarEvolution::getPtrToIntExpr(const SCEV *Op, Type *Ty) { assert(Ty->isIntegerTy() && "Target type must be an integer type!"); const SCEV *IntOp = getLosslessPtrToIntExpr(Op); if (isa(IntOp)) return IntOp; return getTruncateOrZeroExtend(IntOp, Ty); } const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && "This is not a truncating conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); assert(!Op->getType()->isPointerTy() && "Can't truncate pointer!"); Ty = getEffectiveSCEVType(Ty); FoldingSetNodeID ID; ID.AddInteger(scTruncate); ID.AddPointer(Op); ID.AddPointer(Ty); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( cast(ConstantExpr::getTrunc(SC->getValue(), Ty))); // trunc(trunc(x)) --> trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) return getTruncateExpr(ST->getOperand(), Ty, Depth + 1); // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1); // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1); if (Depth > MaxCastDepth) { SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); registerUser(S, Op); return S; } // trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and // trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN), // if after transforming we have at most one truncate, not counting truncates // that replace other casts. if (isa(Op) || isa(Op)) { auto *CommOp = cast(Op); SmallVector Operands; unsigned numTruncs = 0; for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2; ++i) { const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1); if (!isa(CommOp->getOperand(i)) && isa(S)) numTruncs++; Operands.push_back(S); } if (numTruncs < 2) { if (isa(Op)) return getAddExpr(Operands); if (isa(Op)) return getMulExpr(Operands); llvm_unreachable("Unexpected SCEV type for Op."); } // Although we checked in the beginning that ID is not in the cache, it is // possible that during recursion and different modification ID was inserted // into the cache. So if we find it, just return it. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; } // If the input value is a chrec scev, truncate the chrec's operands. if (const SCEVAddRecExpr *AddRec = dyn_cast(Op)) { SmallVector Operands; for (const SCEV *Op : AddRec->operands()) Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1)); return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); } // Return zero if truncating to known zeros. uint32_t MinTrailingZeros = getMinTrailingZeros(Op); if (MinTrailingZeros >= getTypeSizeInBits(Ty)) return getZero(Ty); // The cast wasn't folded; create an explicit cast node. We can reuse // the existing insert position since if we get here, we won't have // made any changes which would invalidate it. SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); registerUser(S, Op); return S; } // Get the limit of a recurrence such that incrementing by Step cannot cause // signed overflow as long as the value of the recurrence within the // loop does not exceed this limit before incrementing. static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE) { unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); if (SE->isKnownPositive(Step)) { *Pred = ICmpInst::ICMP_SLT; return SE->getConstant(APInt::getSignedMinValue(BitWidth) - SE->getSignedRangeMax(Step)); } if (SE->isKnownNegative(Step)) { *Pred = ICmpInst::ICMP_SGT; return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - SE->getSignedRangeMin(Step)); } return nullptr; } // Get the limit of a recurrence such that incrementing by Step cannot cause // unsigned overflow as long as the value of the recurrence within the loop does // not exceed this limit before incrementing. static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE) { unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); *Pred = ICmpInst::ICMP_ULT; return SE->getConstant(APInt::getMinValue(BitWidth) - SE->getUnsignedRangeMax(Step)); } namespace { struct ExtendOpTraitsBase { typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *, unsigned); }; // Used to make code generic over signed and unsigned overflow. template struct ExtendOpTraits { // Members present: // // static const SCEV::NoWrapFlags WrapType; // // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr; // // static const SCEV *getOverflowLimitForStep(const SCEV *Step, // ICmpInst::Predicate *Pred, // ScalarEvolution *SE); }; template <> struct ExtendOpTraits : public ExtendOpTraitsBase { static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW; static const GetExtendExprTy GetExtendExpr; static const SCEV *getOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE) { return getSignedOverflowLimitForStep(Step, Pred, SE); } }; const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; template <> struct ExtendOpTraits : public ExtendOpTraitsBase { static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW; static const GetExtendExprTy GetExtendExpr; static const SCEV *getOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, ScalarEvolution *SE) { return getUnsignedOverflowLimitForStep(Step, Pred, SE); } }; const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; } // end anonymous namespace // The recurrence AR has been shown to have no signed/unsigned wrap or something // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as // easily prove NSW/NUW for its preincrement or postincrement sibling. This // allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step + // Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the // expression "Step + sext/zext(PreIncAR)" is congruent with // "sext/zext(PostIncAR)" template static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, ScalarEvolution *SE, unsigned Depth) { auto WrapType = ExtendOpTraits::WrapType; auto GetExtendExpr = ExtendOpTraits::GetExtendExpr; const Loop *L = AR->getLoop(); const SCEV *Start = AR->getStart(); const SCEV *Step = AR->getStepRecurrence(*SE); // Check for a simple looking step prior to loop entry. const SCEVAddExpr *SA = dyn_cast(Start); if (!SA) return nullptr; // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV // subtraction is expensive. For this purpose, perform a quick and dirty // difference, by checking for Step in the operand list. Note, that // SA might have repeated ops, like %a + %a + ..., so only remove one. SmallVector DiffOps(SA->operands()); for (auto It = DiffOps.begin(); It != DiffOps.end(); ++It) if (*It == Step) { DiffOps.erase(It); break; } if (DiffOps.size() == SA->getNumOperands()) return nullptr; // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` + // `Step`: // 1. NSW/NUW flags on the step increment. auto PreStartFlags = ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW); const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags); const SCEVAddRecExpr *PreAR = dyn_cast( SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); // "{S,+,X} is /" and "the backedge is taken at least once" implies // "S+X does not sign/unsign-overflow". // const SCEV *BECount = SE->getBackedgeTakenCount(L); if (PreAR && PreAR->getNoWrapFlags(WrapType) && !isa(BECount) && SE->isKnownPositive(BECount)) return PreStart; // 2. Direct overflow check on the step operation's expression. unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); const SCEV *OperandExtendedStart = SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth), (SE->*GetExtendExpr)(Step, WideTy, Depth)); if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) { if (PreAR && AR->getNoWrapFlags(WrapType)) { // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. SE->setNoWrapFlags(const_cast(PreAR), WrapType); } return PreStart; } // 3. Loop precondition. ICmpInst::Predicate Pred; const SCEV *OverflowLimit = ExtendOpTraits::getOverflowLimitForStep(Step, &Pred, SE); if (OverflowLimit && SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) return PreStart; return nullptr; } // Get the normalized zero or sign extended expression for this AddRec's Start. template static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, ScalarEvolution *SE, unsigned Depth) { auto GetExtendExpr = ExtendOpTraits::GetExtendExpr; const SCEV *PreStart = getPreStartForExtend(AR, Ty, SE, Depth); if (!PreStart) return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth); return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, Depth), (SE->*GetExtendExpr)(PreStart, Ty, Depth)); } // Try to prove away overflow by looking at "nearby" add recurrences. A // motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it // does not itself wrap then we can conclude that `{1,+,4}` is `nuw`. // // Formally: // // {S,+,X} == {S-T,+,X} + T // => Ext({S,+,X}) == Ext({S-T,+,X} + T) // // If ({S-T,+,X} + T) does not overflow ... (1) // // RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T) // // If {S-T,+,X} does not overflow ... (2) // // RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T) // == {Ext(S-T)+Ext(T),+,Ext(X)} // // If (S-T)+T does not overflow ... (3) // // RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)} // == {Ext(S),+,Ext(X)} == LHS // // Thus, if (1), (2) and (3) are true for some T, then // Ext({S,+,X}) == {Ext(S),+,Ext(X)} // // (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T) // does not overflow" restricted to the 0th iteration. Therefore we only need // to check for (1) and (2). // // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T // is `Delta` (defined below). template bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, const Loop *L) { auto WrapType = ExtendOpTraits::WrapType; // We restrict `Start` to a constant to prevent SCEV from spending too much // time here. It is correct (but more expensive) to continue with a // non-constant `Start` and do a general SCEV subtraction to compute // `PreStart` below. const SCEVConstant *StartC = dyn_cast(Start); if (!StartC) return false; APInt StartAI = StartC->getAPInt(); for (unsigned Delta : {-2, -1, 1, 2}) { const SCEV *PreStart = getConstant(StartAI - Delta); FoldingSetNodeID ID; ID.AddInteger(scAddRecExpr); ID.AddPointer(PreStart); ID.AddPointer(Step); ID.AddPointer(L); void *IP = nullptr; const auto *PreAR = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); // Give up if we don't already have the add recurrence we need because // actually constructing an add recurrence is relatively expensive. if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2) const SCEV *DeltaS = getConstant(StartC->getType(), Delta); ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; const SCEV *Limit = ExtendOpTraits::getOverflowLimitForStep( DeltaS, &Pred, this); if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1) return true; } } return false; } // Finds an integer D for an expression (C + x + y + ...) such that the top // level addition in (D + (C - D + x + y + ...)) would not wrap (signed or // unsigned) and the number of trailing zeros of (C - D + x + y + ...) is // maximized, where C is the \p ConstantTerm, x, y, ... are arbitrary SCEVs, and // the (C + x + y + ...) expression is \p WholeAddExpr. static APInt extractConstantWithoutWrapping(ScalarEvolution &SE, const SCEVConstant *ConstantTerm, const SCEVAddExpr *WholeAddExpr) { const APInt &C = ConstantTerm->getAPInt(); const unsigned BitWidth = C.getBitWidth(); // Find number of trailing zeros of (x + y + ...) w/o the C first: uint32_t TZ = BitWidth; for (unsigned I = 1, E = WholeAddExpr->getNumOperands(); I < E && TZ; ++I) TZ = std::min(TZ, SE.getMinTrailingZeros(WholeAddExpr->getOperand(I))); if (TZ) { // Set D to be as many least significant bits of C as possible while still // guaranteeing that adding D to (C - D + x + y + ...) won't cause a wrap: return TZ < BitWidth ? C.trunc(TZ).zext(BitWidth) : C; } return APInt(BitWidth, 0); } // Finds an integer D for an affine AddRec expression {C,+,x} such that the top // level addition in (D + {C-D,+,x}) would not wrap (signed or unsigned) and the // number of trailing zeros of (C - D + x * n) is maximized, where C is the \p // ConstantStart, x is an arbitrary \p Step, and n is the loop trip count. static APInt extractConstantWithoutWrapping(ScalarEvolution &SE, const APInt &ConstantStart, const SCEV *Step) { const unsigned BitWidth = ConstantStart.getBitWidth(); const uint32_t TZ = SE.getMinTrailingZeros(Step); if (TZ) return TZ < BitWidth ? ConstantStart.trunc(TZ).zext(BitWidth) : ConstantStart; return APInt(BitWidth, 0); } static void insertFoldCacheEntry( const ScalarEvolution::FoldID &ID, const SCEV *S, DenseMap &FoldCache, DenseMap> &FoldCacheUser) { auto I = FoldCache.insert({ID, S}); if (!I.second) { // Remove FoldCacheUser entry for ID when replacing an existing FoldCache // entry. auto &UserIDs = FoldCacheUser[I.first->second]; assert(count(UserIDs, ID) == 1 && "unexpected duplicates in UserIDs"); for (unsigned I = 0; I != UserIDs.size(); ++I) if (UserIDs[I] == ID) { std::swap(UserIDs[I], UserIDs.back()); break; } UserIDs.pop_back(); I.first->second = S; } auto R = FoldCacheUser.insert({S, {}}); R.first->second.push_back(ID); } const SCEV * ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); assert(!Op->getType()->isPointerTy() && "Can't extend pointer!"); Ty = getEffectiveSCEVType(Ty); FoldID ID(scZeroExtend, Op, Ty); auto Iter = FoldCache.find(ID); if (Iter != FoldCache.end()) return Iter->second; const SCEV *S = getZeroExtendExprImpl(Op, Ty, Depth); if (!isa(S)) insertFoldCacheEntry(ID, S, FoldCache, FoldCacheUser); return S; } const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); assert(!Op->getType()->isPointerTy() && "Can't extend pointer!"); // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant(SC->getAPInt().zext(getTypeSizeInBits(Ty))); // zext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1); // Before doing any expensive analysis, check to see if we've already // computed a SCEV for this Op and Ty. FoldingSetNodeID ID; ID.AddInteger(scZeroExtend); ID.AddPointer(Op); ID.AddPointer(Ty); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; if (Depth > MaxCastDepth) { SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); registerUser(S, Op); return S; } // zext(trunc(x)) --> zext(x) or x or trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) { // It's possible the bits taken off by the truncate were all zero bits. If // so, we should be able to simplify this further. const SCEV *X = ST->getOperand(); ConstantRange CR = getUnsignedRange(X); unsigned TruncBits = getTypeSizeInBits(ST->getType()); unsigned NewBits = getTypeSizeInBits(Ty); if (CR.truncate(TruncBits).zeroExtend(NewBits).contains( CR.zextOrTrunc(NewBits))) return getTruncateOrZeroExtend(X, Ty, Depth); } // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can zero extend all of the // operands (often constants). This allows analysis of something like // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } if (const SCEVAddRecExpr *AR = dyn_cast(Op)) if (AR->isAffine()) { const SCEV *Start = AR->getStart(); const SCEV *Step = AR->getStepRecurrence(*this); unsigned BitWidth = getTypeSizeInBits(AR->getType()); const Loop *L = AR->getLoop(); // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->hasNoUnsignedWrap()) { Start = getExtendAddRecStart(AR, Ty, this, Depth + 1); Step = getZeroExtendExpr(Step, Ty, Depth + 1); return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is // being called from within backedge-taken count analysis, such that // attempting to ask for the backedge-taken count would likely result // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); if (!isa(MaxBECount)) { // Manually compute the final value for AR, checking for overflow. // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. const SCEV *CastedMaxBECount = getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( CastedMaxBECount, MaxBECount->getType(), Depth); if (MaxBECount == RecastedMaxBECount) { Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step, SCEV::FlagAnyWrap, Depth + 1); const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul, SCEV::FlagAnyWrap, Depth + 1), WideTy, Depth + 1); const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1); const SCEV *WideMaxBECount = getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1); const SCEV *OperandExtendedAdd = getAddExpr(WideStart, getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy, Depth + 1), SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NUW, which is propagated to this AddRec. setNoWrapFlags(const_cast(AR), SCEV::FlagNUW); // Return the expression with the addrec on the outside. Start = getExtendAddRecStart(AR, Ty, this, Depth + 1); Step = getZeroExtendExpr(Step, Ty, Depth + 1); return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. OperandExtendedAdd = getAddExpr(WideStart, getMulExpr(WideMaxBECount, getSignExtendExpr(Step, WideTy, Depth + 1), SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NW, which is propagated to this AddRec. // Negative step causes unsigned wrap, but it still can't self-wrap. setNoWrapFlags(const_cast(AR), SCEV::FlagNW); // Return the expression with the addrec on the outside. Start = getExtendAddRecStart(AR, Ty, this, Depth + 1); Step = getSignExtendExpr(Step, Ty, Depth + 1); return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } } } // Normally, in the cases we can prove no-overflow via a // backedge guarding condition, we can also compute a backedge // taken count for the loop. The exceptions are assumptions and // guards present in the loop -- SCEV is not great at exploiting // these to compute max backedge taken counts, but can still use // these to prove lack of overflow. Use this fact to avoid // doing extra work that may not pay off. if (!isa(MaxBECount) || HasGuards || !AC.assumptions().empty()) { auto NewFlags = proveNoUnsignedWrapViaInduction(AR); setNoWrapFlags(const_cast(AR), NewFlags); if (AR->hasNoUnsignedWrap()) { // Same as nuw case above - duplicated here to avoid a compile time // issue. It's not clear that the order of checks does matter, but // it's one of two issue possible causes for a change which was // reverted. Be conservative for the moment. Start = getExtendAddRecStart(AR, Ty, this, Depth + 1); Step = getZeroExtendExpr(Step, Ty, Depth + 1); return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } // For a negative step, we can extend the operands iff doing so only // traverses values in the range zext([0,UINT_MAX]). if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - getSignedRangeMin(Step)); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || isKnownOnEveryIteration(ICmpInst::ICMP_UGT, AR, N)) { // Cache knowledge of AR NW, which is propagated to this // AddRec. Negative step causes unsigned wrap, but it // still can't self-wrap. setNoWrapFlags(const_cast(AR), SCEV::FlagNW); // Return the expression with the addrec on the outside. Start = getExtendAddRecStart(AR, Ty, this, Depth + 1); Step = getSignExtendExpr(Step, Ty, Depth + 1); return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } } } // zext({C,+,Step}) --> (zext(D) + zext({C-D,+,Step})) // if D + (C - D + Step * n) could be proven to not unsigned wrap // where D maximizes the number of trailing zeros of (C - D + Step * n) if (const auto *SC = dyn_cast(Start)) { const APInt &C = SC->getAPInt(); const APInt &D = extractConstantWithoutWrapping(*this, C, Step); if (D != 0) { const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth); const SCEV *SResidual = getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags()); const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1); return getAddExpr(SZExtD, SZExtR, (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), Depth + 1); } } if (proveNoWrapByVaryingStart(Start, Step, L)) { setNoWrapFlags(const_cast(AR), SCEV::FlagNUW); Start = getExtendAddRecStart(AR, Ty, this, Depth + 1); Step = getZeroExtendExpr(Step, Ty, Depth + 1); return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } } // zext(A % B) --> zext(A) % zext(B) { const SCEV *LHS; const SCEV *RHS; if (matchURem(Op, LHS, RHS)) return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1), getZeroExtendExpr(RHS, Ty, Depth + 1)); } // zext(A / B) --> zext(A) / zext(B). if (auto *Div = dyn_cast(Op)) return getUDivExpr(getZeroExtendExpr(Div->getLHS(), Ty, Depth + 1), getZeroExtendExpr(Div->getRHS(), Ty, Depth + 1)); if (auto *SA = dyn_cast(Op)) { // zext((A + B + ...)) --> (zext(A) + zext(B) + ...) if (SA->hasNoUnsignedWrap()) { // If the addition does not unsign overflow then we can, by definition, // commute the zero extension with the addition operation. SmallVector Ops; for (const auto *Op : SA->operands()) Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1)); return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1); } // zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...)) // if D + (C - D + x + y + ...) could be proven to not unsigned wrap // where D maximizes the number of trailing zeros of (C - D + x + y + ...) // // Often address arithmetics contain expressions like // (zext (add (shl X, C1), C2)), for instance, (zext (5 + (4 * X))). // This transformation is useful while proving that such expressions are // equal or differ by a small constant amount, see LoadStoreVectorizer pass. if (const auto *SC = dyn_cast(SA->getOperand(0))) { const APInt &D = extractConstantWithoutWrapping(*this, SC, SA); if (D != 0) { const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth); const SCEV *SResidual = getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth); const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1); return getAddExpr(SZExtD, SZExtR, (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), Depth + 1); } } } if (auto *SM = dyn_cast(Op)) { // zext((A * B * ...)) --> (zext(A) * zext(B) * ...) if (SM->hasNoUnsignedWrap()) { // If the multiply does not unsign overflow then we can, by definition, // commute the zero extension with the multiply operation. SmallVector Ops; for (const auto *Op : SM->operands()) Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1)); return getMulExpr(Ops, SCEV::FlagNUW, Depth + 1); } // zext(2^K * (trunc X to iN)) to iM -> // 2^K * (zext(trunc X to i{N-K}) to iM) // // Proof: // // zext(2^K * (trunc X to iN)) to iM // = zext((trunc X to iN) << K) to iM // = zext((trunc X to i{N-K}) << K) to iM // (because shl removes the top K bits) // = zext((2^K * (trunc X to i{N-K}))) to iM // = (2^K * (zext(trunc X to i{N-K}) to iM)). // if (SM->getNumOperands() == 2) if (auto *MulLHS = dyn_cast(SM->getOperand(0))) if (MulLHS->getAPInt().isPowerOf2()) if (auto *TruncRHS = dyn_cast(SM->getOperand(1))) { int NewTruncBits = getTypeSizeInBits(TruncRHS->getType()) - MulLHS->getAPInt().logBase2(); Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits); return getMulExpr( getZeroExtendExpr(MulLHS, Ty), getZeroExtendExpr( getTruncateExpr(TruncRHS->getOperand(), NewTruncTy), Ty), SCEV::FlagNUW, Depth + 1); } } // zext(umin(x, y)) -> umin(zext(x), zext(y)) // zext(umax(x, y)) -> umax(zext(x), zext(y)) if (isa(Op) || isa(Op)) { auto *MinMax = cast(Op); SmallVector Operands; for (auto *Operand : MinMax->operands()) Operands.push_back(getZeroExtendExpr(Operand, Ty)); if (isa(MinMax)) return getUMinExpr(Operands); return getUMaxExpr(Operands); } // zext(umin_seq(x, y)) -> umin_seq(zext(x), zext(y)) if (auto *MinMax = dyn_cast(Op)) { assert(isa(MinMax) && "Not supported!"); SmallVector Operands; for (auto *Operand : MinMax->operands()) Operands.push_back(getZeroExtendExpr(Operand, Ty)); return getUMinExpr(Operands, /*Sequential*/ true); } // The cast wasn't folded; create an explicit cast node. // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); registerUser(S, Op); return S; } const SCEV * ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); assert(!Op->getType()->isPointerTy() && "Can't extend pointer!"); Ty = getEffectiveSCEVType(Ty); FoldID ID(scSignExtend, Op, Ty); auto Iter = FoldCache.find(ID); if (Iter != FoldCache.end()) return Iter->second; const SCEV *S = getSignExtendExprImpl(Op, Ty, Depth); if (!isa(S)) insertFoldCacheEntry(ID, S, FoldCache, FoldCacheUser); return S; } const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); assert(!Op->getType()->isPointerTy() && "Can't extend pointer!"); Ty = getEffectiveSCEVType(Ty); // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant(SC->getAPInt().sext(getTypeSizeInBits(Ty))); // sext(sext(x)) --> sext(x) if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1); // sext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1); // Before doing any expensive analysis, check to see if we've already // computed a SCEV for this Op and Ty. FoldingSetNodeID ID; ID.AddInteger(scSignExtend); ID.AddPointer(Op); ID.AddPointer(Ty); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // Limit recursion depth. if (Depth > MaxCastDepth) { SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); registerUser(S, Op); return S; } // sext(trunc(x)) --> sext(x) or x or trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) { // It's possible the bits taken off by the truncate were all sign bits. If // so, we should be able to simplify this further. const SCEV *X = ST->getOperand(); ConstantRange CR = getSignedRange(X); unsigned TruncBits = getTypeSizeInBits(ST->getType()); unsigned NewBits = getTypeSizeInBits(Ty); if (CR.truncate(TruncBits).signExtend(NewBits).contains( CR.sextOrTrunc(NewBits))) return getTruncateOrSignExtend(X, Ty, Depth); } if (auto *SA = dyn_cast(Op)) { // sext((A + B + ...)) --> (sext(A) + sext(B) + ...) if (SA->hasNoSignedWrap()) { // If the addition does not sign overflow then we can, by definition, // commute the sign extension with the addition operation. SmallVector Ops; for (const auto *Op : SA->operands()) Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1)); return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1); } // sext(C + x + y + ...) --> (sext(D) + sext((C - D) + x + y + ...)) // if D + (C - D + x + y + ...) could be proven to not signed wrap // where D maximizes the number of trailing zeros of (C - D + x + y + ...) // // For instance, this will bring two seemingly different expressions: // 1 + sext(5 + 20 * %x + 24 * %y) and // sext(6 + 20 * %x + 24 * %y) // to the same form: // 2 + sext(4 + 20 * %x + 24 * %y) if (const auto *SC = dyn_cast(SA->getOperand(0))) { const APInt &D = extractConstantWithoutWrapping(*this, SC, SA); if (D != 0) { const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth); const SCEV *SResidual = getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth); const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1); return getAddExpr(SSExtD, SSExtR, (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), Depth + 1); } } } // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the // operands (often constants). This allows analysis of something like // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } if (const SCEVAddRecExpr *AR = dyn_cast(Op)) if (AR->isAffine()) { const SCEV *Start = AR->getStart(); const SCEV *Step = AR->getStepRecurrence(*this); unsigned BitWidth = getTypeSizeInBits(AR->getType()); const Loop *L = AR->getLoop(); // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->hasNoSignedWrap()) { Start = getExtendAddRecStart(AR, Ty, this, Depth + 1); Step = getSignExtendExpr(Step, Ty, Depth + 1); return getAddRecExpr(Start, Step, L, SCEV::FlagNSW); } // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is // being called from within backedge-taken count analysis, such that // attempting to ask for the backedge-taken count would likely result // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); if (!isa(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. const SCEV *CastedMaxBECount = getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( CastedMaxBECount, MaxBECount->getType(), Depth); if (MaxBECount == RecastedMaxBECount) { Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step, SCEV::FlagAnyWrap, Depth + 1); const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul, SCEV::FlagAnyWrap, Depth + 1), WideTy, Depth + 1); const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1); const SCEV *WideMaxBECount = getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1); const SCEV *OperandExtendedAdd = getAddExpr(WideStart, getMulExpr(WideMaxBECount, getSignExtendExpr(Step, WideTy, Depth + 1), SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. setNoWrapFlags(const_cast(AR), SCEV::FlagNSW); // Return the expression with the addrec on the outside. Start = getExtendAddRecStart(AR, Ty, this, Depth + 1); Step = getSignExtendExpr(Step, Ty, Depth + 1); return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. OperandExtendedAdd = getAddExpr(WideStart, getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy, Depth + 1), SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1); if (SAdd == OperandExtendedAdd) { // If AR wraps around then // // abs(Step) * MaxBECount > unsigned-max(AR->getType()) // => SAdd != OperandExtendedAdd // // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=> // (SAdd == OperandExtendedAdd => AR is NW) setNoWrapFlags(const_cast(AR), SCEV::FlagNW); // Return the expression with the addrec on the outside. Start = getExtendAddRecStart(AR, Ty, this, Depth + 1); Step = getZeroExtendExpr(Step, Ty, Depth + 1); return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } } } auto NewFlags = proveNoSignedWrapViaInduction(AR); setNoWrapFlags(const_cast(AR), NewFlags); if (AR->hasNoSignedWrap()) { // Same as nsw case above - duplicated here to avoid a compile time // issue. It's not clear that the order of checks does matter, but // it's one of two issue possible causes for a change which was // reverted. Be conservative for the moment. Start = getExtendAddRecStart(AR, Ty, this, Depth + 1); Step = getSignExtendExpr(Step, Ty, Depth + 1); return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } // sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step})) // if D + (C - D + Step * n) could be proven to not signed wrap // where D maximizes the number of trailing zeros of (C - D + Step * n) if (const auto *SC = dyn_cast(Start)) { const APInt &C = SC->getAPInt(); const APInt &D = extractConstantWithoutWrapping(*this, C, Step); if (D != 0) { const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth); const SCEV *SResidual = getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags()); const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1); return getAddExpr(SSExtD, SSExtR, (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), Depth + 1); } } if (proveNoWrapByVaryingStart(Start, Step, L)) { setNoWrapFlags(const_cast(AR), SCEV::FlagNSW); Start = getExtendAddRecStart(AR, Ty, this, Depth + 1); Step = getSignExtendExpr(Step, Ty, Depth + 1); return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags()); } } // If the input value is provably positive and we could not simplify // away the sext build a zext instead. if (isKnownNonNegative(Op)) return getZeroExtendExpr(Op, Ty, Depth + 1); // sext(smin(x, y)) -> smin(sext(x), sext(y)) // sext(smax(x, y)) -> smax(sext(x), sext(y)) if (isa(Op) || isa(Op)) { auto *MinMax = cast(Op); SmallVector Operands; for (auto *Operand : MinMax->operands()) Operands.push_back(getSignExtendExpr(Operand, Ty)); if (isa(MinMax)) return getSMinExpr(Operands); return getSMaxExpr(Operands); } // The cast wasn't folded; create an explicit cast node. // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); registerUser(S, { Op }); return S; } const SCEV *ScalarEvolution::getCastExpr(SCEVTypes Kind, const SCEV *Op, Type *Ty) { switch (Kind) { case scTruncate: return getTruncateExpr(Op, Ty); case scZeroExtend: return getZeroExtendExpr(Op, Ty); case scSignExtend: return getSignExtendExpr(Op, Ty); case scPtrToInt: return getPtrToIntExpr(Op, Ty); default: llvm_unreachable("Not a SCEV cast expression!"); } } /// getAnyExtendExpr - Return a SCEV for the given operand extended with /// unspecified bits out to the given type. const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); Ty = getEffectiveSCEVType(Ty); // Sign-extend negative constants. if (const SCEVConstant *SC = dyn_cast(Op)) if (SC->getAPInt().isNegative()) return getSignExtendExpr(Op, Ty); // Peel off a truncate cast. if (const SCEVTruncateExpr *T = dyn_cast(Op)) { const SCEV *NewOp = T->getOperand(); if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) return getAnyExtendExpr(NewOp, Ty); return getTruncateOrNoop(NewOp, Ty); } // Next try a zext cast. If the cast is folded, use it. const SCEV *ZExt = getZeroExtendExpr(Op, Ty); if (!isa(ZExt)) return ZExt; // Next try a sext cast. If the cast is folded, use it. const SCEV *SExt = getSignExtendExpr(Op, Ty); if (!isa(SExt)) return SExt; // Force the cast to be folded into the operands of an addrec. if (const SCEVAddRecExpr *AR = dyn_cast(Op)) { SmallVector Ops; for (const SCEV *Op : AR->operands()) Ops.push_back(getAnyExtendExpr(Op, Ty)); return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); } // If the expression is obviously signed, use the sext cast value. if (isa(Op)) return SExt; // Absent any other information, use the zext cast value. return ZExt; } /// Process the given Ops list, which is a list of operands to be added under /// the given scale, update the given map. This is a helper function for /// getAddRecExpr. As an example of what it does, given a sequence of operands /// that would form an add expression like this: /// /// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r) /// /// where A and B are constants, update the map with these values: /// /// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0) /// /// and add 13 + A*B*29 to AccumulatedConstant. /// This will allow getAddRecExpr to produce this: /// /// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B) /// /// This form often exposes folding opportunities that are hidden in /// the original operand list. /// /// Return true iff it appears that any interesting folding opportunities /// may be exposed. This helps getAddRecExpr short-circuit extra work in /// the common case where no interesting opportunities are present, and /// is also used as a check to avoid infinite recursion. static bool CollectAddOperandsWithScales(DenseMap &M, SmallVectorImpl &NewOps, APInt &AccumulatedConstant, ArrayRef Ops, const APInt &Scale, ScalarEvolution &SE) { bool Interesting = false; // Iterate over the add operands. They are sorted, with constants first. unsigned i = 0; while (const SCEVConstant *C = dyn_cast(Ops[i])) { ++i; // Pull a buried constant out to the outside. if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) Interesting = true; AccumulatedConstant += Scale * C->getAPInt(); } // Next comes everything else. We're especially interested in multiplies // here, but they're in the middle, so just visit the rest with one loop. for (; i != Ops.size(); ++i) { const SCEVMulExpr *Mul = dyn_cast(Ops[i]); if (Mul && isa(Mul->getOperand(0))) { APInt NewScale = Scale * cast(Mul->getOperand(0))->getAPInt(); if (Mul->getNumOperands() == 2 && isa(Mul->getOperand(1))) { // A multiplication of a constant with another add; recurse. const SCEVAddExpr *Add = cast(Mul->getOperand(1)); Interesting |= CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Add->operands(), NewScale, SE); } else { // A multiplication of a constant with some other value. Update // the map. SmallVector MulOps(drop_begin(Mul->operands())); const SCEV *Key = SE.getMulExpr(MulOps); auto Pair = M.insert({Key, NewScale}); if (Pair.second) { NewOps.push_back(Pair.first->first); } else { Pair.first->second += NewScale; // The map already had an entry for this value, which may indicate // a folding opportunity. Interesting = true; } } } else { // An ordinary operand. Update the map. std::pair::iterator, bool> Pair = M.insert({Ops[i], Scale}); if (Pair.second) { NewOps.push_back(Pair.first->first); } else { Pair.first->second += Scale; // The map already had an entry for this value, which may indicate // a folding opportunity. Interesting = true; } } } return Interesting; } bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI) { const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned); switch (BinOp) { default: llvm_unreachable("Unsupported binary op"); case Instruction::Add: Operation = &ScalarEvolution::getAddExpr; break; case Instruction::Sub: Operation = &ScalarEvolution::getMinusSCEV; break; case Instruction::Mul: Operation = &ScalarEvolution::getMulExpr; break; } const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) = Signed ? &ScalarEvolution::getSignExtendExpr : &ScalarEvolution::getZeroExtendExpr; // Check ext(LHS op RHS) == ext(LHS) op ext(RHS) auto *NarrowTy = cast(LHS->getType()); auto *WideTy = IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); const SCEV *A = (this->*Extension)( (this->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), WideTy, 0); const SCEV *LHSB = (this->*Extension)(LHS, WideTy, 0); const SCEV *RHSB = (this->*Extension)(RHS, WideTy, 0); const SCEV *B = (this->*Operation)(LHSB, RHSB, SCEV::FlagAnyWrap, 0); if (A == B) return true; // Can we use context to prove the fact we need? if (!CtxI) return false; // TODO: Support mul. if (BinOp == Instruction::Mul) return false; auto *RHSC = dyn_cast(RHS); // TODO: Lift this limitation. if (!RHSC) return false; APInt C = RHSC->getAPInt(); unsigned NumBits = C.getBitWidth(); bool IsSub = (BinOp == Instruction::Sub); bool IsNegativeConst = (Signed && C.isNegative()); // Compute the direction and magnitude by which we need to check overflow. bool OverflowDown = IsSub ^ IsNegativeConst; APInt Magnitude = C; if (IsNegativeConst) { if (C == APInt::getSignedMinValue(NumBits)) // TODO: SINT_MIN on inversion gives the same negative value, we don't // want to deal with that. return false; Magnitude = -C; } ICmpInst::Predicate Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; if (OverflowDown) { // To avoid overflow down, we need to make sure that MIN + Magnitude <= LHS. APInt Min = Signed ? APInt::getSignedMinValue(NumBits) : APInt::getMinValue(NumBits); APInt Limit = Min + Magnitude; return isKnownPredicateAt(Pred, getConstant(Limit), LHS, CtxI); } else { // To avoid overflow up, we need to make sure that LHS <= MAX - Magnitude. APInt Max = Signed ? APInt::getSignedMaxValue(NumBits) : APInt::getMaxValue(NumBits); APInt Limit = Max - Magnitude; return isKnownPredicateAt(Pred, LHS, getConstant(Limit), CtxI); } } std::optional ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp( const OverflowingBinaryOperator *OBO) { // It cannot be done any better. if (OBO->hasNoUnsignedWrap() && OBO->hasNoSignedWrap()) return std::nullopt; SCEV::NoWrapFlags Flags = SCEV::NoWrapFlags::FlagAnyWrap; if (OBO->hasNoUnsignedWrap()) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); if (OBO->hasNoSignedWrap()) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); bool Deduced = false; if (OBO->getOpcode() != Instruction::Add && OBO->getOpcode() != Instruction::Sub && OBO->getOpcode() != Instruction::Mul) return std::nullopt; const SCEV *LHS = getSCEV(OBO->getOperand(0)); const SCEV *RHS = getSCEV(OBO->getOperand(1)); const Instruction *CtxI = UseContextForNoWrapFlagInference ? dyn_cast(OBO) : nullptr; if (!OBO->hasNoUnsignedWrap() && willNotOverflow((Instruction::BinaryOps)OBO->getOpcode(), /* Signed */ false, LHS, RHS, CtxI)) { Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); Deduced = true; } if (!OBO->hasNoSignedWrap() && willNotOverflow((Instruction::BinaryOps)OBO->getOpcode(), /* Signed */ true, LHS, RHS, CtxI)) { Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); Deduced = true; } if (Deduced) return Flags; return std::nullopt; } // We're trying to construct a SCEV of type `Type' with `Ops' as operands and // `OldFlags' as can't-wrap behavior. Infer a more aggressive set of // can't-overflow flags for the operation if possible. static SCEV::NoWrapFlags StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, const ArrayRef Ops, SCEV::NoWrapFlags Flags) { using namespace std::placeholders; using OBO = OverflowingBinaryOperator; bool CanAnalyze = Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr; (void)CanAnalyze; assert(CanAnalyze && "don't call from other places!"); int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; SCEV::NoWrapFlags SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. auto IsKnownNonNegative = [&](const SCEV *S) { return SE->isKnownNonNegative(S); }; if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative)) Flags = ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); if (SignOrUnsignWrap != SignOrUnsignMask && (Type == scAddExpr || Type == scMulExpr) && Ops.size() == 2 && isa(Ops[0])) { auto Opcode = [&] { switch (Type) { case scAddExpr: return Instruction::Add; case scMulExpr: return Instruction::Mul; default: llvm_unreachable("Unexpected SCEV op."); } }(); const APInt &C = cast(Ops[0])->getAPInt(); // (A C) --> (A C) if the op doesn't sign overflow. if (!(SignOrUnsignWrap & SCEV::FlagNSW)) { auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( Opcode, C, OBO::NoSignedWrap); if (NSWRegion.contains(SE->getSignedRange(Ops[1]))) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); } // (A C) --> (A C) if the op doesn't unsign overflow. if (!(SignOrUnsignWrap & SCEV::FlagNUW)) { auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( Opcode, C, OBO::NoUnsignedWrap); if (NUWRegion.contains(SE->getUnsignedRange(Ops[1]))) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); } } // <0,+,nonnegative> is also nuw // TODO: Add corresponding nsw case if (Type == scAddRecExpr && ScalarEvolution::hasFlags(Flags, SCEV::FlagNW) && !ScalarEvolution::hasFlags(Flags, SCEV::FlagNUW) && Ops.size() == 2 && Ops[0]->isZero() && IsKnownNonNegative(Ops[1])) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); // both (udiv X, Y) * Y and Y * (udiv X, Y) are always NUW if (Type == scMulExpr && !ScalarEvolution::hasFlags(Flags, SCEV::FlagNUW) && Ops.size() == 2) { if (auto *UDiv = dyn_cast(Ops[0])) if (UDiv->getOperand(1) == Ops[1]) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); if (auto *UDiv = dyn_cast(Ops[1])) if (UDiv->getOperand(1) == Ops[0]) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); } return Flags; } bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) { return isLoopInvariant(S, L) && properlyDominates(S, L->getHeader()); } /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags OrigFlags, unsigned Depth) { assert(!(OrigFlags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVAddExpr operand types don't match!"); unsigned NumPtrs = count_if( Ops, [](const SCEV *Op) { return Op->getType()->isPointerTy(); }); assert(NumPtrs <= 1 && "add has at most one pointer operand"); #endif // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, &LI, DT); // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { ++Idx; assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt()); if (Ops.size() == 2) return Ops[0]; Ops.erase(Ops.begin()+1); // Erase the folded element LHSC = cast(Ops[0]); } // If we are left with a constant zero being added, strip it off. if (LHSC->getValue()->isZero()) { Ops.erase(Ops.begin()); --Idx; } if (Ops.size() == 1) return Ops[0]; } // Delay expensive flag strengthening until necessary. auto ComputeFlags = [this, OrigFlags](const ArrayRef Ops) { return StrengthenNoWrapFlags(this, scAddExpr, Ops, OrigFlags); }; // Limit recursion calls depth. if (Depth > MaxArithDepth || hasHugeExpression(Ops)) return getOrCreateAddExpr(Ops, ComputeFlags(Ops)); if (SCEV *S = findExistingSCEVInCache(scAddExpr, Ops)) { // Don't strengthen flags if we have no new information. SCEVAddExpr *Add = static_cast(S); if (Add->getNoWrapFlags(OrigFlags) != OrigFlags) Add->setNoWrapFlags(ComputeFlags(Ops)); return S; } // Okay, check to see if the same value occurs in the operand list more than // once. If so, merge them together into an multiply expression. Since we // sorted the list, these values are required to be adjacent. Type *Ty = Ops[0]->getType(); bool FoundMatch = false; for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 // Scan ahead to count how many equal operands there are. unsigned Count = 2; while (i+Count != e && Ops[i+Count] == Ops[i]) ++Count; // Merge the values into a multiply. const SCEV *Scale = getConstant(Ty, Count); const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == Count) return Mul; Ops[i] = Mul; Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count); --i; e -= Count - 1; FoundMatch = true; } if (FoundMatch) return getAddExpr(Ops, OrigFlags, Depth + 1); // Check for truncates. If all the operands are truncated from the same // type, see if factoring out the truncate would permit the result to be // folded. eg., n*trunc(x) + m*trunc(y) --> trunc(trunc(m)*x + trunc(n)*y) // if the contents of the resulting outer trunc fold to something simple. auto FindTruncSrcType = [&]() -> Type * { // We're ultimately looking to fold an addrec of truncs and muls of only // constants and truncs, so if we find any other types of SCEV // as operands of the addrec then we bail and return nullptr here. // Otherwise, we return the type of the operand of a trunc that we find. if (auto *T = dyn_cast(Ops[Idx])) return T->getOperand()->getType(); if (const auto *Mul = dyn_cast(Ops[Idx])) { const auto *LastOp = Mul->getOperand(Mul->getNumOperands() - 1); if (const auto *T = dyn_cast(LastOp)) return T->getOperand()->getType(); } return nullptr; }; if (auto *SrcType = FindTruncSrcType()) { SmallVector LargeOps; bool Ok = true; // Check all the operands to see if they can be represented in the // source type of the truncate. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { if (const SCEVTruncateExpr *T = dyn_cast(Ops[i])) { if (T->getOperand()->getType() != SrcType) { Ok = false; break; } LargeOps.push_back(T->getOperand()); } else if (const SCEVConstant *C = dyn_cast(Ops[i])) { LargeOps.push_back(getAnyExtendExpr(C, SrcType)); } else if (const SCEVMulExpr *M = dyn_cast(Ops[i])) { SmallVector LargeMulOps; for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { if (const SCEVTruncateExpr *T = dyn_cast(M->getOperand(j))) { if (T->getOperand()->getType() != SrcType) { Ok = false; break; } LargeMulOps.push_back(T->getOperand()); } else if (const auto *C = dyn_cast(M->getOperand(j))) { LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); } else { Ok = false; break; } } if (Ok) LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1)); } else { Ok = false; break; } } if (Ok) { // Evaluate the expression in the larger type. const SCEV *Fold = getAddExpr(LargeOps, SCEV::FlagAnyWrap, Depth + 1); // If it folds to something simple, use it. Otherwise, don't. if (isa(Fold) || isa(Fold)) return getTruncateExpr(Fold, Ty); } } if (Ops.size() == 2) { // Check if we have an expression of the form ((X + C1) - C2), where C1 and // C2 can be folded in a way that allows retaining wrapping flags of (X + // C1). const SCEV *A = Ops[0]; const SCEV *B = Ops[1]; auto *AddExpr = dyn_cast(B); auto *C = dyn_cast(A); if (AddExpr && C && isa(AddExpr->getOperand(0))) { auto C1 = cast(AddExpr->getOperand(0))->getAPInt(); auto C2 = C->getAPInt(); SCEV::NoWrapFlags PreservedFlags = SCEV::FlagAnyWrap; APInt ConstAdd = C1 + C2; auto AddFlags = AddExpr->getNoWrapFlags(); // Adding a smaller constant is NUW if the original AddExpr was NUW. if (ScalarEvolution::hasFlags(AddFlags, SCEV::FlagNUW) && ConstAdd.ule(C1)) { PreservedFlags = ScalarEvolution::setFlags(PreservedFlags, SCEV::FlagNUW); } // Adding a constant with the same sign and small magnitude is NSW, if the // original AddExpr was NSW. if (ScalarEvolution::hasFlags(AddFlags, SCEV::FlagNSW) && C1.isSignBitSet() == ConstAdd.isSignBitSet() && ConstAdd.abs().ule(C1.abs())) { PreservedFlags = ScalarEvolution::setFlags(PreservedFlags, SCEV::FlagNSW); } if (PreservedFlags != SCEV::FlagAnyWrap) { SmallVector NewOps(AddExpr->operands()); NewOps[0] = getConstant(ConstAdd); return getAddExpr(NewOps, PreservedFlags); } } } // Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y) if (Ops.size() == 2) { const SCEVMulExpr *Mul = dyn_cast(Ops[0]); if (Mul && Mul->getNumOperands() == 2 && Mul->getOperand(0)->isAllOnesValue()) { const SCEV *X; const SCEV *Y; if (matchURem(Mul->getOperand(1), X, Y) && X == Ops[1]) { return getMulExpr(Y, getUDivExpr(X, Y)); } } } // Skip past any other cast SCEVs. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) ++Idx; // If there are add operands they would be next. if (Idx < Ops.size()) { bool DeletedAdd = false; // If the original flags and all inlined SCEVAddExprs are NUW, use the // common NUW flag for expression after inlining. Other flags cannot be // preserved, because they may depend on the original order of operations. SCEV::NoWrapFlags CommonFlags = maskFlags(OrigFlags, SCEV::FlagNUW); while (const SCEVAddExpr *Add = dyn_cast(Ops[Idx])) { if (Ops.size() > AddOpsInlineThreshold || Add->getNumOperands() > AddOpsInlineThreshold) break; // If we have an add, expand the add operands onto the end of the operands // list. Ops.erase(Ops.begin()+Idx); append_range(Ops, Add->operands()); DeletedAdd = true; CommonFlags = maskFlags(CommonFlags, Add->getNoWrapFlags()); } // If we deleted at least one add, we added operands to the end of the list, // and they are not necessarily sorted. Recurse to resort and resimplify // any operands we just acquired. if (DeletedAdd) return getAddExpr(Ops, CommonFlags, Depth + 1); } // Skip over the add expression until we get to a multiply. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) ++Idx; // Check to see if there are any folding opportunities present with // operands multiplied by constant values. if (Idx < Ops.size() && isa(Ops[Idx])) { uint64_t BitWidth = getTypeSizeInBits(Ty); DenseMap M; SmallVector NewOps; APInt AccumulatedConstant(BitWidth, 0); if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Ops, APInt(BitWidth, 1), *this)) { struct APIntCompare { bool operator()(const APInt &LHS, const APInt &RHS) const { return LHS.ult(RHS); } }; // Some interesting folding opportunity is present, so its worthwhile to // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. std::map, APIntCompare> MulOpLists; for (const SCEV *NewOp : NewOps) MulOpLists[M.find(NewOp)->second].push_back(NewOp); // Re-generate the operands list. Ops.clear(); if (AccumulatedConstant != 0) Ops.push_back(getConstant(AccumulatedConstant)); for (auto &MulOp : MulOpLists) { if (MulOp.first == 1) { Ops.push_back(getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1)); } else if (MulOp.first != 0) { Ops.push_back(getMulExpr( getConstant(MulOp.first), getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1), SCEV::FlagAnyWrap, Depth + 1)); } } if (Ops.empty()) return getZero(Ty); if (Ops.size() == 1) return Ops[0]; return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } } // If we are adding something to a multiply expression, make sure the // something is not already an operand of the multiply. If so, merge it into // the multiply. for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { const SCEVMulExpr *Mul = cast(Ops[Idx]); for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { const SCEV *MulOpSCEV = Mul->getOperand(MulOp); if (isa(MulOpSCEV)) continue; for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) if (MulOpSCEV == Ops[AddOp]) { // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) const SCEV *InnerMul = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { // If the multiply has more than two operands, we must get the // Y*Z term. SmallVector MulOps( Mul->operands().take_front(MulOp)); append_range(MulOps, Mul->operands().drop_front(MulOp + 1)); InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } SmallVector TwoOps = {getOne(Ty), InnerMul}; const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV, SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == 2) return OuterMul; if (AddOp < Idx) { Ops.erase(Ops.begin()+AddOp); Ops.erase(Ops.begin()+Idx-1); } else { Ops.erase(Ops.begin()+Idx); Ops.erase(Ops.begin()+AddOp-1); } Ops.push_back(OuterMul); return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // Check this multiply against other multiplies being added together. for (unsigned OtherMulIdx = Idx+1; OtherMulIdx < Ops.size() && isa(Ops[OtherMulIdx]); ++OtherMulIdx) { const SCEVMulExpr *OtherMul = cast(Ops[OtherMulIdx]); // If MulOp occurs in OtherMul, we can fold the two multiplies // together. for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); OMulOp != e; ++OMulOp) if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { SmallVector MulOps( Mul->operands().take_front(MulOp)); append_range(MulOps, Mul->operands().drop_front(MulOp+1)); InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); if (OtherMul->getNumOperands() != 2) { SmallVector MulOps( OtherMul->operands().take_front(OMulOp)); append_range(MulOps, OtherMul->operands().drop_front(OMulOp+1)); InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } SmallVector TwoOps = {InnerMul1, InnerMul2}; const SCEV *InnerMulSum = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum, SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == 2) return OuterMul; Ops.erase(Ops.begin()+Idx); Ops.erase(Ops.begin()+OtherMulIdx-1); Ops.push_back(OuterMul); return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } } } } // If there are any add recurrences in the operands list, see if any other // added values are loop invariant. If so, we can fold them into the // recurrence. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) ++Idx; // Scan over all recurrences, trying to fold loop invariants into them. for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { // Scan all of the other operands to this add and add them to the vector if // they are loop invariant w.r.t. the recurrence. SmallVector LIOps; const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; } // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { // Compute nowrap flags for the addition of the loop-invariant ops and // the addrec. Temporarily push it as an operand for that purpose. These // flags are valid in the scope of the addrec only. LIOps.push_back(AddRec); SCEV::NoWrapFlags Flags = ComputeFlags(LIOps); LIOps.pop_back(); // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} LIOps.push_back(AddRec->getStart()); SmallVector AddRecOps(AddRec->operands()); // It is not in general safe to propagate flags valid on an add within // the addrec scope to one outside it. We must prove that the inner // scope is guaranteed to execute if the outer one does to be able to // safely propagate. We know the program is undefined if poison is // produced on the inner scoped addrec. We also know that *for this use* // the outer scoped add can't overflow (because of the flags we just // computed for the inner scoped add) without the program being undefined. // Proving that entry to the outer scope neccesitates entry to the inner // scope, thus proves the program undefined if the flags would be violated // in the outer scope. SCEV::NoWrapFlags AddFlags = Flags; if (AddFlags != SCEV::FlagAnyWrap) { auto *DefI = getDefiningScopeBound(LIOps); auto *ReachI = &*AddRecLoop->getHeader()->begin(); if (!isGuaranteedToTransferExecutionTo(DefI, ReachI)) AddFlags = SCEV::FlagAnyWrap; } AddRecOps[0] = getAddExpr(LIOps, AddFlags, Depth + 1); // Build the new addrec. Propagate the NUW and NSW flags if both the // outer add and the inner addrec are guaranteed to have no overflow. // Always propagate NW. Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW)); const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; // Otherwise, add the folded AddRec by the non-invariant parts. for (unsigned i = 0;; ++i) if (Ops[i] == AddRec) { Ops[i] = NewRec; break; } return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // Okay, if there weren't any loop invariants to be folded, check to see if // there are multiple AddRec's with the same loop induction variable being // added together. If so, we can fold them. for (unsigned OtherIdx = Idx+1; OtherIdx < Ops.size() && isa(Ops[OtherIdx]); ++OtherIdx) { // We expect the AddRecExpr's to be sorted in reverse dominance order, // so that the 1st found AddRecExpr is dominated by all others. assert(DT.dominates( cast(Ops[OtherIdx])->getLoop()->getHeader(), AddRec->getLoop()->getHeader()) && "AddRecExprs are not sorted in reverse dominance order?"); if (AddRecLoop == cast(Ops[OtherIdx])->getLoop()) { // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} SmallVector AddRecOps(AddRec->operands()); for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); ++OtherIdx) { const auto *OtherAddRec = cast(Ops[OtherIdx]); if (OtherAddRec->getLoop() == AddRecLoop) { for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) { if (i >= AddRecOps.size()) { append_range(AddRecOps, OtherAddRec->operands().drop_front(i)); break; } SmallVector TwoOps = { AddRecOps[i], OtherAddRec->getOperand(i)}; AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); } Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; } } // Step size has changed, so we cannot guarantee no self-wraparound. Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } } // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. } // Okay, it looks like we really DO need an add expr. Check to see if we // already have one, otherwise create a new one. return getOrCreateAddExpr(Ops, ComputeFlags(Ops)); } const SCEV * ScalarEvolution::getOrCreateAddExpr(ArrayRef Ops, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddExpr); for (const SCEV *Op : Ops) ID.AddPointer(Op); void *IP = nullptr; SCEVAddExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); registerUser(S, Ops); } S->setNoWrapFlags(Flags); return S; } const SCEV * ScalarEvolution::getOrCreateAddRecExpr(ArrayRef Ops, const Loop *L, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddRecExpr); for (const SCEV *Op : Ops) ID.AddPointer(Op); ID.AddPointer(L); void *IP = nullptr; SCEVAddRecExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L); UniqueSCEVs.InsertNode(S, IP); LoopUsers[L].push_back(S); registerUser(S, Ops); } setNoWrapFlags(S, Flags); return S; } const SCEV * ScalarEvolution::getOrCreateMulExpr(ArrayRef Ops, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scMulExpr); for (const SCEV *Op : Ops) ID.AddPointer(Op); void *IP = nullptr; SCEVMulExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); registerUser(S, Ops); } S->setNoWrapFlags(Flags); return S; } static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { uint64_t k = i*j; if (j > 1 && k / j != i) Overflow = true; return k; } /// Compute the result of "n choose k", the binomial coefficient. If an /// intermediate computation overflows, Overflow will be set and the return will /// be garbage. Overflow is not cleared on absence of overflow. static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { // We use the multiplicative formula: // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . // At each iteration, we take the n-th term of the numeral and divide by the // (k-n)th term of the denominator. This division will always produce an // integral result, and helps reduce the chance of overflow in the // intermediate computations. However, we can still overflow even when the // final result would fit. if (n == 0 || n == k) return 1; if (k > n) return 0; if (k > n/2) k = n-k; uint64_t r = 1; for (uint64_t i = 1; i <= k; ++i) { r = umul_ov(r, n-(i-1), Overflow); r /= i; } return r; } /// Determine if any of the operands in this SCEV are a constant or if /// any of the add or multiply expressions in this SCEV contain a constant. static bool containsConstantInAddMulChain(const SCEV *StartExpr) { struct FindConstantInAddMulChain { bool FoundConstant = false; bool follow(const SCEV *S) { FoundConstant |= isa(S); return isa(S) || isa(S); } bool isDone() const { return FoundConstant; } }; FindConstantInAddMulChain F; SCEVTraversal ST(F); ST.visitAll(StartExpr); return F.FoundConstant; } /// Get a canonical multiply expression, or something simpler if possible. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags OrigFlags, unsigned Depth) { assert(OrigFlags == maskFlags(OrigFlags, SCEV::FlagNUW | SCEV::FlagNSW) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty mul!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = Ops[0]->getType(); assert(!ETy->isPointerTy()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(Ops[i]->getType() == ETy && "SCEVMulExpr operand types don't match!"); #endif // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, &LI, DT); // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { ++Idx; assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! Ops[0] = getConstant(LHSC->getAPInt() * RHSC->getAPInt()); if (Ops.size() == 2) return Ops[0]; Ops.erase(Ops.begin()+1); // Erase the folded element LHSC = cast(Ops[0]); } // If we have a multiply of zero, it will always be zero. if (LHSC->getValue()->isZero()) return LHSC; // If we are left with a constant one being multiplied, strip it off. if (LHSC->getValue()->isOne()) { Ops.erase(Ops.begin()); --Idx; } if (Ops.size() == 1) return Ops[0]; } // Delay expensive flag strengthening until necessary. auto ComputeFlags = [this, OrigFlags](const ArrayRef Ops) { return StrengthenNoWrapFlags(this, scMulExpr, Ops, OrigFlags); }; // Limit recursion calls depth. if (Depth > MaxArithDepth || hasHugeExpression(Ops)) return getOrCreateMulExpr(Ops, ComputeFlags(Ops)); if (SCEV *S = findExistingSCEVInCache(scMulExpr, Ops)) { // Don't strengthen flags if we have no new information. SCEVMulExpr *Mul = static_cast(S); if (Mul->getNoWrapFlags(OrigFlags) != OrigFlags) Mul->setNoWrapFlags(ComputeFlags(Ops)); return S; } if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { if (Ops.size() == 2) { // C1*(C2+V) -> C1*C2 + C1*V if (const SCEVAddExpr *Add = dyn_cast(Ops[1])) // If any of Add's ops are Adds or Muls with a constant, apply this // transformation as well. // // TODO: There are some cases where this transformation is not // profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of // this transformation should be narrowed down. if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add)) { const SCEV *LHS = getMulExpr(LHSC, Add->getOperand(0), SCEV::FlagAnyWrap, Depth + 1); const SCEV *RHS = getMulExpr(LHSC, Add->getOperand(1), SCEV::FlagAnyWrap, Depth + 1); return getAddExpr(LHS, RHS, SCEV::FlagAnyWrap, Depth + 1); } if (Ops[0]->isAllOnesValue()) { // If we have a mul by -1 of an add, try distributing the -1 among the // add operands. if (const SCEVAddExpr *Add = dyn_cast(Ops[1])) { SmallVector NewOps; bool AnyFolded = false; for (const SCEV *AddOp : Add->operands()) { const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap, Depth + 1); if (!isa(Mul)) AnyFolded = true; NewOps.push_back(Mul); } if (AnyFolded) return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1); } else if (const auto *AddRec = dyn_cast(Ops[1])) { // Negation preserves a recurrence's no self-wrap property. SmallVector Operands; for (const SCEV *AddRecOp : AddRec->operands()) Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap, Depth + 1)); // Let M be the minimum representable signed value. AddRec with nsw // multiplied by -1 can have signed overflow if and only if it takes a // value of M: M * (-1) would stay M and (M + 1) * (-1) would be the // maximum signed value. In all other cases signed overflow is // impossible. auto FlagsMask = SCEV::FlagNW; if (hasFlags(AddRec->getNoWrapFlags(), SCEV::FlagNSW)) { auto MinInt = APInt::getSignedMinValue(getTypeSizeInBits(AddRec->getType())); if (getSignedRangeMin(AddRec) != MinInt) FlagsMask = setFlags(FlagsMask, SCEV::FlagNSW); } return getAddRecExpr(Operands, AddRec->getLoop(), AddRec->getNoWrapFlags(FlagsMask)); } } } } // Skip over the add expression until we get to a multiply. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) ++Idx; // If there are mul operands inline them all into this expression. if (Idx < Ops.size()) { bool DeletedMul = false; while (const SCEVMulExpr *Mul = dyn_cast(Ops[Idx])) { if (Ops.size() > MulOpsInlineThreshold) break; // If we have an mul, expand the mul operands onto the end of the // operands list. Ops.erase(Ops.begin()+Idx); append_range(Ops, Mul->operands()); DeletedMul = true; } // If we deleted at least one mul, we added operands to the end of the // list, and they are not necessarily sorted. Recurse to resort and // resimplify any operands we just acquired. if (DeletedMul) return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // If there are any add recurrences in the operands list, see if any other // added values are loop invariant. If so, we can fold them into the // recurrence. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) ++Idx; // Scan over all recurrences, trying to fold loop invariants into them. for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { // Scan all of the other operands to this mul and add them to the vector // if they are loop invariant w.r.t. the recurrence. SmallVector LIOps; const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (isAvailableAtLoopEntry(Ops[i], AddRec->getLoop())) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; } // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} SmallVector NewOps; NewOps.reserve(AddRec->getNumOperands()); const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1); // If both the mul and addrec are nuw, we can preserve nuw. // If both the mul and addrec are nsw, we can only preserve nsw if either // a) they are also nuw, or // b) all multiplications of addrec operands with scale are nsw. SCEV::NoWrapFlags Flags = AddRec->getNoWrapFlags(ComputeFlags({Scale, AddRec})); for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i), SCEV::FlagAnyWrap, Depth + 1)); if (hasFlags(Flags, SCEV::FlagNSW) && !hasFlags(Flags, SCEV::FlagNUW)) { ConstantRange NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( Instruction::Mul, getSignedRange(Scale), OverflowingBinaryOperator::NoSignedWrap); if (!NSWRegion.contains(getSignedRange(AddRec->getOperand(i)))) Flags = clearFlags(Flags, SCEV::FlagNSW); } } const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(), Flags); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; // Otherwise, multiply the folded AddRec by the non-invariant parts. for (unsigned i = 0;; ++i) if (Ops[i] == AddRec) { Ops[i] = NewRec; break; } return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // Okay, if there weren't any loop invariants to be folded, check to see // if there are multiple AddRec's with the same loop induction variable // being multiplied together. If so, we can fold them. // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z // ]]],+,...up to x=2n}. // Note that the arguments to choose() are always integers with values // known at compile time, never SCEV objects. // // The implementation avoids pointless extra computations when the two // addrec's are of different length (mathematically, it's equivalent to // an infinite stream of zeros on the right). bool OpsModified = false; for (unsigned OtherIdx = Idx+1; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); ++OtherIdx) { const SCEVAddRecExpr *OtherAddRec = dyn_cast(Ops[OtherIdx]); if (!OtherAddRec || OtherAddRec->getLoop() != AddRec->getLoop()) continue; // Limit max number of arguments to avoid creation of unreasonably big // SCEVAddRecs with very complex operands. if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 > MaxAddRecSize || hasHugeExpression({AddRec, OtherAddRec})) continue; bool Overflow = false; Type *Ty = AddRec->getType(); bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; SmallVector AddRecOps; for (int x = 0, xe = AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { SmallVector SumOps; for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); z < ze && !Overflow; ++z) { uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); uint64_t Coeff; if (LargerThan64Bits) Coeff = umul_ov(Coeff1, Coeff2, Overflow); else Coeff = Coeff1*Coeff2; const SCEV *CoeffTerm = getConstant(Ty, Coeff); const SCEV *Term1 = AddRec->getOperand(y-z); const SCEV *Term2 = OtherAddRec->getOperand(z); SumOps.push_back(getMulExpr(CoeffTerm, Term1, Term2, SCEV::FlagAnyWrap, Depth + 1)); } } if (SumOps.empty()) SumOps.push_back(getZero(Ty)); AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1)); } if (!Overflow) { const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), SCEV::FlagAnyWrap); if (Ops.size() == 2) return NewAddRec; Ops[Idx] = NewAddRec; Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; OpsModified = true; AddRec = dyn_cast(NewAddRec); if (!AddRec) break; } } if (OpsModified) return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. } // Okay, it looks like we really DO need an mul expr. Check to see if we // already have one, otherwise create a new one. return getOrCreateMulExpr(Ops, ComputeFlags(Ops)); } /// Represents an unsigned remainder expression based on unsigned division. const SCEV *ScalarEvolution::getURemExpr(const SCEV *LHS, const SCEV *RHS) { assert(getEffectiveSCEVType(LHS->getType()) == getEffectiveSCEVType(RHS->getType()) && "SCEVURemExpr operand types don't match!"); // Short-circuit easy cases if (const SCEVConstant *RHSC = dyn_cast(RHS)) { // If constant is one, the result is trivial if (RHSC->getValue()->isOne()) return getZero(LHS->getType()); // X urem 1 --> 0 // If constant is a power of two, fold into a zext(trunc(LHS)). if (RHSC->getAPInt().isPowerOf2()) { Type *FullTy = LHS->getType(); Type *TruncTy = IntegerType::get(getContext(), RHSC->getAPInt().logBase2()); return getZeroExtendExpr(getTruncateExpr(LHS, TruncTy), FullTy); } } // Fallback to %a == %x urem %y == %x - ((%x udiv %y) * %y) const SCEV *UDiv = getUDivExpr(LHS, RHS); const SCEV *Mult = getMulExpr(UDiv, RHS, SCEV::FlagNUW); return getMinusSCEV(LHS, Mult, SCEV::FlagNUW); } /// Get a canonical unsigned division expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, const SCEV *RHS) { assert(!LHS->getType()->isPointerTy() && "SCEVUDivExpr operand can't be pointer!"); assert(LHS->getType() == RHS->getType() && "SCEVUDivExpr operand types don't match!"); FoldingSetNodeID ID; ID.AddInteger(scUDivExpr); ID.AddPointer(LHS); ID.AddPointer(RHS); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // 0 udiv Y == 0 if (const SCEVConstant *LHSC = dyn_cast(LHS)) if (LHSC->getValue()->isZero()) return LHS; if (const SCEVConstant *RHSC = dyn_cast(RHS)) { if (RHSC->getValue()->isOne()) return LHS; // X udiv 1 --> x // If the denominator is zero, the result of the udiv is undefined. Don't // try to analyze it, because the resolution chosen here may differ from // the resolution chosen in other parts of the compiler. if (!RHSC->getValue()->isZero()) { // Determine if the division can be folded into the operands of // its operands. // TODO: Generalize this to non-constants by using known-bits information. Type *Ty = LHS->getType(); unsigned LZ = RHSC->getAPInt().countl_zero(); unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; // For non-power-of-two values, effectively round the value up to the // nearest power of two. if (!RHSC->getAPInt().isPowerOf2()) ++MaxShiftAmt; IntegerType *ExtTy = IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); if (const SCEVAddRecExpr *AR = dyn_cast(LHS)) if (const SCEVConstant *Step = dyn_cast(AR->getStepRecurrence(*this))) { // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. const APInt &StepInt = Step->getAPInt(); const APInt &DivInt = RHSC->getAPInt(); if (!StepInt.urem(DivInt) && getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop(), SCEV::FlagAnyWrap)) { SmallVector Operands; for (const SCEV *Op : AR->operands()) Operands.push_back(getUDivExpr(Op, RHS)); return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW); } /// Get a canonical UDivExpr for a recurrence. /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. // We can currently only fold X%N if X is constant. const SCEVConstant *StartC = dyn_cast(AR->getStart()); if (StartC && !DivInt.urem(StepInt) && getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop(), SCEV::FlagAnyWrap)) { const APInt &StartInt = StartC->getAPInt(); const APInt &StartRem = StartInt.urem(StepInt); if (StartRem != 0) { const SCEV *NewLHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, AR->getLoop(), SCEV::FlagNW); if (LHS != NewLHS) { LHS = NewLHS; // Reset the ID to include the new LHS, and check if it is // already cached. ID.clear(); ID.AddInteger(scUDivExpr); ID.AddPointer(LHS); ID.AddPointer(RHS); IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; } } } } // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast(LHS)) { SmallVector Operands; for (const SCEV *Op : M->operands()) Operands.push_back(getZeroExtendExpr(Op, ExtTy)); if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) // Find an operand that's safely divisible. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { const SCEV *Op = M->getOperand(i); const SCEV *Div = getUDivExpr(Op, RHSC); if (!isa(Div) && getMulExpr(Div, RHSC) == Op) { Operands = SmallVector(M->operands()); Operands[i] = Div; return getMulExpr(Operands); } } } // (A/B)/C --> A/(B*C) if safe and B*C can be folded. if (const SCEVUDivExpr *OtherDiv = dyn_cast(LHS)) { if (auto *DivisorConstant = dyn_cast(OtherDiv->getRHS())) { bool Overflow = false; APInt NewRHS = DivisorConstant->getAPInt().umul_ov(RHSC->getAPInt(), Overflow); if (Overflow) { return getConstant(RHSC->getType(), 0, false); } return getUDivExpr(OtherDiv->getLHS(), getConstant(NewRHS)); } } // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. if (const SCEVAddExpr *A = dyn_cast(LHS)) { SmallVector Operands; for (const SCEV *Op : A->operands()) Operands.push_back(getZeroExtendExpr(Op, ExtTy)); if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { Operands.clear(); for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); if (isa(Op) || getMulExpr(Op, RHS) != A->getOperand(i)) break; Operands.push_back(Op); } if (Operands.size() == A->getNumOperands()) return getAddExpr(Operands); } } // Fold if both operands are constant. if (const SCEVConstant *LHSC = dyn_cast(LHS)) return getConstant(LHSC->getAPInt().udiv(RHSC->getAPInt())); } } // The Insertion Point (IP) might be invalid by now (due to UniqueSCEVs // changes). Make sure we get a new one. IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); UniqueSCEVs.InsertNode(S, IP); registerUser(S, {LHS, RHS}); return S; } APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { APInt A = C1->getAPInt().abs(); APInt B = C2->getAPInt().abs(); uint32_t ABW = A.getBitWidth(); uint32_t BBW = B.getBitWidth(); if (ABW > BBW) B = B.zext(ABW); else if (ABW < BBW) A = A.zext(BBW); return APIntOps::GreatestCommonDivisor(std::move(A), std::move(B)); } /// Get a canonical unsigned division expression, or something simpler if /// possible. There is no representation for an exact udiv in SCEV IR, but we /// can attempt to remove factors from the LHS and RHS. We can't do this when /// it's not exact because the udiv may be clearing bits. const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, const SCEV *RHS) { // TODO: we could try to find factors in all sorts of things, but for now we // just deal with u/exact (multiply, constant). See SCEVDivision towards the // end of this file for inspiration. const SCEVMulExpr *Mul = dyn_cast(LHS); if (!Mul || !Mul->hasNoUnsignedWrap()) return getUDivExpr(LHS, RHS); if (const SCEVConstant *RHSCst = dyn_cast(RHS)) { // If the mulexpr multiplies by a constant, then that constant must be the // first element of the mulexpr. if (const auto *LHSCst = dyn_cast(Mul->getOperand(0))) { if (LHSCst == RHSCst) { SmallVector Operands(drop_begin(Mul->operands())); return getMulExpr(Operands); } // We can't just assume that LHSCst divides RHSCst cleanly, it could be // that there's a factor provided by one of the other terms. We need to // check. APInt Factor = gcd(LHSCst, RHSCst); if (!Factor.isIntN(1)) { LHSCst = cast(getConstant(LHSCst->getAPInt().udiv(Factor))); RHSCst = cast(getConstant(RHSCst->getAPInt().udiv(Factor))); SmallVector Operands; Operands.push_back(LHSCst); append_range(Operands, Mul->operands().drop_front()); LHS = getMulExpr(Operands); RHS = RHSCst; Mul = dyn_cast(LHS); if (!Mul) return getUDivExactExpr(LHS, RHS); } } } for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) { if (Mul->getOperand(i) == RHS) { SmallVector Operands; append_range(Operands, Mul->operands().take_front(i)); append_range(Operands, Mul->operands().drop_front(i + 1)); return getMulExpr(Operands); } } return getUDivExpr(LHS, RHS); } /// Get an add recurrence expression for the specified loop. Simplify the /// expression as much as possible. const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags) { SmallVector Operands; Operands.push_back(Start); if (const SCEVAddRecExpr *StepChrec = dyn_cast(Step)) if (StepChrec->getLoop() == L) { append_range(Operands, StepChrec->operands()); return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW)); } Operands.push_back(Step); return getAddRecExpr(Operands, L, Flags); } /// Get an add recurrence expression for the specified loop. Simplify the /// expression as much as possible. const SCEV * ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, const Loop *L, SCEV::NoWrapFlags Flags) { if (Operands.size() == 1) return Operands[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); for (unsigned i = 1, e = Operands.size(); i != e; ++i) { assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy && "SCEVAddRecExpr operand types don't match!"); assert(!Operands[i]->getType()->isPointerTy() && "Step must be integer"); } for (unsigned i = 0, e = Operands.size(); i != e; ++i) assert(isAvailableAtLoopEntry(Operands[i], L) && "SCEVAddRecExpr operand is not available at loop entry!"); #endif if (Operands.back()->isZero()) { Operands.pop_back(); return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X } // It's tempting to want to call getConstantMaxBackedgeTakenCount count here and // use that information to infer NUW and NSW flags. However, computing a // BE count requires calling getAddRecExpr, so we may not yet have a // meaningful BE count at this point (and if we don't, we'd be stuck // with a SCEVCouldNotCompute as the cached BE count). Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); // Canonicalize nested AddRecs in by nesting them in order of loop depth. if (const SCEVAddRecExpr *NestedAR = dyn_cast(Operands[0])) { const Loop *NestedLoop = NestedAR->getLoop(); if (L->contains(NestedLoop) ? (L->getLoopDepth() < NestedLoop->getLoopDepth()) : (!NestedLoop->contains(L) && DT.dominates(L->getHeader(), NestedLoop->getHeader()))) { SmallVector NestedOperands(NestedAR->operands()); Operands[0] = NestedAR->getStart(); // AddRecs require their operands be loop-invariant with respect to their // loops. Don't perform this transformation if it would break this // requirement. bool AllInvariant = all_of( Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); }); if (AllInvariant) { // Create a recurrence for the outer loop with the same step size. // // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the // inner recurrence has the same property. SCEV::NoWrapFlags OuterFlags = maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags()); NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) { return isLoopInvariant(Op, NestedLoop); }); if (AllInvariant) { // Ok, both add recurrences are valid after the transformation. // // The inner recurrence keeps its NW flag but only keeps NUW/NSW if // the outer recurrence has the same property. SCEV::NoWrapFlags InnerFlags = maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags); return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags); } } // Reset Operands to its original state. Operands[0] = NestedAR; } } // Okay, it looks like we really DO need an addrec expr. Check to see if we // already have one, otherwise create a new one. return getOrCreateAddRecExpr(Operands, L, Flags); } const SCEV * ScalarEvolution::getGEPExpr(GEPOperator *GEP, const SmallVectorImpl &IndexExprs) { const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand()); // getSCEV(Base)->getType() has the same address space as Base->getType() // because SCEV::getType() preserves the address space. Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType()); const bool AssumeInBoundsFlags = [&]() { if (!GEP->isInBounds()) return false; // We'd like to propagate flags from the IR to the corresponding SCEV nodes, // but to do that, we have to ensure that said flag is valid in the entire // defined scope of the SCEV. auto *GEPI = dyn_cast(GEP); // TODO: non-instructions have global scope. We might be able to prove // some global scope cases return GEPI && isSCEVExprNeverPoison(GEPI); }(); SCEV::NoWrapFlags OffsetWrap = AssumeInBoundsFlags ? SCEV::FlagNSW : SCEV::FlagAnyWrap; Type *CurTy = GEP->getType(); bool FirstIter = true; SmallVector Offsets; for (const SCEV *IndexExpr : IndexExprs) { // Compute the (potentially symbolic) offset in bytes for this index. if (StructType *STy = dyn_cast(CurTy)) { // For a struct, add the member offset. ConstantInt *Index = cast(IndexExpr)->getValue(); unsigned FieldNo = Index->getZExtValue(); const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo); Offsets.push_back(FieldOffset); // Update CurTy to the type of the field at Index. CurTy = STy->getTypeAtIndex(Index); } else { // Update CurTy to its element type. if (FirstIter) { assert(isa(CurTy) && "The first index of a GEP indexes a pointer"); CurTy = GEP->getSourceElementType(); FirstIter = false; } else { CurTy = GetElementPtrInst::getTypeAtIndex(CurTy, (uint64_t)0); } // For an array, add the element offset, explicitly scaled. const SCEV *ElementSize = getSizeOfExpr(IntIdxTy, CurTy); // Getelementptr indices are signed. IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy); // Multiply the index by the element size to compute the element offset. const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, OffsetWrap); Offsets.push_back(LocalOffset); } } // Handle degenerate case of GEP without offsets. if (Offsets.empty()) return BaseExpr; // Add the offsets together, assuming nsw if inbounds. const SCEV *Offset = getAddExpr(Offsets, OffsetWrap); // Add the base address and the offset. We cannot use the nsw flag, as the // base address is unsigned. However, if we know that the offset is // non-negative, we can use nuw. SCEV::NoWrapFlags BaseWrap = AssumeInBoundsFlags && isKnownNonNegative(Offset) ? SCEV::FlagNUW : SCEV::FlagAnyWrap; auto *GEPExpr = getAddExpr(BaseExpr, Offset, BaseWrap); assert(BaseExpr->getType() == GEPExpr->getType() && "GEP should not change type mid-flight."); return GEPExpr; } SCEV *ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef Ops) { FoldingSetNodeID ID; ID.AddInteger(SCEVType); for (const SCEV *Op : Ops) ID.AddPointer(Op); void *IP = nullptr; return UniqueSCEVs.FindNodeOrInsertPos(ID, IP); } const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) { SCEV::NoWrapFlags Flags = IsNSW ? SCEV::FlagNSW : SCEV::FlagAnyWrap; return getSMaxExpr(Op, getNegativeSCEV(Op, Flags)); } const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind, SmallVectorImpl &Ops) { assert(SCEVMinMaxExpr::isMinMaxType(Kind) && "Not a SCEVMinMaxExpr!"); assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) { assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "Operand types don't match!"); assert(Ops[0]->getType()->isPointerTy() == Ops[i]->getType()->isPointerTy() && "min/max should be consistently pointerish"); } #endif bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr; bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr; // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, &LI, DT); // Check if we have created the same expression before. if (const SCEV *S = findExistingSCEVInCache(Kind, Ops)) { return S; } // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { ++Idx; assert(Idx < Ops.size()); auto FoldOp = [&](const APInt &LHS, const APInt &RHS) { switch (Kind) { case scSMaxExpr: return APIntOps::smax(LHS, RHS); case scSMinExpr: return APIntOps::smin(LHS, RHS); case scUMaxExpr: return APIntOps::umax(LHS, RHS); case scUMinExpr: return APIntOps::umin(LHS, RHS); default: llvm_unreachable("Unknown SCEV min/max opcode"); } }; while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! ConstantInt *Fold = ConstantInt::get( getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt())); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; LHSC = cast(Ops[0]); } bool IsMinV = LHSC->getValue()->isMinValue(IsSigned); bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned); if (IsMax ? IsMinV : IsMaxV) { // If we are left with a constant minimum(/maximum)-int, strip it off. Ops.erase(Ops.begin()); --Idx; } else if (IsMax ? IsMaxV : IsMinV) { // If we have a max(/min) with a constant maximum(/minimum)-int, // it will always be the extremum. return LHSC; } if (Ops.size() == 1) return Ops[0]; } // Find the first operation of the same kind while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind) ++Idx; // Check to see if one of the operands is of the same kind. If so, expand its // operands onto our operand list, and recurse to simplify. if (Idx < Ops.size()) { bool DeletedAny = false; while (Ops[Idx]->getSCEVType() == Kind) { const SCEVMinMaxExpr *SMME = cast(Ops[Idx]); Ops.erase(Ops.begin()+Idx); append_range(Ops, SMME->operands()); DeletedAny = true; } if (DeletedAny) return getMinMaxExpr(Kind, Ops); } // Okay, check to see if the same value occurs in the operand list twice. If // so, delete one. Since we sorted the list, these values are required to // be adjacent. llvm::CmpInst::Predicate GEPred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; llvm::CmpInst::Predicate LEPred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred; llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred; for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) { if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) { // X op Y op Y --> X op Y // X op Y --> X, if we know X, Y are ordered appropriately Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2); --i; --e; } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i], Ops[i + 1])) { // X op Y --> Y, if we know X, Y are ordered appropriately Ops.erase(Ops.begin() + i, Ops.begin() + i + 1); --i; --e; } } if (Ops.size() == 1) return Ops[0]; assert(!Ops.empty() && "Reduced smax down to nothing!"); // Okay, it looks like we really DO need an expr. Check to see if we // already have one, otherwise create a new one. FoldingSetNodeID ID; ID.AddInteger(Kind); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = nullptr; const SCEV *ExistingSCEV = UniqueSCEVs.FindNodeOrInsertPos(ID, IP); if (ExistingSCEV) return ExistingSCEV; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); registerUser(S, Ops); return S; } namespace { class SCEVSequentialMinMaxDeduplicatingVisitor final : public SCEVVisitor> { using RetVal = std::optional; using Base = SCEVVisitor; ScalarEvolution &SE; const SCEVTypes RootKind; // Must be a sequential min/max expression. const SCEVTypes NonSequentialRootKind; // Non-sequential variant of RootKind. SmallPtrSet SeenOps; bool canRecurseInto(SCEVTypes Kind) const { // We can only recurse into the SCEV expression of the same effective type // as the type of our root SCEV expression. return RootKind == Kind || NonSequentialRootKind == Kind; }; RetVal visitAnyMinMaxExpr(const SCEV *S) { assert((isa(S) || isa(S)) && "Only for min/max expressions."); SCEVTypes Kind = S->getSCEVType(); if (!canRecurseInto(Kind)) return S; auto *NAry = cast(S); SmallVector NewOps; bool Changed = visit(Kind, NAry->operands(), NewOps); if (!Changed) return S; if (NewOps.empty()) return std::nullopt; return isa(S) ? SE.getSequentialMinMaxExpr(Kind, NewOps) : SE.getMinMaxExpr(Kind, NewOps); } RetVal visit(const SCEV *S) { // Has the whole operand been seen already? if (!SeenOps.insert(S).second) return std::nullopt; return Base::visit(S); } public: SCEVSequentialMinMaxDeduplicatingVisitor(ScalarEvolution &SE, SCEVTypes RootKind) : SE(SE), RootKind(RootKind), NonSequentialRootKind( SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType( RootKind)) {} bool /*Changed*/ visit(SCEVTypes Kind, ArrayRef OrigOps, SmallVectorImpl &NewOps) { bool Changed = false; SmallVector Ops; Ops.reserve(OrigOps.size()); for (const SCEV *Op : OrigOps) { RetVal NewOp = visit(Op); if (NewOp != Op) Changed = true; if (NewOp) Ops.emplace_back(*NewOp); } if (Changed) NewOps = std::move(Ops); return Changed; } RetVal visitConstant(const SCEVConstant *Constant) { return Constant; } RetVal visitVScale(const SCEVVScale *VScale) { return VScale; } RetVal visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) { return Expr; } RetVal visitTruncateExpr(const SCEVTruncateExpr *Expr) { return Expr; } RetVal visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { return Expr; } RetVal visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { return Expr; } RetVal visitAddExpr(const SCEVAddExpr *Expr) { return Expr; } RetVal visitMulExpr(const SCEVMulExpr *Expr) { return Expr; } RetVal visitUDivExpr(const SCEVUDivExpr *Expr) { return Expr; } RetVal visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; } RetVal visitSMaxExpr(const SCEVSMaxExpr *Expr) { return visitAnyMinMaxExpr(Expr); } RetVal visitUMaxExpr(const SCEVUMaxExpr *Expr) { return visitAnyMinMaxExpr(Expr); } RetVal visitSMinExpr(const SCEVSMinExpr *Expr) { return visitAnyMinMaxExpr(Expr); } RetVal visitUMinExpr(const SCEVUMinExpr *Expr) { return visitAnyMinMaxExpr(Expr); } RetVal visitSequentialUMinExpr(const SCEVSequentialUMinExpr *Expr) { return visitAnyMinMaxExpr(Expr); } RetVal visitUnknown(const SCEVUnknown *Expr) { return Expr; } RetVal visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { return Expr; } }; } // namespace static bool scevUnconditionallyPropagatesPoisonFromOperands(SCEVTypes Kind) { switch (Kind) { case scConstant: case scVScale: case scTruncate: case scZeroExtend: case scSignExtend: case scPtrToInt: case scAddExpr: case scMulExpr: case scUDivExpr: case scAddRecExpr: case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: case scUnknown: // If any operand is poison, the whole expression is poison. return true; case scSequentialUMinExpr: // FIXME: if the *first* operand is poison, the whole expression is poison. return false; // Pessimistically, say that it does not propagate poison. case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } namespace { // The only way poison may be introduced in a SCEV expression is from a // poison SCEVUnknown (ConstantExprs are also represented as SCEVUnknown, // not SCEVConstant). Notably, nowrap flags in SCEV nodes can *not* // introduce poison -- they encode guaranteed, non-speculated knowledge. // // Additionally, all SCEV nodes propagate poison from inputs to outputs, // with the notable exception of umin_seq, where only poison from the first // operand is (unconditionally) propagated. struct SCEVPoisonCollector { bool LookThroughMaybePoisonBlocking; SmallPtrSet MaybePoison; SCEVPoisonCollector(bool LookThroughMaybePoisonBlocking) : LookThroughMaybePoisonBlocking(LookThroughMaybePoisonBlocking) {} bool follow(const SCEV *S) { if (!LookThroughMaybePoisonBlocking && !scevUnconditionallyPropagatesPoisonFromOperands(S->getSCEVType())) return false; if (auto *SU = dyn_cast(S)) { if (!isGuaranteedNotToBePoison(SU->getValue())) MaybePoison.insert(SU); } return true; } bool isDone() const { return false; } }; } // namespace /// Return true if V is poison given that AssumedPoison is already poison. static bool impliesPoison(const SCEV *AssumedPoison, const SCEV *S) { // First collect all SCEVs that might result in AssumedPoison to be poison. // We need to look through potentially poison-blocking operations here, // because we want to find all SCEVs that *might* result in poison, not only // those that are *required* to. SCEVPoisonCollector PC1(/* LookThroughMaybePoisonBlocking */ true); visitAll(AssumedPoison, PC1); // AssumedPoison is never poison. As the assumption is false, the implication // is true. Don't bother walking the other SCEV in this case. if (PC1.MaybePoison.empty()) return true; // Collect all SCEVs in S that, if poison, *will* result in S being poison // as well. We cannot look through potentially poison-blocking operations // here, as their arguments only *may* make the result poison. SCEVPoisonCollector PC2(/* LookThroughMaybePoisonBlocking */ false); visitAll(S, PC2); // Make sure that no matter which SCEV in PC1.MaybePoison is actually poison, // it will also make S poison by being part of PC2.MaybePoison. return all_of(PC1.MaybePoison, [&](const SCEVUnknown *S) { return PC2.MaybePoison.contains(S); }); } void ScalarEvolution::getPoisonGeneratingValues( SmallPtrSetImpl &Result, const SCEV *S) { SCEVPoisonCollector PC(/* LookThroughMaybePoisonBlocking */ false); visitAll(S, PC); for (const SCEVUnknown *SU : PC.MaybePoison) Result.insert(SU->getValue()); } +bool ScalarEvolution::canReuseInstruction( + const SCEV *S, Instruction *I, + SmallVectorImpl &DropPoisonGeneratingInsts) { + // If the instruction cannot be poison, it's always safe to reuse. + if (programUndefinedIfPoison(I)) + return true; + + // Otherwise, it is possible that I is more poisonous that S. Collect the + // poison-contributors of S, and then check whether I has any additional + // poison-contributors. Poison that is contributed through poison-generating + // flags is handled by dropping those flags instead. + SmallPtrSet PoisonVals; + getPoisonGeneratingValues(PoisonVals, S); + + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(I); + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + // Avoid walking large instruction graphs. + if (Visited.size() > 16) + return false; + + // Either the value can't be poison, or the S would also be poison if it + // is. + if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V)) + continue; + + auto *I = dyn_cast(V); + if (!I) + return false; + + // Disjoint or instructions are interpreted as adds by SCEV. However, we + // can't replace an arbitrary add with disjoint or, even if we drop the + // flag. We would need to convert the or into an add. + if (auto *PDI = dyn_cast(I)) + if (PDI->isDisjoint()) + return false; + + // FIXME: Ignore vscale, even though it technically could be poison. Do this + // because SCEV currently assumes it can't be poison. Remove this special + // case once we proper model when vscale can be poison. + if (auto *II = dyn_cast(I); + II && II->getIntrinsicID() == Intrinsic::vscale) + continue; + + if (canCreatePoison(cast(I), /*ConsiderFlagsAndMetadata*/ false)) + return false; + + // If the instruction can't create poison, we can recurse to its operands. + if (I->hasPoisonGeneratingFlagsOrMetadata()) + DropPoisonGeneratingInsts.push_back(I); + + for (Value *Op : I->operands()) + Worklist.push_back(Op); + } + return true; +} + const SCEV * ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind, SmallVectorImpl &Ops) { assert(SCEVSequentialMinMaxExpr::isSequentialMinMaxType(Kind) && "Not a SCEVSequentialMinMaxExpr!"); assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) { assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "Operand types don't match!"); assert(Ops[0]->getType()->isPointerTy() == Ops[i]->getType()->isPointerTy() && "min/max should be consistently pointerish"); } #endif // Note that SCEVSequentialMinMaxExpr is *NOT* commutative, // so we can *NOT* do any kind of sorting of the expressions! // Check if we have created the same expression before. if (const SCEV *S = findExistingSCEVInCache(Kind, Ops)) return S; // FIXME: there are *some* simplifications that we can do here. // Keep only the first instance of an operand. { SCEVSequentialMinMaxDeduplicatingVisitor Deduplicator(*this, Kind); bool Changed = Deduplicator.visit(Kind, Ops, Ops); if (Changed) return getSequentialMinMaxExpr(Kind, Ops); } // Check to see if one of the operands is of the same kind. If so, expand its // operands onto our operand list, and recurse to simplify. { unsigned Idx = 0; bool DeletedAny = false; while (Idx < Ops.size()) { if (Ops[Idx]->getSCEVType() != Kind) { ++Idx; continue; } const auto *SMME = cast(Ops[Idx]); Ops.erase(Ops.begin() + Idx); Ops.insert(Ops.begin() + Idx, SMME->operands().begin(), SMME->operands().end()); DeletedAny = true; } if (DeletedAny) return getSequentialMinMaxExpr(Kind, Ops); } const SCEV *SaturationPoint; ICmpInst::Predicate Pred; switch (Kind) { case scSequentialUMinExpr: SaturationPoint = getZero(Ops[0]->getType()); Pred = ICmpInst::ICMP_ULE; break; default: llvm_unreachable("Not a sequential min/max type."); } for (unsigned i = 1, e = Ops.size(); i != e; ++i) { // We can replace %x umin_seq %y with %x umin %y if either: // * %y being poison implies %x is also poison. // * %x cannot be the saturating value (e.g. zero for umin). if (::impliesPoison(Ops[i], Ops[i - 1]) || isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_NE, Ops[i - 1], SaturationPoint)) { SmallVector SeqOps = {Ops[i - 1], Ops[i]}; Ops[i - 1] = getMinMaxExpr( SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(Kind), SeqOps); Ops.erase(Ops.begin() + i); return getSequentialMinMaxExpr(Kind, Ops); } // Fold %x umin_seq %y to %x if %x ule %y. // TODO: We might be able to prove the predicate for a later operand. if (isKnownViaNonRecursiveReasoning(Pred, Ops[i - 1], Ops[i])) { Ops.erase(Ops.begin() + i); return getSequentialMinMaxExpr(Kind, Ops); } } // Okay, it looks like we really DO need an expr. Check to see if we // already have one, otherwise create a new one. FoldingSetNodeID ID; ID.AddInteger(Kind); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = nullptr; const SCEV *ExistingSCEV = UniqueSCEVs.FindNodeOrInsertPos(ID, IP); if (ExistingSCEV) return ExistingSCEV; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); SCEV *S = new (SCEVAllocator) SCEVSequentialMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); registerUser(S, Ops); return S; } const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { SmallVector Ops = {LHS, RHS}; return getSMaxExpr(Ops); } const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { return getMinMaxExpr(scSMaxExpr, Ops); } const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) { SmallVector Ops = {LHS, RHS}; return getUMaxExpr(Ops); } const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { return getMinMaxExpr(scUMaxExpr, Ops); } const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, const SCEV *RHS) { SmallVector Ops = { LHS, RHS }; return getSMinExpr(Ops); } const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl &Ops) { return getMinMaxExpr(scSMinExpr, Ops); } const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential) { SmallVector Ops = { LHS, RHS }; return getUMinExpr(Ops, Sequential); } const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl &Ops, bool Sequential) { return Sequential ? getSequentialMinMaxExpr(scSequentialUMinExpr, Ops) : getMinMaxExpr(scUMinExpr, Ops); } const SCEV * ScalarEvolution::getSizeOfExpr(Type *IntTy, TypeSize Size) { const SCEV *Res = getConstant(IntTy, Size.getKnownMinValue()); if (Size.isScalable()) Res = getMulExpr(Res, getVScale(IntTy)); return Res; } const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { return getSizeOfExpr(IntTy, getDataLayout().getTypeAllocSize(AllocTy)); } const SCEV *ScalarEvolution::getStoreSizeOfExpr(Type *IntTy, Type *StoreTy) { return getSizeOfExpr(IntTy, getDataLayout().getTypeStoreSize(StoreTy)); } const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo) { // We can bypass creating a target-independent constant expression and then // folding it back into a ConstantInt. This is just a compile-time // optimization. const StructLayout *SL = getDataLayout().getStructLayout(STy); assert(!SL->getSizeInBits().isScalable() && "Cannot get offset for structure containing scalable vector types"); return getConstant(IntTy, SL->getElementOffset(FieldNo)); } const SCEV *ScalarEvolution::getUnknown(Value *V) { // Don't attempt to do anything other than create a SCEVUnknown object // here. createSCEV only calls getUnknown after checking for all other // interesting possibilities, and any other code that calls getUnknown // is doing so in order to hide a value from SCEV canonicalization. FoldingSetNodeID ID; ID.AddInteger(scUnknown); ID.AddPointer(V); void *IP = nullptr; if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { assert(cast(S)->getValue() == V && "Stale SCEVUnknown in uniquing map!"); return S; } SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, FirstUnknown); FirstUnknown = cast(S); UniqueSCEVs.InsertNode(S, IP); return S; } //===----------------------------------------------------------------------===// // Basic SCEV Analysis and PHI Idiom Recognition Code // /// Test if values of the given type are analyzable within the SCEV /// framework. This primarily includes integer types, and it can optionally /// include pointer types if the ScalarEvolution class has access to /// target-specific information. bool ScalarEvolution::isSCEVable(Type *Ty) const { // Integers and pointers are always SCEVable. return Ty->isIntOrPtrTy(); } /// Return the size in bits of the specified type, for which isSCEVable must /// return true. uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); if (Ty->isPointerTy()) return getDataLayout().getIndexTypeSizeInBits(Ty); return getDataLayout().getTypeSizeInBits(Ty); } /// Return a type with the same bitwidth as the given type and which represents /// how SCEV will treat the given type, for which isSCEVable must return /// true. For pointer types, this is the pointer index sized integer type. Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); if (Ty->isIntegerTy()) return Ty; // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); return getDataLayout().getIndexType(Ty); } Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const { return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2; } bool ScalarEvolution::instructionCouldExistWithOperands(const SCEV *A, const SCEV *B) { /// For a valid use point to exist, the defining scope of one operand /// must dominate the other. bool PreciseA, PreciseB; auto *ScopeA = getDefiningScopeBound({A}, PreciseA); auto *ScopeB = getDefiningScopeBound({B}, PreciseB); if (!PreciseA || !PreciseB) // Can't tell. return false; return (ScopeA == ScopeB) || DT.dominates(ScopeA, ScopeB) || DT.dominates(ScopeB, ScopeA); } const SCEV *ScalarEvolution::getCouldNotCompute() { return CouldNotCompute.get(); } bool ScalarEvolution::checkValidity(const SCEV *S) const { bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) { auto *SU = dyn_cast(S); return SU && SU->getValue() == nullptr; }); return !ContainsNulls; } bool ScalarEvolution::containsAddRecurrence(const SCEV *S) { HasRecMapType::iterator I = HasRecMap.find(S); if (I != HasRecMap.end()) return I->second; bool FoundAddRec = SCEVExprContains(S, [](const SCEV *S) { return isa(S); }); HasRecMap.insert({S, FoundAddRec}); return FoundAddRec; } /// Return the ValueOffsetPair set for \p S. \p S can be represented /// by the value and offset from any ValueOffsetPair in the set. ArrayRef ScalarEvolution::getSCEVValues(const SCEV *S) { ExprValueMapType::iterator SI = ExprValueMap.find_as(S); if (SI == ExprValueMap.end()) return std::nullopt; return SI->second.getArrayRef(); } /// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V) /// cannot be used separately. eraseValueFromMap should be used to remove /// V from ValueExprMap and ExprValueMap at the same time. void ScalarEvolution::eraseValueFromMap(Value *V) { ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { auto EVIt = ExprValueMap.find(I->second); bool Removed = EVIt->second.remove(V); (void) Removed; assert(Removed && "Value not in ExprValueMap?"); ValueExprMap.erase(I); } } void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) { // A recursive query may have already computed the SCEV. It should be // equivalent, but may not necessarily be exactly the same, e.g. due to lazily // inferred nowrap flags. auto It = ValueExprMap.find_as(V); if (It == ValueExprMap.end()) { ValueExprMap.insert({SCEVCallbackVH(V, this), S}); ExprValueMap[S].insert(V); } } /// Return an existing SCEV if it exists, otherwise analyze the expression and /// create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); if (const SCEV *S = getExistingSCEV(V)) return S; return createSCEVIter(V); } const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { const SCEV *S = I->second; assert(checkValidity(S) && "existing SCEV has not been properly invalidated"); return S; } return nullptr; } /// Return a SCEV corresponding to -V = -1*V const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags) { if (const SCEVConstant *VC = dyn_cast(V)) return getConstant( cast(ConstantExpr::getNeg(VC->getValue()))); Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); return getMulExpr(V, getMinusOne(Ty), Flags); } /// If Expr computes ~A, return A else return nullptr static const SCEV *MatchNotExpr(const SCEV *Expr) { const SCEVAddExpr *Add = dyn_cast(Expr); if (!Add || Add->getNumOperands() != 2 || !Add->getOperand(0)->isAllOnesValue()) return nullptr; const SCEVMulExpr *AddRHS = dyn_cast(Add->getOperand(1)); if (!AddRHS || AddRHS->getNumOperands() != 2 || !AddRHS->getOperand(0)->isAllOnesValue()) return nullptr; return AddRHS->getOperand(1); } /// Return a SCEV corresponding to ~V = -1-V const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { assert(!V->getType()->isPointerTy() && "Can't negate pointer"); if (const SCEVConstant *VC = dyn_cast(V)) return getConstant( cast(ConstantExpr::getNot(VC->getValue()))); // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y) if (const SCEVMinMaxExpr *MME = dyn_cast(V)) { auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) { SmallVector MatchedOperands; for (const SCEV *Operand : MME->operands()) { const SCEV *Matched = MatchNotExpr(Operand); if (!Matched) return (const SCEV *)nullptr; MatchedOperands.push_back(Matched); } return getMinMaxExpr(SCEVMinMaxExpr::negate(MME->getSCEVType()), MatchedOperands); }; if (const SCEV *Replaced = MatchMinMaxNegation(MME)) return Replaced; } Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); return getMinusSCEV(getMinusOne(Ty), V); } const SCEV *ScalarEvolution::removePointerBase(const SCEV *P) { assert(P->getType()->isPointerTy()); if (auto *AddRec = dyn_cast(P)) { // The base of an AddRec is the first operand. SmallVector Ops{AddRec->operands()}; Ops[0] = removePointerBase(Ops[0]); // Don't try to transfer nowrap flags for now. We could in some cases // (for example, if pointer operand of the AddRec is a SCEVUnknown). return getAddRecExpr(Ops, AddRec->getLoop(), SCEV::FlagAnyWrap); } if (auto *Add = dyn_cast(P)) { // The base of an Add is the pointer operand. SmallVector Ops{Add->operands()}; const SCEV **PtrOp = nullptr; for (const SCEV *&AddOp : Ops) { if (AddOp->getType()->isPointerTy()) { assert(!PtrOp && "Cannot have multiple pointer ops"); PtrOp = &AddOp; } } *PtrOp = removePointerBase(*PtrOp); // Don't try to transfer nowrap flags for now. We could in some cases // (for example, if the pointer operand of the Add is a SCEVUnknown). return getAddExpr(Ops); } // Any other expression must be a pointer base. return getZero(P->getType()); } const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags, unsigned Depth) { // Fast path: X - X --> 0. if (LHS == RHS) return getZero(LHS->getType()); // If we subtract two pointers with different pointer bases, bail. // Eventually, we're going to add an assertion to getMulExpr that we // can't multiply by a pointer. if (RHS->getType()->isPointerTy()) { if (!LHS->getType()->isPointerTy() || getPointerBase(LHS) != getPointerBase(RHS)) return getCouldNotCompute(); LHS = removePointerBase(LHS); RHS = removePointerBase(RHS); } // We represent LHS - RHS as LHS + (-1)*RHS. This transformation // makes it so that we cannot make much use of NUW. auto AddFlags = SCEV::FlagAnyWrap; const bool RHSIsNotMinSigned = !getSignedRangeMin(RHS).isMinSignedValue(); if (hasFlags(Flags, SCEV::FlagNSW)) { // Let M be the minimum representable signed value. Then (-1)*RHS // signed-wraps if and only if RHS is M. That can happen even for // a NSW subtraction because e.g. (-1)*M signed-wraps even though // -1 - M does not. So to transfer NSW from LHS - RHS to LHS + // (-1)*RHS, we need to prove that RHS != M. // // If LHS is non-negative and we know that LHS - RHS does not // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap // either by proving that RHS > M or that LHS >= 0. if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) { AddFlags = SCEV::FlagNSW; } } // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS - // RHS is NSW and LHS >= 0. // // The difficulty here is that the NSW flag may have been proven // relative to a loop that is to be found in a recurrence in LHS and // not in RHS. Applying NSW to (-1)*M may then let the NSW have a // larger scope than intended. auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap; return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth); } const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth) { Type *SrcTy = V->getType(); assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) return getTruncateExpr(V, Ty, Depth); return getZeroExtendExpr(V, Ty, Depth); } const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth) { Type *SrcTy = V->getType(); assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) return getTruncateExpr(V, Ty, Depth); return getSignExtendExpr(V, Ty, Depth); } const SCEV * ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot noop or zero extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrZeroExtend cannot truncate!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion return getZeroExtendExpr(V, Ty); } const SCEV * ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot noop or sign extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrSignExtend cannot truncate!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion return getSignExtendExpr(V, Ty); } const SCEV * ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot noop or any extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrAnyExtend cannot truncate!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion return getAnyExtendExpr(V, Ty); } const SCEV * ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) { Type *SrcTy = V->getType(); assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot truncate or noop with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && "getTruncateOrNoop cannot extend!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion return getTruncateExpr(V, Ty); } const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS) { const SCEV *PromotedLHS = LHS; const SCEV *PromotedRHS = RHS; if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); else PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); return getUMaxExpr(PromotedLHS, PromotedRHS); } const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS, bool Sequential) { SmallVector Ops = { LHS, RHS }; return getUMinFromMismatchedTypes(Ops, Sequential); } const SCEV * ScalarEvolution::getUMinFromMismatchedTypes(SmallVectorImpl &Ops, bool Sequential) { assert(!Ops.empty() && "At least one operand must be!"); // Trivial case. if (Ops.size() == 1) return Ops[0]; // Find the max type first. Type *MaxType = nullptr; for (const auto *S : Ops) if (MaxType) MaxType = getWiderType(MaxType, S->getType()); else MaxType = S->getType(); assert(MaxType && "Failed to find maximum type!"); // Extend all ops to max type. SmallVector PromotedOps; for (const auto *S : Ops) PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType)); // Generate umin. return getUMinExpr(PromotedOps, Sequential); } const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { // A pointer operand may evaluate to a nonpointer expression, such as null. if (!V->getType()->isPointerTy()) return V; while (true) { if (auto *AddRec = dyn_cast(V)) { V = AddRec->getStart(); } else if (auto *Add = dyn_cast(V)) { const SCEV *PtrOp = nullptr; for (const SCEV *AddOp : Add->operands()) { if (AddOp->getType()->isPointerTy()) { assert(!PtrOp && "Cannot have multiple pointer ops"); PtrOp = AddOp; } } assert(PtrOp && "Must have pointer op"); V = PtrOp; } else // Not something we can look further into. return V; } } /// Push users of the given Instruction onto the given Worklist. static void PushDefUseChildren(Instruction *I, SmallVectorImpl &Worklist, SmallPtrSetImpl &Visited) { // Push the def-use children onto the Worklist stack. for (User *U : I->users()) { auto *UserInsn = cast(U); if (Visited.insert(UserInsn).second) Worklist.push_back(UserInsn); } } namespace { /// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start /// expression in case its Loop is L. If it is not L then /// if IgnoreOtherLoops is true then use AddRec itself /// otherwise rewrite cannot be done. /// If SCEV contains non-invariant unknown SCEV rewrite cannot be done. class SCEVInitRewriter : public SCEVRewriteVisitor { public: static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool IgnoreOtherLoops = true) { SCEVInitRewriter Rewriter(L, SE); const SCEV *Result = Rewriter.visit(S); if (Rewriter.hasSeenLoopVariantSCEVUnknown()) return SE.getCouldNotCompute(); return Rewriter.hasSeenOtherLoops() && !IgnoreOtherLoops ? SE.getCouldNotCompute() : Result; } const SCEV *visitUnknown(const SCEVUnknown *Expr) { if (!SE.isLoopInvariant(Expr, L)) SeenLoopVariantSCEVUnknown = true; return Expr; } const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { // Only re-write AddRecExprs for this loop. if (Expr->getLoop() == L) return Expr->getStart(); SeenOtherLoops = true; return Expr; } bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; } bool hasSeenOtherLoops() { return SeenOtherLoops; } private: explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE) : SCEVRewriteVisitor(SE), L(L) {} const Loop *L; bool SeenLoopVariantSCEVUnknown = false; bool SeenOtherLoops = false; }; /// Takes SCEV S and Loop L. For each AddRec sub-expression, use its post /// increment expression in case its Loop is L. If it is not L then /// use AddRec itself. /// If SCEV contains non-invariant unknown SCEV rewrite cannot be done. class SCEVPostIncRewriter : public SCEVRewriteVisitor { public: static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) { SCEVPostIncRewriter Rewriter(L, SE); const SCEV *Result = Rewriter.visit(S); return Rewriter.hasSeenLoopVariantSCEVUnknown() ? SE.getCouldNotCompute() : Result; } const SCEV *visitUnknown(const SCEVUnknown *Expr) { if (!SE.isLoopInvariant(Expr, L)) SeenLoopVariantSCEVUnknown = true; return Expr; } const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { // Only re-write AddRecExprs for this loop. if (Expr->getLoop() == L) return Expr->getPostIncExpr(SE); SeenOtherLoops = true; return Expr; } bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; } bool hasSeenOtherLoops() { return SeenOtherLoops; } private: explicit SCEVPostIncRewriter(const Loop *L, ScalarEvolution &SE) : SCEVRewriteVisitor(SE), L(L) {} const Loop *L; bool SeenLoopVariantSCEVUnknown = false; bool SeenOtherLoops = false; }; /// This class evaluates the compare condition by matching it against the /// condition of loop latch. If there is a match we assume a true value /// for the condition while building SCEV nodes. class SCEVBackedgeConditionFolder : public SCEVRewriteVisitor { public: static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) { bool IsPosBECond = false; Value *BECond = nullptr; if (BasicBlock *Latch = L->getLoopLatch()) { BranchInst *BI = dyn_cast(Latch->getTerminator()); if (BI && BI->isConditional()) { assert(BI->getSuccessor(0) != BI->getSuccessor(1) && "Both outgoing branches should not target same header!"); BECond = BI->getCondition(); IsPosBECond = BI->getSuccessor(0) == L->getHeader(); } else { return S; } } SCEVBackedgeConditionFolder Rewriter(L, BECond, IsPosBECond, SE); return Rewriter.visit(S); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { const SCEV *Result = Expr; bool InvariantF = SE.isLoopInvariant(Expr, L); if (!InvariantF) { Instruction *I = cast(Expr->getValue()); switch (I->getOpcode()) { case Instruction::Select: { SelectInst *SI = cast(I); std::optional Res = compareWithBackedgeCondition(SI->getCondition()); if (Res) { bool IsOne = cast(*Res)->getValue()->isOne(); Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue()); } break; } default: { std::optional Res = compareWithBackedgeCondition(I); if (Res) Result = *Res; break; } } } return Result; } private: explicit SCEVBackedgeConditionFolder(const Loop *L, Value *BECond, bool IsPosBECond, ScalarEvolution &SE) : SCEVRewriteVisitor(SE), L(L), BackedgeCond(BECond), IsPositiveBECond(IsPosBECond) {} std::optional compareWithBackedgeCondition(Value *IC); const Loop *L; /// Loop back condition. Value *BackedgeCond = nullptr; /// Set to true if loop back is on positive branch condition. bool IsPositiveBECond; }; std::optional SCEVBackedgeConditionFolder::compareWithBackedgeCondition(Value *IC) { // If value matches the backedge condition for loop latch, // then return a constant evolution node based on loopback // branch taken. if (BackedgeCond == IC) return IsPositiveBECond ? SE.getOne(Type::getInt1Ty(SE.getContext())) : SE.getZero(Type::getInt1Ty(SE.getContext())); return std::nullopt; } class SCEVShiftRewriter : public SCEVRewriteVisitor { public: static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) { SCEVShiftRewriter Rewriter(L, SE); const SCEV *Result = Rewriter.visit(S); return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { // Only allow AddRecExprs for this loop. if (!SE.isLoopInvariant(Expr, L)) Valid = false; return Expr; } const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { if (Expr->getLoop() == L && Expr->isAffine()) return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE)); Valid = false; return Expr; } bool isValid() { return Valid; } private: explicit SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE) : SCEVRewriteVisitor(SE), L(L) {} const Loop *L; bool Valid = true; }; } // end anonymous namespace SCEV::NoWrapFlags ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { if (!AR->isAffine()) return SCEV::FlagAnyWrap; using OBO = OverflowingBinaryOperator; SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap; if (!AR->hasNoSelfWrap()) { const SCEV *BECount = getConstantMaxBackedgeTakenCount(AR->getLoop()); if (const SCEVConstant *BECountMax = dyn_cast(BECount)) { ConstantRange StepCR = getSignedRange(AR->getStepRecurrence(*this)); const APInt &BECountAP = BECountMax->getAPInt(); unsigned NoOverflowBitWidth = BECountAP.getActiveBits() + StepCR.getMinSignedBits(); if (NoOverflowBitWidth <= getTypeSizeInBits(AR->getType())) Result = ScalarEvolution::setFlags(Result, SCEV::FlagNW); } } if (!AR->hasNoSignedWrap()) { ConstantRange AddRecRange = getSignedRange(AR); ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this)); auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( Instruction::Add, IncRange, OBO::NoSignedWrap); if (NSWRegion.contains(AddRecRange)) Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW); } if (!AR->hasNoUnsignedWrap()) { ConstantRange AddRecRange = getUnsignedRange(AR); ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this)); auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( Instruction::Add, IncRange, OBO::NoUnsignedWrap); if (NUWRegion.contains(AddRecRange)) Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW); } return Result; } SCEV::NoWrapFlags ScalarEvolution::proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR) { SCEV::NoWrapFlags Result = AR->getNoWrapFlags(); if (AR->hasNoSignedWrap()) return Result; if (!AR->isAffine()) return Result; // This function can be expensive, only try to prove NSW once per AddRec. if (!SignedWrapViaInductionTried.insert(AR).second) return Result; const SCEV *Step = AR->getStepRecurrence(*this); const Loop *L = AR->getLoop(); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is // being called from within backedge-taken count analysis, such that // attempting to ask for the backedge-taken count would likely result // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); // Normally, in the cases we can prove no-overflow via a // backedge guarding condition, we can also compute a backedge // taken count for the loop. The exceptions are assumptions and // guards present in the loop -- SCEV is not great at exploiting // these to compute max backedge taken counts, but can still use // these to prove lack of overflow. Use this fact to avoid // doing extra work that may not pay off. if (isa(MaxBECount) && !HasGuards && AC.assumptions().empty()) return Result; // If the backedge is guarded by a comparison with the pre-inc value the // addrec is safe. Also, if the entry is guarded by a comparison with the // start value and the backedge is guarded by a comparison with the post-inc // value, the addrec is safe. ICmpInst::Predicate Pred; const SCEV *OverflowLimit = getSignedOverflowLimitForStep(Step, &Pred, this); if (OverflowLimit && (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || isKnownOnEveryIteration(Pred, AR, OverflowLimit))) { Result = setFlags(Result, SCEV::FlagNSW); } return Result; } SCEV::NoWrapFlags ScalarEvolution::proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR) { SCEV::NoWrapFlags Result = AR->getNoWrapFlags(); if (AR->hasNoUnsignedWrap()) return Result; if (!AR->isAffine()) return Result; // This function can be expensive, only try to prove NUW once per AddRec. if (!UnsignedWrapViaInductionTried.insert(AR).second) return Result; const SCEV *Step = AR->getStepRecurrence(*this); unsigned BitWidth = getTypeSizeInBits(AR->getType()); const Loop *L = AR->getLoop(); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is // being called from within backedge-taken count analysis, such that // attempting to ask for the backedge-taken count would likely result // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); // Normally, in the cases we can prove no-overflow via a // backedge guarding condition, we can also compute a backedge // taken count for the loop. The exceptions are assumptions and // guards present in the loop -- SCEV is not great at exploiting // these to compute max backedge taken counts, but can still use // these to prove lack of overflow. Use this fact to avoid // doing extra work that may not pay off. if (isa(MaxBECount) && !HasGuards && AC.assumptions().empty()) return Result; // If the backedge is guarded by a comparison with the pre-inc value the // addrec is safe. Also, if the entry is guarded by a comparison with the // start value and the backedge is guarded by a comparison with the post-inc // value, the addrec is safe. if (isKnownPositive(Step)) { const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - getUnsignedRangeMax(Step)); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) { Result = setFlags(Result, SCEV::FlagNUW); } } return Result; } namespace { /// Represents an abstract binary operation. This may exist as a /// normal instruction or constant expression, or may have been /// derived from an expression tree. struct BinaryOp { unsigned Opcode; Value *LHS; Value *RHS; bool IsNSW = false; bool IsNUW = false; /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or /// constant expression. Operator *Op = nullptr; explicit BinaryOp(Operator *Op) : Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)), Op(Op) { if (auto *OBO = dyn_cast(Op)) { IsNSW = OBO->hasNoSignedWrap(); IsNUW = OBO->hasNoUnsignedWrap(); } } explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false, bool IsNUW = false) : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {} }; } // end anonymous namespace /// Try to map \p V into a BinaryOp, and return \c std::nullopt on failure. static std::optional MatchBinaryOp(Value *V, const DataLayout &DL, AssumptionCache &AC, const DominatorTree &DT, const Instruction *CxtI) { auto *Op = dyn_cast(V); if (!Op) return std::nullopt; // Implementation detail: all the cleverness here should happen without // creating new SCEV expressions -- our caller knowns tricks to avoid creating // SCEV expressions when possible, and we should not break that. switch (Op->getOpcode()) { case Instruction::Add: case Instruction::Sub: case Instruction::Mul: case Instruction::UDiv: case Instruction::URem: case Instruction::And: case Instruction::AShr: case Instruction::Shl: return BinaryOp(Op); case Instruction::Or: { // Convert or disjoint into add nuw nsw. if (cast(Op)->isDisjoint()) return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1), /*IsNSW=*/true, /*IsNUW=*/true); return BinaryOp(Op); } case Instruction::Xor: if (auto *RHSC = dyn_cast(Op->getOperand(1))) // If the RHS of the xor is a signmask, then this is just an add. // Instcombine turns add of signmask into xor as a strength reduction step. if (RHSC->getValue().isSignMask()) return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1)); // Binary `xor` is a bit-wise `add`. if (V->getType()->isIntegerTy(1)) return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1)); return BinaryOp(Op); case Instruction::LShr: // Turn logical shift right of a constant into a unsigned divide. if (ConstantInt *SA = dyn_cast(Op->getOperand(1))) { uint32_t BitWidth = cast(Op->getType())->getBitWidth(); // If the shift count is not less than the bitwidth, the result of // the shift is undefined. Don't try to analyze it, because the // resolution chosen here may differ from the resolution chosen in // other parts of the compiler. if (SA->getValue().ult(BitWidth)) { Constant *X = ConstantInt::get(SA->getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue())); return BinaryOp(Instruction::UDiv, Op->getOperand(0), X); } } return BinaryOp(Op); case Instruction::ExtractValue: { auto *EVI = cast(Op); if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0) break; auto *WO = dyn_cast(EVI->getAggregateOperand()); if (!WO) break; Instruction::BinaryOps BinOp = WO->getBinaryOp(); bool Signed = WO->isSigned(); // TODO: Should add nuw/nsw flags for mul as well. if (BinOp == Instruction::Mul || !isOverflowIntrinsicNoWrap(WO, DT)) return BinaryOp(BinOp, WO->getLHS(), WO->getRHS()); // Now that we know that all uses of the arithmetic-result component of // CI are guarded by the overflow check, we can go ahead and pretend // that the arithmetic is non-overflowing. return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(), /* IsNSW = */ Signed, /* IsNUW = */ !Signed); } default: break; } // Recognise intrinsic loop.decrement.reg, and as this has exactly the same // semantics as a Sub, return a binary sub expression. if (auto *II = dyn_cast(V)) if (II->getIntrinsicID() == Intrinsic::loop_decrement_reg) return BinaryOp(Instruction::Sub, II->getOperand(0), II->getOperand(1)); return std::nullopt; } /// Helper function to createAddRecFromPHIWithCasts. We have a phi /// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via /// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the /// way. This function checks if \p Op, an operand of this SCEVAddExpr, /// follows one of the following patterns: /// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) /// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) /// If the SCEV expression of \p Op conforms with one of the expected patterns /// we return the type of the truncation operation, and indicate whether the /// truncated type should be treated as signed/unsigned by setting /// \p Signed to true/false, respectively. static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, bool &Signed, ScalarEvolution &SE) { // The case where Op == SymbolicPHI (that is, with no type conversions on // the way) is handled by the regular add recurrence creating logic and // would have already been triggered in createAddRecForPHI. Reaching it here // means that createAddRecFromPHI had failed for this PHI before (e.g., // because one of the other operands of the SCEVAddExpr updating this PHI is // not invariant). // // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in // this case predicates that allow us to prove that Op == SymbolicPHI will // be added. if (Op == SymbolicPHI) return nullptr; unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType()); unsigned NewBits = SE.getTypeSizeInBits(Op->getType()); if (SourceBits != NewBits) return nullptr; const SCEVSignExtendExpr *SExt = dyn_cast(Op); const SCEVZeroExtendExpr *ZExt = dyn_cast(Op); if (!SExt && !ZExt) return nullptr; const SCEVTruncateExpr *Trunc = SExt ? dyn_cast(SExt->getOperand()) : dyn_cast(ZExt->getOperand()); if (!Trunc) return nullptr; const SCEV *X = Trunc->getOperand(); if (X != SymbolicPHI) return nullptr; Signed = SExt != nullptr; return Trunc->getType(); } static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { if (!PN->getType()->isIntegerTy()) return nullptr; const Loop *L = LI.getLoopFor(PN->getParent()); if (!L || L->getHeader() != PN->getParent()) return nullptr; return L; } // Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the // computation that updates the phi follows the following pattern: // (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum // which correspond to a phi->trunc->sext/zext->add->phi update chain. // If so, try to see if it can be rewritten as an AddRecExpr under some // Predicates. If successful, return them as a pair. Also cache the results // of the analysis. // // Example usage scenario: // Say the Rewriter is called for the following SCEV: // 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step) // where: // %X = phi i64 (%Start, %BEValue) // It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X), // and call this function with %SymbolicPHI = %X. // // The analysis will find that the value coming around the backedge has // the following SCEV: // BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step) // Upon concluding that this matches the desired pattern, the function // will return the pair {NewAddRec, SmallPredsVec} where: // NewAddRec = {%Start,+,%Step} // SmallPredsVec = {P1, P2, P3} as follows: // P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)} Flags: // P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64) // P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64) // The returned pair means that SymbolicPHI can be rewritten into NewAddRec // under the predicates {P1,P2,P3}. // This predicated rewrite will be cached in PredicatedSCEVRewrites: // PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)} // // TODO's: // // 1) Extend the Induction descriptor to also support inductions that involve // casts: When needed (namely, when we are called in the context of the // vectorizer induction analysis), a Set of cast instructions will be // populated by this method, and provided back to isInductionPHI. This is // needed to allow the vectorizer to properly record them to be ignored by // the cost model and to avoid vectorizing them (otherwise these casts, // which are redundant under the runtime overflow checks, will be // vectorized, which can be costly). // // 2) Support additional induction/PHISCEV patterns: We also want to support // inductions where the sext-trunc / zext-trunc operations (partly) occur // after the induction update operation (the induction increment): // // (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix) // which correspond to a phi->add->trunc->sext/zext->phi update chain. // // (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) // which correspond to a phi->trunc->add->sext/zext->phi update chain. // // 3) Outline common code with createAddRecFromPHI to avoid duplication. std::optional>> ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) { SmallVector Predicates; // *** Part1: Analyze if we have a phi-with-cast pattern for which we can // return an AddRec expression under some predicate. auto *PN = cast(SymbolicPHI->getValue()); const Loop *L = isIntegerLoopHeaderPHI(PN, LI); assert(L && "Expecting an integer loop header phi"); // The loop may have multiple entrances or multiple exits; we can analyze // this phi as an addrec if it has a unique entry value and a unique // backedge value. Value *BEValueV = nullptr, *StartValueV = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); if (L->contains(PN->getIncomingBlock(i))) { if (!BEValueV) { BEValueV = V; } else if (BEValueV != V) { BEValueV = nullptr; break; } } else if (!StartValueV) { StartValueV = V; } else if (StartValueV != V) { StartValueV = nullptr; break; } } if (!BEValueV || !StartValueV) return std::nullopt; const SCEV *BEValue = getSCEV(BEValueV); // If the value coming around the backedge is an add with the symbolic // value we just inserted, possibly with casts that we can ignore under // an appropriate runtime guard, then we found a simple induction variable! const auto *Add = dyn_cast(BEValue); if (!Add) return std::nullopt; // If there is a single occurrence of the symbolic value, possibly // casted, replace it with a recurrence. unsigned FoundIndex = Add->getNumOperands(); Type *TruncTy = nullptr; bool Signed; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if ((TruncTy = isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this))) if (FoundIndex == e) { FoundIndex = i; break; } if (FoundIndex == Add->getNumOperands()) return std::nullopt; // Create an add with everything but the specified operand. SmallVector Ops; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if (i != FoundIndex) Ops.push_back(Add->getOperand(i)); const SCEV *Accum = getAddExpr(Ops); // The runtime checks will not be valid if the step amount is // varying inside the loop. if (!isLoopInvariant(Accum, L)) return std::nullopt; // *** Part2: Create the predicates // Analysis was successful: we have a phi-with-cast pattern for which we // can return an AddRec expression under the following predicates: // // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum) // fits within the truncated type (does not overflow) for i = 0 to n-1. // P2: An Equal predicate that guarantees that // Start = (Ext ix (Trunc iy (Start) to ix) to iy) // P3: An Equal predicate that guarantees that // Accum = (Ext ix (Trunc iy (Accum) to ix) to iy) // // As we next prove, the above predicates guarantee that: // Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy) // // // More formally, we want to prove that: // Expr(i+1) = Start + (i+1) * Accum // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum // // Given that: // 1) Expr(0) = Start // 2) Expr(1) = Start + Accum // = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2 // 3) Induction hypothesis (step i): // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum // // Proof: // Expr(i+1) = // = Start + (i+1)*Accum // = (Start + i*Accum) + Accum // = Expr(i) + Accum // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum // :: from step i // // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum // // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) // + (Ext ix (Trunc iy (Accum) to ix) to iy) // + Accum :: from P3 // // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy) // + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y) // // = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum // // By induction, the same applies to all iterations 1<=i(PHISCEV)) { SCEVWrapPredicate::IncrementWrapFlags AddedFlags = Signed ? SCEVWrapPredicate::IncrementNSSW : SCEVWrapPredicate::IncrementNUSW; const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); Predicates.push_back(AddRecPred); } // Create the Equal Predicates P2,P3: // It is possible that the predicates P2 and/or P3 are computable at // compile time due to StartVal and/or Accum being constants. // If either one is, then we can check that now and escape if either P2 // or P3 is false. // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy) // for each of StartVal and Accum auto getExtendedExpr = [&](const SCEV *Expr, bool CreateSignExtend) -> const SCEV * { assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); const SCEV *ExtendedExpr = CreateSignExtend ? getSignExtendExpr(TruncatedExpr, Expr->getType()) : getZeroExtendExpr(TruncatedExpr, Expr->getType()); return ExtendedExpr; }; // Given: // ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy // = getExtendedExpr(Expr) // Determine whether the predicate P: Expr == ExtendedExpr // is known to be false at compile time auto PredIsKnownFalse = [&](const SCEV *Expr, const SCEV *ExtendedExpr) -> bool { return Expr != ExtendedExpr && isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr); }; const SCEV *StartExtended = getExtendedExpr(StartVal, Signed); if (PredIsKnownFalse(StartVal, StartExtended)) { LLVM_DEBUG(dbgs() << "P2 is compile-time false\n";); return std::nullopt; } // The Step is always Signed (because the overflow checks are either // NSSW or NUSW) const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true); if (PredIsKnownFalse(Accum, AccumExtended)) { LLVM_DEBUG(dbgs() << "P3 is compile-time false\n";); return std::nullopt; } auto AppendPredicate = [&](const SCEV *Expr, const SCEV *ExtendedExpr) -> void { if (Expr != ExtendedExpr && !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) { const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr); LLVM_DEBUG(dbgs() << "Added Predicate: " << *Pred); Predicates.push_back(Pred); } }; AppendPredicate(StartVal, StartExtended); AppendPredicate(Accum, AccumExtended); // *** Part3: Predicates are ready. Now go ahead and create the new addrec in // which the casts had been folded away. The caller can rewrite SymbolicPHI // into NewAR if it will also add the runtime overflow checks specified in // Predicates. auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap); std::pair> PredRewrite = std::make_pair(NewAR, Predicates); // Remember the result of the analysis for this SCEV at this locayyytion. PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite; return PredRewrite; } std::optional>> ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { auto *PN = cast(SymbolicPHI->getValue()); const Loop *L = isIntegerLoopHeaderPHI(PN, LI); if (!L) return std::nullopt; // Check to see if we already analyzed this PHI. auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L}); if (I != PredicatedSCEVRewrites.end()) { std::pair> Rewrite = I->second; // Analysis was done before and failed to create an AddRec: if (Rewrite.first == SymbolicPHI) return std::nullopt; // Analysis was done before and succeeded to create an AddRec under // a predicate: assert(isa(Rewrite.first) && "Expected an AddRec"); assert(!(Rewrite.second).empty() && "Expected to find Predicates"); return Rewrite; } std::optional>> Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI); // Record in the cache that the analysis failed if (!Rewrite) { SmallVector Predicates; PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates}; return std::nullopt; } return Rewrite; } // FIXME: This utility is currently required because the Rewriter currently // does not rewrite this expression: // {0, +, (sext ix (trunc iy to ix) to iy)} // into {0, +, %step}, // even when the following Equal predicate exists: // "%step == (sext ix (trunc iy to ix) to iy)". bool PredicatedScalarEvolution::areAddRecsEqualWithPreds( const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const { if (AR1 == AR2) return true; auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool { if (Expr1 != Expr2 && !Preds->implies(SE.getEqualPredicate(Expr1, Expr2)) && !Preds->implies(SE.getEqualPredicate(Expr2, Expr1))) return false; return true; }; if (!areExprsEqual(AR1->getStart(), AR2->getStart()) || !areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE))) return false; return true; } /// A helper function for createAddRecFromPHI to handle simple cases. /// /// This function tries to find an AddRec expression for the simplest (yet most /// common) cases: PN = PHI(Start, OP(Self, LoopInvariant)). /// If it fails, createAddRecFromPHI will use a more general, but slow, /// technique for finding the AddRec expression. const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN, Value *BEValueV, Value *StartValueV) { const Loop *L = LI.getLoopFor(PN->getParent()); assert(L && L->getHeader() == PN->getParent()); assert(BEValueV && StartValueV); auto BO = MatchBinaryOp(BEValueV, getDataLayout(), AC, DT, PN); if (!BO) return nullptr; if (BO->Opcode != Instruction::Add) return nullptr; const SCEV *Accum = nullptr; if (BO->LHS == PN && L->isLoopInvariant(BO->RHS)) Accum = getSCEV(BO->RHS); else if (BO->RHS == PN && L->isLoopInvariant(BO->LHS)) Accum = getSCEV(BO->LHS); if (!Accum) return nullptr; SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; if (BO->IsNUW) Flags = setFlags(Flags, SCEV::FlagNUW); if (BO->IsNSW) Flags = setFlags(Flags, SCEV::FlagNSW); const SCEV *StartVal = getSCEV(StartValueV); const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); insertValueToMap(PN, PHISCEV); if (auto *AR = dyn_cast(PHISCEV)) { setNoWrapFlags(const_cast(AR), (SCEV::NoWrapFlags)(AR->getNoWrapFlags() | proveNoWrapViaConstantRanges(AR))); } // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to // overflow. if (auto *BEInst = dyn_cast(BEValueV)) { assert(isLoopInvariant(Accum, L) && "Accum is defined outside L, but is not invariant?"); if (isAddRecNeverPoison(BEInst, L)) (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); } return PHISCEV; } const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { const Loop *L = LI.getLoopFor(PN->getParent()); if (!L || L->getHeader() != PN->getParent()) return nullptr; // The loop may have multiple entrances or multiple exits; we can analyze // this phi as an addrec if it has a unique entry value and a unique // backedge value. Value *BEValueV = nullptr, *StartValueV = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); if (L->contains(PN->getIncomingBlock(i))) { if (!BEValueV) { BEValueV = V; } else if (BEValueV != V) { BEValueV = nullptr; break; } } else if (!StartValueV) { StartValueV = V; } else if (StartValueV != V) { StartValueV = nullptr; break; } } if (!BEValueV || !StartValueV) return nullptr; assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && "PHI node already processed?"); // First, try to find AddRec expression without creating a fictituos symbolic // value for PN. if (auto *S = createSimpleAffineAddRec(PN, BEValueV, StartValueV)) return S; // Handle PHI node value symbolically. const SCEV *SymbolicName = getUnknown(PN); insertValueToMap(PN, SymbolicName); // Using this symbolic name for the PHI, analyze the value coming around // the back-edge. const SCEV *BEValue = getSCEV(BEValueV); // NOTE: If BEValue is loop invariant, we know that the PHI node just // has a special value for the first iteration of the loop. // If the value coming around the backedge is an add with the symbolic // value we just inserted, then we found a simple induction variable! if (const SCEVAddExpr *Add = dyn_cast(BEValue)) { // If there is a single occurrence of the symbolic value, replace it // with a recurrence. unsigned FoundIndex = Add->getNumOperands(); for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if (Add->getOperand(i) == SymbolicName) if (FoundIndex == e) { FoundIndex = i; break; } if (FoundIndex != Add->getNumOperands()) { // Create an add with everything but the specified operand. SmallVector Ops; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if (i != FoundIndex) Ops.push_back(SCEVBackedgeConditionFolder::rewrite(Add->getOperand(i), L, *this)); const SCEV *Accum = getAddExpr(Ops); // This is not a valid addrec if the step amount is varying each // loop iteration, but is not itself an addrec in this loop. if (isLoopInvariant(Accum, L) || (isa(Accum) && cast(Accum)->getLoop() == L)) { SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; if (auto BO = MatchBinaryOp(BEValueV, getDataLayout(), AC, DT, PN)) { if (BO->Opcode == Instruction::Add && BO->LHS == PN) { if (BO->IsNUW) Flags = setFlags(Flags, SCEV::FlagNUW); if (BO->IsNSW) Flags = setFlags(Flags, SCEV::FlagNSW); } } else if (GEPOperator *GEP = dyn_cast(BEValueV)) { // If the increment is an inbounds GEP, then we know the address // space cannot be wrapped around. We cannot make any guarantee // about signed or unsigned overflow because pointers are // unsigned but we may have a negative index from the base // pointer. We can guarantee that no unsigned wrap occurs if the // indices form a positive value. if (GEP->isInBounds() && GEP->getOperand(0) == PN) { Flags = setFlags(Flags, SCEV::FlagNW); if (isKnownPositive(Accum)) Flags = setFlags(Flags, SCEV::FlagNUW); } // We cannot transfer nuw and nsw flags from subtraction // operations -- sub nuw X, Y is not the same as add nuw X, -Y // for instance. } const SCEV *StartVal = getSCEV(StartValueV); const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. forgetMemoizedResults(SymbolicName); insertValueToMap(PN, PHISCEV); if (auto *AR = dyn_cast(PHISCEV)) { setNoWrapFlags(const_cast(AR), (SCEV::NoWrapFlags)(AR->getNoWrapFlags() | proveNoWrapViaConstantRanges(AR))); } // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to // overflow. if (auto *BEInst = dyn_cast(BEValueV)) if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); return PHISCEV; } } } else { // Otherwise, this could be a loop like this: // i = 0; for (j = 1; ..; ++j) { .... i = j; } // In this case, j = {1,+,1} and BEValue is j. // Because the other in-value of i (0) fits the evolution of BEValue // i really is an addrec evolution. // // We can generalize this saying that i is the shifted value of BEValue // by one iteration: // PHI(f(0), f({1,+,1})) --> f({0,+,1}) const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this, false); if (Shifted != getCouldNotCompute() && Start != getCouldNotCompute()) { const SCEV *StartVal = getSCEV(StartValueV); if (Start == StartVal) { // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. forgetMemoizedResults(SymbolicName); insertValueToMap(PN, Shifted); return Shifted; } } } // Remove the temporary PHI node SCEV that has been inserted while intending // to create an AddRecExpr for this PHI node. We can not keep this temporary // as it will prevent later (possibly simpler) SCEV expressions to be added // to the ValueExprMap. eraseValueFromMap(PN); return nullptr; } // Try to match a control flow sequence that branches out at BI and merges back // at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful // match. static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge, Value *&C, Value *&LHS, Value *&RHS) { C = BI->getCondition(); BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0)); BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1)); if (!LeftEdge.isSingleEdge()) return false; assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()"); Use &LeftUse = Merge->getOperandUse(0); Use &RightUse = Merge->getOperandUse(1); if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) { LHS = LeftUse; RHS = RightUse; return true; } if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) { LHS = RightUse; RHS = LeftUse; return true; } return false; } const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) { auto IsReachable = [&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); }; if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) { // Try to match // // br %cond, label %left, label %right // left: // br label %merge // right: // br label %merge // merge: // V = phi [ %x, %left ], [ %y, %right ] // // as "select %cond, %x, %y" BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock(); assert(IDom && "At least the entry block should dominate PN"); auto *BI = dyn_cast(IDom->getTerminator()); Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr; if (BI && BI->isConditional() && BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) && properlyDominates(getSCEV(LHS), PN->getParent()) && properlyDominates(getSCEV(RHS), PN->getParent())) return createNodeForSelectOrPHI(PN, Cond, LHS, RHS); } return nullptr; } const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (const SCEV *S = createAddRecFromPHI(PN)) return S; if (Value *V = simplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC})) return getSCEV(V); if (const SCEV *S = createNodeFromSelectLikePHI(PN)) return S; // If it's not a loop phi, we can't handle it yet. return getUnknown(PN); } bool SCEVMinMaxExprContains(const SCEV *Root, const SCEV *OperandToFind, SCEVTypes RootKind) { struct FindClosure { const SCEV *OperandToFind; const SCEVTypes RootKind; // Must be a sequential min/max expression. const SCEVTypes NonSequentialRootKind; // Non-seq variant of RootKind. bool Found = false; bool canRecurseInto(SCEVTypes Kind) const { // We can only recurse into the SCEV expression of the same effective type // as the type of our root SCEV expression, and into zero-extensions. return RootKind == Kind || NonSequentialRootKind == Kind || scZeroExtend == Kind; }; FindClosure(const SCEV *OperandToFind, SCEVTypes RootKind) : OperandToFind(OperandToFind), RootKind(RootKind), NonSequentialRootKind( SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType( RootKind)) {} bool follow(const SCEV *S) { Found = S == OperandToFind; return !isDone() && canRecurseInto(S->getSCEVType()); } bool isDone() const { return Found; } }; FindClosure FC(OperandToFind, RootKind); visitAll(Root, FC); return FC.Found; } std::optional ScalarEvolution::createNodeForSelectOrPHIInstWithICmpInstCond(Type *Ty, ICmpInst *Cond, Value *TrueVal, Value *FalseVal) { // Try to match some simple smax or umax patterns. auto *ICI = Cond; Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); switch (ICI->getPredicate()) { case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: // a > b ? a+x : b+x -> max(a, b)+x // a > b ? b+x : a+x -> min(a, b)+x if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(Ty)) { bool Signed = ICI->isSigned(); const SCEV *LA = getSCEV(TrueVal); const SCEV *RA = getSCEV(FalseVal); const SCEV *LS = getSCEV(LHS); const SCEV *RS = getSCEV(RHS); if (LA->getType()->isPointerTy()) { // FIXME: Handle cases where LS/RS are pointers not equal to LA/RA. // Need to make sure we can't produce weird expressions involving // negated pointers. if (LA == LS && RA == RS) return Signed ? getSMaxExpr(LS, RS) : getUMaxExpr(LS, RS); if (LA == RS && RA == LS) return Signed ? getSMinExpr(LS, RS) : getUMinExpr(LS, RS); } auto CoerceOperand = [&](const SCEV *Op) -> const SCEV * { if (Op->getType()->isPointerTy()) { Op = getLosslessPtrToIntExpr(Op); if (isa(Op)) return Op; } if (Signed) Op = getNoopOrSignExtend(Op, Ty); else Op = getNoopOrZeroExtend(Op, Ty); return Op; }; LS = CoerceOperand(LS); RS = CoerceOperand(RS); if (isa(LS) || isa(RS)) break; const SCEV *LDiff = getMinusSCEV(LA, LS); const SCEV *RDiff = getMinusSCEV(RA, RS); if (LDiff == RDiff) return getAddExpr(Signed ? getSMaxExpr(LS, RS) : getUMaxExpr(LS, RS), LDiff); LDiff = getMinusSCEV(LA, RS); RDiff = getMinusSCEV(RA, LS); if (LDiff == RDiff) return getAddExpr(Signed ? getSMinExpr(LS, RS) : getUMinExpr(LS, RS), LDiff); } break; case ICmpInst::ICMP_NE: // x != 0 ? x+y : C+y -> x == 0 ? C+y : x+y std::swap(TrueVal, FalseVal); [[fallthrough]]; case ICmpInst::ICMP_EQ: // x == 0 ? C+y : x+y -> umax(x, C)+y iff C u<= 1 if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(Ty) && isa(RHS) && cast(RHS)->isZero()) { const SCEV *X = getNoopOrZeroExtend(getSCEV(LHS), Ty); const SCEV *TrueValExpr = getSCEV(TrueVal); // C+y const SCEV *FalseValExpr = getSCEV(FalseVal); // x+y const SCEV *Y = getMinusSCEV(FalseValExpr, X); // y = (x+y)-x const SCEV *C = getMinusSCEV(TrueValExpr, Y); // C = (C+y)-y if (isa(C) && cast(C)->getAPInt().ule(1)) return getAddExpr(getUMaxExpr(X, C), Y); } // x == 0 ? 0 : umin (..., x, ...) -> umin_seq(x, umin (...)) // x == 0 ? 0 : umin_seq(..., x, ...) -> umin_seq(x, umin_seq(...)) // x == 0 ? 0 : umin (..., umin_seq(..., x, ...), ...) // -> umin_seq(x, umin (..., umin_seq(...), ...)) if (isa(RHS) && cast(RHS)->isZero() && isa(TrueVal) && cast(TrueVal)->isZero()) { const SCEV *X = getSCEV(LHS); while (auto *ZExt = dyn_cast(X)) X = ZExt->getOperand(); if (getTypeSizeInBits(X->getType()) <= getTypeSizeInBits(Ty)) { const SCEV *FalseValExpr = getSCEV(FalseVal); if (SCEVMinMaxExprContains(FalseValExpr, X, scSequentialUMinExpr)) return getUMinExpr(getNoopOrZeroExtend(X, Ty), FalseValExpr, /*Sequential=*/true); } } break; default: break; } return std::nullopt; } static std::optional createNodeForSelectViaUMinSeq(ScalarEvolution *SE, const SCEV *CondExpr, const SCEV *TrueExpr, const SCEV *FalseExpr) { assert(CondExpr->getType()->isIntegerTy(1) && TrueExpr->getType() == FalseExpr->getType() && TrueExpr->getType()->isIntegerTy(1) && "Unexpected operands of a select."); // i1 cond ? i1 x : i1 C --> C + (i1 cond ? (i1 x - i1 C) : i1 0) // --> C + (umin_seq cond, x - C) // // i1 cond ? i1 C : i1 x --> C + (i1 cond ? i1 0 : (i1 x - i1 C)) // --> C + (i1 ~cond ? (i1 x - i1 C) : i1 0) // --> C + (umin_seq ~cond, x - C) // FIXME: while we can't legally model the case where both of the hands // are fully variable, we only require that the *difference* is constant. if (!isa(TrueExpr) && !isa(FalseExpr)) return std::nullopt; const SCEV *X, *C; if (isa(TrueExpr)) { CondExpr = SE->getNotSCEV(CondExpr); X = FalseExpr; C = TrueExpr; } else { X = TrueExpr; C = FalseExpr; } return SE->getAddExpr(C, SE->getUMinExpr(CondExpr, SE->getMinusSCEV(X, C), /*Sequential=*/true)); } static std::optional createNodeForSelectViaUMinSeq(ScalarEvolution *SE, Value *Cond, Value *TrueVal, Value *FalseVal) { if (!isa(TrueVal) && !isa(FalseVal)) return std::nullopt; const auto *SECond = SE->getSCEV(Cond); const auto *SETrue = SE->getSCEV(TrueVal); const auto *SEFalse = SE->getSCEV(FalseVal); return createNodeForSelectViaUMinSeq(SE, SECond, SETrue, SEFalse); } const SCEV *ScalarEvolution::createNodeForSelectOrPHIViaUMinSeq( Value *V, Value *Cond, Value *TrueVal, Value *FalseVal) { assert(Cond->getType()->isIntegerTy(1) && "Select condition is not an i1?"); assert(TrueVal->getType() == FalseVal->getType() && V->getType() == TrueVal->getType() && "Types of select hands and of the result must match."); // For now, only deal with i1-typed `select`s. if (!V->getType()->isIntegerTy(1)) return getUnknown(V); if (std::optional S = createNodeForSelectViaUMinSeq(this, Cond, TrueVal, FalseVal)) return *S; return getUnknown(V); } const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Value *V, Value *Cond, Value *TrueVal, Value *FalseVal) { // Handle "constant" branch or select. This can occur for instance when a // loop pass transforms an inner loop and moves on to process the outer loop. if (auto *CI = dyn_cast(Cond)) return getSCEV(CI->isOne() ? TrueVal : FalseVal); if (auto *I = dyn_cast(V)) { if (auto *ICI = dyn_cast(Cond)) { if (std::optional S = createNodeForSelectOrPHIInstWithICmpInstCond(I->getType(), ICI, TrueVal, FalseVal)) return *S; } } return createNodeForSelectOrPHIViaUMinSeq(V, Cond, TrueVal, FalseVal); } /// Expand GEP instructions into add and multiply operations. This allows them /// to be analyzed by regular SCEV code. const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { assert(GEP->getSourceElementType()->isSized() && "GEP source element type must be sized"); SmallVector IndexExprs; for (Value *Index : GEP->indices()) IndexExprs.push_back(getSCEV(Index)); return getGEPExpr(GEP, IndexExprs); } APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { uint64_t BitWidth = getTypeSizeInBits(S->getType()); auto GetShiftedByZeros = [BitWidth](uint32_t TrailingZeros) { return TrailingZeros >= BitWidth ? APInt::getZero(BitWidth) : APInt::getOneBitSet(BitWidth, TrailingZeros); }; auto GetGCDMultiple = [this](const SCEVNAryExpr *N) { // The result is GCD of all operands results. APInt Res = getConstantMultiple(N->getOperand(0)); for (unsigned I = 1, E = N->getNumOperands(); I < E && Res != 1; ++I) Res = APIntOps::GreatestCommonDivisor( Res, getConstantMultiple(N->getOperand(I))); return Res; }; switch (S->getSCEVType()) { case scConstant: return cast(S)->getAPInt(); case scPtrToInt: return getConstantMultiple(cast(S)->getOperand()); case scUDivExpr: case scVScale: return APInt(BitWidth, 1); case scTruncate: { // Only multiples that are a power of 2 will hold after truncation. const SCEVTruncateExpr *T = cast(S); uint32_t TZ = getMinTrailingZeros(T->getOperand()); return GetShiftedByZeros(TZ); } case scZeroExtend: { const SCEVZeroExtendExpr *Z = cast(S); return getConstantMultiple(Z->getOperand()).zext(BitWidth); } case scSignExtend: { const SCEVSignExtendExpr *E = cast(S); return getConstantMultiple(E->getOperand()).sext(BitWidth); } case scMulExpr: { const SCEVMulExpr *M = cast(S); if (M->hasNoUnsignedWrap()) { // The result is the product of all operand results. APInt Res = getConstantMultiple(M->getOperand(0)); for (const SCEV *Operand : M->operands().drop_front()) Res = Res * getConstantMultiple(Operand); return Res; } // If there are no wrap guarentees, find the trailing zeros, which is the // sum of trailing zeros for all its operands. uint32_t TZ = 0; for (const SCEV *Operand : M->operands()) TZ += getMinTrailingZeros(Operand); return GetShiftedByZeros(TZ); } case scAddExpr: case scAddRecExpr: { const SCEVNAryExpr *N = cast(S); if (N->hasNoUnsignedWrap()) return GetGCDMultiple(N); // Find the trailing bits, which is the minimum of its operands. uint32_t TZ = getMinTrailingZeros(N->getOperand(0)); for (const SCEV *Operand : N->operands().drop_front()) TZ = std::min(TZ, getMinTrailingZeros(Operand)); return GetShiftedByZeros(TZ); } case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: case scSequentialUMinExpr: return GetGCDMultiple(cast(S)); case scUnknown: { // ask ValueTracking for known bits const SCEVUnknown *U = cast(S); unsigned Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT) .countMinTrailingZeros(); return GetShiftedByZeros(Known); } case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } APInt ScalarEvolution::getConstantMultiple(const SCEV *S) { auto I = ConstantMultipleCache.find(S); if (I != ConstantMultipleCache.end()) return I->second; APInt Result = getConstantMultipleImpl(S); auto InsertPair = ConstantMultipleCache.insert({S, Result}); assert(InsertPair.second && "Should insert a new key"); return InsertPair.first->second; } APInt ScalarEvolution::getNonZeroConstantMultiple(const SCEV *S) { APInt Multiple = getConstantMultiple(S); return Multiple == 0 ? APInt(Multiple.getBitWidth(), 1) : Multiple; } uint32_t ScalarEvolution::getMinTrailingZeros(const SCEV *S) { return std::min(getConstantMultiple(S).countTrailingZeros(), (unsigned)getTypeSizeInBits(S->getType())); } /// Helper method to assign a range to V from metadata present in the IR. static std::optional GetRangeFromMetadata(Value *V) { if (Instruction *I = dyn_cast(V)) if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) return getConstantRangeFromMetadata(*MD); return std::nullopt; } void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags) { if (AddRec->getNoWrapFlags(Flags) != Flags) { AddRec->setNoWrapFlags(Flags); UnsignedRanges.erase(AddRec); SignedRanges.erase(AddRec); ConstantMultipleCache.erase(AddRec); } } ConstantRange ScalarEvolution:: getRangeForUnknownRecurrence(const SCEVUnknown *U) { const DataLayout &DL = getDataLayout(); unsigned BitWidth = getTypeSizeInBits(U->getType()); const ConstantRange FullSet(BitWidth, /*isFullSet=*/true); // Match a simple recurrence of the form: , and then // use information about the trip count to improve our available range. Note // that the trip count independent cases are already handled by known bits. // WARNING: The definition of recurrence used here is subtly different than // the one used by AddRec (and thus most of this file). Step is allowed to // be arbitrarily loop varying here, where AddRec allows only loop invariant // and other addrecs in the same loop (for non-affine addrecs). The code // below intentionally handles the case where step is not loop invariant. auto *P = dyn_cast(U->getValue()); if (!P) return FullSet; // Make sure that no Phi input comes from an unreachable block. Otherwise, // even the values that are not available in these blocks may come from them, // and this leads to false-positive recurrence test. for (auto *Pred : predecessors(P->getParent())) if (!DT.isReachableFromEntry(Pred)) return FullSet; BinaryOperator *BO; Value *Start, *Step; if (!matchSimpleRecurrence(P, BO, Start, Step)) return FullSet; // If we found a recurrence in reachable code, we must be in a loop. Note // that BO might be in some subloop of L, and that's completely okay. auto *L = LI.getLoopFor(P->getParent()); assert(L && L->getHeader() == P->getParent()); if (!L->contains(BO->getParent())) // NOTE: This bailout should be an assert instead. However, asserting // the condition here exposes a case where LoopFusion is querying SCEV // with malformed loop information during the midst of the transform. // There doesn't appear to be an obvious fix, so for the moment bailout // until the caller issue can be fixed. PR49566 tracks the bug. return FullSet; // TODO: Extend to other opcodes such as mul, and div switch (BO->getOpcode()) { default: return FullSet; case Instruction::AShr: case Instruction::LShr: case Instruction::Shl: break; }; if (BO->getOperand(0) != P) // TODO: Handle the power function forms some day. return FullSet; unsigned TC = getSmallConstantMaxTripCount(L); if (!TC || TC >= BitWidth) return FullSet; auto KnownStart = computeKnownBits(Start, DL, 0, &AC, nullptr, &DT); auto KnownStep = computeKnownBits(Step, DL, 0, &AC, nullptr, &DT); assert(KnownStart.getBitWidth() == BitWidth && KnownStep.getBitWidth() == BitWidth); // Compute total shift amount, being careful of overflow and bitwidths. auto MaxShiftAmt = KnownStep.getMaxValue(); APInt TCAP(BitWidth, TC-1); bool Overflow = false; auto TotalShift = MaxShiftAmt.umul_ov(TCAP, Overflow); if (Overflow) return FullSet; switch (BO->getOpcode()) { default: llvm_unreachable("filtered out above"); case Instruction::AShr: { // For each ashr, three cases: // shift = 0 => unchanged value // saturation => 0 or -1 // other => a value closer to zero (of the same sign) // Thus, the end value is closer to zero than the start. auto KnownEnd = KnownBits::ashr(KnownStart, KnownBits::makeConstant(TotalShift)); if (KnownStart.isNonNegative()) // Analogous to lshr (simply not yet canonicalized) return ConstantRange::getNonEmpty(KnownEnd.getMinValue(), KnownStart.getMaxValue() + 1); if (KnownStart.isNegative()) // End >=u Start && End <=s Start return ConstantRange::getNonEmpty(KnownStart.getMinValue(), KnownEnd.getMaxValue() + 1); break; } case Instruction::LShr: { // For each lshr, three cases: // shift = 0 => unchanged value // saturation => 0 // other => a smaller positive number // Thus, the low end of the unsigned range is the last value produced. auto KnownEnd = KnownBits::lshr(KnownStart, KnownBits::makeConstant(TotalShift)); return ConstantRange::getNonEmpty(KnownEnd.getMinValue(), KnownStart.getMaxValue() + 1); } case Instruction::Shl: { // Iff no bits are shifted out, value increases on every shift. auto KnownEnd = KnownBits::shl(KnownStart, KnownBits::makeConstant(TotalShift)); if (TotalShift.ult(KnownStart.countMinLeadingZeros())) return ConstantRange(KnownStart.getMinValue(), KnownEnd.getMaxValue() + 1); break; } }; return FullSet; } const ConstantRange & ScalarEvolution::getRangeRefIter(const SCEV *S, ScalarEvolution::RangeSignHint SignHint) { DenseMap &Cache = SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges; SmallVector WorkList; SmallPtrSet Seen; // Add Expr to the worklist, if Expr is either an N-ary expression or a // SCEVUnknown PHI node. auto AddToWorklist = [&WorkList, &Seen, &Cache](const SCEV *Expr) { if (!Seen.insert(Expr).second) return; if (Cache.contains(Expr)) return; switch (Expr->getSCEVType()) { case scUnknown: if (!isa(cast(Expr)->getValue())) break; [[fallthrough]]; case scConstant: case scVScale: case scTruncate: case scZeroExtend: case scSignExtend: case scPtrToInt: case scAddExpr: case scMulExpr: case scUDivExpr: case scAddRecExpr: case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: case scSequentialUMinExpr: WorkList.push_back(Expr); break; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } }; AddToWorklist(S); // Build worklist by queuing operands of N-ary expressions and phi nodes. for (unsigned I = 0; I != WorkList.size(); ++I) { const SCEV *P = WorkList[I]; auto *UnknownS = dyn_cast(P); // If it is not a `SCEVUnknown`, just recurse into operands. if (!UnknownS) { for (const SCEV *Op : P->operands()) AddToWorklist(Op); continue; } // `SCEVUnknown`'s require special treatment. if (const PHINode *P = dyn_cast(UnknownS->getValue())) { if (!PendingPhiRangesIter.insert(P).second) continue; for (auto &Op : reverse(P->operands())) AddToWorklist(getSCEV(Op)); } } if (!WorkList.empty()) { // Use getRangeRef to compute ranges for items in the worklist in reverse // order. This will force ranges for earlier operands to be computed before // their users in most cases. for (const SCEV *P : reverse(drop_begin(WorkList))) { getRangeRef(P, SignHint); if (auto *UnknownS = dyn_cast(P)) if (const PHINode *P = dyn_cast(UnknownS->getValue())) PendingPhiRangesIter.erase(P); } } return getRangeRef(S, SignHint, 0); } /// Determine the range for a particular SCEV. If SignHint is /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges /// with a "cleaner" unsigned (resp. signed) representation. const ConstantRange &ScalarEvolution::getRangeRef( const SCEV *S, ScalarEvolution::RangeSignHint SignHint, unsigned Depth) { DenseMap &Cache = SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges; ConstantRange::PreferredRangeType RangeType = SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? ConstantRange::Unsigned : ConstantRange::Signed; // See if we've computed this range already. DenseMap::iterator I = Cache.find(S); if (I != Cache.end()) return I->second; if (const SCEVConstant *C = dyn_cast(S)) return setRange(C, SignHint, ConstantRange(C->getAPInt())); // Switch to iteratively computing the range for S, if it is part of a deeply // nested expression. if (Depth > RangeIterThreshold) return getRangeRefIter(S, SignHint); unsigned BitWidth = getTypeSizeInBits(S->getType()); ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); using OBO = OverflowingBinaryOperator; // If the value has known zeros, the maximum value will have those known zeros // as well. if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { APInt Multiple = getNonZeroConstantMultiple(S); APInt Remainder = APInt::getMaxValue(BitWidth).urem(Multiple); if (!Remainder.isZero()) ConservativeResult = ConstantRange(APInt::getMinValue(BitWidth), APInt::getMaxValue(BitWidth) - Remainder + 1); } else { uint32_t TZ = getMinTrailingZeros(S); if (TZ != 0) { ConservativeResult = ConstantRange( APInt::getSignedMinValue(BitWidth), APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); } } switch (S->getSCEVType()) { case scConstant: llvm_unreachable("Already handled above."); case scVScale: return setRange(S, SignHint, getVScaleRange(&F, BitWidth)); case scTruncate: { const SCEVTruncateExpr *Trunc = cast(S); ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint, Depth + 1); return setRange( Trunc, SignHint, ConservativeResult.intersectWith(X.truncate(BitWidth), RangeType)); } case scZeroExtend: { const SCEVZeroExtendExpr *ZExt = cast(S); ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint, Depth + 1); return setRange( ZExt, SignHint, ConservativeResult.intersectWith(X.zeroExtend(BitWidth), RangeType)); } case scSignExtend: { const SCEVSignExtendExpr *SExt = cast(S); ConstantRange X = getRangeRef(SExt->getOperand(), SignHint, Depth + 1); return setRange( SExt, SignHint, ConservativeResult.intersectWith(X.signExtend(BitWidth), RangeType)); } case scPtrToInt: { const SCEVPtrToIntExpr *PtrToInt = cast(S); ConstantRange X = getRangeRef(PtrToInt->getOperand(), SignHint, Depth + 1); return setRange(PtrToInt, SignHint, X); } case scAddExpr: { const SCEVAddExpr *Add = cast(S); ConstantRange X = getRangeRef(Add->getOperand(0), SignHint, Depth + 1); unsigned WrapType = OBO::AnyWrap; if (Add->hasNoSignedWrap()) WrapType |= OBO::NoSignedWrap; if (Add->hasNoUnsignedWrap()) WrapType |= OBO::NoUnsignedWrap; for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) X = X.addWithNoWrap(getRangeRef(Add->getOperand(i), SignHint, Depth + 1), WrapType, RangeType); return setRange(Add, SignHint, ConservativeResult.intersectWith(X, RangeType)); } case scMulExpr: { const SCEVMulExpr *Mul = cast(S); ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint, Depth + 1); for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint, Depth + 1)); return setRange(Mul, SignHint, ConservativeResult.intersectWith(X, RangeType)); } case scUDivExpr: { const SCEVUDivExpr *UDiv = cast(S); ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint, Depth + 1); ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint, Depth + 1); return setRange(UDiv, SignHint, ConservativeResult.intersectWith(X.udiv(Y), RangeType)); } case scAddRecExpr: { const SCEVAddRecExpr *AddRec = cast(S); // If there's no unsigned wrap, the value will never be less than its // initial value. if (AddRec->hasNoUnsignedWrap()) { APInt UnsignedMinValue = getUnsignedRangeMin(AddRec->getStart()); if (!UnsignedMinValue.isZero()) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(UnsignedMinValue, APInt(BitWidth, 0)), RangeType); } // If there's no signed wrap, and all the operands except initial value have // the same sign or zero, the value won't ever be: // 1: smaller than initial value if operands are non negative, // 2: bigger than initial value if operands are non positive. // For both cases, value can not cross signed min/max boundary. if (AddRec->hasNoSignedWrap()) { bool AllNonNeg = true; bool AllNonPos = true; for (unsigned i = 1, e = AddRec->getNumOperands(); i != e; ++i) { if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false; if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false; } if (AllNonNeg) ConservativeResult = ConservativeResult.intersectWith( ConstantRange::getNonEmpty(getSignedRangeMin(AddRec->getStart()), APInt::getSignedMinValue(BitWidth)), RangeType); else if (AllNonPos) ConservativeResult = ConservativeResult.intersectWith( ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), getSignedRangeMax(AddRec->getStart()) + 1), RangeType); } // TODO: non-affine addrec if (AddRec->isAffine()) { const SCEV *MaxBEScev = getConstantMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa(MaxBEScev)) { APInt MaxBECount = cast(MaxBEScev)->getAPInt(); // Adjust MaxBECount to the same bitwidth as AddRec. We can truncate if // MaxBECount's active bits are all <= AddRec's bit width. if (MaxBECount.getBitWidth() > BitWidth && MaxBECount.getActiveBits() <= BitWidth) MaxBECount = MaxBECount.trunc(BitWidth); else if (MaxBECount.getBitWidth() < BitWidth) MaxBECount = MaxBECount.zext(BitWidth); if (MaxBECount.getBitWidth() == BitWidth) { auto RangeFromAffine = getRangeForAffineAR( AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount); ConservativeResult = ConservativeResult.intersectWith(RangeFromAffine, RangeType); auto RangeFromFactoring = getRangeViaFactoring( AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount); ConservativeResult = ConservativeResult.intersectWith(RangeFromFactoring, RangeType); } } // Now try symbolic BE count and more powerful methods. if (UseExpensiveRangeSharpening) { const SCEV *SymbolicMaxBECount = getSymbolicMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa(SymbolicMaxBECount) && getTypeSizeInBits(MaxBEScev->getType()) <= BitWidth && AddRec->hasNoSelfWrap()) { auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR( AddRec, SymbolicMaxBECount, BitWidth, SignHint); ConservativeResult = ConservativeResult.intersectWith(RangeFromAffineNew, RangeType); } } } return setRange(AddRec, SignHint, std::move(ConservativeResult)); } case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: case scSequentialUMinExpr: { Intrinsic::ID ID; switch (S->getSCEVType()) { case scUMaxExpr: ID = Intrinsic::umax; break; case scSMaxExpr: ID = Intrinsic::smax; break; case scUMinExpr: case scSequentialUMinExpr: ID = Intrinsic::umin; break; case scSMinExpr: ID = Intrinsic::smin; break; default: llvm_unreachable("Unknown SCEVMinMaxExpr/SCEVSequentialMinMaxExpr."); } const auto *NAry = cast(S); ConstantRange X = getRangeRef(NAry->getOperand(0), SignHint, Depth + 1); for (unsigned i = 1, e = NAry->getNumOperands(); i != e; ++i) X = X.intrinsic( ID, {X, getRangeRef(NAry->getOperand(i), SignHint, Depth + 1)}); return setRange(S, SignHint, ConservativeResult.intersectWith(X, RangeType)); } case scUnknown: { const SCEVUnknown *U = cast(S); Value *V = U->getValue(); // Check if the IR explicitly contains !range metadata. std::optional MDRange = GetRangeFromMetadata(V); if (MDRange) ConservativeResult = ConservativeResult.intersectWith(*MDRange, RangeType); // Use facts about recurrences in the underlying IR. Note that add // recurrences are AddRecExprs and thus don't hit this path. This // primarily handles shift recurrences. auto CR = getRangeForUnknownRecurrence(U); ConservativeResult = ConservativeResult.intersectWith(CR); // See if ValueTracking can give us a useful range. const DataLayout &DL = getDataLayout(); KnownBits Known = computeKnownBits(V, DL, 0, &AC, nullptr, &DT); if (Known.getBitWidth() != BitWidth) Known = Known.zextOrTrunc(BitWidth); // ValueTracking may be able to compute a tighter result for the number of // sign bits than for the value of those sign bits. unsigned NS = ComputeNumSignBits(V, DL, 0, &AC, nullptr, &DT); if (U->getType()->isPointerTy()) { // If the pointer size is larger than the index size type, this can cause // NS to be larger than BitWidth. So compensate for this. unsigned ptrSize = DL.getPointerTypeSizeInBits(U->getType()); int ptrIdxDiff = ptrSize - BitWidth; if (ptrIdxDiff > 0 && ptrSize > BitWidth && NS > (unsigned)ptrIdxDiff) NS -= ptrIdxDiff; } if (NS > 1) { // If we know any of the sign bits, we know all of the sign bits. if (!Known.Zero.getHiBits(NS).isZero()) Known.Zero.setHighBits(NS); if (!Known.One.getHiBits(NS).isZero()) Known.One.setHighBits(NS); } if (Known.getMinValue() != Known.getMaxValue() + 1) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(Known.getMinValue(), Known.getMaxValue() + 1), RangeType); if (NS > 1) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1), RangeType); if (U->getType()->isPointerTy() && SignHint == HINT_RANGE_UNSIGNED) { // Strengthen the range if the underlying IR value is a // global/alloca/heap allocation using the size of the object. ObjectSizeOpts Opts; Opts.RoundToAlign = false; Opts.NullIsUnknownSize = true; uint64_t ObjSize; if ((isa(V) || isa(V) || isAllocationFn(V, &TLI)) && getObjectSize(V, ObjSize, DL, &TLI, Opts) && ObjSize > 1) { // The highest address the object can start is ObjSize bytes before the // end (unsigned max value). If this value is not a multiple of the // alignment, the last possible start value is the next lowest multiple // of the alignment. Note: The computations below cannot overflow, // because if they would there's no possible start address for the // object. APInt MaxVal = APInt::getMaxValue(BitWidth) - APInt(BitWidth, ObjSize); uint64_t Align = U->getValue()->getPointerAlignment(DL).value(); uint64_t Rem = MaxVal.urem(Align); MaxVal -= APInt(BitWidth, Rem); APInt MinVal = APInt::getZero(BitWidth); if (llvm::isKnownNonZero(V, DL)) MinVal = Align; ConservativeResult = ConservativeResult.intersectWith( ConstantRange::getNonEmpty(MinVal, MaxVal + 1), RangeType); } } // A range of Phi is a subset of union of all ranges of its input. if (PHINode *Phi = dyn_cast(V)) { // Make sure that we do not run over cycled Phis. if (PendingPhiRanges.insert(Phi).second) { ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false); for (const auto &Op : Phi->operands()) { auto OpRange = getRangeRef(getSCEV(Op), SignHint, Depth + 1); RangeFromOps = RangeFromOps.unionWith(OpRange); // No point to continue if we already have a full set. if (RangeFromOps.isFullSet()) break; } ConservativeResult = ConservativeResult.intersectWith(RangeFromOps, RangeType); bool Erased = PendingPhiRanges.erase(Phi); assert(Erased && "Failed to erase Phi properly?"); (void)Erased; } } // vscale can't be equal to zero if (const auto *II = dyn_cast(V)) if (II->getIntrinsicID() == Intrinsic::vscale) { ConstantRange Disallowed = APInt::getZero(BitWidth); ConservativeResult = ConservativeResult.difference(Disallowed); } return setRange(U, SignHint, std::move(ConservativeResult)); } case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } return setRange(S, SignHint, std::move(ConservativeResult)); } // Given a StartRange, Step and MaxBECount for an expression compute a range of // values that the expression can take. Initially, the expression has a value // from StartRange and then is changed by Step up to MaxBECount times. Signed // argument defines if we treat Step as signed or unsigned. static ConstantRange getRangeForAffineARHelper(APInt Step, const ConstantRange &StartRange, const APInt &MaxBECount, bool Signed) { unsigned BitWidth = Step.getBitWidth(); assert(BitWidth == StartRange.getBitWidth() && BitWidth == MaxBECount.getBitWidth() && "mismatched bit widths"); // If either Step or MaxBECount is 0, then the expression won't change, and we // just need to return the initial range. if (Step == 0 || MaxBECount == 0) return StartRange; // If we don't know anything about the initial value (i.e. StartRange is // FullRange), then we don't know anything about the final range either. // Return FullRange. if (StartRange.isFullSet()) return ConstantRange::getFull(BitWidth); // If Step is signed and negative, then we use its absolute value, but we also // note that we're moving in the opposite direction. bool Descending = Signed && Step.isNegative(); if (Signed) // This is correct even for INT_SMIN. Let's look at i8 to illustrate this: // abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128. // This equations hold true due to the well-defined wrap-around behavior of // APInt. Step = Step.abs(); // Check if Offset is more than full span of BitWidth. If it is, the // expression is guaranteed to overflow. if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount)) return ConstantRange::getFull(BitWidth); // Offset is by how much the expression can change. Checks above guarantee no // overflow here. APInt Offset = Step * MaxBECount; // Minimum value of the final range will match the minimal value of StartRange // if the expression is increasing and will be decreased by Offset otherwise. // Maximum value of the final range will match the maximal value of StartRange // if the expression is decreasing and will be increased by Offset otherwise. APInt StartLower = StartRange.getLower(); APInt StartUpper = StartRange.getUpper() - 1; APInt MovedBoundary = Descending ? (StartLower - std::move(Offset)) : (StartUpper + std::move(Offset)); // It's possible that the new minimum/maximum value will fall into the initial // range (due to wrap around). This means that the expression can take any // value in this bitwidth, and we have to return full range. if (StartRange.contains(MovedBoundary)) return ConstantRange::getFull(BitWidth); APInt NewLower = Descending ? std::move(MovedBoundary) : std::move(StartLower); APInt NewUpper = Descending ? std::move(StartUpper) : std::move(MovedBoundary); NewUpper += 1; // No overflow detected, return [StartLower, StartUpper + Offset + 1) range. return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper)); } ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, const SCEV *Step, const APInt &MaxBECount) { assert(getTypeSizeInBits(Start->getType()) == getTypeSizeInBits(Step->getType()) && getTypeSizeInBits(Start->getType()) == MaxBECount.getBitWidth() && "mismatched bit widths"); // First, consider step signed. ConstantRange StartSRange = getSignedRange(Start); ConstantRange StepSRange = getSignedRange(Step); // If Step can be both positive and negative, we need to find ranges for the // maximum absolute step values in both directions and union them. ConstantRange SR = getRangeForAffineARHelper( StepSRange.getSignedMin(), StartSRange, MaxBECount, /* Signed = */ true); SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(), StartSRange, MaxBECount, /* Signed = */ true)); // Next, consider step unsigned. ConstantRange UR = getRangeForAffineARHelper( getUnsignedRangeMax(Step), getUnsignedRange(Start), MaxBECount, /* Signed = */ false); // Finally, intersect signed and unsigned ranges. return SR.intersectWith(UR, ConstantRange::Smallest); } ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR( const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth, ScalarEvolution::RangeSignHint SignHint) { assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n"); assert(AddRec->hasNoSelfWrap() && "This only works for non-self-wrapping AddRecs!"); const bool IsSigned = SignHint == HINT_RANGE_SIGNED; const SCEV *Step = AddRec->getStepRecurrence(*this); // Only deal with constant step to save compile time. if (!isa(Step)) return ConstantRange::getFull(BitWidth); // Let's make sure that we can prove that we do not self-wrap during // MaxBECount iterations. We need this because MaxBECount is a maximum // iteration count estimate, and we might infer nw from some exit for which we // do not know max exit count (or any other side reasoning). // TODO: Turn into assert at some point. if (getTypeSizeInBits(MaxBECount->getType()) > getTypeSizeInBits(AddRec->getType())) return ConstantRange::getFull(BitWidth); MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType()); const SCEV *RangeWidth = getMinusOne(AddRec->getType()); const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step)); const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs); if (!isKnownPredicateViaConstantRanges(ICmpInst::ICMP_ULE, MaxBECount, MaxItersWithoutWrap)) return ConstantRange::getFull(BitWidth); ICmpInst::Predicate LEPred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; ICmpInst::Predicate GEPred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); // We know that there is no self-wrap. Let's take Start and End values and // look at all intermediate values V1, V2, ..., Vn that IndVar takes during // the iteration. They either lie inside the range [Min(Start, End), // Max(Start, End)] or outside it: // // Case 1: RangeMin ... Start V1 ... VN End ... RangeMax; // Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax; // // No self wrap flag guarantees that the intermediate values cannot be BOTH // outside and inside the range [Min(Start, End), Max(Start, End)]. Using that // knowledge, let's try to prove that we are dealing with Case 1. It is so if // Start <= End and step is positive, or Start >= End and step is negative. const SCEV *Start = applyLoopGuards(AddRec->getStart(), AddRec->getLoop()); ConstantRange StartRange = getRangeRef(Start, SignHint); ConstantRange EndRange = getRangeRef(End, SignHint); ConstantRange RangeBetween = StartRange.unionWith(EndRange); // If they already cover full iteration space, we will know nothing useful // even if we prove what we want to prove. if (RangeBetween.isFullSet()) return RangeBetween; // Only deal with ranges that do not wrap (i.e. RangeMin < RangeMax). bool IsWrappedSet = IsSigned ? RangeBetween.isSignWrappedSet() : RangeBetween.isWrappedSet(); if (IsWrappedSet) return ConstantRange::getFull(BitWidth); if (isKnownPositive(Step) && isKnownPredicateViaConstantRanges(LEPred, Start, End)) return RangeBetween; if (isKnownNegative(Step) && isKnownPredicateViaConstantRanges(GEPred, Start, End)) return RangeBetween; return ConstantRange::getFull(BitWidth); } ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, const SCEV *Step, const APInt &MaxBECount) { // RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q}) // == RangeOf({A,+,P}) union RangeOf({B,+,Q}) unsigned BitWidth = MaxBECount.getBitWidth(); assert(getTypeSizeInBits(Start->getType()) == BitWidth && getTypeSizeInBits(Step->getType()) == BitWidth && "mismatched bit widths"); struct SelectPattern { Value *Condition = nullptr; APInt TrueValue; APInt FalseValue; explicit SelectPattern(ScalarEvolution &SE, unsigned BitWidth, const SCEV *S) { std::optional CastOp; APInt Offset(BitWidth, 0); assert(SE.getTypeSizeInBits(S->getType()) == BitWidth && "Should be!"); // Peel off a constant offset: if (auto *SA = dyn_cast(S)) { // In the future we could consider being smarter here and handle // {Start+Step,+,Step} too. if (SA->getNumOperands() != 2 || !isa(SA->getOperand(0))) return; Offset = cast(SA->getOperand(0))->getAPInt(); S = SA->getOperand(1); } // Peel off a cast operation if (auto *SCast = dyn_cast(S)) { CastOp = SCast->getSCEVType(); S = SCast->getOperand(); } using namespace llvm::PatternMatch; auto *SU = dyn_cast(S); const APInt *TrueVal, *FalseVal; if (!SU || !match(SU->getValue(), m_Select(m_Value(Condition), m_APInt(TrueVal), m_APInt(FalseVal)))) { Condition = nullptr; return; } TrueValue = *TrueVal; FalseValue = *FalseVal; // Re-apply the cast we peeled off earlier if (CastOp) switch (*CastOp) { default: llvm_unreachable("Unknown SCEV cast type!"); case scTruncate: TrueValue = TrueValue.trunc(BitWidth); FalseValue = FalseValue.trunc(BitWidth); break; case scZeroExtend: TrueValue = TrueValue.zext(BitWidth); FalseValue = FalseValue.zext(BitWidth); break; case scSignExtend: TrueValue = TrueValue.sext(BitWidth); FalseValue = FalseValue.sext(BitWidth); break; } // Re-apply the constant offset we peeled off earlier TrueValue += Offset; FalseValue += Offset; } bool isRecognized() { return Condition != nullptr; } }; SelectPattern StartPattern(*this, BitWidth, Start); if (!StartPattern.isRecognized()) return ConstantRange::getFull(BitWidth); SelectPattern StepPattern(*this, BitWidth, Step); if (!StepPattern.isRecognized()) return ConstantRange::getFull(BitWidth); if (StartPattern.Condition != StepPattern.Condition) { // We don't handle this case today; but we could, by considering four // possibilities below instead of two. I'm not sure if there are cases where // that will help over what getRange already does, though. return ConstantRange::getFull(BitWidth); } // NB! Calling ScalarEvolution::getConstant is fine, but we should not try to // construct arbitrary general SCEV expressions here. This function is called // from deep in the call stack, and calling getSCEV (on a sext instruction, // say) can end up caching a suboptimal value. // FIXME: without the explicit `this` receiver below, MSVC errors out with // C2352 and C2512 (otherwise it isn't needed). const SCEV *TrueStart = this->getConstant(StartPattern.TrueValue); const SCEV *TrueStep = this->getConstant(StepPattern.TrueValue); const SCEV *FalseStart = this->getConstant(StartPattern.FalseValue); const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue); ConstantRange TrueRange = this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount); ConstantRange FalseRange = this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount); return TrueRange.unionWith(FalseRange); } SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) { if (isa(V)) return SCEV::FlagAnyWrap; const BinaryOperator *BinOp = cast(V); // Return early if there are no flags to propagate to the SCEV. SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; if (BinOp->hasNoUnsignedWrap()) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); if (BinOp->hasNoSignedWrap()) Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); if (Flags == SCEV::FlagAnyWrap) return SCEV::FlagAnyWrap; return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap; } const Instruction * ScalarEvolution::getNonTrivialDefiningScopeBound(const SCEV *S) { if (auto *AddRec = dyn_cast(S)) return &*AddRec->getLoop()->getHeader()->begin(); if (auto *U = dyn_cast(S)) if (auto *I = dyn_cast(U->getValue())) return I; return nullptr; } const Instruction * ScalarEvolution::getDefiningScopeBound(ArrayRef Ops, bool &Precise) { Precise = true; // Do a bounded search of the def relation of the requested SCEVs. SmallSet Visited; SmallVector Worklist; auto pushOp = [&](const SCEV *S) { if (!Visited.insert(S).second) return; // Threshold of 30 here is arbitrary. if (Visited.size() > 30) { Precise = false; return; } Worklist.push_back(S); }; for (const auto *S : Ops) pushOp(S); const Instruction *Bound = nullptr; while (!Worklist.empty()) { auto *S = Worklist.pop_back_val(); if (auto *DefI = getNonTrivialDefiningScopeBound(S)) { if (!Bound || DT.dominates(Bound, DefI)) Bound = DefI; } else { for (const auto *Op : S->operands()) pushOp(Op); } } return Bound ? Bound : &*F.getEntryBlock().begin(); } const Instruction * ScalarEvolution::getDefiningScopeBound(ArrayRef Ops) { bool Discard; return getDefiningScopeBound(Ops, Discard); } bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A, const Instruction *B) { if (A->getParent() == B->getParent() && isGuaranteedToTransferExecutionToSuccessor(A->getIterator(), B->getIterator())) return true; auto *BLoop = LI.getLoopFor(B->getParent()); if (BLoop && BLoop->getHeader() == B->getParent() && BLoop->getLoopPreheader() == A->getParent() && isGuaranteedToTransferExecutionToSuccessor(A->getIterator(), A->getParent()->end()) && isGuaranteedToTransferExecutionToSuccessor(B->getParent()->begin(), B->getIterator())) return true; return false; } bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { // Only proceed if we can prove that I does not yield poison. if (!programUndefinedIfPoison(I)) return false; // At this point we know that if I is executed, then it does not wrap // according to at least one of NSW or NUW. If I is not executed, then we do // not know if the calculation that I represents would wrap. Multiple // instructions can map to the same SCEV. If we apply NSW or NUW from I to // the SCEV, we must guarantee no wrapping for that SCEV also when it is // derived from other instructions that map to the same SCEV. We cannot make // that guarantee for cases where I is not executed. So we need to find a // upper bound on the defining scope for the SCEV, and prove that I is // executed every time we enter that scope. When the bounding scope is a // loop (the common case), this is equivalent to proving I executes on every // iteration of that loop. SmallVector SCEVOps; for (const Use &Op : I->operands()) { // I could be an extractvalue from a call to an overflow intrinsic. // TODO: We can do better here in some cases. if (isSCEVable(Op->getType())) SCEVOps.push_back(getSCEV(Op)); } auto *DefI = getDefiningScopeBound(SCEVOps); return isGuaranteedToTransferExecutionTo(DefI, I); } bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { // If we know that \c I can never be poison period, then that's enough. if (isSCEVExprNeverPoison(I)) return true; // If the loop only has one exit, then we know that, if the loop is entered, // any instruction dominating that exit will be executed. If any such // instruction would result in UB, the addrec cannot be poison. // // This is basically the same reasoning as in isSCEVExprNeverPoison(), but // also handles uses outside the loop header (they just need to dominate the // single exit). auto *ExitingBB = L->getExitingBlock(); if (!ExitingBB || !loopHasNoAbnormalExits(L)) return false; SmallPtrSet KnownPoison; SmallVector Worklist; // We start by assuming \c I, the post-inc add recurrence, is poison. Only // things that are known to be poison under that assumption go on the // Worklist. KnownPoison.insert(I); Worklist.push_back(I); while (!Worklist.empty()) { const Instruction *Poison = Worklist.pop_back_val(); for (const Use &U : Poison->uses()) { const Instruction *PoisonUser = cast(U.getUser()); if (mustTriggerUB(PoisonUser, KnownPoison) && DT.dominates(PoisonUser->getParent(), ExitingBB)) return true; if (propagatesPoison(U) && L->contains(PoisonUser)) if (KnownPoison.insert(PoisonUser).second) Worklist.push_back(PoisonUser); } } return false; } ScalarEvolution::LoopProperties ScalarEvolution::getLoopProperties(const Loop *L) { using LoopProperties = ScalarEvolution::LoopProperties; auto Itr = LoopPropertiesCache.find(L); if (Itr == LoopPropertiesCache.end()) { auto HasSideEffects = [](Instruction *I) { if (auto *SI = dyn_cast(I)) return !SI->isSimple(); return I->mayThrow() || I->mayWriteToMemory(); }; LoopProperties LP = {/* HasNoAbnormalExits */ true, /*HasNoSideEffects*/ true}; for (auto *BB : L->getBlocks()) for (auto &I : *BB) { if (!isGuaranteedToTransferExecutionToSuccessor(&I)) LP.HasNoAbnormalExits = false; if (HasSideEffects(&I)) LP.HasNoSideEffects = false; if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects) break; // We're already as pessimistic as we can get. } auto InsertPair = LoopPropertiesCache.insert({L, LP}); assert(InsertPair.second && "We just checked!"); Itr = InsertPair.first; } return Itr->second; } bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) { // A mustprogress loop without side effects must be finite. // TODO: The check used here is very conservative. It's only *specific* // side effects which are well defined in infinite loops. return isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L)); } const SCEV *ScalarEvolution::createSCEVIter(Value *V) { // Worklist item with a Value and a bool indicating whether all operands have // been visited already. using PointerTy = PointerIntPair; SmallVector Stack; Stack.emplace_back(V, true); Stack.emplace_back(V, false); while (!Stack.empty()) { auto E = Stack.pop_back_val(); Value *CurV = E.getPointer(); if (getExistingSCEV(CurV)) continue; SmallVector Ops; const SCEV *CreatedSCEV = nullptr; // If all operands have been visited already, create the SCEV. if (E.getInt()) { CreatedSCEV = createSCEV(CurV); } else { // Otherwise get the operands we need to create SCEV's for before creating // the SCEV for CurV. If the SCEV for CurV can be constructed trivially, // just use it. CreatedSCEV = getOperandsToCreate(CurV, Ops); } if (CreatedSCEV) { insertValueToMap(CurV, CreatedSCEV); } else { // Queue CurV for SCEV creation, followed by its's operands which need to // be constructed first. Stack.emplace_back(CurV, true); for (Value *Op : Ops) Stack.emplace_back(Op, false); } } return getExistingSCEV(V); } const SCEV * ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl &Ops) { if (!isSCEVable(V->getType())) return getUnknown(V); if (Instruction *I = dyn_cast(V)) { // Don't attempt to analyze instructions in blocks that aren't // reachable. Such instructions don't matter, and they aren't required // to obey basic rules for definitions dominating uses which this // analysis depends on. if (!DT.isReachableFromEntry(I->getParent())) return getUnknown(PoisonValue::get(V->getType())); } else if (ConstantInt *CI = dyn_cast(V)) return getConstant(CI); else if (isa(V)) return getUnknown(V); else if (!isa(V)) return getUnknown(V); Operator *U = cast(V); if (auto BO = MatchBinaryOp(U, getDataLayout(), AC, DT, dyn_cast(V))) { bool IsConstArg = isa(BO->RHS); switch (BO->Opcode) { case Instruction::Add: case Instruction::Mul: { // For additions and multiplications, traverse add/mul chains for which we // can potentially create a single SCEV, to reduce the number of // get{Add,Mul}Expr calls. do { if (BO->Op) { if (BO->Op != V && getExistingSCEV(BO->Op)) { Ops.push_back(BO->Op); break; } } Ops.push_back(BO->RHS); auto NewBO = MatchBinaryOp(BO->LHS, getDataLayout(), AC, DT, dyn_cast(V)); if (!NewBO || (BO->Opcode == Instruction::Add && (NewBO->Opcode != Instruction::Add && NewBO->Opcode != Instruction::Sub)) || (BO->Opcode == Instruction::Mul && NewBO->Opcode != Instruction::Mul)) { Ops.push_back(BO->LHS); break; } // CreateSCEV calls getNoWrapFlagsFromUB, which under certain conditions // requires a SCEV for the LHS. if (BO->Op && (BO->IsNSW || BO->IsNUW)) { auto *I = dyn_cast(BO->Op); if (I && programUndefinedIfPoison(I)) { Ops.push_back(BO->LHS); break; } } BO = NewBO; } while (true); return nullptr; } case Instruction::Sub: case Instruction::UDiv: case Instruction::URem: break; case Instruction::AShr: case Instruction::Shl: case Instruction::Xor: if (!IsConstArg) return nullptr; break; case Instruction::And: case Instruction::Or: if (!IsConstArg && !BO->LHS->getType()->isIntegerTy(1)) return nullptr; break; case Instruction::LShr: return getUnknown(V); default: llvm_unreachable("Unhandled binop"); break; } Ops.push_back(BO->LHS); Ops.push_back(BO->RHS); return nullptr; } switch (U->getOpcode()) { case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::PtrToInt: Ops.push_back(U->getOperand(0)); return nullptr; case Instruction::BitCast: if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) { Ops.push_back(U->getOperand(0)); return nullptr; } return getUnknown(V); case Instruction::SDiv: case Instruction::SRem: Ops.push_back(U->getOperand(0)); Ops.push_back(U->getOperand(1)); return nullptr; case Instruction::GetElementPtr: assert(cast(U)->getSourceElementType()->isSized() && "GEP source element type must be sized"); for (Value *Index : U->operands()) Ops.push_back(Index); return nullptr; case Instruction::IntToPtr: return getUnknown(V); case Instruction::PHI: // Keep constructing SCEVs' for phis recursively for now. return nullptr; case Instruction::Select: { // Check if U is a select that can be simplified to a SCEVUnknown. auto CanSimplifyToUnknown = [this, U]() { if (U->getType()->isIntegerTy(1) || isa(U->getOperand(0))) return false; auto *ICI = dyn_cast(U->getOperand(0)); if (!ICI) return false; Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); if (ICI->getPredicate() == CmpInst::ICMP_EQ || ICI->getPredicate() == CmpInst::ICMP_NE) { if (!(isa(RHS) && cast(RHS)->isZero())) return true; } else if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(U->getType())) return true; return false; }; if (CanSimplifyToUnknown()) return getUnknown(U); for (Value *Inc : U->operands()) Ops.push_back(Inc); return nullptr; break; } case Instruction::Call: case Instruction::Invoke: if (Value *RV = cast(U)->getReturnedArgOperand()) { Ops.push_back(RV); return nullptr; } if (auto *II = dyn_cast(U)) { switch (II->getIntrinsicID()) { case Intrinsic::abs: Ops.push_back(II->getArgOperand(0)); return nullptr; case Intrinsic::umax: case Intrinsic::umin: case Intrinsic::smax: case Intrinsic::smin: case Intrinsic::usub_sat: case Intrinsic::uadd_sat: Ops.push_back(II->getArgOperand(0)); Ops.push_back(II->getArgOperand(1)); return nullptr; case Intrinsic::start_loop_iterations: case Intrinsic::annotation: case Intrinsic::ptr_annotation: Ops.push_back(II->getArgOperand(0)); return nullptr; default: break; } } break; } return nullptr; } const SCEV *ScalarEvolution::createSCEV(Value *V) { if (!isSCEVable(V->getType())) return getUnknown(V); if (Instruction *I = dyn_cast(V)) { // Don't attempt to analyze instructions in blocks that aren't // reachable. Such instructions don't matter, and they aren't required // to obey basic rules for definitions dominating uses which this // analysis depends on. if (!DT.isReachableFromEntry(I->getParent())) return getUnknown(PoisonValue::get(V->getType())); } else if (ConstantInt *CI = dyn_cast(V)) return getConstant(CI); else if (isa(V)) return getUnknown(V); else if (!isa(V)) return getUnknown(V); const SCEV *LHS; const SCEV *RHS; Operator *U = cast(V); if (auto BO = MatchBinaryOp(U, getDataLayout(), AC, DT, dyn_cast(V))) { switch (BO->Opcode) { case Instruction::Add: { // The simple thing to do would be to just call getSCEV on both operands // and call getAddExpr with the result. However if we're looking at a // bunch of things all added together, this can be quite inefficient, // because it leads to N-1 getAddExpr calls for N ultimate operands. // Instead, gather up all the operands and make a single getAddExpr call. // LLVM IR canonical form means we need only traverse the left operands. SmallVector AddOps; do { if (BO->Op) { if (auto *OpSCEV = getExistingSCEV(BO->Op)) { AddOps.push_back(OpSCEV); break; } // If a NUW or NSW flag can be applied to the SCEV for this // addition, then compute the SCEV for this addition by itself // with a separate call to getAddExpr. We need to do that // instead of pushing the operands of the addition onto AddOps, // since the flags are only known to apply to this particular // addition - they may not apply to other additions that can be // formed with operands from AddOps. const SCEV *RHS = getSCEV(BO->RHS); SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op); if (Flags != SCEV::FlagAnyWrap) { const SCEV *LHS = getSCEV(BO->LHS); if (BO->Opcode == Instruction::Sub) AddOps.push_back(getMinusSCEV(LHS, RHS, Flags)); else AddOps.push_back(getAddExpr(LHS, RHS, Flags)); break; } } if (BO->Opcode == Instruction::Sub) AddOps.push_back(getNegativeSCEV(getSCEV(BO->RHS))); else AddOps.push_back(getSCEV(BO->RHS)); auto NewBO = MatchBinaryOp(BO->LHS, getDataLayout(), AC, DT, dyn_cast(V)); if (!NewBO || (NewBO->Opcode != Instruction::Add && NewBO->Opcode != Instruction::Sub)) { AddOps.push_back(getSCEV(BO->LHS)); break; } BO = NewBO; } while (true); return getAddExpr(AddOps); } case Instruction::Mul: { SmallVector MulOps; do { if (BO->Op) { if (auto *OpSCEV = getExistingSCEV(BO->Op)) { MulOps.push_back(OpSCEV); break; } SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op); if (Flags != SCEV::FlagAnyWrap) { LHS = getSCEV(BO->LHS); RHS = getSCEV(BO->RHS); MulOps.push_back(getMulExpr(LHS, RHS, Flags)); break; } } MulOps.push_back(getSCEV(BO->RHS)); auto NewBO = MatchBinaryOp(BO->LHS, getDataLayout(), AC, DT, dyn_cast(V)); if (!NewBO || NewBO->Opcode != Instruction::Mul) { MulOps.push_back(getSCEV(BO->LHS)); break; } BO = NewBO; } while (true); return getMulExpr(MulOps); } case Instruction::UDiv: LHS = getSCEV(BO->LHS); RHS = getSCEV(BO->RHS); return getUDivExpr(LHS, RHS); case Instruction::URem: LHS = getSCEV(BO->LHS); RHS = getSCEV(BO->RHS); return getURemExpr(LHS, RHS); case Instruction::Sub: { SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; if (BO->Op) Flags = getNoWrapFlagsFromUB(BO->Op); LHS = getSCEV(BO->LHS); RHS = getSCEV(BO->RHS); return getMinusSCEV(LHS, RHS, Flags); } case Instruction::And: // For an expression like x&255 that merely masks off the high bits, // use zext(trunc(x)) as the SCEV expression. if (ConstantInt *CI = dyn_cast(BO->RHS)) { if (CI->isZero()) return getSCEV(BO->RHS); if (CI->isMinusOne()) return getSCEV(BO->LHS); const APInt &A = CI->getValue(); // Instcombine's ShrinkDemandedConstant may strip bits out of // constants, obscuring what would otherwise be a low-bits mask. // Use computeKnownBits to compute what ShrinkDemandedConstant // knew about to reconstruct a low-bits mask value. unsigned LZ = A.countl_zero(); unsigned TZ = A.countr_zero(); unsigned BitWidth = A.getBitWidth(); KnownBits Known(BitWidth); computeKnownBits(BO->LHS, Known, getDataLayout(), 0, &AC, nullptr, &DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); if ((LZ != 0 || TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) { const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ)); const SCEV *LHS = getSCEV(BO->LHS); const SCEV *ShiftedLHS = nullptr; if (auto *LHSMul = dyn_cast(LHS)) { if (auto *OpC = dyn_cast(LHSMul->getOperand(0))) { // For an expression like (x * 8) & 8, simplify the multiply. unsigned MulZeros = OpC->getAPInt().countr_zero(); unsigned GCD = std::min(MulZeros, TZ); APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD); SmallVector MulOps; MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD))); append_range(MulOps, LHSMul->operands().drop_front()); auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags()); ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt)); } } if (!ShiftedLHS) ShiftedLHS = getUDivExpr(LHS, MulCount); return getMulExpr( getZeroExtendExpr( getTruncateExpr(ShiftedLHS, IntegerType::get(getContext(), BitWidth - LZ - TZ)), BO->LHS->getType()), MulCount); } } // Binary `and` is a bit-wise `umin`. if (BO->LHS->getType()->isIntegerTy(1)) { LHS = getSCEV(BO->LHS); RHS = getSCEV(BO->RHS); return getUMinExpr(LHS, RHS); } break; case Instruction::Or: // Binary `or` is a bit-wise `umax`. if (BO->LHS->getType()->isIntegerTy(1)) { LHS = getSCEV(BO->LHS); RHS = getSCEV(BO->RHS); return getUMaxExpr(LHS, RHS); } break; case Instruction::Xor: if (ConstantInt *CI = dyn_cast(BO->RHS)) { // If the RHS of xor is -1, then this is a not operation. if (CI->isMinusOne()) return getNotSCEV(getSCEV(BO->LHS)); // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. // This is a variant of the check for xor with -1, and it handles // the case where instcombine has trimmed non-demanded bits out // of an xor with -1. if (auto *LBO = dyn_cast(BO->LHS)) if (ConstantInt *LCI = dyn_cast(LBO->getOperand(1))) if (LBO->getOpcode() == Instruction::And && LCI->getValue() == CI->getValue()) if (const SCEVZeroExtendExpr *Z = dyn_cast(getSCEV(BO->LHS))) { Type *UTy = BO->LHS->getType(); const SCEV *Z0 = Z->getOperand(); Type *Z0Ty = Z0->getType(); unsigned Z0TySize = getTypeSizeInBits(Z0Ty); // If C is a low-bits mask, the zero extend is serving to // mask off the high bits. Complement the operand and // re-apply the zext. if (CI->getValue().isMask(Z0TySize)) return getZeroExtendExpr(getNotSCEV(Z0), UTy); // If C is a single bit, it may be in the sign-bit position // before the zero-extend. In this case, represent the xor // using an add, which is equivalent, and re-apply the zext. APInt Trunc = CI->getValue().trunc(Z0TySize); if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() && Trunc.isSignMask()) return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), UTy); } } break; case Instruction::Shl: // Turn shift left of a constant amount into a multiply. if (ConstantInt *SA = dyn_cast(BO->RHS)) { uint32_t BitWidth = cast(SA->getType())->getBitWidth(); // If the shift count is not less than the bitwidth, the result of // the shift is undefined. Don't try to analyze it, because the // resolution chosen here may differ from the resolution chosen in // other parts of the compiler. if (SA->getValue().uge(BitWidth)) break; // We can safely preserve the nuw flag in all cases. It's also safe to // turn a nuw nsw shl into a nuw nsw mul. However, nsw in isolation // requires special handling. It can be preserved as long as we're not // left shifting by bitwidth - 1. auto Flags = SCEV::FlagAnyWrap; if (BO->Op) { auto MulFlags = getNoWrapFlagsFromUB(BO->Op); if ((MulFlags & SCEV::FlagNSW) && ((MulFlags & SCEV::FlagNUW) || SA->getValue().ult(BitWidth - 1))) Flags = (SCEV::NoWrapFlags)(Flags | SCEV::FlagNSW); if (MulFlags & SCEV::FlagNUW) Flags = (SCEV::NoWrapFlags)(Flags | SCEV::FlagNUW); } ConstantInt *X = ConstantInt::get( getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue())); return getMulExpr(getSCEV(BO->LHS), getConstant(X), Flags); } break; case Instruction::AShr: // AShr X, C, where C is a constant. ConstantInt *CI = dyn_cast(BO->RHS); if (!CI) break; Type *OuterTy = BO->LHS->getType(); uint64_t BitWidth = getTypeSizeInBits(OuterTy); // If the shift count is not less than the bitwidth, the result of // the shift is undefined. Don't try to analyze it, because the // resolution chosen here may differ from the resolution chosen in // other parts of the compiler. if (CI->getValue().uge(BitWidth)) break; if (CI->isZero()) return getSCEV(BO->LHS); // shift by zero --> noop uint64_t AShrAmt = CI->getZExtValue(); Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt); Operator *L = dyn_cast(BO->LHS); const SCEV *AddTruncateExpr = nullptr; ConstantInt *ShlAmtCI = nullptr; const SCEV *AddConstant = nullptr; if (L && L->getOpcode() == Instruction::Add) { // X = Shl A, n // Y = Add X, c // Z = AShr Y, m // n, c and m are constants. Operator *LShift = dyn_cast(L->getOperand(0)); ConstantInt *AddOperandCI = dyn_cast(L->getOperand(1)); if (LShift && LShift->getOpcode() == Instruction::Shl) { if (AddOperandCI) { const SCEV *ShlOp0SCEV = getSCEV(LShift->getOperand(0)); ShlAmtCI = dyn_cast(LShift->getOperand(1)); // since we truncate to TruncTy, the AddConstant should be of the // same type, so create a new Constant with type same as TruncTy. // Also, the Add constant should be shifted right by AShr amount. APInt AddOperand = AddOperandCI->getValue().ashr(AShrAmt); AddConstant = getConstant(AddOperand.trunc(BitWidth - AShrAmt)); // we model the expression as sext(add(trunc(A), c << n)), since the // sext(trunc) part is already handled below, we create a // AddExpr(TruncExp) which will be used later. AddTruncateExpr = getTruncateExpr(ShlOp0SCEV, TruncTy); } } } else if (L && L->getOpcode() == Instruction::Shl) { // X = Shl A, n // Y = AShr X, m // Both n and m are constant. const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0)); ShlAmtCI = dyn_cast(L->getOperand(1)); AddTruncateExpr = getTruncateExpr(ShlOp0SCEV, TruncTy); } if (AddTruncateExpr && ShlAmtCI) { // We can merge the two given cases into a single SCEV statement, // incase n = m, the mul expression will be 2^0, so it gets resolved to // a simpler case. The following code handles the two cases: // // 1) For a two-shift sext-inreg, i.e. n = m, // use sext(trunc(x)) as the SCEV expression. // // 2) When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV // expression. We already checked that ShlAmt < BitWidth, so // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as // ShlAmt - AShrAmt < Amt. const APInt &ShlAmt = ShlAmtCI->getValue(); if (ShlAmt.ult(BitWidth) && ShlAmt.uge(AShrAmt)) { APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt, ShlAmtCI->getZExtValue() - AShrAmt); const SCEV *CompositeExpr = getMulExpr(AddTruncateExpr, getConstant(Mul)); if (L->getOpcode() != Instruction::Shl) CompositeExpr = getAddExpr(CompositeExpr, AddConstant); return getSignExtendExpr(CompositeExpr, OuterTy); } } break; } } switch (U->getOpcode()) { case Instruction::Trunc: return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType()); case Instruction::ZExt: return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType()); case Instruction::SExt: if (auto BO = MatchBinaryOp(U->getOperand(0), getDataLayout(), AC, DT, dyn_cast(V))) { // The NSW flag of a subtract does not always survive the conversion to // A + (-1)*B. By pushing sign extension onto its operands we are much // more likely to preserve NSW and allow later AddRec optimisations. // // NOTE: This is effectively duplicating this logic from getSignExtend: // sext((A + B + ...)) --> (sext(A) + sext(B) + ...) // but by that point the NSW information has potentially been lost. if (BO->Opcode == Instruction::Sub && BO->IsNSW) { Type *Ty = U->getType(); auto *V1 = getSignExtendExpr(getSCEV(BO->LHS), Ty); auto *V2 = getSignExtendExpr(getSCEV(BO->RHS), Ty); return getMinusSCEV(V1, V2, SCEV::FlagNSW); } } return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); case Instruction::BitCast: // BitCasts are no-op casts so we just eliminate the cast. if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) return getSCEV(U->getOperand(0)); break; case Instruction::PtrToInt: { // Pointer to integer cast is straight-forward, so do model it. const SCEV *Op = getSCEV(U->getOperand(0)); Type *DstIntTy = U->getType(); // But only if effective SCEV (integer) type is wide enough to represent // all possible pointer values. const SCEV *IntOp = getPtrToIntExpr(Op, DstIntTy); if (isa(IntOp)) return getUnknown(V); return IntOp; } case Instruction::IntToPtr: // Just don't deal with inttoptr casts. return getUnknown(V); case Instruction::SDiv: // If both operands are non-negative, this is just an udiv. if (isKnownNonNegative(getSCEV(U->getOperand(0))) && isKnownNonNegative(getSCEV(U->getOperand(1)))) return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); break; case Instruction::SRem: // If both operands are non-negative, this is just an urem. if (isKnownNonNegative(getSCEV(U->getOperand(0))) && isKnownNonNegative(getSCEV(U->getOperand(1)))) return getURemExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); break; case Instruction::GetElementPtr: return createNodeForGEP(cast(U)); case Instruction::PHI: return createNodeForPHI(cast(U)); case Instruction::Select: return createNodeForSelectOrPHI(U, U->getOperand(0), U->getOperand(1), U->getOperand(2)); case Instruction::Call: case Instruction::Invoke: if (Value *RV = cast(U)->getReturnedArgOperand()) return getSCEV(RV); if (auto *II = dyn_cast(U)) { switch (II->getIntrinsicID()) { case Intrinsic::abs: return getAbsExpr( getSCEV(II->getArgOperand(0)), /*IsNSW=*/cast(II->getArgOperand(1))->isOne()); case Intrinsic::umax: LHS = getSCEV(II->getArgOperand(0)); RHS = getSCEV(II->getArgOperand(1)); return getUMaxExpr(LHS, RHS); case Intrinsic::umin: LHS = getSCEV(II->getArgOperand(0)); RHS = getSCEV(II->getArgOperand(1)); return getUMinExpr(LHS, RHS); case Intrinsic::smax: LHS = getSCEV(II->getArgOperand(0)); RHS = getSCEV(II->getArgOperand(1)); return getSMaxExpr(LHS, RHS); case Intrinsic::smin: LHS = getSCEV(II->getArgOperand(0)); RHS = getSCEV(II->getArgOperand(1)); return getSMinExpr(LHS, RHS); case Intrinsic::usub_sat: { const SCEV *X = getSCEV(II->getArgOperand(0)); const SCEV *Y = getSCEV(II->getArgOperand(1)); const SCEV *ClampedY = getUMinExpr(X, Y); return getMinusSCEV(X, ClampedY, SCEV::FlagNUW); } case Intrinsic::uadd_sat: { const SCEV *X = getSCEV(II->getArgOperand(0)); const SCEV *Y = getSCEV(II->getArgOperand(1)); const SCEV *ClampedX = getUMinExpr(X, getNotSCEV(Y)); return getAddExpr(ClampedX, Y, SCEV::FlagNUW); } case Intrinsic::start_loop_iterations: case Intrinsic::annotation: case Intrinsic::ptr_annotation: // A start_loop_iterations or llvm.annotation or llvm.prt.annotation is // just eqivalent to the first operand for SCEV purposes. return getSCEV(II->getArgOperand(0)); case Intrinsic::vscale: return getVScale(II->getType()); default: break; } } break; } return getUnknown(V); } //===----------------------------------------------------------------------===// // Iteration Count Computation Code // const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount) { if (isa(ExitCount)) return getCouldNotCompute(); auto *ExitCountType = ExitCount->getType(); assert(ExitCountType->isIntegerTy()); auto *EvalTy = Type::getIntNTy(ExitCountType->getContext(), 1 + ExitCountType->getScalarSizeInBits()); return getTripCountFromExitCount(ExitCount, EvalTy, nullptr); } const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount, Type *EvalTy, const Loop *L) { if (isa(ExitCount)) return getCouldNotCompute(); unsigned ExitCountSize = getTypeSizeInBits(ExitCount->getType()); unsigned EvalSize = EvalTy->getPrimitiveSizeInBits(); auto CanAddOneWithoutOverflow = [&]() { ConstantRange ExitCountRange = getRangeRef(ExitCount, RangeSignHint::HINT_RANGE_UNSIGNED); if (!ExitCountRange.contains(APInt::getMaxValue(ExitCountSize))) return true; return L && isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, getMinusOne(ExitCount->getType())); }; // If we need to zero extend the backedge count, check if we can add one to // it prior to zero extending without overflow. Provided this is safe, it // allows better simplification of the +1. if (EvalSize > ExitCountSize && CanAddOneWithoutOverflow()) return getZeroExtendExpr( getAddExpr(ExitCount, getOne(ExitCount->getType())), EvalTy); // Get the total trip count from the count by adding 1. This may wrap. return getAddExpr(getTruncateOrZeroExtend(ExitCount, EvalTy), getOne(EvalTy)); } static unsigned getConstantTripCount(const SCEVConstant *ExitCount) { if (!ExitCount) return 0; ConstantInt *ExitConst = ExitCount->getValue(); // Guard against huge trip counts. if (ExitConst->getValue().getActiveBits() > 32) return 0; // In case of integer overflow, this returns 0, which is correct. return ((unsigned)ExitConst->getZExtValue()) + 1; } unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) { auto *ExitCount = dyn_cast(getBackedgeTakenCount(L, Exact)); return getConstantTripCount(ExitCount); } unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L, const BasicBlock *ExitingBlock) { assert(ExitingBlock && "Must pass a non-null exiting block!"); assert(L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!"); const SCEVConstant *ExitCount = dyn_cast(getExitCount(L, ExitingBlock)); return getConstantTripCount(ExitCount); } unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) { const auto *MaxExitCount = dyn_cast(getConstantMaxBackedgeTakenCount(L)); return getConstantTripCount(MaxExitCount); } unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); std::optional Res; for (auto *ExitingBB : ExitingBlocks) { unsigned Multiple = getSmallConstantTripMultiple(L, ExitingBB); if (!Res) Res = Multiple; Res = (unsigned)std::gcd(*Res, Multiple); } return Res.value_or(1); } unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount) { if (ExitCount == getCouldNotCompute()) return 1; // Get the trip count const SCEV *TCExpr = getTripCountFromExitCount(applyLoopGuards(ExitCount, L)); APInt Multiple = getNonZeroConstantMultiple(TCExpr); // If a trip multiple is huge (>=2^32), the trip count is still divisible by // the greatest power of 2 divisor less than 2^32. return Multiple.getActiveBits() > 32 ? 1U << std::min((unsigned)31, Multiple.countTrailingZeros()) : (unsigned)Multiple.zextOrTrunc(32).getZExtValue(); } /// Returns the largest constant divisor of the trip count of this loop as a /// normal unsigned value, if possible. This means that the actual trip count is /// always a multiple of the returned value (don't forget the trip count could /// very well be zero as well!). /// /// Returns 1 if the trip count is unknown or not guaranteed to be the /// multiple of a constant (which is also the case if the trip count is simply /// constant, use getSmallConstantTripCount for that case), Will also return 1 /// if the trip count is very large (>= 2^32). /// /// As explained in the comments for getSmallConstantTripCount, this assumes /// that control exits the loop via ExitingBlock. unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L, const BasicBlock *ExitingBlock) { assert(ExitingBlock && "Must pass a non-null exiting block!"); assert(L->isLoopExiting(ExitingBlock) && "Exiting block must actually branch out of the loop!"); const SCEV *ExitCount = getExitCount(L, ExitingBlock); return getSmallConstantTripMultiple(L, ExitCount); } const SCEV *ScalarEvolution::getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind) { switch (Kind) { case Exact: return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); case SymbolicMaximum: return getBackedgeTakenInfo(L).getSymbolicMax(ExitingBlock, this); case ConstantMaximum: return getBackedgeTakenInfo(L).getConstantMax(ExitingBlock, this); }; llvm_unreachable("Invalid ExitCountKind!"); } const SCEV * ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, SmallVector &Preds) { return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds); } const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L, ExitCountKind Kind) { switch (Kind) { case Exact: return getBackedgeTakenInfo(L).getExact(L, this); case ConstantMaximum: return getBackedgeTakenInfo(L).getConstantMax(this); case SymbolicMaximum: return getBackedgeTakenInfo(L).getSymbolicMax(L, this); }; llvm_unreachable("Invalid ExitCountKind!"); } bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) { return getBackedgeTakenInfo(L).isConstantMaxOrZero(this); } /// Push PHI nodes in the header of the given loop onto the given Worklist. static void PushLoopPHIs(const Loop *L, SmallVectorImpl &Worklist, SmallPtrSetImpl &Visited) { BasicBlock *Header = L->getHeader(); // Push all Loop-header PHIs onto the Worklist stack. for (PHINode &PN : Header->phis()) if (Visited.insert(&PN).second) Worklist.push_back(&PN); } const ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { auto &BTI = getBackedgeTakenInfo(L); if (BTI.hasFullInfo()) return BTI; auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); if (!Pair.second) return Pair.first->second; BackedgeTakenInfo Result = computeBackedgeTakenCount(L, /*AllowPredicates=*/true); return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result); } ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Initially insert an invalid entry for this loop. If the insertion // succeeds, proceed to actually compute a backedge-taken count and // update the value. The temporary CouldNotCompute value tells SCEV // code elsewhere that it shouldn't attempt to request a new // backedge-taken count, which could result in infinite recursion. std::pair::iterator, bool> Pair = BackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); if (!Pair.second) return Pair.first->second; // computeBackedgeTakenCount may allocate memory for its result. Inserting it // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result // must be cleared in this scope. BackedgeTakenInfo Result = computeBackedgeTakenCount(L); // Now that we know more about the trip count for this loop, forget any // existing SCEV values for PHI nodes in this loop since they are only // conservative estimates made without the benefit of trip count // information. This invalidation is not necessary for correctness, and is // only done to produce more precise results. if (Result.hasAnyInfo()) { // Invalidate any expression using an addrec in this loop. SmallVector ToForget; auto LoopUsersIt = LoopUsers.find(L); if (LoopUsersIt != LoopUsers.end()) append_range(ToForget, LoopUsersIt->second); forgetMemoizedResults(ToForget); // Invalidate constant-evolved loop header phis. for (PHINode &PN : L->getHeader()->phis()) ConstantEvolutionLoopExitValue.erase(&PN); } // Re-lookup the insert position, since the call to // computeBackedgeTakenCount above could result in a // recusive call to getBackedgeTakenInfo (on a different // loop), which would invalidate the iterator computed // earlier. return BackedgeTakenCounts.find(L)->second = std::move(Result); } void ScalarEvolution::forgetAllLoops() { // This method is intended to forget all info about loops. It should // invalidate caches as if the following happened: // - The trip counts of all loops have changed arbitrarily // - Every llvm::Value has been updated in place to produce a different // result. BackedgeTakenCounts.clear(); PredicatedBackedgeTakenCounts.clear(); BECountUsers.clear(); LoopPropertiesCache.clear(); ConstantEvolutionLoopExitValue.clear(); ValueExprMap.clear(); ValuesAtScopes.clear(); ValuesAtScopesUsers.clear(); LoopDispositions.clear(); BlockDispositions.clear(); UnsignedRanges.clear(); SignedRanges.clear(); ExprValueMap.clear(); HasRecMap.clear(); ConstantMultipleCache.clear(); PredicatedSCEVRewrites.clear(); FoldCache.clear(); FoldCacheUser.clear(); } void ScalarEvolution::visitAndClearUsers( SmallVectorImpl &Worklist, SmallPtrSetImpl &Visited, SmallVectorImpl &ToForget) { while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); if (!isSCEVable(I->getType())) continue; ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { eraseValueFromMap(It->first); ToForget.push_back(It->second); if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); } PushDefUseChildren(I, Worklist, Visited); } } void ScalarEvolution::forgetLoop(const Loop *L) { SmallVector LoopWorklist(1, L); SmallVector Worklist; SmallPtrSet Visited; SmallVector ToForget; // Iterate over all the loops and sub-loops to drop SCEV information. while (!LoopWorklist.empty()) { auto *CurrL = LoopWorklist.pop_back_val(); // Drop any stored trip count value. forgetBackedgeTakenCounts(CurrL, /* Predicated */ false); forgetBackedgeTakenCounts(CurrL, /* Predicated */ true); // Drop information about predicated SCEV rewrites for this loop. for (auto I = PredicatedSCEVRewrites.begin(); I != PredicatedSCEVRewrites.end();) { std::pair Entry = I->first; if (Entry.second == CurrL) PredicatedSCEVRewrites.erase(I++); else ++I; } auto LoopUsersItr = LoopUsers.find(CurrL); if (LoopUsersItr != LoopUsers.end()) { ToForget.insert(ToForget.end(), LoopUsersItr->second.begin(), LoopUsersItr->second.end()); } // Drop information about expressions based on loop-header PHIs. PushLoopPHIs(CurrL, Worklist, Visited); visitAndClearUsers(Worklist, Visited, ToForget); LoopPropertiesCache.erase(CurrL); // Forget all contained loops too, to avoid dangling entries in the // ValuesAtScopes map. LoopWorklist.append(CurrL->begin(), CurrL->end()); } forgetMemoizedResults(ToForget); } void ScalarEvolution::forgetTopmostLoop(const Loop *L) { forgetLoop(L->getOutermostLoop()); } void ScalarEvolution::forgetValue(Value *V) { Instruction *I = dyn_cast(V); if (!I) return; // Drop information about expressions based on loop-header PHIs. SmallVector Worklist; SmallPtrSet Visited; SmallVector ToForget; Worklist.push_back(I); Visited.insert(I); visitAndClearUsers(Worklist, Visited, ToForget); forgetMemoizedResults(ToForget); } void ScalarEvolution::forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V) { if (!isSCEVable(V->getType())) return; // If SCEV looked through a trivial LCSSA phi node, we might have SCEV's // directly using a SCEVUnknown/SCEVAddRec defined in the loop. After an // extra predecessor is added, this is no longer valid. Find all Unknowns and // AddRecs defined in the loop and invalidate any SCEV's making use of them. if (const SCEV *S = getExistingSCEV(V)) { struct InvalidationRootCollector { Loop *L; SmallVector Roots; InvalidationRootCollector(Loop *L) : L(L) {} bool follow(const SCEV *S) { if (auto *SU = dyn_cast(S)) { if (auto *I = dyn_cast(SU->getValue())) if (L->contains(I)) Roots.push_back(S); } else if (auto *AddRec = dyn_cast(S)) { if (L->contains(AddRec->getLoop())) Roots.push_back(S); } return true; } bool isDone() const { return false; } }; InvalidationRootCollector C(L); visitAll(S, C); forgetMemoizedResults(C.Roots); } // Also perform the normal invalidation. forgetValue(V); } void ScalarEvolution::forgetLoopDispositions() { LoopDispositions.clear(); } void ScalarEvolution::forgetBlockAndLoopDispositions(Value *V) { // Unless a specific value is passed to invalidation, completely clear both // caches. if (!V) { BlockDispositions.clear(); LoopDispositions.clear(); return; } if (!isSCEVable(V->getType())) return; const SCEV *S = getExistingSCEV(V); if (!S) return; // Invalidate the block and loop dispositions cached for S. Dispositions of // S's users may change if S's disposition changes (i.e. a user may change to // loop-invariant, if S changes to loop invariant), so also invalidate // dispositions of S's users recursively. SmallVector Worklist = {S}; SmallPtrSet Seen = {S}; while (!Worklist.empty()) { const SCEV *Curr = Worklist.pop_back_val(); bool LoopDispoRemoved = LoopDispositions.erase(Curr); bool BlockDispoRemoved = BlockDispositions.erase(Curr); if (!LoopDispoRemoved && !BlockDispoRemoved) continue; auto Users = SCEVUsers.find(Curr); if (Users != SCEVUsers.end()) for (const auto *User : Users->second) if (Seen.insert(User).second) Worklist.push_back(User); } } /// Get the exact loop backedge taken count considering all loop exits. A /// computable result can only be returned for loops with all exiting blocks /// dominating the latch. howFarToZero assumes that the limit of each loop test /// is never skipped. This is a valid assumption as long as the loop exits via /// that test. For precise results, it is the caller's responsibility to specify /// the relevant loop exiting block using getExact(ExitingBlock, SE). const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, SmallVector *Preds) const { // If any exits were not computable, the loop is not computable. if (!isComplete() || ExitNotTaken.empty()) return SE->getCouldNotCompute(); const BasicBlock *Latch = L->getLoopLatch(); // All exiting blocks we have collected must dominate the only backedge. if (!Latch) return SE->getCouldNotCompute(); // All exiting blocks we have gathered dominate loop's latch, so exact trip // count is simply a minimum out of all these calculated exit counts. SmallVector Ops; for (const auto &ENT : ExitNotTaken) { const SCEV *BECount = ENT.ExactNotTaken; assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!"); assert(SE->DT.dominates(ENT.ExitingBlock, Latch) && "We should only have known counts for exiting blocks that dominate " "latch!"); Ops.push_back(BECount); if (Preds) for (const auto *P : ENT.Predicates) Preds->push_back(P); assert((Preds || ENT.hasAlwaysTruePredicate()) && "Predicate should be always true!"); } // If an earlier exit exits on the first iteration (exit count zero), then // a later poison exit count should not propagate into the result. This are // exactly the semantics provided by umin_seq. return SE->getUMinFromMismatchedTypes(Ops, /* Sequential */ true); } /// Get the exact not taken count for this loop exit. const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (const auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) return ENT.ExactNotTaken; return SE->getCouldNotCompute(); } const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax( const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (const auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) return ENT.ConstantMaxNotTaken; return SE->getCouldNotCompute(); } const SCEV *ScalarEvolution::BackedgeTakenInfo::getSymbolicMax( const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (const auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) return ENT.SymbolicMaxNotTaken; return SE->getCouldNotCompute(); } /// getConstantMax - Get the constant max backedge taken count for the loop. const SCEV * ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const { auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { return !ENT.hasAlwaysTruePredicate(); }; if (!getConstantMax() || any_of(ExitNotTaken, PredicateNotAlwaysTrue)) return SE->getCouldNotCompute(); assert((isa(getConstantMax()) || isa(getConstantMax())) && "No point in having a non-constant max backedge taken count!"); return getConstantMax(); } const SCEV * ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(const Loop *L, ScalarEvolution *SE) { if (!SymbolicMax) SymbolicMax = SE->computeSymbolicMaxBackedgeTakenCount(L); return SymbolicMax; } bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero( ScalarEvolution *SE) const { auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { return !ENT.hasAlwaysTruePredicate(); }; return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue); } ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E) : ExitLimit(E, E, E, false, std::nullopt) {} ScalarEvolution::ExitLimit::ExitLimit( const SCEV *E, const SCEV *ConstantMaxNotTaken, const SCEV *SymbolicMaxNotTaken, bool MaxOrZero, ArrayRef *> PredSetList) : ExactNotTaken(E), ConstantMaxNotTaken(ConstantMaxNotTaken), SymbolicMaxNotTaken(SymbolicMaxNotTaken), MaxOrZero(MaxOrZero) { // If we prove the max count is zero, so is the symbolic bound. This happens // in practice due to differences in a) how context sensitive we've chosen // to be and b) how we reason about bounds implied by UB. if (ConstantMaxNotTaken->isZero()) { this->ExactNotTaken = E = ConstantMaxNotTaken; this->SymbolicMaxNotTaken = SymbolicMaxNotTaken = ConstantMaxNotTaken; } assert((isa(ExactNotTaken) || !isa(ConstantMaxNotTaken)) && "Exact is not allowed to be less precise than Constant Max"); assert((isa(ExactNotTaken) || !isa(SymbolicMaxNotTaken)) && "Exact is not allowed to be less precise than Symbolic Max"); assert((isa(SymbolicMaxNotTaken) || !isa(ConstantMaxNotTaken)) && "Symbolic Max is not allowed to be less precise than Constant Max"); assert((isa(ConstantMaxNotTaken) || isa(ConstantMaxNotTaken)) && "No point in having a non-constant max backedge taken count!"); for (const auto *PredSet : PredSetList) for (const auto *P : *PredSet) addPredicate(P); assert((isa(E) || !E->getType()->isPointerTy()) && "Backedge count should be int"); assert((isa(ConstantMaxNotTaken) || !ConstantMaxNotTaken->getType()->isPointerTy()) && "Max backedge count should be int"); } ScalarEvolution::ExitLimit::ExitLimit( const SCEV *E, const SCEV *ConstantMaxNotTaken, const SCEV *SymbolicMaxNotTaken, bool MaxOrZero, const SmallPtrSetImpl &PredSet) : ExitLimit(E, ConstantMaxNotTaken, SymbolicMaxNotTaken, MaxOrZero, { &PredSet }) {} /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( ArrayRef ExitCounts, bool IsComplete, const SCEV *ConstantMax, bool MaxOrZero) : ConstantMax(ConstantMax), IsComplete(IsComplete), MaxOrZero(MaxOrZero) { using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; ExitNotTaken.reserve(ExitCounts.size()); std::transform(ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken), [&](const EdgeExitInfo &EEI) { BasicBlock *ExitBB = EEI.first; const ExitLimit &EL = EEI.second; return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.ConstantMaxNotTaken, EL.SymbolicMaxNotTaken, EL.Predicates); }); assert((isa(ConstantMax) || isa(ConstantMax)) && "No point in having a non-constant max backedge taken count!"); } /// Compute the number of times the backedge of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo ScalarEvolution::computeBackedgeTakenCount(const Loop *L, bool AllowPredicates) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; SmallVector ExitCounts; bool CouldComputeBECount = true; BasicBlock *Latch = L->getLoopLatch(); // may be NULL. const SCEV *MustExitMaxBECount = nullptr; const SCEV *MayExitMaxBECount = nullptr; bool MustExitMaxOrZero = false; // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts // and compute maxBECount. // Do a union of all the predicates here. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitingBlocks[i]; // We canonicalize untaken exits to br (constant), ignore them so that // proving an exit untaken doesn't negatively impact our ability to reason // about the loop as whole. if (auto *BI = dyn_cast(ExitBB->getTerminator())) if (auto *CI = dyn_cast(BI->getCondition())) { bool ExitIfTrue = !L->contains(BI->getSuccessor(0)); if (ExitIfTrue == CI->isZero()) continue; } ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates); assert((AllowPredicates || EL.Predicates.empty()) && "Predicated exit limit when predicates are not allowed!"); // 1. For each exit that can be computed, add an entry to ExitCounts. // CouldComputeBECount is true only if all exits can be computed. if (EL.ExactNotTaken != getCouldNotCompute()) ++NumExitCountsComputed; else // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; // Remember exit count if either exact or symbolic is known. Because // Exact always implies symbolic, only check symbolic. if (EL.SymbolicMaxNotTaken != getCouldNotCompute()) ExitCounts.emplace_back(ExitBB, EL); else { assert(EL.ExactNotTaken == getCouldNotCompute() && "Exact is known but symbolic isn't?"); ++NumExitCountsNotComputed; } // 2. Derive the loop's MaxBECount from each exit's max number of // non-exiting iterations. Partition the loop exits into two kinds: // LoopMustExits and LoopMayExits. // // If the exit dominates the loop latch, it is a LoopMustExit otherwise it // is a LoopMayExit. If any computable LoopMustExit is found, then // MaxBECount is the minimum EL.ConstantMaxNotTaken of computable // LoopMustExits. Otherwise, MaxBECount is conservatively the maximum // EL.ConstantMaxNotTaken, where CouldNotCompute is considered greater than // any // computable EL.ConstantMaxNotTaken. if (EL.ConstantMaxNotTaken != getCouldNotCompute() && Latch && DT.dominates(ExitBB, Latch)) { if (!MustExitMaxBECount) { MustExitMaxBECount = EL.ConstantMaxNotTaken; MustExitMaxOrZero = EL.MaxOrZero; } else { MustExitMaxBECount = getUMinFromMismatchedTypes(MustExitMaxBECount, EL.ConstantMaxNotTaken); } } else if (MayExitMaxBECount != getCouldNotCompute()) { if (!MayExitMaxBECount || EL.ConstantMaxNotTaken == getCouldNotCompute()) MayExitMaxBECount = EL.ConstantMaxNotTaken; else { MayExitMaxBECount = getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.ConstantMaxNotTaken); } } } const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); // The loop backedge will be taken the maximum or zero times if there's // a single exit that must be taken the maximum or zero times. bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1); // Remember which SCEVs are used in exit limits for invalidation purposes. // We only care about non-constant SCEVs here, so we can ignore // EL.ConstantMaxNotTaken // and MaxBECount, which must be SCEVConstant. for (const auto &Pair : ExitCounts) { if (!isa(Pair.second.ExactNotTaken)) BECountUsers[Pair.second.ExactNotTaken].insert({L, AllowPredicates}); if (!isa(Pair.second.SymbolicMaxNotTaken)) BECountUsers[Pair.second.SymbolicMaxNotTaken].insert( {L, AllowPredicates}); } return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount, MaxBECount, MaxOrZero); } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates) { assert(L->contains(ExitingBlock) && "Exit count for non-loop block?"); // If our exiting block does not dominate the latch, then its connection with // loop's exit limit may be far from trivial. const BasicBlock *Latch = L->getLoopLatch(); if (!Latch || !DT.dominates(ExitingBlock, Latch)) return getCouldNotCompute(); bool IsOnlyExit = (L->getExitingBlock() != nullptr); Instruction *Term = ExitingBlock->getTerminator(); if (BranchInst *BI = dyn_cast(Term)) { assert(BI->isConditional() && "If unconditional, it can't be in loop!"); bool ExitIfTrue = !L->contains(BI->getSuccessor(0)); assert(ExitIfTrue == L->contains(BI->getSuccessor(1)) && "It should have one successor in loop and one exit block!"); // Proceed to the next level to examine the exit condition expression. return computeExitLimitFromCond(L, BI->getCondition(), ExitIfTrue, /*ControlsOnlyExit=*/IsOnlyExit, AllowPredicates); } if (SwitchInst *SI = dyn_cast(Term)) { // For switch, make sure that there is a single exit from the loop. BasicBlock *Exit = nullptr; for (auto *SBB : successors(ExitingBlock)) if (!L->contains(SBB)) { if (Exit) // Multiple exit successors. return getCouldNotCompute(); Exit = SBB; } assert(Exit && "Exiting block must have at least one exit"); return computeExitLimitFromSingleExitSwitch( L, SI, Exit, /*ControlsOnlyExit=*/IsOnlyExit); } return getCouldNotCompute(); } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond( const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates) { ScalarEvolution::ExitLimitCacheTy Cache(L, ExitIfTrue, AllowPredicates); return computeExitLimitFromCondCached(Cache, L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates); } std::optional ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates) { (void)this->L; (void)this->ExitIfTrue; (void)this->AllowPredicates; assert(this->L == L && this->ExitIfTrue == ExitIfTrue && this->AllowPredicates == AllowPredicates && "Variance in assumed invariant key components!"); auto Itr = TripCountMap.find({ExitCond, ControlsOnlyExit}); if (Itr == TripCountMap.end()) return std::nullopt; return Itr->second; } void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates, const ExitLimit &EL) { assert(this->L == L && this->ExitIfTrue == ExitIfTrue && this->AllowPredicates == AllowPredicates && "Variance in assumed invariant key components!"); auto InsertResult = TripCountMap.insert({{ExitCond, ControlsOnlyExit}, EL}); assert(InsertResult.second && "Expected successful insertion!"); (void)InsertResult; (void)ExitIfTrue; } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached( ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates) { if (auto MaybeEL = Cache.find(L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates)) return *MaybeEL; ExitLimit EL = computeExitLimitFromCondImpl( Cache, L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates); Cache.insert(L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates, EL); return EL; } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates) { // Handle BinOp conditions (And, Or). if (auto LimitFromBinOp = computeExitLimitFromCondFromBinOp( Cache, L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates)) return *LimitFromBinOp; // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. if (ICmpInst *ExitCondICmp = dyn_cast(ExitCond)) { ExitLimit EL = computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsOnlyExit); if (EL.hasFullInfo() || !AllowPredicates) return EL; // Try again, but use SCEV predicates this time. return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsOnlyExit, /*AllowPredicates=*/true); } // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to // preserve the CFG and is temporarily leaving constant conditions // in place. if (ConstantInt *CI = dyn_cast(ExitCond)) { if (ExitIfTrue == !CI->getZExtValue()) // The backedge is always taken. return getCouldNotCompute(); // The backedge is never taken. return getZero(CI->getType()); } // If we're exiting based on the overflow flag of an x.with.overflow intrinsic // with a constant step, we can form an equivalent icmp predicate and figure // out how many iterations will be taken before we exit. const WithOverflowInst *WO; const APInt *C; if (match(ExitCond, m_ExtractValue<1>(m_WithOverflowInst(WO))) && match(WO->getRHS(), m_APInt(C))) { ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C, WO->getNoWrapKind()); CmpInst::Predicate Pred; APInt NewRHSC, Offset; NWR.getEquivalentICmp(Pred, NewRHSC, Offset); if (!ExitIfTrue) Pred = ICmpInst::getInversePredicate(Pred); auto *LHS = getSCEV(WO->getLHS()); if (Offset != 0) LHS = getAddExpr(LHS, getConstant(Offset)); auto EL = computeExitLimitFromICmp(L, Pred, LHS, getConstant(NewRHSC), ControlsOnlyExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; } // If it's not an integer or pointer comparison then compute it the hard way. return computeExitCountExhaustively(L, ExitCond, ExitIfTrue); } std::optional ScalarEvolution::computeExitLimitFromCondFromBinOp( ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates) { // Check if the controlling expression for this loop is an And or Or. Value *Op0, *Op1; bool IsAnd = false; if (match(ExitCond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) IsAnd = true; else if (match(ExitCond, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) IsAnd = false; else return std::nullopt; // EitherMayExit is true in these two cases: // br (and Op0 Op1), loop, exit // br (or Op0 Op1), exit, loop bool EitherMayExit = IsAnd ^ ExitIfTrue; ExitLimit EL0 = computeExitLimitFromCondCached( Cache, L, Op0, ExitIfTrue, ControlsOnlyExit && !EitherMayExit, AllowPredicates); ExitLimit EL1 = computeExitLimitFromCondCached( Cache, L, Op1, ExitIfTrue, ControlsOnlyExit && !EitherMayExit, AllowPredicates); // Be robust against unsimplified IR for the form "op i1 X, NeutralElement" const Constant *NeutralElement = ConstantInt::get(ExitCond->getType(), IsAnd); if (isa(Op1)) return Op1 == NeutralElement ? EL0 : EL1; if (isa(Op0)) return Op0 == NeutralElement ? EL1 : EL0; const SCEV *BECount = getCouldNotCompute(); const SCEV *ConstantMaxBECount = getCouldNotCompute(); const SCEV *SymbolicMaxBECount = getCouldNotCompute(); if (EitherMayExit) { bool UseSequentialUMin = !isa(ExitCond); // Both conditions must be same for the loop to continue executing. // Choose the less conservative count. if (EL0.ExactNotTaken != getCouldNotCompute() && EL1.ExactNotTaken != getCouldNotCompute()) { BECount = getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken, UseSequentialUMin); } if (EL0.ConstantMaxNotTaken == getCouldNotCompute()) ConstantMaxBECount = EL1.ConstantMaxNotTaken; else if (EL1.ConstantMaxNotTaken == getCouldNotCompute()) ConstantMaxBECount = EL0.ConstantMaxNotTaken; else ConstantMaxBECount = getUMinFromMismatchedTypes(EL0.ConstantMaxNotTaken, EL1.ConstantMaxNotTaken); if (EL0.SymbolicMaxNotTaken == getCouldNotCompute()) SymbolicMaxBECount = EL1.SymbolicMaxNotTaken; else if (EL1.SymbolicMaxNotTaken == getCouldNotCompute()) SymbolicMaxBECount = EL0.SymbolicMaxNotTaken; else SymbolicMaxBECount = getUMinFromMismatchedTypes( EL0.SymbolicMaxNotTaken, EL1.SymbolicMaxNotTaken, UseSequentialUMin); } else { // Both conditions must be same at the same time for the loop to exit. // For now, be conservative. if (EL0.ExactNotTaken == EL1.ExactNotTaken) BECount = EL0.ExactNotTaken; } // There are cases (e.g. PR26207) where computeExitLimitFromCond is able // to be more aggressive when computing BECount than when computing // ConstantMaxBECount. In these cases it is possible for EL0.ExactNotTaken // and // EL1.ExactNotTaken to match, but for EL0.ConstantMaxNotTaken and // EL1.ConstantMaxNotTaken to not. if (isa(ConstantMaxBECount) && !isa(BECount)) ConstantMaxBECount = getConstant(getUnsignedRangeMax(BECount)); if (isa(SymbolicMaxBECount)) SymbolicMaxBECount = isa(BECount) ? ConstantMaxBECount : BECount; return ExitLimit(BECount, ConstantMaxBECount, SymbolicMaxBECount, false, { &EL0.Predicates, &EL1.Predicates }); } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp( const Loop *L, ICmpInst *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit, bool AllowPredicates) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Pred; if (!ExitIfTrue) Pred = ExitCond->getPredicate(); else Pred = ExitCond->getInversePredicate(); const ICmpInst::Predicate OriginalPred = Pred; const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); ExitLimit EL = computeExitLimitFromICmp(L, Pred, LHS, RHS, ControlsOnlyExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; auto *ExhaustiveCount = computeExitCountExhaustively(L, ExitCond, ExitIfTrue); if (!isa(ExhaustiveCount)) return ExhaustiveCount; return computeShiftCompareExitLimit(ExitCond->getOperand(0), ExitCond->getOperand(1), L, OriginalPred); } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp( const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, bool ControlsOnlyExit, bool AllowPredicates) { // Try to evaluate any dependencies out of the loop. LHS = getSCEVAtScope(LHS, L); RHS = getSCEVAtScope(RHS, L); // At this point, we would like to compute how many iterations of the // loop the predicate will return true for these inputs. if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) { // If there is a loop-invariant, force it into the RHS. std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); } bool ControllingFiniteLoop = ControlsOnlyExit && loopHasNoAbnormalExits(L) && loopIsFiniteByAssumption(L); // Simplify the operands before analyzing them. (void)SimplifyICmpOperands(Pred, LHS, RHS, /*Depth=*/0); // If we have a comparison of a chrec against a constant, try to use value // ranges to answer this query. if (const SCEVConstant *RHSC = dyn_cast(RHS)) if (const SCEVAddRecExpr *AddRec = dyn_cast(LHS)) if (AddRec->getLoop() == L) { // Form the constant range. ConstantRange CompRange = ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt()); const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); if (!isa(Ret)) return Ret; } // If this loop must exit based on this condition (or execute undefined // behaviour), and we can prove the test sequence produced must repeat // the same values on self-wrap of the IV, then we can infer that IV // doesn't self wrap because if it did, we'd have an infinite (undefined) // loop. if (ControllingFiniteLoop && isLoopInvariant(RHS, L)) { // TODO: We can peel off any functions which are invertible *in L*. Loop // invariant terms are effectively constants for our purposes here. auto *InnerLHS = LHS; if (auto *ZExt = dyn_cast(LHS)) InnerLHS = ZExt->getOperand(); if (const SCEVAddRecExpr *AR = dyn_cast(InnerLHS)) { auto *StrideC = dyn_cast(AR->getStepRecurrence(*this)); if (!AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() && StrideC && StrideC->getAPInt().isPowerOf2()) { auto Flags = AR->getNoWrapFlags(); Flags = setFlags(Flags, SCEV::FlagNW); SmallVector Operands{AR->operands()}; Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); setNoWrapFlags(const_cast(AR), Flags); } } } switch (Pred) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) if (LHS->getType()->isPointerTy()) { LHS = getLosslessPtrToIntExpr(LHS); if (isa(LHS)) return LHS; } if (RHS->getType()->isPointerTy()) { RHS = getLosslessPtrToIntExpr(RHS); if (isa(RHS)) return RHS; } ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsOnlyExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_EQ: { // while (X == Y) // Convert to: while (X-Y == 0) if (LHS->getType()->isPointerTy()) { LHS = getLosslessPtrToIntExpr(LHS); if (isa(LHS)) return LHS; } if (RHS->getType()->isPointerTy()) { RHS = getLosslessPtrToIntExpr(RHS); if (isa(RHS)) return RHS; } ExitLimit EL = howFarToNonZero(getMinusSCEV(LHS, RHS), L); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_ULE: // Since the loop is finite, an invariant RHS cannot include the boundary // value, otherwise it would loop forever. if (!EnableFiniteLoopControl || !ControllingFiniteLoop || !isLoopInvariant(RHS, L)) break; RHS = getAddExpr(getOne(RHS->getType()), RHS); [[fallthrough]]; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: { // while (X < Y) bool IsSigned = ICmpInst::isSigned(Pred); ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsOnlyExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SGE: case ICmpInst::ICMP_UGE: // Since the loop is finite, an invariant RHS cannot include the boundary // value, otherwise it would loop forever. if (!EnableFiniteLoopControl || !ControllingFiniteLoop || !isLoopInvariant(RHS, L)) break; RHS = getAddExpr(getMinusOne(RHS->getType()), RHS); [[fallthrough]]; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: { // while (X > Y) bool IsSigned = ICmpInst::isSigned(Pred); ExitLimit EL = howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsOnlyExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } default: break; } return getCouldNotCompute(); } ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch, BasicBlock *ExitingBlock, bool ControlsOnlyExit) { assert(!L->contains(ExitingBlock) && "Not an exiting block!"); // Give up if the exit is the default dest of a switch. if (Switch->getDefaultDest() == ExitingBlock) return getCouldNotCompute(); assert(L->contains(Switch->getDefaultDest()) && "Default case must not exit the loop!"); const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L); const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock)); // while (X != Y) --> while (X-Y != 0) ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsOnlyExit); if (EL.hasAnyInfo()) return EL; return getCouldNotCompute(); } static ConstantInt * EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, ScalarEvolution &SE) { const SCEV *InVal = SE.getConstant(C); const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE); assert(isa(Val) && "Evaluation of SCEV at constant didn't fold correctly?"); return cast(Val)->getValue(); } ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) { ConstantInt *RHS = dyn_cast(RHSV); if (!RHS) return getCouldNotCompute(); const BasicBlock *Latch = L->getLoopLatch(); if (!Latch) return getCouldNotCompute(); const BasicBlock *Predecessor = L->getLoopPredecessor(); if (!Predecessor) return getCouldNotCompute(); // Return true if V is of the form "LHS `shift_op` ". // Return LHS in OutLHS and shift_opt in OutOpCode. auto MatchPositiveShift = [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) { using namespace PatternMatch; ConstantInt *ShiftAmt; if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) OutOpCode = Instruction::LShr; else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) OutOpCode = Instruction::AShr; else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) OutOpCode = Instruction::Shl; else return false; return ShiftAmt->getValue().isStrictlyPositive(); }; // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in // // loop: // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ] // %iv.shifted = lshr i32 %iv, // // Return true on a successful match. Return the corresponding PHI node (%iv // above) in PNOut and the opcode of the shift operation in OpCodeOut. auto MatchShiftRecurrence = [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) { std::optional PostShiftOpCode; { Instruction::BinaryOps OpC; Value *V; // If we encounter a shift instruction, "peel off" the shift operation, // and remember that we did so. Later when we inspect %iv's backedge // value, we will make sure that the backedge value uses the same // operation. // // Note: the peeled shift operation does not have to be the same // instruction as the one feeding into the PHI's backedge value. We only // really care about it being the same *kind* of shift instruction -- // that's all that is required for our later inferences to hold. if (MatchPositiveShift(LHS, V, OpC)) { PostShiftOpCode = OpC; LHS = V; } } PNOut = dyn_cast(LHS); if (!PNOut || PNOut->getParent() != L->getHeader()) return false; Value *BEValue = PNOut->getIncomingValueForBlock(Latch); Value *OpLHS; return // The backedge value for the PHI node must be a shift by a positive // amount MatchPositiveShift(BEValue, OpLHS, OpCodeOut) && // of the PHI node itself OpLHS == PNOut && // and the kind of shift should be match the kind of shift we peeled // off, if any. (!PostShiftOpCode || *PostShiftOpCode == OpCodeOut); }; PHINode *PN; Instruction::BinaryOps OpCode; if (!MatchShiftRecurrence(LHS, PN, OpCode)) return getCouldNotCompute(); const DataLayout &DL = getDataLayout(); // The key rationale for this optimization is that for some kinds of shift // recurrences, the value of the recurrence "stabilizes" to either 0 or -1 // within a finite number of iterations. If the condition guarding the // backedge (in the sense that the backedge is taken if the condition is true) // is false for the value the shift recurrence stabilizes to, then we know // that the backedge is taken only a finite number of times. ConstantInt *StableValue = nullptr; switch (OpCode) { default: llvm_unreachable("Impossible case!"); case Instruction::AShr: { // {K,ashr,} stabilizes to signum(K) in at most // bitwidth(K) iterations. Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); KnownBits Known = computeKnownBits(FirstValue, DL, 0, &AC, Predecessor->getTerminator(), &DT); auto *Ty = cast(RHS->getType()); if (Known.isNonNegative()) StableValue = ConstantInt::get(Ty, 0); else if (Known.isNegative()) StableValue = ConstantInt::get(Ty, -1, true); else return getCouldNotCompute(); break; } case Instruction::LShr: case Instruction::Shl: // Both {K,lshr,} and {K,shl,} // stabilize to 0 in at most bitwidth(K) iterations. StableValue = ConstantInt::get(cast(RHS->getType()), 0); break; } auto *Result = ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI); assert(Result->getType()->isIntegerTy(1) && "Otherwise cannot be an operand to a branch instruction"); if (Result->isZeroValue()) { unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *UpperBound = getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); return ExitLimit(getCouldNotCompute(), UpperBound, UpperBound, false); } return getCouldNotCompute(); } /// Return true if we can constant fold an instruction of the specified type, /// assuming that all operands were constants. static bool CanConstantFold(const Instruction *I) { if (isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I)) return true; if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) return canConstantFoldCallTo(CI, F); return false; } /// Determine whether this instruction can constant evolve within this loop /// assuming its operands can all constant evolve. static bool canConstantEvolve(Instruction *I, const Loop *L) { // An instruction outside of the loop can't be derived from a loop PHI. if (!L->contains(I)) return false; if (isa(I)) { // We don't currently keep track of the control flow needed to evaluate // PHIs, so we cannot handle PHIs inside of loops. return L->getHeader() == I->getParent(); } // If we won't be able to constant fold this expression even if the operands // are constants, bail early. return CanConstantFold(I); } /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by /// recursing through each instruction operand until reaching a loop header phi. static PHINode * getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, DenseMap &PHIMap, unsigned Depth) { if (Depth > MaxConstantEvolvingDepth) return nullptr; // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. PHINode *PHI = nullptr; for (Value *Op : UseInst->operands()) { if (isa(Op)) continue; Instruction *OpInst = dyn_cast(Op); if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr; PHINode *P = dyn_cast(OpInst); if (!P) // If this operand is already visited, reuse the prior result. // We may have P != PHI if this is the deepest point at which the // inconsistent paths meet. P = PHIMap.lookup(OpInst); if (!P) { // Recurse and memoize the results, whether a phi is found or not. // This recursive call invalidates pointers into PHIMap. P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1); PHIMap[OpInst] = P; } if (!P) return nullptr; // Not evolving from PHI if (PHI && PHI != P) return nullptr; // Evolving from multiple different PHIs. PHI = P; } // This is a expression evolving from a constant PHI! return PHI; } /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node /// in the loop that V is derived from. We allow arbitrary operations along the /// way, but the operands of an operation must either be constants or a value /// derived from a constant PHI. If this expression does not fit with these /// constraints, return null. static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { Instruction *I = dyn_cast(V); if (!I || !canConstantEvolve(I, L)) return nullptr; if (PHINode *PN = dyn_cast(I)) return PN; // Record non-constant instructions contained by the loop. DenseMap PHIMap; return getConstantEvolvingPHIOperands(I, L, PHIMap, 0); } /// EvaluateExpression - Given an expression that passes the /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node /// in the loop has the value PHIVal. If we can't fold this expression for some /// reason, return null. static Constant *EvaluateExpression(Value *V, const Loop *L, DenseMap &Vals, const DataLayout &DL, const TargetLibraryInfo *TLI) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast(V)) return C; Instruction *I = dyn_cast(V); if (!I) return nullptr; if (Constant *C = Vals.lookup(I)) return C; // An instruction inside the loop depends on a value outside the loop that we // weren't given a mapping for, or a value such as a call inside the loop. if (!canConstantEvolve(I, L)) return nullptr; // An unmapped PHI can be due to a branch or another loop inside this loop, // or due to this not being the initial iteration through a loop where we // couldn't compute the evolution of this particular PHI last time. if (isa(I)) return nullptr; std::vector Operands(I->getNumOperands()); for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Instruction *Operand = dyn_cast(I->getOperand(i)); if (!Operand) { Operands[i] = dyn_cast(I->getOperand(i)); if (!Operands[i]) return nullptr; continue; } Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI); Vals[Operand] = C; if (!C) return nullptr; Operands[i] = C; } return ConstantFoldInstOperands(I, Operands, DL, TLI); } // If every incoming value to PN except the one for BB is a specific Constant, // return that, else return nullptr. static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) { Constant *IncomingVal = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (PN->getIncomingBlock(i) == BB) continue; auto *CurrentVal = dyn_cast(PN->getIncomingValue(i)); if (!CurrentVal) return nullptr; if (IncomingVal != CurrentVal) { if (IncomingVal) return nullptr; IncomingVal = CurrentVal; } } return IncomingVal; } /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is /// in the header of its containing loop, we know the loop executes a /// constant number of times, and the PHI node is just a recurrence /// involving constants, fold it. Constant * ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, const APInt &BEs, const Loop *L) { auto I = ConstantEvolutionLoopExitValue.find(PN); if (I != ConstantEvolutionLoopExitValue.end()) return I->second; if (BEs.ugt(MaxBruteForceIterations)) return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it. Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; DenseMap CurrentIterVals; BasicBlock *Header = L->getHeader(); assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); BasicBlock *Latch = L->getLoopLatch(); if (!Latch) return nullptr; for (PHINode &PHI : Header->phis()) { if (auto *StartCST = getOtherIncomingValue(&PHI, Latch)) CurrentIterVals[&PHI] = StartCST; } if (!CurrentIterVals.count(PN)) return RetVal = nullptr; Value *BEValue = PN->getIncomingValueForBlock(Latch); // Execute the loop symbolically to determine the exit value. assert(BEs.getActiveBits() < CHAR_BIT * sizeof(unsigned) && "BEs is <= MaxBruteForceIterations which is an 'unsigned'!"); unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; const DataLayout &DL = getDataLayout(); for (; ; ++IterationNum) { if (IterationNum == NumIterations) return RetVal = CurrentIterVals[PN]; // Got exit value! // Compute the value of the PHIs for the next iteration. // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap NextIterVals; Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); if (!NextPHI) return nullptr; // Couldn't evaluate! NextIterVals[PN] = NextPHI; bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; // Also evaluate the other PHI nodes. However, we don't get to stop if we // cease to be able to evaluate one of them or if they stop evolving, // because that doesn't necessarily prevent us from computing PN. SmallVector, 8> PHIsToCompute; for (const auto &I : CurrentIterVals) { PHINode *PHI = dyn_cast(I.first); if (!PHI || PHI == PN || PHI->getParent() != Header) continue; PHIsToCompute.emplace_back(PHI, I.second); } // We use two distinct loops because EvaluateExpression may invalidate any // iterators into CurrentIterVals. for (const auto &I : PHIsToCompute) { PHINode *PHI = I.first; Constant *&NextPHI = NextIterVals[PHI]; if (!NextPHI) { // Not already computed. Value *BEValue = PHI->getIncomingValueForBlock(Latch); NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); } if (NextPHI != I.second) StoppedEvolving = false; } // If all entries in CurrentIterVals == NextIterVals then we can stop // iterating, the loop can't continue to change. if (StoppedEvolving) return RetVal = CurrentIterVals[PN]; CurrentIterVals.swap(NextIterVals); } } const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); if (!PN) return getCouldNotCompute(); // If the loop is canonicalized, the PHI will have exactly two entries. // That's the only form we support here. if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); DenseMap CurrentIterVals; BasicBlock *Header = L->getHeader(); assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Should follow from NumIncomingValues == 2!"); for (PHINode &PHI : Header->phis()) { if (auto *StartCST = getOtherIncomingValue(&PHI, Latch)) CurrentIterVals[&PHI] = StartCST; } if (!CurrentIterVals.count(PN)) return getCouldNotCompute(); // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of // "ExitWhen". unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. const DataLayout &DL = getDataLayout(); for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ auto *CondVal = dyn_cast_or_null( EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); if (CondVal->getValue() == uint64_t(ExitWhen)) { ++NumBruteForceTripCountsComputed; return getConstant(Type::getInt32Ty(getContext()), IterationNum); } // Update all the PHI nodes for the next iteration. DenseMap NextIterVals; // Create a list of which PHIs we need to compute. We want to do this before // calling EvaluateExpression on them because that may invalidate iterators // into CurrentIterVals. SmallVector PHIsToCompute; for (const auto &I : CurrentIterVals) { PHINode *PHI = dyn_cast(I.first); if (!PHI || PHI->getParent() != Header) continue; PHIsToCompute.push_back(PHI); } for (PHINode *PHI : PHIsToCompute) { Constant *&NextPHI = NextIterVals[PHI]; if (NextPHI) continue; // Already computed! Value *BEValue = PHI->getIncomingValueForBlock(Latch); NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); } CurrentIterVals.swap(NextIterVals); } // Too many iterations were needed to evaluate. return getCouldNotCompute(); } const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { SmallVector, 2> &Values = ValuesAtScopes[V]; // Check to see if we've folded this expression at this loop before. for (auto &LS : Values) if (LS.first == L) return LS.second ? LS.second : V; Values.emplace_back(L, nullptr); // Otherwise compute it. const SCEV *C = computeSCEVAtScope(V, L); for (auto &LS : reverse(ValuesAtScopes[V])) if (LS.first == L) { LS.second = C; if (!isa(C)) ValuesAtScopesUsers[C].push_back({L, V}); break; } return C; } /// This builds up a Constant using the ConstantExpr interface. That way, we /// will return Constants for objects which aren't represented by a /// SCEVConstant, because SCEVConstant is restricted to ConstantInt. /// Returns NULL if the SCEV isn't representable as a Constant. static Constant *BuildConstantFromSCEV(const SCEV *V) { switch (V->getSCEVType()) { case scCouldNotCompute: case scAddRecExpr: case scVScale: return nullptr; case scConstant: return cast(V)->getValue(); case scUnknown: return dyn_cast(cast(V)->getValue()); case scPtrToInt: { const SCEVPtrToIntExpr *P2I = cast(V); if (Constant *CastOp = BuildConstantFromSCEV(P2I->getOperand())) return ConstantExpr::getPtrToInt(CastOp, P2I->getType()); return nullptr; } case scTruncate: { const SCEVTruncateExpr *ST = cast(V); if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) return ConstantExpr::getTrunc(CastOp, ST->getType()); return nullptr; } case scAddExpr: { const SCEVAddExpr *SA = cast(V); Constant *C = nullptr; for (const SCEV *Op : SA->operands()) { Constant *OpC = BuildConstantFromSCEV(Op); if (!OpC) return nullptr; if (!C) { C = OpC; continue; } assert(!C->getType()->isPointerTy() && "Can only have one pointer, and it must be last"); if (OpC->getType()->isPointerTy()) { // The offsets have been converted to bytes. We can add bytes using // an i8 GEP. C = ConstantExpr::getGetElementPtr(Type::getInt8Ty(C->getContext()), OpC, C); } else { C = ConstantExpr::getAdd(C, OpC); } } return C; } case scMulExpr: case scSignExtend: case scZeroExtend: case scUDivExpr: case scSMaxExpr: case scUMaxExpr: case scSMinExpr: case scUMinExpr: case scSequentialUMinExpr: return nullptr; } llvm_unreachable("Unknown SCEV kind!"); } const SCEV * ScalarEvolution::getWithOperands(const SCEV *S, SmallVectorImpl &NewOps) { switch (S->getSCEVType()) { case scTruncate: case scZeroExtend: case scSignExtend: case scPtrToInt: return getCastExpr(S->getSCEVType(), NewOps[0], S->getType()); case scAddRecExpr: { auto *AddRec = cast(S); return getAddRecExpr(NewOps, AddRec->getLoop(), AddRec->getNoWrapFlags()); } case scAddExpr: return getAddExpr(NewOps, cast(S)->getNoWrapFlags()); case scMulExpr: return getMulExpr(NewOps, cast(S)->getNoWrapFlags()); case scUDivExpr: return getUDivExpr(NewOps[0], NewOps[1]); case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: return getMinMaxExpr(S->getSCEVType(), NewOps); case scSequentialUMinExpr: return getSequentialMinMaxExpr(S->getSCEVType(), NewOps); case scConstant: case scVScale: case scUnknown: return S; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { switch (V->getSCEVType()) { case scConstant: case scVScale: return V; case scAddRecExpr: { // If this is a loop recurrence for a loop that does not contain L, then we // are dealing with the final value computed by the loop. const SCEVAddRecExpr *AddRec = cast(V); // First, attempt to evaluate each operand. // Avoid performing the look-up in the common case where the specified // expression has no loop-variant portions. for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L); if (OpAtScope == AddRec->getOperand(i)) continue; // Okay, at least one of these operands is loop variant but might be // foldable. Build a new instance of the folded commutative expression. SmallVector NewOps; NewOps.reserve(AddRec->getNumOperands()); append_range(NewOps, AddRec->operands().take_front(i)); NewOps.push_back(OpAtScope); for (++i; i != e; ++i) NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); const SCEV *FoldedRec = getAddRecExpr( NewOps, AddRec->getLoop(), AddRec->getNoWrapFlags(SCEV::FlagNW)); AddRec = dyn_cast(FoldedRec); // The addrec may be folded to a nonrecurrence, for example, if the // induction variable is multiplied by zero after constant folding. Go // ahead and return the folded value. if (!AddRec) return FoldedRec; break; } // If the scope is outside the addrec's loop, evaluate it by using the // loop exit value of the addrec. if (!AddRec->getLoop()->contains(L)) { // To evaluate this recurrence, we need to know how many times the AddRec // loop iterates. Compute this now. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); if (BackedgeTakenCount == getCouldNotCompute()) return AddRec; // Then, evaluate the AddRec. return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); } return AddRec; } case scTruncate: case scZeroExtend: case scSignExtend: case scPtrToInt: case scAddExpr: case scMulExpr: case scUDivExpr: case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: case scSequentialUMinExpr: { ArrayRef Ops = V->operands(); // Avoid performing the look-up in the common case where the specified // expression has no loop-variant portions. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { const SCEV *OpAtScope = getSCEVAtScope(Ops[i], L); if (OpAtScope != Ops[i]) { // Okay, at least one of these operands is loop variant but might be // foldable. Build a new instance of the folded commutative expression. SmallVector NewOps; NewOps.reserve(Ops.size()); append_range(NewOps, Ops.take_front(i)); NewOps.push_back(OpAtScope); for (++i; i != e; ++i) { OpAtScope = getSCEVAtScope(Ops[i], L); NewOps.push_back(OpAtScope); } return getWithOperands(V, NewOps); } } // If we got here, all operands are loop invariant. return V; } case scUnknown: { // If this instruction is evolved from a constant-evolving PHI, compute the // exit value from the loop without using SCEVs. const SCEVUnknown *SU = cast(V); Instruction *I = dyn_cast(SU->getValue()); if (!I) return V; // This is some other type of SCEVUnknown, just return it. if (PHINode *PN = dyn_cast(I)) { const Loop *CurrLoop = this->LI[I->getParent()]; // Looking for loop exit value. if (CurrLoop && CurrLoop->getParentLoop() == L && PN->getParent() == CurrLoop->getHeader()) { // Okay, there is no closed form solution for the PHI node. Check // to see if the loop that contains it has a known backedge-taken // count. If so, we may be able to force computation of the exit // value. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(CurrLoop); // This trivial case can show up in some degenerate cases where // the incoming IR has not yet been fully simplified. if (BackedgeTakenCount->isZero()) { Value *InitValue = nullptr; bool MultipleInitValues = false; for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { if (!CurrLoop->contains(PN->getIncomingBlock(i))) { if (!InitValue) InitValue = PN->getIncomingValue(i); else if (InitValue != PN->getIncomingValue(i)) { MultipleInitValues = true; break; } } } if (!MultipleInitValues && InitValue) return getSCEV(InitValue); } // Do we have a loop invariant value flowing around the backedge // for a loop which must execute the backedge? if (!isa(BackedgeTakenCount) && isKnownNonZero(BackedgeTakenCount) && PN->getNumIncomingValues() == 2) { unsigned InLoopPred = CurrLoop->contains(PN->getIncomingBlock(0)) ? 0 : 1; Value *BackedgeVal = PN->getIncomingValue(InLoopPred); if (CurrLoop->isLoopInvariant(BackedgeVal)) return getSCEV(BackedgeVal); } if (auto *BTCC = dyn_cast(BackedgeTakenCount)) { // Okay, we know how many times the containing loop executes. If // this is a constant evolving PHI node, get the final value at // the specified iteration number. Constant *RV = getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), CurrLoop); if (RV) return getSCEV(RV); } } } // Okay, this is an expression that we cannot symbolically evaluate // into a SCEV. Check to see if it's possible to symbolically evaluate // the arguments into constants, and if so, try to constant propagate the // result. This is particularly useful for computing loop exit values. if (!CanConstantFold(I)) return V; // This is some other type of SCEVUnknown, just return it. SmallVector Operands; Operands.reserve(I->getNumOperands()); bool MadeImprovement = false; for (Value *Op : I->operands()) { if (Constant *C = dyn_cast(Op)) { Operands.push_back(C); continue; } // If any of the operands is non-constant and if they are // non-integer and non-pointer, don't even try to analyze them // with scev techniques. if (!isSCEVable(Op->getType())) return V; const SCEV *OrigV = getSCEV(Op); const SCEV *OpV = getSCEVAtScope(OrigV, L); MadeImprovement |= OrigV != OpV; Constant *C = BuildConstantFromSCEV(OpV); if (!C) return V; assert(C->getType() == Op->getType() && "Type mismatch"); Operands.push_back(C); } // Check to see if getSCEVAtScope actually made an improvement. if (!MadeImprovement) return V; // This is some other type of SCEVUnknown, just return it. Constant *C = nullptr; const DataLayout &DL = getDataLayout(); C = ConstantFoldInstOperands(I, Operands, DL, &TLI); if (!C) return V; return getSCEV(C); } case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV type!"); } const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { return getSCEVAtScope(getSCEV(V), L); } const SCEV *ScalarEvolution::stripInjectiveFunctions(const SCEV *S) const { if (const SCEVZeroExtendExpr *ZExt = dyn_cast(S)) return stripInjectiveFunctions(ZExt->getOperand()); if (const SCEVSignExtendExpr *SExt = dyn_cast(S)) return stripInjectiveFunctions(SExt->getOperand()); return S; } /// Finds the minimum unsigned root of the following equation: /// /// A * X = B (mod N) /// /// where N = 2^BW and BW is the common bit width of A and B. The signedness of /// A and B isn't important. /// /// If the equation does not have a solution, SCEVCouldNotCompute is returned. static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, ScalarEvolution &SE) { uint32_t BW = A.getBitWidth(); assert(BW == SE.getTypeSizeInBits(B->getType())); assert(A != 0 && "A must be non-zero."); // 1. D = gcd(A, N) // // The gcd of A and N may have only one prime factor: 2. The number of // trailing zeros in A is its multiplicity uint32_t Mult2 = A.countr_zero(); // D = 2^Mult2 // 2. Check if B is divisible by D. // // B is divisible by D if and only if the multiplicity of prime factor 2 for B // is not less than multiplicity of this prime factor for D. if (SE.getMinTrailingZeros(B) < Mult2) return SE.getCouldNotCompute(); // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic // modulo (N / D). // // If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent // (N / D) in general. The inverse itself always fits into BW bits, though, // so we immediately truncate it. APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D APInt Mod(BW + 1, 0); Mod.setBit(BW - Mult2); // Mod = N / D APInt I = AD.multiplicativeInverse(Mod).trunc(BW); // 4. Compute the minimum unsigned root of the equation: // I * (B / D) mod (N / D) // To simplify the computation, we factor out the divide by D: // (I * B mod N) / D const SCEV *D = SE.getConstant(APInt::getOneBitSet(BW, Mult2)); return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D); } /// For a given quadratic addrec, generate coefficients of the corresponding /// quadratic equation, multiplied by a common value to ensure that they are /// integers. /// The returned value is a tuple { A, B, C, M, BitWidth }, where /// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C /// were multiplied by, and BitWidth is the bit width of the original addrec /// coefficients. /// This function returns std::nullopt if the addrec coefficients are not /// compile- time constants. static std::optional> GetQuadraticEquation(const SCEVAddRecExpr *AddRec) { assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); const SCEVConstant *LC = dyn_cast(AddRec->getOperand(0)); const SCEVConstant *MC = dyn_cast(AddRec->getOperand(1)); const SCEVConstant *NC = dyn_cast(AddRec->getOperand(2)); LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: " << *AddRec << '\n'); // We currently can only solve this if the coefficients are constants. if (!LC || !MC || !NC) { LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n"); return std::nullopt; } APInt L = LC->getAPInt(); APInt M = MC->getAPInt(); APInt N = NC->getAPInt(); assert(!N.isZero() && "This is not a quadratic addrec"); unsigned BitWidth = LC->getAPInt().getBitWidth(); unsigned NewWidth = BitWidth + 1; LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: " << BitWidth << '\n'); // The sign-extension (as opposed to a zero-extension) here matches the // extension used in SolveQuadraticEquationWrap (with the same motivation). N = N.sext(NewWidth); M = M.sext(NewWidth); L = L.sext(NewWidth); // The increments are M, M+N, M+2N, ..., so the accumulated values are // L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is, // L+M, L+2M+N, L+3M+3N, ... // After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N. // // The equation Acc = 0 is then // L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0. // In a quadratic form it becomes: // N n^2 + (2M-N) n + 2L = 0. APInt A = N; APInt B = 2 * M - A; APInt C = 2 * L; APInt T = APInt(NewWidth, 2); LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B << "x + " << C << ", coeff bw: " << NewWidth << ", multiplied by " << T << '\n'); return std::make_tuple(A, B, C, T, BitWidth); } /// Helper function to compare optional APInts: /// (a) if X and Y both exist, return min(X, Y), /// (b) if neither X nor Y exist, return std::nullopt, /// (c) if exactly one of X and Y exists, return that value. static std::optional MinOptional(std::optional X, std::optional Y) { if (X && Y) { unsigned W = std::max(X->getBitWidth(), Y->getBitWidth()); APInt XW = X->sext(W); APInt YW = Y->sext(W); return XW.slt(YW) ? *X : *Y; } if (!X && !Y) return std::nullopt; return X ? *X : *Y; } /// Helper function to truncate an optional APInt to a given BitWidth. /// When solving addrec-related equations, it is preferable to return a value /// that has the same bit width as the original addrec's coefficients. If the /// solution fits in the original bit width, truncate it (except for i1). /// Returning a value of a different bit width may inhibit some optimizations. /// /// In general, a solution to a quadratic equation generated from an addrec /// may require BW+1 bits, where BW is the bit width of the addrec's /// coefficients. The reason is that the coefficients of the quadratic /// equation are BW+1 bits wide (to avoid truncation when converting from /// the addrec to the equation). static std::optional TruncIfPossible(std::optional X, unsigned BitWidth) { if (!X) return std::nullopt; unsigned W = X->getBitWidth(); if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth)) return X->trunc(BitWidth); return X; } /// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n /// iterations. The values L, M, N are assumed to be signed, and they /// should all have the same bit widths. /// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW, /// where BW is the bit width of the addrec's coefficients. /// If the calculated value is a BW-bit integer (for BW > 1), it will be /// returned as such, otherwise the bit width of the returned value may /// be greater than BW. /// /// This function returns std::nullopt if /// (a) the addrec coefficients are not constant, or /// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases /// like x^2 = 5, no integer solutions exist, in other cases an integer /// solution may exist, but SolveQuadraticEquationWrap may fail to find it. static std::optional SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { APInt A, B, C, M; unsigned BitWidth; auto T = GetQuadraticEquation(AddRec); if (!T) return std::nullopt; std::tie(A, B, C, M, BitWidth) = *T; LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n"); std::optional X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth + 1); if (!X) return std::nullopt; ConstantInt *CX = ConstantInt::get(SE.getContext(), *X); ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE); if (!V->isZero()) return std::nullopt; return TruncIfPossible(X, BitWidth); } /// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n /// iterations. The values M, N are assumed to be signed, and they /// should all have the same bit widths. /// Find the least n such that c(n) does not belong to the given range, /// while c(n-1) does. /// /// This function returns std::nullopt if /// (a) the addrec coefficients are not constant, or /// (b) SolveQuadraticEquationWrap was unable to find a solution for the /// bounds of the range. static std::optional SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec, const ConstantRange &Range, ScalarEvolution &SE) { assert(AddRec->getOperand(0)->isZero() && "Starting value of addrec should be 0"); LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range " << Range << ", addrec " << *AddRec << '\n'); // This case is handled in getNumIterationsInRange. Here we can assume that // we start in the range. assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) && "Addrec's initial value should be in range"); APInt A, B, C, M; unsigned BitWidth; auto T = GetQuadraticEquation(AddRec); if (!T) return std::nullopt; // Be careful about the return value: there can be two reasons for not // returning an actual number. First, if no solutions to the equations // were found, and second, if the solutions don't leave the given range. // The first case means that the actual solution is "unknown", the second // means that it's known, but not valid. If the solution is unknown, we // cannot make any conclusions. // Return a pair: the optional solution and a flag indicating if the // solution was found. auto SolveForBoundary = [&](APInt Bound) -> std::pair, bool> { // Solve for signed overflow and unsigned overflow, pick the lower // solution. LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary " << Bound << " (before multiplying by " << M << ")\n"); Bound *= M; // The quadratic equation multiplier. std::optional SO; if (BitWidth > 1) { LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for " "signed overflow\n"); SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth); } LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for " "unsigned overflow\n"); std::optional UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth + 1); auto LeavesRange = [&] (const APInt &X) { ConstantInt *C0 = ConstantInt::get(SE.getContext(), X); ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE); if (Range.contains(V0->getValue())) return false; // X should be at least 1, so X-1 is non-negative. ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1); ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE); if (Range.contains(V1->getValue())) return true; return false; }; // If SolveQuadraticEquationWrap returns std::nullopt, it means that there // can be a solution, but the function failed to find it. We cannot treat it // as "no solution". if (!SO || !UO) return {std::nullopt, false}; // Check the smaller value first to see if it leaves the range. // At this point, both SO and UO must have values. std::optional Min = MinOptional(SO, UO); if (LeavesRange(*Min)) return { Min, true }; std::optional Max = Min == SO ? UO : SO; if (LeavesRange(*Max)) return { Max, true }; // Solutions were found, but were eliminated, hence the "true". return {std::nullopt, true}; }; std::tie(A, B, C, M, BitWidth) = *T; // Lower bound is inclusive, subtract 1 to represent the exiting value. APInt Lower = Range.getLower().sext(A.getBitWidth()) - 1; APInt Upper = Range.getUpper().sext(A.getBitWidth()); auto SL = SolveForBoundary(Lower); auto SU = SolveForBoundary(Upper); // If any of the solutions was unknown, no meaninigful conclusions can // be made. if (!SL.second || !SU.second) return std::nullopt; // Claim: The correct solution is not some value between Min and Max. // // Justification: Assuming that Min and Max are different values, one of // them is when the first signed overflow happens, the other is when the // first unsigned overflow happens. Crossing the range boundary is only // possible via an overflow (treating 0 as a special case of it, modeling // an overflow as crossing k*2^W for some k). // // The interesting case here is when Min was eliminated as an invalid // solution, but Max was not. The argument is that if there was another // overflow between Min and Max, it would also have been eliminated if // it was considered. // // For a given boundary, it is possible to have two overflows of the same // type (signed/unsigned) without having the other type in between: this // can happen when the vertex of the parabola is between the iterations // corresponding to the overflows. This is only possible when the two // overflows cross k*2^W for the same k. In such case, if the second one // left the range (and was the first one to do so), the first overflow // would have to enter the range, which would mean that either we had left // the range before or that we started outside of it. Both of these cases // are contradictions. // // Claim: In the case where SolveForBoundary returns std::nullopt, the correct // solution is not some value between the Max for this boundary and the // Min of the other boundary. // // Justification: Assume that we had such Max_A and Min_B corresponding // to range boundaries A and B and such that Max_A < Min_B. If there was // a solution between Max_A and Min_B, it would have to be caused by an // overflow corresponding to either A or B. It cannot correspond to B, // since Min_B is the first occurrence of such an overflow. If it // corresponded to A, it would have to be either a signed or an unsigned // overflow that is larger than both eliminated overflows for A. But // between the eliminated overflows and this overflow, the values would // cover the entire value space, thus crossing the other boundary, which // is a contradiction. return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth); } ScalarEvolution::ExitLimit ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsOnlyExit, bool AllowPredicates) { // This is only used for loops with a "x != y" exit test. The exit condition // is now expressed as a single expression, V = x-y. So the exit test is // effectively V != 0. We know and take advantage of the fact that this // expression only being used in a comparison by zero context. SmallPtrSet Predicates; // If the value is a constant if (const SCEVConstant *C = dyn_cast(V)) { // If the value is already zero, the branch will execute zero times. if (C->getValue()->isZero()) return C; return getCouldNotCompute(); // Otherwise it will loop infinitely. } const SCEVAddRecExpr *AddRec = dyn_cast(stripInjectiveFunctions(V)); if (!AddRec && AllowPredicates) // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the // algorithm below. AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates); if (!AddRec || AddRec->getLoop() != L) return getCouldNotCompute(); // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of // the quadratic equation to solve it. if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { // We can only use this value if the chrec ends up with an exact zero // value at this index. When solving for "X*X != 5", for example, we // should not accept a root of 2. if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) { const auto *R = cast(getConstant(*S)); return ExitLimit(R, R, R, false, Predicates); } return getCouldNotCompute(); } // Otherwise we can only handle this if it is affine. if (!AddRec->isAffine()) return getCouldNotCompute(); // If this is an affine expression, the execution count of this branch is // the minimum unsigned root of the following equation: // // Start + Step*N = 0 (mod 2^BW) // // equivalent to: // // Step*N = -Start (mod 2^BW) // // where BW is the common bit width of Start and Step. // Get the initial value for the loop. const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); // For now we handle only constant steps. // // TODO: Handle a nonconstant Step given AddRec. If the // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast(Step); if (!StepC || StepC->getValue()->isZero()) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): // N = -Start/Step (as unsigned) // For negative steps (counting down to zero): // N = Start/-Step // First compute the unsigned distance from zero in the direction of Step. bool CountDown = StepC->getAPInt().isNegative(); const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start); // Handle unitary steps, which cannot wraparound. // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) { APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L)); MaxBECount = APIntOps::umin(MaxBECount, getUnsignedRangeMax(Distance)); // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, // we end up with a loop whose backedge-taken count is n - 1. Detect this // case, and see if we can improve the bound. // // Explicitly handling this here is necessary because getUnsignedRange // isn't context-sensitive; it doesn't know that we only care about the // range inside the loop. const SCEV *Zero = getZero(Distance->getType()); const SCEV *One = getOne(Distance->getType()); const SCEV *DistancePlusOne = getAddExpr(Distance, One); if (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, DistancePlusOne, Zero)) { // If Distance + 1 doesn't overflow, we can compute the maximum distance // as "unsigned_max(Distance + 1) - 1". ConstantRange CR = getUnsignedRange(DistancePlusOne); MaxBECount = APIntOps::umin(MaxBECount, CR.getUnsignedMax() - 1); } return ExitLimit(Distance, getConstant(MaxBECount), Distance, false, Predicates); } // If the condition controls loop exit (the loop exits only if the expression // is true) and the addition is no-wrap we can use unsigned divide to // compute the backedge count. In this case, the step may not divide the // distance, but we don't care because if the condition is "missed" the loop // will have undefined behavior due to wrapping. if (ControlsOnlyExit && AddRec->hasNoSelfWrap() && loopHasNoAbnormalExits(AddRec->getLoop())) { const SCEV *Exact = getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); const SCEV *ConstantMax = getCouldNotCompute(); if (Exact != getCouldNotCompute()) { APInt MaxInt = getUnsignedRangeMax(applyLoopGuards(Exact, L)); ConstantMax = getConstant(APIntOps::umin(MaxInt, getUnsignedRangeMax(Exact))); } const SCEV *SymbolicMax = isa(Exact) ? ConstantMax : Exact; return ExitLimit(Exact, ConstantMax, SymbolicMax, false, Predicates); } // Solve the general equation. const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(), getNegativeSCEV(Start), *this); const SCEV *M = E; if (E != getCouldNotCompute()) { APInt MaxWithGuards = getUnsignedRangeMax(applyLoopGuards(E, L)); M = getConstant(APIntOps::umin(MaxWithGuards, getUnsignedRangeMax(E))); } auto *S = isa(E) ? M : E; return ExitLimit(E, M, S, false, Predicates); } ScalarEvolution::ExitLimit ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) { // Loops that look like: while (X == 0) are very strange indeed. We don't // handle them yet except for the trivial case. This could be expanded in the // future as needed. // If the value is a constant, check to see if it is known to be non-zero // already. If so, the backedge will execute zero times. if (const SCEVConstant *C = dyn_cast(V)) { if (!C->getValue()->isZero()) return getZero(C->getType()); return getCouldNotCompute(); // Otherwise it will loop infinitely. } // We could implement others, but I really doubt anyone writes loops like // this, and if they did, they would already be constant folded. return getCouldNotCompute(); } std::pair ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) const { // If the block has a unique predecessor, then there is no path from the // predecessor to the block that does not go through the direct edge // from the predecessor to the block. if (const BasicBlock *Pred = BB->getSinglePredecessor()) return {Pred, BB}; // A loop's header is defined to be a block that dominates the loop. // If the header has a unique predecessor outside the loop, it must be // a block that has exactly one successor that can reach the loop. if (const Loop *L = LI.getLoopFor(BB)) return {L->getLoopPredecessor(), L->getHeader()}; return {nullptr, nullptr}; } /// SCEV structural equivalence is usually sufficient for testing whether two /// expressions are equal, however for the purposes of looking for a condition /// guarding a loop, it can be useful to be a little more general, since a /// front-end may have replicated the controlling expression. static bool HasSameValue(const SCEV *A, const SCEV *B) { // Quick check to see if they are the same SCEV. if (A == B) return true; auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) { // Not all instructions that are "identical" compute the same value. For // instance, two distinct alloca instructions allocating the same type are // identical and do not read memory; but compute distinct values. return A->isIdenticalTo(B) && (isa(A) || isa(A)); }; // Otherwise, if they're both SCEVUnknown, it's possible that they hold // two different instructions with the same value. Check for this case. if (const SCEVUnknown *AU = dyn_cast(A)) if (const SCEVUnknown *BU = dyn_cast(B)) if (const Instruction *AI = dyn_cast(AU->getValue())) if (const Instruction *BI = dyn_cast(BU->getValue())) if (ComputesEqualValues(AI, BI)) return true; // Otherwise assume they may have a different value. return false; } bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS, const SCEV *&RHS, unsigned Depth) { bool Changed = false; // Simplifies ICMP to trivial true or false by turning it into '0 == 0' or // '0 != 0'. auto TrivialCase = [&](bool TriviallyTrue) { LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); Pred = TriviallyTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; return true; }; // If we hit the max recursion limit bail out. if (Depth >= 3) return false; // Canonicalize a constant to the right side. if (const SCEVConstant *LHSC = dyn_cast(LHS)) { // Check for both operands constant. if (const SCEVConstant *RHSC = dyn_cast(RHS)) { if (ConstantExpr::getICmp(Pred, LHSC->getValue(), RHSC->getValue())->isNullValue()) return TrivialCase(false); return TrivialCase(true); } // Otherwise swap the operands to put the constant on the right. std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); Changed = true; } // If we're comparing an addrec with a value which is loop-invariant in the // addrec's loop, put the addrec on the left. Also make a dominance check, // as both operands could be addrecs loop-invariant in each other's loop. if (const SCEVAddRecExpr *AR = dyn_cast(RHS)) { const Loop *L = AR->getLoop(); if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) { std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); Changed = true; } } // If there's a constant operand, canonicalize comparisons with boundary // cases, and canonicalize *-or-equal comparisons to regular comparisons. if (const SCEVConstant *RC = dyn_cast(RHS)) { const APInt &RA = RC->getAPInt(); bool SimplifiedByConstantRange = false; if (!ICmpInst::isEquality(Pred)) { ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA); if (ExactCR.isFullSet()) return TrivialCase(true); if (ExactCR.isEmptySet()) return TrivialCase(false); APInt NewRHS; CmpInst::Predicate NewPred; if (ExactCR.getEquivalentICmp(NewPred, NewRHS) && ICmpInst::isEquality(NewPred)) { // We were able to convert an inequality to an equality. Pred = NewPred; RHS = getConstant(NewRHS); Changed = SimplifiedByConstantRange = true; } } if (!SimplifiedByConstantRange) { switch (Pred) { default: break; case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_NE: // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. if (!RA) if (const SCEVAddExpr *AE = dyn_cast(LHS)) if (const SCEVMulExpr *ME = dyn_cast(AE->getOperand(0))) if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 && ME->getOperand(0)->isAllOnesValue()) { RHS = AE->getOperand(1); LHS = ME->getOperand(1); Changed = true; } break; // The "Should have been caught earlier!" messages refer to the fact // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above // should have fired on the corresponding cases, and canonicalized the // check to trivial case. case ICmpInst::ICMP_UGE: assert(!RA.isMinValue() && "Should have been caught earlier!"); Pred = ICmpInst::ICMP_UGT; RHS = getConstant(RA - 1); Changed = true; break; case ICmpInst::ICMP_ULE: assert(!RA.isMaxValue() && "Should have been caught earlier!"); Pred = ICmpInst::ICMP_ULT; RHS = getConstant(RA + 1); Changed = true; break; case ICmpInst::ICMP_SGE: assert(!RA.isMinSignedValue() && "Should have been caught earlier!"); Pred = ICmpInst::ICMP_SGT; RHS = getConstant(RA - 1); Changed = true; break; case ICmpInst::ICMP_SLE: assert(!RA.isMaxSignedValue() && "Should have been caught earlier!"); Pred = ICmpInst::ICMP_SLT; RHS = getConstant(RA + 1); Changed = true; break; } } } // Check for obvious equality. if (HasSameValue(LHS, RHS)) { if (ICmpInst::isTrueWhenEqual(Pred)) return TrivialCase(true); if (ICmpInst::isFalseWhenEqual(Pred)) return TrivialCase(false); } // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by // adding or subtracting 1 from one of the operands. switch (Pred) { case ICmpInst::ICMP_SLE: if (!getSignedRangeMax(RHS).isMaxSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; Changed = true; } else if (!getSignedRangeMin(LHS).isMinSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; Changed = true; } break; case ICmpInst::ICMP_SGE: if (!getSignedRangeMin(RHS).isMinSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; Changed = true; } else if (!getSignedRangeMax(LHS).isMaxSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; Changed = true; } break; case ICmpInst::ICMP_ULE: if (!getUnsignedRangeMax(RHS).isMaxValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, SCEV::FlagNUW); Pred = ICmpInst::ICMP_ULT; Changed = true; } else if (!getUnsignedRangeMin(LHS).isMinValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS); Pred = ICmpInst::ICMP_ULT; Changed = true; } break; case ICmpInst::ICMP_UGE: if (!getUnsignedRangeMin(RHS).isMinValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS); Pred = ICmpInst::ICMP_UGT; Changed = true; } else if (!getUnsignedRangeMax(LHS).isMaxValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, SCEV::FlagNUW); Pred = ICmpInst::ICMP_UGT; Changed = true; } break; default: break; } // TODO: More simplifications are possible here. // Recursively simplify until we either hit a recursion limit or nothing // changes. if (Changed) return SimplifyICmpOperands(Pred, LHS, RHS, Depth + 1); return Changed; } bool ScalarEvolution::isKnownNegative(const SCEV *S) { return getSignedRangeMax(S).isNegative(); } bool ScalarEvolution::isKnownPositive(const SCEV *S) { return getSignedRangeMin(S).isStrictlyPositive(); } bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { return !getSignedRangeMin(S).isNegative(); } bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { return !getSignedRangeMax(S).isStrictlyPositive(); } bool ScalarEvolution::isKnownNonZero(const SCEV *S) { // Query push down for cases where the unsigned range is // less than sufficient. if (const auto *SExt = dyn_cast(S)) return isKnownNonZero(SExt->getOperand(0)); return getUnsignedRangeMin(S) != 0; } std::pair ScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) { // Compute SCEV on entry of loop L. const SCEV *Start = SCEVInitRewriter::rewrite(S, L, *this); if (Start == getCouldNotCompute()) return { Start, Start }; // Compute post increment SCEV for loop L. const SCEV *PostInc = SCEVPostIncRewriter::rewrite(S, L, *this); assert(PostInc != getCouldNotCompute() && "Unexpected could not compute"); return { Start, PostInc }; } bool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // First collect all loops. SmallPtrSet LoopsUsed; getUsedLoops(LHS, LoopsUsed); getUsedLoops(RHS, LoopsUsed); if (LoopsUsed.empty()) return false; // Domination relationship must be a linear order on collected loops. #ifndef NDEBUG for (const auto *L1 : LoopsUsed) for (const auto *L2 : LoopsUsed) assert((DT.dominates(L1->getHeader(), L2->getHeader()) || DT.dominates(L2->getHeader(), L1->getHeader())) && "Domination relationship is not a linear order"); #endif const Loop *MDL = *std::max_element(LoopsUsed.begin(), LoopsUsed.end(), [&](const Loop *L1, const Loop *L2) { return DT.properlyDominates(L1->getHeader(), L2->getHeader()); }); // Get init and post increment value for LHS. auto SplitLHS = SplitIntoInitAndPostInc(MDL, LHS); // if LHS contains unknown non-invariant SCEV then bail out. if (SplitLHS.first == getCouldNotCompute()) return false; assert (SplitLHS.second != getCouldNotCompute() && "Unexpected CNC"); // Get init and post increment value for RHS. auto SplitRHS = SplitIntoInitAndPostInc(MDL, RHS); // if RHS contains unknown non-invariant SCEV then bail out. if (SplitRHS.first == getCouldNotCompute()) return false; assert (SplitRHS.second != getCouldNotCompute() && "Unexpected CNC"); // It is possible that init SCEV contains an invariant load but it does // not dominate MDL and is not available at MDL loop entry, so we should // check it here. if (!isAvailableAtLoopEntry(SplitLHS.first, MDL) || !isAvailableAtLoopEntry(SplitRHS.first, MDL)) return false; // It seems backedge guard check is faster than entry one so in some cases // it can speed up whole estimation by short circuit return isLoopBackedgeGuardedByCond(MDL, Pred, SplitLHS.second, SplitRHS.second) && isLoopEntryGuardedByCond(MDL, Pred, SplitLHS.first, SplitRHS.first); } bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Canonicalize the inputs first. (void)SimplifyICmpOperands(Pred, LHS, RHS); if (isKnownViaInduction(Pred, LHS, RHS)) return true; if (isKnownPredicateViaSplitting(Pred, LHS, RHS)) return true; // Otherwise see what can be done with some simple reasoning. return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS); } std::optional ScalarEvolution::evaluatePredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { if (isKnownPredicate(Pred, LHS, RHS)) return true; if (isKnownPredicate(ICmpInst::getInversePredicate(Pred), LHS, RHS)) return false; return std::nullopt; } bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI) { // TODO: Analyze guards and assumes from Context's block. return isKnownPredicate(Pred, LHS, RHS) || isBasicBlockEntryGuardedByCond(CtxI->getParent(), Pred, LHS, RHS); } std::optional ScalarEvolution::evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI) { std::optional KnownWithoutContext = evaluatePredicate(Pred, LHS, RHS); if (KnownWithoutContext) return KnownWithoutContext; if (isBasicBlockEntryGuardedByCond(CtxI->getParent(), Pred, LHS, RHS)) return true; if (isBasicBlockEntryGuardedByCond(CtxI->getParent(), ICmpInst::getInversePredicate(Pred), LHS, RHS)) return false; return std::nullopt; } bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred, const SCEVAddRecExpr *LHS, const SCEV *RHS) { const Loop *L = LHS->getLoop(); return isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS) && isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS); } std::optional ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred) { auto Result = getMonotonicPredicateTypeImpl(LHS, Pred); #ifndef NDEBUG // Verify an invariant: inverting the predicate should turn a monotonically // increasing change to a monotonically decreasing one, and vice versa. if (Result) { auto ResultSwapped = getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred)); assert(*ResultSwapped != *Result && "monotonicity should flip as we flip the predicate"); } #endif return Result; } std::optional ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred) { // A zero step value for LHS means the induction variable is essentially a // loop invariant value. We don't really depend on the predicate actually // flipping from false to true (for increasing predicates, and the other way // around for decreasing predicates), all we care about is that *if* the // predicate changes then it only changes from false to true. // // A zero step value in itself is not very useful, but there may be places // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be // as general as possible. // Only handle LE/LT/GE/GT predicates. if (!ICmpInst::isRelational(Pred)) return std::nullopt; bool IsGreater = ICmpInst::isGE(Pred) || ICmpInst::isGT(Pred); assert((IsGreater || ICmpInst::isLE(Pred) || ICmpInst::isLT(Pred)) && "Should be greater or less!"); // Check that AR does not wrap. if (ICmpInst::isUnsigned(Pred)) { if (!LHS->hasNoUnsignedWrap()) return std::nullopt; return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing; } assert(ICmpInst::isSigned(Pred) && "Relational predicate is either signed or unsigned!"); if (!LHS->hasNoSignedWrap()) return std::nullopt; const SCEV *Step = LHS->getStepRecurrence(*this); if (isKnownNonNegative(Step)) return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing; if (isKnownNonPositive(Step)) return !IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing; return std::nullopt; } std::optional ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI) { // If there is a loop-invariant, force it into the RHS, otherwise bail out. if (!isLoopInvariant(RHS, L)) { if (!isLoopInvariant(LHS, L)) return std::nullopt; std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); } const SCEVAddRecExpr *ArLHS = dyn_cast(LHS); if (!ArLHS || ArLHS->getLoop() != L) return std::nullopt; auto MonotonicType = getMonotonicPredicateType(ArLHS, Pred); if (!MonotonicType) return std::nullopt; // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to // true as the loop iterates, and the backedge is control dependent on // "ArLHS `Pred` RHS" == true then we can reason as follows: // // * if the predicate was false in the first iteration then the predicate // is never evaluated again, since the loop exits without taking the // backedge. // * if the predicate was true in the first iteration then it will // continue to be true for all future iterations since it is // monotonically increasing. // // For both the above possibilities, we can replace the loop varying // predicate with its value on the first iteration of the loop (which is // loop invariant). // // A similar reasoning applies for a monotonically decreasing predicate, by // replacing true with false and false with true in the above two bullets. bool Increasing = *MonotonicType == ScalarEvolution::MonotonicallyIncreasing; auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred); if (isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) return ScalarEvolution::LoopInvariantPredicate(Pred, ArLHS->getStart(), RHS); if (!CtxI) return std::nullopt; // Try to prove via context. // TODO: Support other cases. switch (Pred) { default: break; case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_ULT: { assert(ArLHS->hasNoUnsignedWrap() && "Is a requirement of monotonicity!"); // Given preconditions // (1) ArLHS does not cross the border of positive and negative parts of // range because of: // - Positive step; (TODO: lift this limitation) // - nuw - does not cross zero boundary; // - nsw - does not cross SINT_MAX boundary; // (2) ArLHS =s 0 // we can replace the loop variant ArLHS ArLHS Start(ArLHS) >=s 0. // We can strengthen this to Start(ArLHS) hasNoSignedWrap() && ArLHS->isAffine() && isKnownPositive(ArLHS->getStepRecurrence(*this)) && isKnownNonNegative(RHS) && isKnownPredicateAt(SignFlippedPred, ArLHS, RHS, CtxI)) return ScalarEvolution::LoopInvariantPredicate(Pred, ArLHS->getStart(), RHS); } } return std::nullopt; } std::optional ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter) { if (auto LIP = getLoopInvariantExitCondDuringFirstIterationsImpl( Pred, LHS, RHS, L, CtxI, MaxIter)) return LIP; if (auto *UMin = dyn_cast(MaxIter)) // Number of iterations expressed as UMIN isn't always great for expressing // the value on the last iteration. If the straightforward approach didn't // work, try the following trick: if the a predicate is invariant for X, it // is also invariant for umin(X, ...). So try to find something that works // among subexpressions of MaxIter expressed as umin. for (auto *Op : UMin->operands()) if (auto LIP = getLoopInvariantExitCondDuringFirstIterationsImpl( Pred, LHS, RHS, L, CtxI, Op)) return LIP; return std::nullopt; } std::optional ScalarEvolution::getLoopInvariantExitCondDuringFirstIterationsImpl( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, const Instruction *CtxI, const SCEV *MaxIter) { // Try to prove the following set of facts: // - The predicate is monotonic in the iteration space. // - If the check does not fail on the 1st iteration: // - No overflow will happen during first MaxIter iterations; // - It will not fail on the MaxIter'th iteration. // If the check does fail on the 1st iteration, we leave the loop and no // other checks matter. // If there is a loop-invariant, force it into the RHS, otherwise bail out. if (!isLoopInvariant(RHS, L)) { if (!isLoopInvariant(LHS, L)) return std::nullopt; std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); } auto *AR = dyn_cast(LHS); if (!AR || AR->getLoop() != L) return std::nullopt; // The predicate must be relational (i.e. <, <=, >=, >). if (!ICmpInst::isRelational(Pred)) return std::nullopt; // TODO: Support steps other than +/- 1. const SCEV *Step = AR->getStepRecurrence(*this); auto *One = getOne(Step->getType()); auto *MinusOne = getNegativeSCEV(One); if (Step != One && Step != MinusOne) return std::nullopt; // Type mismatch here means that MaxIter is potentially larger than max // unsigned value in start type, which mean we cannot prove no wrap for the // indvar. if (AR->getType() != MaxIter->getType()) return std::nullopt; // Value of IV on suggested last iteration. const SCEV *Last = AR->evaluateAtIteration(MaxIter, *this); // Does it still meet the requirement? if (!isLoopBackedgeGuardedByCond(L, Pred, Last, RHS)) return std::nullopt; // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does // not exceed max unsigned value of this type), this effectively proves // that there is no wrap during the iteration. To prove that there is no // signed/unsigned wrap, we need to check that // Start <= Last for step = 1 or Start >= Last for step = -1. ICmpInst::Predicate NoOverflowPred = CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; if (Step == MinusOne) NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred); const SCEV *Start = AR->getStart(); if (!isKnownPredicateAt(NoOverflowPred, Start, Last, CtxI)) return std::nullopt; // Everything is fine. return ScalarEvolution::LoopInvariantPredicate(Pred, Start, RHS); } bool ScalarEvolution::isKnownPredicateViaConstantRanges( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { if (HasSameValue(LHS, RHS)) return ICmpInst::isTrueWhenEqual(Pred); // This code is split out from isKnownPredicate because it is called from // within isLoopEntryGuardedByCond. auto CheckRanges = [&](const ConstantRange &RangeLHS, const ConstantRange &RangeRHS) { return RangeLHS.icmp(Pred, RangeRHS); }; // The check at the top of the function catches the case where the values are // known to be equal. if (Pred == CmpInst::ICMP_EQ) return false; if (Pred == CmpInst::ICMP_NE) { auto SL = getSignedRange(LHS); auto SR = getSignedRange(RHS); if (CheckRanges(SL, SR)) return true; auto UL = getUnsignedRange(LHS); auto UR = getUnsignedRange(RHS); if (CheckRanges(UL, UR)) return true; auto *Diff = getMinusSCEV(LHS, RHS); return !isa(Diff) && isKnownNonZero(Diff); } if (CmpInst::isSigned(Pred)) { auto SL = getSignedRange(LHS); auto SR = getSignedRange(RHS); return CheckRanges(SL, SR); } auto UL = getUnsignedRange(LHS); auto UR = getUnsignedRange(RHS); return CheckRanges(UL, UR); } bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Match X to (A + C1) and Y to (A + C2), where // C1 and C2 are constant integers. If either X or Y are not add expressions, // consider them as X + 0 and Y + 0 respectively. C1 and C2 are returned via // OutC1 and OutC2. auto MatchBinaryAddToConst = [this](const SCEV *X, const SCEV *Y, APInt &OutC1, APInt &OutC2, SCEV::NoWrapFlags ExpectedFlags) { const SCEV *XNonConstOp, *XConstOp; const SCEV *YNonConstOp, *YConstOp; SCEV::NoWrapFlags XFlagsPresent; SCEV::NoWrapFlags YFlagsPresent; if (!splitBinaryAdd(X, XConstOp, XNonConstOp, XFlagsPresent)) { XConstOp = getZero(X->getType()); XNonConstOp = X; XFlagsPresent = ExpectedFlags; } if (!isa(XConstOp) || (XFlagsPresent & ExpectedFlags) != ExpectedFlags) return false; if (!splitBinaryAdd(Y, YConstOp, YNonConstOp, YFlagsPresent)) { YConstOp = getZero(Y->getType()); YNonConstOp = Y; YFlagsPresent = ExpectedFlags; } if (!isa(YConstOp) || (YFlagsPresent & ExpectedFlags) != ExpectedFlags) return false; if (YNonConstOp != XNonConstOp) return false; OutC1 = cast(XConstOp)->getAPInt(); OutC2 = cast(YConstOp)->getAPInt(); return true; }; APInt C1; APInt C2; switch (Pred) { default: break; case ICmpInst::ICMP_SGE: std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_SLE: // (X + C1) s<= (X + C2) if C1 s<= C2. if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNSW) && C1.sle(C2)) return true; break; case ICmpInst::ICMP_SGT: std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_SLT: // (X + C1) s< (X + C2) if C1 s< C2. if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNSW) && C1.slt(C2)) return true; break; case ICmpInst::ICMP_UGE: std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_ULE: // (X + C1) u<= (X + C2) for C1 u<= C2. if (MatchBinaryAddToConst(RHS, LHS, C2, C1, SCEV::FlagNUW) && C1.ule(C2)) return true; break; case ICmpInst::ICMP_UGT: std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_ULT: // (X + C1) u< (X + C2) if C1 u< C2. if (MatchBinaryAddToConst(RHS, LHS, C2, C1, SCEV::FlagNUW) && C1.ult(C2)) return true; break; } return false; } bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate) return false; // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on // the stack can result in exponential time complexity. SaveAndRestore Restore(ProvingSplitPredicate, true); // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L // // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use // isKnownPredicate. isKnownPredicate is more powerful, but also more // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the // interesting cases seen in practice. We can consider "upgrading" L >= 0 to // use isKnownPredicate later if needed. return isKnownNonNegative(RHS) && isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) && isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS); } bool ScalarEvolution::isImpliedViaGuard(const BasicBlock *BB, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // No need to even try if we know the module has no guards. if (!HasGuards) return false; return any_of(*BB, [&](const Instruction &I) { using namespace llvm::PatternMatch; Value *Condition; return match(&I, m_Intrinsic( m_Value(Condition))) && isImpliedCond(Pred, LHS, RHS, Condition, false); }); } /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is /// protected by a conditional between LHS and RHS. This is used to /// to eliminate casts. bool ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Interpret a null as meaning no loop, where there is obviously no guard // (interprocedural conditions notwithstanding). Do not bother about // unreachable loops. if (!L || !DT.isReachableFromEntry(L->getHeader())) return true; if (VerifyIR) assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) && "This cannot be done on broken IR!"); if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS)) return true; BasicBlock *Latch = L->getLoopLatch(); if (!Latch) return false; BranchInst *LoopContinuePredicate = dyn_cast(Latch->getTerminator()); if (LoopContinuePredicate && LoopContinuePredicate->isConditional() && isImpliedCond(Pred, LHS, RHS, LoopContinuePredicate->getCondition(), LoopContinuePredicate->getSuccessor(0) != L->getHeader())) return true; // We don't want more than one activation of the following loops on the stack // -- that can lead to O(n!) time complexity. if (WalkingBEDominatingConds) return false; SaveAndRestore ClearOnExit(WalkingBEDominatingConds, true); // See if we can exploit a trip count to prove the predicate. const auto &BETakenInfo = getBackedgeTakenInfo(L); const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this); if (LatchBECount != getCouldNotCompute()) { // We know that Latch branches back to the loop header exactly // LatchBECount times. This means the backdege condition at Latch is // equivalent to "{0,+,1} u< LatchBECount". Type *Ty = LatchBECount->getType(); auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW); const SCEV *LoopCounter = getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags); if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter, LatchBECount)) return true; } // Check conditions due to any @llvm.assume intrinsics. for (auto &AssumeVH : AC.assumptions()) { if (!AssumeVH) continue; auto *CI = cast(AssumeVH); if (!DT.dominates(CI, Latch->getTerminator())) continue; if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) return true; } if (isImpliedViaGuard(Latch, Pred, LHS, RHS)) return true; for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()]; DTN != HeaderDTN; DTN = DTN->getIDom()) { assert(DTN && "should reach the loop header before reaching the root!"); BasicBlock *BB = DTN->getBlock(); if (isImpliedViaGuard(BB, Pred, LHS, RHS)) return true; BasicBlock *PBB = BB->getSinglePredecessor(); if (!PBB) continue; BranchInst *ContinuePredicate = dyn_cast(PBB->getTerminator()); if (!ContinuePredicate || !ContinuePredicate->isConditional()) continue; Value *Condition = ContinuePredicate->getCondition(); // If we have an edge `E` within the loop body that dominates the only // latch, the condition guarding `E` also guards the backedge. This // reasoning works only for loops with a single latch. BasicBlockEdge DominatingEdge(PBB, BB); if (DominatingEdge.isSingleEdge()) { // We're constructively (and conservatively) enumerating edges within the // loop body that dominate the latch. The dominator tree better agree // with us on this: assert(DT.dominates(DominatingEdge, Latch) && "should be!"); if (isImpliedCond(Pred, LHS, RHS, Condition, BB != ContinuePredicate->getSuccessor(0))) return true; } } return false; } bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Do not bother proving facts for unreachable code. if (!DT.isReachableFromEntry(BB)) return true; if (VerifyIR) assert(!verifyFunction(*BB->getParent(), &dbgs()) && "This cannot be done on broken IR!"); // If we cannot prove strict comparison (e.g. a > b), maybe we can prove // the facts (a >= b && a != b) separately. A typical situation is when the // non-strict comparison is known from ranges and non-equality is known from // dominating predicates. If we are proving strict comparison, we always try // to prove non-equality and non-strict comparison separately. auto NonStrictPredicate = ICmpInst::getNonStrictPredicate(Pred); const bool ProvingStrictComparison = (Pred != NonStrictPredicate); bool ProvedNonStrictComparison = false; bool ProvedNonEquality = false; auto SplitAndProve = [&](std::function Fn) -> bool { if (!ProvedNonStrictComparison) ProvedNonStrictComparison = Fn(NonStrictPredicate); if (!ProvedNonEquality) ProvedNonEquality = Fn(ICmpInst::ICMP_NE); if (ProvedNonStrictComparison && ProvedNonEquality) return true; return false; }; if (ProvingStrictComparison) { auto ProofFn = [&](ICmpInst::Predicate P) { return isKnownViaNonRecursiveReasoning(P, LHS, RHS); }; if (SplitAndProve(ProofFn)) return true; } // Try to prove (Pred, LHS, RHS) using isImpliedCond. auto ProveViaCond = [&](const Value *Condition, bool Inverse) { const Instruction *CtxI = &BB->front(); if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, CtxI)) return true; if (ProvingStrictComparison) { auto ProofFn = [&](ICmpInst::Predicate P) { return isImpliedCond(P, LHS, RHS, Condition, Inverse, CtxI); }; if (SplitAndProve(ProofFn)) return true; } return false; }; // Starting at the block's predecessor, climb up the predecessor chain, as long // as there are predecessors that can be found that have unique successors // leading to the original block. const Loop *ContainingLoop = LI.getLoopFor(BB); const BasicBlock *PredBB; if (ContainingLoop && ContainingLoop->getHeader() == BB) PredBB = ContainingLoop->getLoopPredecessor(); else PredBB = BB->getSinglePredecessor(); for (std::pair Pair(PredBB, BB); Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { const BranchInst *BlockEntryPredicate = dyn_cast(Pair.first->getTerminator()); if (!BlockEntryPredicate || BlockEntryPredicate->isUnconditional()) continue; if (ProveViaCond(BlockEntryPredicate->getCondition(), BlockEntryPredicate->getSuccessor(0) != Pair.second)) return true; } // Check conditions due to any @llvm.assume intrinsics. for (auto &AssumeVH : AC.assumptions()) { if (!AssumeVH) continue; auto *CI = cast(AssumeVH); if (!DT.dominates(CI, BB)) continue; if (ProveViaCond(CI->getArgOperand(0), false)) return true; } // Check conditions due to any @llvm.experimental.guard intrinsics. auto *GuardDecl = F.getParent()->getFunction( Intrinsic::getName(Intrinsic::experimental_guard)); if (GuardDecl) for (const auto *GU : GuardDecl->users()) if (const auto *Guard = dyn_cast(GU)) if (Guard->getFunction() == BB->getParent() && DT.dominates(Guard, BB)) if (ProveViaCond(Guard->getArgOperand(0), false)) return true; return false; } bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Interpret a null as meaning no loop, where there is obviously no guard // (interprocedural conditions notwithstanding). if (!L) return false; // Both LHS and RHS must be available at loop entry. assert(isAvailableAtLoopEntry(LHS, L) && "LHS is not available at Loop Entry"); assert(isAvailableAtLoopEntry(RHS, L) && "RHS is not available at Loop Entry"); if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS)) return true; return isBasicBlockEntryGuardedByCond(L->getHeader(), Pred, LHS, RHS); } bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Value *FoundCondValue, bool Inverse, const Instruction *CtxI) { // False conditions implies anything. Do not bother analyzing it further. if (FoundCondValue == ConstantInt::getBool(FoundCondValue->getContext(), Inverse)) return true; if (!PendingLoopPredicates.insert(FoundCondValue).second) return false; auto ClearOnExit = make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); }); // Recursively handle And and Or conditions. const Value *Op0, *Op1; if (match(FoundCondValue, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { if (!Inverse) return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, CtxI) || isImpliedCond(Pred, LHS, RHS, Op1, Inverse, CtxI); } else if (match(FoundCondValue, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { if (Inverse) return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, CtxI) || isImpliedCond(Pred, LHS, RHS, Op1, Inverse, CtxI); } const ICmpInst *ICI = dyn_cast(FoundCondValue); if (!ICI) return false; // Now that we found a conditional branch that dominates the loop or controls // the loop latch. Check to see if it is the comparison we are looking for. ICmpInst::Predicate FoundPred; if (Inverse) FoundPred = ICI->getInversePredicate(); else FoundPred = ICI->getPredicate(); const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, CtxI); } bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *CtxI) { // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { // For unsigned and equality predicates, try to prove that both found // operands fit into narrow unsigned range. If so, try to prove facts in // narrow types. if (!CmpInst::isSigned(FoundPred) && !FoundLHS->getType()->isPointerTy() && !FoundRHS->getType()->isPointerTy()) { auto *NarrowType = LHS->getType(); auto *WideType = FoundLHS->getType(); auto BitWidth = getTypeSizeInBits(NarrowType); const SCEV *MaxValue = getZeroExtendExpr( getConstant(APInt::getMaxValue(BitWidth)), WideType); if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, FoundLHS, MaxValue) && isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, FoundRHS, MaxValue)) { const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType); const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType); if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS, TruncFoundRHS, CtxI)) return true; } } if (LHS->getType()->isPointerTy() || RHS->getType()->isPointerTy()) return false; if (CmpInst::isSigned(Pred)) { LHS = getSignExtendExpr(LHS, FoundLHS->getType()); RHS = getSignExtendExpr(RHS, FoundLHS->getType()); } else { LHS = getZeroExtendExpr(LHS, FoundLHS->getType()); RHS = getZeroExtendExpr(RHS, FoundLHS->getType()); } } else if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(FoundLHS->getType())) { if (FoundLHS->getType()->isPointerTy() || FoundRHS->getType()->isPointerTy()) return false; if (CmpInst::isSigned(FoundPred)) { FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); } else { FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType()); FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); } } return isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, CtxI); } bool ScalarEvolution::isImpliedCondBalancedTypes( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *CtxI) { assert(getTypeSizeInBits(LHS->getType()) == getTypeSizeInBits(FoundLHS->getType()) && "Types should be balanced!"); // Canonicalize the query to match the way instcombine will have // canonicalized the comparison. if (SimplifyICmpOperands(Pred, LHS, RHS)) if (LHS == RHS) return CmpInst::isTrueWhenEqual(Pred); if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) if (FoundLHS == FoundRHS) return CmpInst::isFalseWhenEqual(FoundPred); // Check to see if we can make the LHS or RHS match. if (LHS == FoundRHS || RHS == FoundLHS) { if (isa(RHS)) { std::swap(FoundLHS, FoundRHS); FoundPred = ICmpInst::getSwappedPredicate(FoundPred); } else { std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); } } // Check whether the found predicate is the same as the desired predicate. if (FoundPred == Pred) return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI); // Check whether swapping the found predicate makes it the same as the // desired predicate. if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { // We can write the implication // 0. LHS Pred RHS <- FoundLHS SwapPred FoundRHS // using one of the following ways: // 1. LHS Pred RHS <- FoundRHS Pred FoundLHS // 2. RHS SwapPred LHS <- FoundLHS SwapPred FoundRHS // 3. LHS Pred RHS <- ~FoundLHS Pred ~FoundRHS // 4. ~LHS SwapPred ~RHS <- FoundLHS SwapPred FoundRHS // Forms 1. and 2. require swapping the operands of one condition. Don't // do this if it would break canonical constant/addrec ordering. if (!isa(RHS) && !isa(LHS)) return isImpliedCondOperands(FoundPred, RHS, LHS, FoundLHS, FoundRHS, CtxI); if (!isa(FoundRHS) && !isa(FoundLHS)) return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, CtxI); // There's no clear preference between forms 3. and 4., try both. Avoid // forming getNotSCEV of pointer values as the resulting subtract is // not legal. if (!LHS->getType()->isPointerTy() && !RHS->getType()->isPointerTy() && isImpliedCondOperands(FoundPred, getNotSCEV(LHS), getNotSCEV(RHS), FoundLHS, FoundRHS, CtxI)) return true; if (!FoundLHS->getType()->isPointerTy() && !FoundRHS->getType()->isPointerTy() && isImpliedCondOperands(Pred, LHS, RHS, getNotSCEV(FoundLHS), getNotSCEV(FoundRHS), CtxI)) return true; return false; } auto IsSignFlippedPredicate = [](CmpInst::Predicate P1, CmpInst::Predicate P2) { assert(P1 != P2 && "Handled earlier!"); return CmpInst::isRelational(P2) && P1 == CmpInst::getFlippedSignednessPredicate(P2); }; if (IsSignFlippedPredicate(Pred, FoundPred)) { // Unsigned comparison is the same as signed comparison when both the // operands are non-negative or negative. if ((isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) || (isKnownNegative(FoundLHS) && isKnownNegative(FoundRHS))) return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI); // Create local copies that we can freely swap and canonicalize our // conditions to "le/lt". ICmpInst::Predicate CanonicalPred = Pred, CanonicalFoundPred = FoundPred; const SCEV *CanonicalLHS = LHS, *CanonicalRHS = RHS, *CanonicalFoundLHS = FoundLHS, *CanonicalFoundRHS = FoundRHS; if (ICmpInst::isGT(CanonicalPred) || ICmpInst::isGE(CanonicalPred)) { CanonicalPred = ICmpInst::getSwappedPredicate(CanonicalPred); CanonicalFoundPred = ICmpInst::getSwappedPredicate(CanonicalFoundPred); std::swap(CanonicalLHS, CanonicalRHS); std::swap(CanonicalFoundLHS, CanonicalFoundRHS); } assert((ICmpInst::isLT(CanonicalPred) || ICmpInst::isLE(CanonicalPred)) && "Must be!"); assert((ICmpInst::isLT(CanonicalFoundPred) || ICmpInst::isLE(CanonicalFoundPred)) && "Must be!"); if (ICmpInst::isSigned(CanonicalPred) && isKnownNonNegative(CanonicalRHS)) // Use implication: // x =s 0 --> x x (FoundLHS) || isa(FoundRHS))) { const SCEVConstant *C = nullptr; const SCEV *V = nullptr; if (isa(FoundLHS)) { C = cast(FoundLHS); V = FoundRHS; } else { C = cast(FoundRHS); V = FoundLHS; } // The guarding predicate tells us that C != V. If the known range // of V is [C, t), we can sharpen the range to [C + 1, t). The // range we consider has to correspond to same signedness as the // predicate we're interested in folding. APInt Min = ICmpInst::isSigned(Pred) ? getSignedRangeMin(V) : getUnsignedRangeMin(V); if (Min == C->getAPInt()) { // Given (V >= Min && V != Min) we conclude V >= (Min + 1). // This is true even if (Min + 1) wraps around -- in case of // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)). APInt SharperMin = Min + 1; switch (Pred) { case ICmpInst::ICMP_SGE: case ICmpInst::ICMP_UGE: // We know V `Pred` SharperMin. If this implies LHS `Pred` // RHS, we're done. if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin), CtxI)) return true; [[fallthrough]]; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: // We know from the range information that (V `Pred` Min || // V == Min). We know from the guarding condition that !(V // == Min). This gives us // // V `Pred` Min || V == Min && !(V == Min) // => V `Pred` Min // // If V `Pred` Min implies LHS `Pred` RHS, we're done. if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), CtxI)) return true; break; // `LHS < RHS` and `LHS <= RHS` are handled in the same way as `RHS > LHS` and `RHS >= LHS` respectively. case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_ULE: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, LHS, V, getConstant(SharperMin), CtxI)) return true; [[fallthrough]]; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, LHS, V, getConstant(Min), CtxI)) return true; break; default: // No change break; } } } // Check whether the actual condition is beyond sufficient. if (FoundPred == ICmpInst::ICMP_EQ) if (ICmpInst::isTrueWhenEqual(Pred)) if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI)) return true; if (Pred == ICmpInst::ICMP_NE) if (!ICmpInst::isTrueWhenEqual(FoundPred)) if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, CtxI)) return true; if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS)) return true; // Otherwise assume the worst. return false; } bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, SCEV::NoWrapFlags &Flags) { const auto *AE = dyn_cast(Expr); if (!AE || AE->getNumOperands() != 2) return false; L = AE->getOperand(0); R = AE->getOperand(1); Flags = AE->getNoWrapFlags(); return true; } std::optional ScalarEvolution::computeConstantDifference(const SCEV *More, const SCEV *Less) { // We avoid subtracting expressions here because this function is usually // fairly deep in the call stack (i.e. is called many times). // X - X = 0. if (More == Less) return APInt(getTypeSizeInBits(More->getType()), 0); if (isa(Less) && isa(More)) { const auto *LAR = cast(Less); const auto *MAR = cast(More); if (LAR->getLoop() != MAR->getLoop()) return std::nullopt; // We look at affine expressions only; not for correctness but to keep // getStepRecurrence cheap. if (!LAR->isAffine() || !MAR->isAffine()) return std::nullopt; if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this)) return std::nullopt; Less = LAR->getStart(); More = MAR->getStart(); // fall through } if (isa(Less) && isa(More)) { const auto &M = cast(More)->getAPInt(); const auto &L = cast(Less)->getAPInt(); return M - L; } SCEV::NoWrapFlags Flags; const SCEV *LLess = nullptr, *RLess = nullptr; const SCEV *LMore = nullptr, *RMore = nullptr; const SCEVConstant *C1 = nullptr, *C2 = nullptr; // Compare (X + C1) vs X. if (splitBinaryAdd(Less, LLess, RLess, Flags)) if ((C1 = dyn_cast(LLess))) if (RLess == More) return -(C1->getAPInt()); // Compare X vs (X + C2). if (splitBinaryAdd(More, LMore, RMore, Flags)) if ((C2 = dyn_cast(LMore))) if (RMore == Less) return C2->getAPInt(); // Compare (X + C1) vs (X + C2). if (C1 && C2 && RLess == RMore) return C2->getAPInt() - C1->getAPInt(); return std::nullopt; } bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *CtxI) { // Try to recognize the following pattern: // // FoundRHS = ... // ... // loop: // FoundLHS = {Start,+,W} // context_bb: // Basic block from the same loop // known(Pred, FoundLHS, FoundRHS) // // If some predicate is known in the context of a loop, it is also known on // each iteration of this loop, including the first iteration. Therefore, in // this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to // prove the original pred using this fact. if (!CtxI) return false; const BasicBlock *ContextBB = CtxI->getParent(); // Make sure AR varies in the context block. if (auto *AR = dyn_cast(FoundLHS)) { const Loop *L = AR->getLoop(); // Make sure that context belongs to the loop and executes on 1st iteration // (if it ever executes at all). if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch())) return false; if (!isAvailableAtLoopEntry(FoundRHS, AR->getLoop())) return false; return isImpliedCondOperands(Pred, LHS, RHS, AR->getStart(), FoundRHS); } if (auto *AR = dyn_cast(FoundRHS)) { const Loop *L = AR->getLoop(); // Make sure that context belongs to the loop and executes on 1st iteration // (if it ever executes at all). if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch())) return false; if (!isAvailableAtLoopEntry(FoundLHS, AR->getLoop())) return false; return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->getStart()); } return false; } bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT) return false; const auto *AddRecLHS = dyn_cast(LHS); if (!AddRecLHS) return false; const auto *AddRecFoundLHS = dyn_cast(FoundLHS); if (!AddRecFoundLHS) return false; // We'd like to let SCEV reason about control dependencies, so we constrain // both the inequalities to be about add recurrences on the same loop. This // way we can use isLoopEntryGuardedByCond later. const Loop *L = AddRecFoundLHS->getLoop(); if (L != AddRecLHS->getLoop()) return false; // FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1) // // FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C) // ... (2) // // Informal proof for (2), assuming (1) [*]: // // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**] // // Then // // FoundLHS s< FoundRHS s< INT_MIN - C // <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ] // <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ] // <=> (FoundLHS + INT_MIN + C + INT_MIN) s< // (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ] // <=> FoundLHS + C s< FoundRHS + C // // [*]: (1) can be proved by ruling out overflow. // // [**]: This can be proved by analyzing all the four possibilities: // (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and // (A s>= 0, B s>= 0). // // Note: // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C" // will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS // = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS // s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS + // C)". std::optional LDiff = computeConstantDifference(LHS, FoundLHS); std::optional RDiff = computeConstantDifference(RHS, FoundRHS); if (!LDiff || !RDiff || *LDiff != *RDiff) return false; if (LDiff->isMinValue()) return true; APInt FoundRHSLimit; if (Pred == CmpInst::ICMP_ULT) { FoundRHSLimit = -(*RDiff); } else { assert(Pred == CmpInst::ICMP_SLT && "Checked above!"); FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff; } // Try to prove (1) or (2), as needed. return isAvailableAtLoopEntry(FoundRHS, L) && isLoopEntryGuardedByCond(L, Pred, FoundRHS, getConstant(FoundRHSLimit)); } bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS, unsigned Depth) { const PHINode *LPhi = nullptr, *RPhi = nullptr; auto ClearOnExit = make_scope_exit([&]() { if (LPhi) { bool Erased = PendingMerges.erase(LPhi); assert(Erased && "Failed to erase LPhi!"); (void)Erased; } if (RPhi) { bool Erased = PendingMerges.erase(RPhi); assert(Erased && "Failed to erase RPhi!"); (void)Erased; } }); // Find respective Phis and check that they are not being pending. if (const SCEVUnknown *LU = dyn_cast(LHS)) if (auto *Phi = dyn_cast(LU->getValue())) { if (!PendingMerges.insert(Phi).second) return false; LPhi = Phi; } if (const SCEVUnknown *RU = dyn_cast(RHS)) if (auto *Phi = dyn_cast(RU->getValue())) { // If we detect a loop of Phi nodes being processed by this method, for // example: // // %a = phi i32 [ %some1, %preheader ], [ %b, %latch ] // %b = phi i32 [ %some2, %preheader ], [ %a, %latch ] // // we don't want to deal with a case that complex, so return conservative // answer false. if (!PendingMerges.insert(Phi).second) return false; RPhi = Phi; } // If none of LHS, RHS is a Phi, nothing to do here. if (!LPhi && !RPhi) return false; // If there is a SCEVUnknown Phi we are interested in, make it left. if (!LPhi) { std::swap(LHS, RHS); std::swap(FoundLHS, FoundRHS); std::swap(LPhi, RPhi); Pred = ICmpInst::getSwappedPredicate(Pred); } assert(LPhi && "LPhi should definitely be a SCEVUnknown Phi!"); const BasicBlock *LBB = LPhi->getParent(); const SCEVAddRecExpr *RAR = dyn_cast(RHS); auto ProvedEasily = [&](const SCEV *S1, const SCEV *S2) { return isKnownViaNonRecursiveReasoning(Pred, S1, S2) || isImpliedCondOperandsViaRanges(Pred, S1, S2, Pred, FoundLHS, FoundRHS) || isImpliedViaOperations(Pred, S1, S2, FoundLHS, FoundRHS, Depth); }; if (RPhi && RPhi->getParent() == LBB) { // Case one: RHS is also a SCEVUnknown Phi from the same basic block. // If we compare two Phis from the same block, and for each entry block // the predicate is true for incoming values from this block, then the // predicate is also true for the Phis. for (const BasicBlock *IncBB : predecessors(LBB)) { const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); const SCEV *R = getSCEV(RPhi->getIncomingValueForBlock(IncBB)); if (!ProvedEasily(L, R)) return false; } } else if (RAR && RAR->getLoop()->getHeader() == LBB) { // Case two: RHS is also a Phi from the same basic block, and it is an // AddRec. It means that there is a loop which has both AddRec and Unknown // PHIs, for it we can compare incoming values of AddRec from above the loop // and latch with their respective incoming values of LPhi. // TODO: Generalize to handle loops with many inputs in a header. if (LPhi->getNumIncomingValues() != 2) return false; auto *RLoop = RAR->getLoop(); auto *Predecessor = RLoop->getLoopPredecessor(); assert(Predecessor && "Loop with AddRec with no predecessor?"); const SCEV *L1 = getSCEV(LPhi->getIncomingValueForBlock(Predecessor)); if (!ProvedEasily(L1, RAR->getStart())) return false; auto *Latch = RLoop->getLoopLatch(); assert(Latch && "Loop with AddRec with no latch?"); const SCEV *L2 = getSCEV(LPhi->getIncomingValueForBlock(Latch)); if (!ProvedEasily(L2, RAR->getPostIncExpr(*this))) return false; } else { // In all other cases go over inputs of LHS and compare each of them to RHS, // the predicate is true for (LHS, RHS) if it is true for all such pairs. // At this point RHS is either a non-Phi, or it is a Phi from some block // different from LBB. for (const BasicBlock *IncBB : predecessors(LBB)) { // Check that RHS is available in this block. if (!dominates(RHS, IncBB)) return false; const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); // Make sure L does not refer to a value from a potentially previous // iteration of a loop. if (!properlyDominates(L, LBB)) return false; if (!ProvedEasily(L, RHS)) return false; } } return true; } bool ScalarEvolution::isImpliedCondOperandsViaShift(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { // We want to imply LHS < RHS from LHS < (RHS >> shiftvalue). First, make // sure that we are dealing with same LHS. if (RHS == FoundRHS) { std::swap(LHS, RHS); std::swap(FoundLHS, FoundRHS); Pred = ICmpInst::getSwappedPredicate(Pred); } if (LHS != FoundLHS) return false; auto *SUFoundRHS = dyn_cast(FoundRHS); if (!SUFoundRHS) return false; Value *Shiftee, *ShiftValue; using namespace PatternMatch; if (match(SUFoundRHS->getValue(), m_LShr(m_Value(Shiftee), m_Value(ShiftValue)))) { auto *ShifteeS = getSCEV(Shiftee); // Prove one of the following: // LHS > shiftvalue) && shiftee <=u RHS ---> LHS > shiftvalue) && shiftee <=u RHS ---> LHS <=u RHS // LHS > shiftvalue) && shiftee <=s RHS && shiftee >=s 0 // ---> LHS > shiftvalue) && shiftee <=s RHS && shiftee >=s 0 // ---> LHS <=s RHS if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) return isKnownPredicate(ICmpInst::ICMP_ULE, ShifteeS, RHS); if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) if (isKnownNonNegative(ShifteeS)) return isKnownPredicate(ICmpInst::ICMP_SLE, ShifteeS, RHS); } return false; } bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *CtxI) { if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, Pred, FoundLHS, FoundRHS)) return true; if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; if (isImpliedCondOperandsViaShift(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI)) return true; return isImpliedCondOperandsHelper(Pred, LHS, RHS, FoundLHS, FoundRHS); } /// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values? template static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr, const SCEV *Candidate) { const MinMaxExprType *MinMaxExpr = dyn_cast(MaybeMinMaxExpr); if (!MinMaxExpr) return false; return is_contained(MinMaxExpr->operands(), Candidate); } static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // If both sides are affine addrecs for the same loop, with equal // steps, and we know the recurrences don't wrap, then we only // need to check the predicate on the starting values. if (!ICmpInst::isRelational(Pred)) return false; const SCEVAddRecExpr *LAR = dyn_cast(LHS); if (!LAR) return false; const SCEVAddRecExpr *RAR = dyn_cast(RHS); if (!RAR) return false; if (LAR->getLoop() != RAR->getLoop()) return false; if (!LAR->isAffine() || !RAR->isAffine()) return false; if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE)) return false; SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ? SCEV::FlagNSW : SCEV::FlagNUW; if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW)) return false; return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart()); } /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max /// expression? static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { switch (Pred) { default: return false; case ICmpInst::ICMP_SGE: std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_SLE: return // min(A, ...) <= A IsMinMaxConsistingOf(LHS, RHS) || // A <= max(A, ...) IsMinMaxConsistingOf(RHS, LHS); case ICmpInst::ICMP_UGE: std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_ULE: return // min(A, ...) <= A // FIXME: what about umin_seq? IsMinMaxConsistingOf(LHS, RHS) || // A <= max(A, ...) IsMinMaxConsistingOf(RHS, LHS); } llvm_unreachable("covered switch fell through?!"); } bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS, unsigned Depth) { assert(getTypeSizeInBits(LHS->getType()) == getTypeSizeInBits(RHS->getType()) && "LHS and RHS have different sizes?"); assert(getTypeSizeInBits(FoundLHS->getType()) == getTypeSizeInBits(FoundRHS->getType()) && "FoundLHS and FoundRHS have different sizes?"); // We want to avoid hurting the compile time with analysis of too big trees. if (Depth > MaxSCEVOperationsImplicationDepth) return false; // We only want to work with GT comparison so far. if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) { Pred = CmpInst::getSwappedPredicate(Pred); std::swap(LHS, RHS); std::swap(FoundLHS, FoundRHS); } // For unsigned, try to reduce it to corresponding signed comparison. if (Pred == ICmpInst::ICMP_UGT) // We can replace unsigned predicate with its signed counterpart if all // involved values are non-negative. // TODO: We could have better support for unsigned. if (isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) { // Knowing that both FoundLHS and FoundRHS are non-negative, and knowing // FoundLHS >u FoundRHS, we also know that FoundLHS >s FoundRHS. Let us // use this fact to prove that LHS and RHS are non-negative. const SCEV *MinusOne = getMinusOne(LHS->getType()); if (isImpliedCondOperands(ICmpInst::ICMP_SGT, LHS, MinusOne, FoundLHS, FoundRHS) && isImpliedCondOperands(ICmpInst::ICMP_SGT, RHS, MinusOne, FoundLHS, FoundRHS)) Pred = ICmpInst::ICMP_SGT; } if (Pred != ICmpInst::ICMP_SGT) return false; auto GetOpFromSExt = [&](const SCEV *S) { if (auto *Ext = dyn_cast(S)) return Ext->getOperand(); // TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off // the constant in some cases. return S; }; // Acquire values from extensions. auto *OrigLHS = LHS; auto *OrigFoundLHS = FoundLHS; LHS = GetOpFromSExt(LHS); FoundLHS = GetOpFromSExt(FoundLHS); // Is the SGT predicate can be proved trivially or using the found context. auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) { return isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGT, S1, S2) || isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS, FoundRHS, Depth + 1); }; if (auto *LHSAddExpr = dyn_cast(LHS)) { // We want to avoid creation of any new non-constant SCEV. Since we are // going to compare the operands to RHS, we should be certain that we don't // need any size extensions for this. So let's decline all cases when the // sizes of types of LHS and RHS do not match. // TODO: Maybe try to get RHS from sext to catch more cases? if (getTypeSizeInBits(LHS->getType()) != getTypeSizeInBits(RHS->getType())) return false; // Should not overflow. if (!LHSAddExpr->hasNoSignedWrap()) return false; auto *LL = LHSAddExpr->getOperand(0); auto *LR = LHSAddExpr->getOperand(1); auto *MinusOne = getMinusOne(RHS->getType()); // Checks that S1 >= 0 && S2 > RHS, trivially or using the found context. auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) { return IsSGTViaContext(S1, MinusOne) && IsSGTViaContext(S2, RHS); }; // Try to prove the following rule: // (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS). // (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS). if (IsSumGreaterThanRHS(LL, LR) || IsSumGreaterThanRHS(LR, LL)) return true; } else if (auto *LHSUnknownExpr = dyn_cast(LHS)) { Value *LL, *LR; // FIXME: Once we have SDiv implemented, we can get rid of this matching. using namespace llvm::PatternMatch; if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) { // Rules for division. // We are going to perform some comparisons with Denominator and its // derivative expressions. In general case, creating a SCEV for it may // lead to a complex analysis of the entire graph, and in particular it // can request trip count recalculation for the same loop. This would // cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid // this, we only want to create SCEVs that are constants in this section. // So we bail if Denominator is not a constant. if (!isa(LR)) return false; auto *Denominator = cast(getSCEV(LR)); // We want to make sure that LHS = FoundLHS / Denominator. If it is so, // then a SCEV for the numerator already exists and matches with FoundLHS. auto *Numerator = getExistingSCEV(LL); if (!Numerator || Numerator->getType() != FoundLHS->getType()) return false; // Make sure that the numerator matches with FoundLHS and the denominator // is positive. if (!HasSameValue(Numerator, FoundLHS) || !isKnownPositive(Denominator)) return false; auto *DTy = Denominator->getType(); auto *FRHSTy = FoundRHS->getType(); if (DTy->isPointerTy() != FRHSTy->isPointerTy()) // One of types is a pointer and another one is not. We cannot extend // them properly to a wider type, so let us just reject this case. // TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help // to avoid this check. return false; // Given that: // FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0. auto *WTy = getWiderType(DTy, FRHSTy); auto *DenominatorExt = getNoopOrSignExtend(Denominator, WTy); auto *FoundRHSExt = getNoopOrSignExtend(FoundRHS, WTy); // Try to prove the following rule: // (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS). // For example, given that FoundLHS > 2. It means that FoundLHS is at // least 3. If we divide it by Denominator < 4, we will have at least 1. auto *DenomMinusTwo = getMinusSCEV(DenominatorExt, getConstant(WTy, 2)); if (isKnownNonPositive(RHS) && IsSGTViaContext(FoundRHSExt, DenomMinusTwo)) return true; // Try to prove the following rule: // (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS). // For example, given that FoundLHS > -3. Then FoundLHS is at least -2. // If we divide it by Denominator > 2, then: // 1. If FoundLHS is negative, then the result is 0. // 2. If FoundLHS is non-negative, then the result is non-negative. // Anyways, the result is non-negative. auto *MinusOne = getMinusOne(WTy); auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt); if (isKnownNegative(RHS) && IsSGTViaContext(FoundRHSExt, NegDenomMinusOne)) return true; } } // If our expression contained SCEVUnknown Phis, and we split it down and now // need to prove something for them, try to prove the predicate for every // possible incoming values of those Phis. if (isImpliedViaMerge(Pred, OrigLHS, RHS, OrigFoundLHS, FoundRHS, Depth + 1)) return true; return false; } static bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // zext x u<= sext x, sext x s<= zext x switch (Pred) { case ICmpInst::ICMP_SGE: std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_SLE: { // If operand >=s 0 then ZExt == SExt. If operand (LHS); const SCEVZeroExtendExpr *ZExt = dyn_cast(RHS); if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) return true; break; } case ICmpInst::ICMP_UGE: std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_ULE: { // If operand >=s 0 then ZExt == SExt. If operand (LHS); const SCEVSignExtendExpr *SExt = dyn_cast(RHS); if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) return true; break; } default: break; }; return false; } bool ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { return isKnownPredicateExtendIdiom(Pred, LHS, RHS) || isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || isKnownPredicateViaNoOverflow(Pred, LHS, RHS); } bool ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { switch (Pred) { default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_NE: if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) && isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) && isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) && isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) && isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS)) return true; break; } // Maybe it can be proved via operations? if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; return false; } bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS) { if (!isa(RHS) || !isa(FoundRHS)) // The restriction on `FoundRHS` be lifted easily -- it exists only to // reduce the compile time impact of this optimization. return false; std::optional Addend = computeConstantDifference(LHS, FoundLHS); if (!Addend) return false; const APInt &ConstFoundRHS = cast(FoundRHS)->getAPInt(); // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the // antecedent "`FoundLHS` `FoundPred` `FoundRHS`". ConstantRange FoundLHSRange = ConstantRange::makeExactICmpRegion(FoundPred, ConstFoundRHS); // Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`: ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend)); // We can also compute the range of values for `LHS` that satisfy the // consequent, "`LHS` `Pred` `RHS`": const APInt &ConstRHS = cast(RHS)->getAPInt(); // The antecedent implies the consequent if every value of `LHS` that // satisfies the antecedent also satisfies the consequent. return LHSRange.icmp(Pred, ConstRHS); } bool ScalarEvolution::canIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned) { assert(isKnownPositive(Stride) && "Positive stride expected!"); unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *One = getOne(Stride->getType()); if (IsSigned) { APInt MaxRHS = getSignedRangeMax(RHS); APInt MaxValue = APInt::getSignedMaxValue(BitWidth); APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS); } APInt MaxRHS = getUnsignedRangeMax(RHS); APInt MaxValue = APInt::getMaxValue(BitWidth); APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS); } bool ScalarEvolution::canIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned) { unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *One = getOne(Stride->getType()); if (IsSigned) { APInt MinRHS = getSignedRangeMin(RHS); APInt MinValue = APInt::getSignedMinValue(BitWidth); APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS); } APInt MinRHS = getUnsignedRangeMin(RHS); APInt MinValue = APInt::getMinValue(BitWidth); APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS); } const SCEV *ScalarEvolution::getUDivCeilSCEV(const SCEV *N, const SCEV *D) { // umin(N, 1) + floor((N - umin(N, 1)) / D) // This is equivalent to "1 + floor((N - 1) / D)" for N != 0. The umin // expression fixes the case of N=0. const SCEV *MinNOne = getUMinExpr(N, getOne(N->getType())); const SCEV *NMinusOne = getMinusSCEV(N, MinNOne); return getAddExpr(MinNOne, getUDivExpr(NMinusOne, D)); } const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride, const SCEV *End, unsigned BitWidth, bool IsSigned) { // The logic in this function assumes we can represent a positive stride. // If we can't, the backedge-taken count must be zero. if (IsSigned && BitWidth == 1) return getZero(Stride->getType()); // This code below only been closely audited for negative strides in the // unsigned comparison case, it may be correct for signed comparison, but // that needs to be established. if (IsSigned && isKnownNegative(Stride)) return getCouldNotCompute(); // Calculate the maximum backedge count based on the range of values // permitted by Start, End, and Stride. APInt MinStart = IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start); APInt MinStride = IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride); // We assume either the stride is positive, or the backedge-taken count // is zero. So force StrideForMaxBECount to be at least one. APInt One(BitWidth, 1); APInt StrideForMaxBECount = IsSigned ? APIntOps::smax(One, MinStride) : APIntOps::umax(One, MinStride); APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth) : APInt::getMaxValue(BitWidth); APInt Limit = MaxValue - (StrideForMaxBECount - 1); // Although End can be a MAX expression we estimate MaxEnd considering only // the case End = RHS of the loop termination condition. This is safe because // in the other case (End - Start) is zero, leading to a zero maximum backedge // taken count. APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit) : APIntOps::umin(getUnsignedRangeMax(End), Limit); // MaxBECount = ceil((max(MaxEnd, MinStart) - MinStart) / Stride) MaxEnd = IsSigned ? APIntOps::smax(MaxEnd, MinStart) : APIntOps::umax(MaxEnd, MinStart); return getUDivCeilSCEV(getConstant(MaxEnd - MinStart) /* Delta */, getConstant(StrideForMaxBECount) /* Step */); } ScalarEvolution::ExitLimit ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, bool ControlsOnlyExit, bool AllowPredicates) { SmallPtrSet Predicates; const SCEVAddRecExpr *IV = dyn_cast(LHS); bool PredicatedIV = false; auto canAssumeNoSelfWrap = [&](const SCEVAddRecExpr *AR) { // Can we prove this loop *must* be UB if overflow of IV occurs? // Reasoning goes as follows: // * Suppose the IV did self wrap. // * If Stride evenly divides the iteration space, then once wrap // occurs, the loop must revisit the same values. // * We know that RHS is invariant, and that none of those values // caused this exit to be taken previously. Thus, this exit is // dynamically dead. // * If this is the sole exit, then a dead exit implies the loop // must be infinite if there are no abnormal exits. // * If the loop were infinite, then it must either not be mustprogress // or have side effects. Otherwise, it must be UB. // * It can't (by assumption), be UB so we have contradicted our // premise and can conclude the IV did not in fact self-wrap. if (!isLoopInvariant(RHS, L)) return false; auto *StrideC = dyn_cast(AR->getStepRecurrence(*this)); if (!StrideC || !StrideC->getAPInt().isPowerOf2()) return false; if (!ControlsOnlyExit || !loopHasNoAbnormalExits(L)) return false; return loopIsFiniteByAssumption(L); }; if (!IV) { if (auto *ZExt = dyn_cast(LHS)) { const SCEVAddRecExpr *AR = dyn_cast(ZExt->getOperand()); if (AR && AR->getLoop() == L && AR->isAffine()) { auto canProveNUW = [&]() { // We can use the comparison to infer no-wrap flags only if it fully // controls the loop exit. if (!ControlsOnlyExit) return false; if (!isLoopInvariant(RHS, L)) return false; if (!isKnownNonZero(AR->getStepRecurrence(*this))) // We need the sequence defined by AR to strictly increase in the // unsigned integer domain for the logic below to hold. return false; const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType()); const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType()); // If RHS <=u Limit, then there must exist a value V in the sequence // defined by AR (e.g. {Start,+,Step}) such that V >u RHS, and // V <=u UINT_MAX. Thus, we must exit the loop before unsigned // overflow occurs. This limit also implies that a signed comparison // (in the wide bitwidth) is equivalent to an unsigned comparison as // the high bits on both sides must be zero. APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this)); APInt Limit = APInt::getMaxValue(InnerBitWidth) - (StrideMax - 1); Limit = Limit.zext(OuterBitWidth); return getUnsignedRangeMax(applyLoopGuards(RHS, L)).ule(Limit); }; auto Flags = AR->getNoWrapFlags(); if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW()) Flags = setFlags(Flags, SCEV::FlagNUW); setNoWrapFlags(const_cast(AR), Flags); if (AR->hasNoUnsignedWrap()) { // Emulate what getZeroExtendExpr would have done during construction // if we'd been able to infer the fact just above at that time. const SCEV *Step = AR->getStepRecurrence(*this); Type *Ty = ZExt->getType(); auto *S = getAddRecExpr( getExtendAddRecStart(AR, Ty, this, 0), getZeroExtendExpr(Step, Ty, 0), L, AR->getNoWrapFlags()); IV = dyn_cast(S); } } } } if (!IV && AllowPredicates) { // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the // algorithm below. IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates); PredicatedIV = true; } // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) return getCouldNotCompute(); // A precondition of this method is that the condition being analyzed // reaches an exiting branch which dominates the latch. Given that, we can // assume that an increment which violates the nowrap specification and // produces poison must cause undefined behavior when the resulting poison // value is branched upon and thus we can conclude that the backedge is // taken no more often than would be required to produce that poison value. // Note that a well defined loop can exit on the iteration which violates // the nowrap specification if there is another exit (either explicit or // implicit/exceptional) which causes the loop to execute before the // exiting instruction we're analyzing would trigger UB. auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW; bool NoWrap = ControlsOnlyExit && IV->getNoWrapFlags(WrapType); ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; const SCEV *Stride = IV->getStepRecurrence(*this); bool PositiveStride = isKnownPositive(Stride); // Avoid negative or zero stride values. if (!PositiveStride) { // We can compute the correct backedge taken count for loops with unknown // strides if we can prove that the loop is not an infinite loop with side // effects. Here's the loop structure we are trying to handle - // // i = start // do { // A[i] = i; // i += s; // } while (i < end); // // The backedge taken count for such loops is evaluated as - // (max(end, start + stride) - start - 1) /u stride // // The additional preconditions that we need to check to prove correctness // of the above formula is as follows - // // a) IV is either nuw or nsw depending upon signedness (indicated by the // NoWrap flag). // b) the loop is guaranteed to be finite (e.g. is mustprogress and has // no side effects within the loop) // c) loop has a single static exit (with no abnormal exits) // // Precondition a) implies that if the stride is negative, this is a single // trip loop. The backedge taken count formula reduces to zero in this case. // // Precondition b) and c) combine to imply that if rhs is invariant in L, // then a zero stride means the backedge can't be taken without executing // undefined behavior. // // The positive stride case is the same as isKnownPositive(Stride) returning // true (original behavior of the function). // if (PredicatedIV || !NoWrap || !loopIsFiniteByAssumption(L) || !loopHasNoAbnormalExits(L)) return getCouldNotCompute(); if (!isKnownNonZero(Stride)) { // If we have a step of zero, and RHS isn't invariant in L, we don't know // if it might eventually be greater than start and if so, on which // iteration. We can't even produce a useful upper bound. if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); // We allow a potentially zero stride, but we need to divide by stride // below. Since the loop can't be infinite and this check must control // the sole exit, we can infer the exit must be taken on the first // iteration (e.g. backedge count = 0) if the stride is zero. Given that, // we know the numerator in the divides below must be zero, so we can // pick an arbitrary non-zero value for the denominator (e.g. stride) // and produce the right result. // FIXME: Handle the case where Stride is poison? auto wouldZeroStrideBeUB = [&]() { // Proof by contradiction. Suppose the stride were zero. If we can // prove that the backedge *is* taken on the first iteration, then since // we know this condition controls the sole exit, we must have an // infinite loop. We can't have a (well defined) infinite loop per // check just above. // Note: The (Start - Stride) term is used to get the start' term from // (start' + stride,+,stride). Remember that we only care about the // result of this expression when stride == 0 at runtime. auto *StartIfZero = getMinusSCEV(IV->getStart(), Stride); return isLoopEntryGuardedByCond(L, Cond, StartIfZero, RHS); }; if (!wouldZeroStrideBeUB()) { Stride = getUMaxExpr(Stride, getOne(Stride->getType())); } } } else if (!Stride->isOne() && !NoWrap) { auto isUBOnWrap = [&]() { // From no-self-wrap, we need to then prove no-(un)signed-wrap. This // follows trivially from the fact that every (un)signed-wrapped, but // not self-wrapped value must be LT than the last value before // (un)signed wrap. Since we know that last value didn't exit, nor // will any smaller one. return canAssumeNoSelfWrap(IV); }; // Avoid proven overflow cases: this will ensure that the backedge taken // count will not generate any unsigned overflow. Relaxed no-overflow // conditions exploit NoWrapFlags, allowing to optimize in presence of // undefined behaviors like the case of C language. if (canIVOverflowOnLT(RHS, Stride, IsSigned) && !isUBOnWrap()) return getCouldNotCompute(); } // On all paths just preceeding, we established the following invariant: // IV can be assumed not to overflow up to and including the exiting // iteration. We proved this in one of two ways: // 1) We can show overflow doesn't occur before the exiting iteration // 1a) canIVOverflowOnLT, and b) step of one // 2) We can show that if overflow occurs, the loop must execute UB // before any possible exit. // Note that we have not yet proved RHS invariant (in general). const SCEV *Start = IV->getStart(); // Preserve pointer-typed Start/RHS to pass to isLoopEntryGuardedByCond. // If we convert to integers, isLoopEntryGuardedByCond will miss some cases. // Use integer-typed versions for actual computation; we can't subtract // pointers in general. const SCEV *OrigStart = Start; const SCEV *OrigRHS = RHS; if (Start->getType()->isPointerTy()) { Start = getLosslessPtrToIntExpr(Start); if (isa(Start)) return Start; } if (RHS->getType()->isPointerTy()) { RHS = getLosslessPtrToIntExpr(RHS); if (isa(RHS)) return RHS; } // When the RHS is not invariant, we do not know the end bound of the loop and // cannot calculate the ExactBECount needed by ExitLimit. However, we can // calculate the MaxBECount, given the start, stride and max value for the end // bound of the loop (RHS), and the fact that IV does not overflow (which is // checked above). if (!isLoopInvariant(RHS, L)) { const SCEV *MaxBECount = computeMaxBECountForLT( Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount, MaxBECount, false /*MaxOrZero*/, Predicates); } // We use the expression (max(End,Start)-Start)/Stride to describe the // backedge count, as if the backedge is taken at least once max(End,Start) // is End and so the result is as above, and if not max(End,Start) is Start // so we get a backedge count of zero. const SCEV *BECount = nullptr; auto *OrigStartMinusStride = getMinusSCEV(OrigStart, Stride); assert(isAvailableAtLoopEntry(OrigStartMinusStride, L) && "Must be!"); assert(isAvailableAtLoopEntry(OrigStart, L) && "Must be!"); assert(isAvailableAtLoopEntry(OrigRHS, L) && "Must be!"); // Can we prove (max(RHS,Start) > Start - Stride? if (isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigStart) && isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigRHS)) { // In this case, we can use a refined formula for computing backedge taken // count. The general formula remains: // "End-Start /uceiling Stride" where "End = max(RHS,Start)" // We want to use the alternate formula: // "((End - 1) - (Start - Stride)) /u Stride" // Let's do a quick case analysis to show these are equivalent under // our precondition that max(RHS,Start) > Start - Stride. // * For RHS <= Start, the backedge-taken count must be zero. // "((End - 1) - (Start - Stride)) /u Stride" reduces to // "((Start - 1) - (Start - Stride)) /u Stride" which simplies to // "Stride - 1 /u Stride" which is indeed zero for all non-zero values // of Stride. For 0 stride, we've use umin(1,Stride) above, reducing // this to the stride of 1 case. // * For RHS >= Start, the backedge count must be "RHS-Start /uceil Stride". // "((End - 1) - (Start - Stride)) /u Stride" reduces to // "((RHS - 1) - (Start - Stride)) /u Stride" reassociates to // "((RHS - (Start - Stride) - 1) /u Stride". // Our preconditions trivially imply no overflow in that form. const SCEV *MinusOne = getMinusOne(Stride->getType()); const SCEV *Numerator = getMinusSCEV(getAddExpr(RHS, MinusOne), getMinusSCEV(Start, Stride)); BECount = getUDivExpr(Numerator, Stride); } const SCEV *BECountIfBackedgeTaken = nullptr; if (!BECount) { auto canProveRHSGreaterThanEqualStart = [&]() { auto CondGE = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; const SCEV *GuardedRHS = applyLoopGuards(OrigRHS, L); const SCEV *GuardedStart = applyLoopGuards(OrigStart, L); if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart) || isKnownPredicate(CondGE, GuardedRHS, GuardedStart)) return true; // (RHS > Start - 1) implies RHS >= Start. // * "RHS >= Start" is trivially equivalent to "RHS > Start - 1" if // "Start - 1" doesn't overflow. // * For signed comparison, if Start - 1 does overflow, it's equal // to INT_MAX, and "RHS >s INT_MAX" is trivially false. // * For unsigned comparison, if Start - 1 does overflow, it's equal // to UINT_MAX, and "RHS >u UINT_MAX" is trivially false. // // FIXME: Should isLoopEntryGuardedByCond do this for us? auto CondGT = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; auto *StartMinusOne = getAddExpr(OrigStart, getMinusOne(OrigStart->getType())); return isLoopEntryGuardedByCond(L, CondGT, OrigRHS, StartMinusOne); }; // If we know that RHS >= Start in the context of loop, then we know that // max(RHS, Start) = RHS at this point. const SCEV *End; if (canProveRHSGreaterThanEqualStart()) { End = RHS; } else { // If RHS < Start, the backedge will be taken zero times. So in // general, we can write the backedge-taken count as: // // RHS >= Start ? ceil(RHS - Start) / Stride : 0 // // We convert it to the following to make it more convenient for SCEV: // // ceil(max(RHS, Start) - Start) / Stride End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); // See what would happen if we assume the backedge is taken. This is // used to compute MaxBECount. BECountIfBackedgeTaken = getUDivCeilSCEV(getMinusSCEV(RHS, Start), Stride); } // At this point, we know: // // 1. If IsSigned, Start <=s End; otherwise, Start <=u End // 2. The index variable doesn't overflow. // // Therefore, we know N exists such that // (Start + Stride * N) >= End, and computing "(Start + Stride * N)" // doesn't overflow. // // Using this information, try to prove whether the addition in // "(Start - End) + (Stride - 1)" has unsigned overflow. const SCEV *One = getOne(Stride->getType()); bool MayAddOverflow = [&] { if (auto *StrideC = dyn_cast(Stride)) { if (StrideC->getAPInt().isPowerOf2()) { // Suppose Stride is a power of two, and Start/End are unsigned // integers. Let UMAX be the largest representable unsigned // integer. // // By the preconditions of this function, we know // "(Start + Stride * N) >= End", and this doesn't overflow. // As a formula: // // End <= (Start + Stride * N) <= UMAX // // Subtracting Start from all the terms: // // End - Start <= Stride * N <= UMAX - Start // // Since Start is unsigned, UMAX - Start <= UMAX. Therefore: // // End - Start <= Stride * N <= UMAX // // Stride * N is a multiple of Stride. Therefore, // // End - Start <= Stride * N <= UMAX - (UMAX mod Stride) // // Since Stride is a power of two, UMAX + 1 is divisible by Stride. // Therefore, UMAX mod Stride == Stride - 1. So we can write: // // End - Start <= Stride * N <= UMAX - Stride - 1 // // Dropping the middle term: // // End - Start <= UMAX - Stride - 1 // // Adding Stride - 1 to both sides: // // (End - Start) + (Stride - 1) <= UMAX // // In other words, the addition doesn't have unsigned overflow. // // A similar proof works if we treat Start/End as signed values. // Just rewrite steps before "End - Start <= Stride * N <= UMAX" to // use signed max instead of unsigned max. Note that we're trying // to prove a lack of unsigned overflow in either case. return false; } } if (Start == Stride || Start == getMinusSCEV(Stride, One)) { // If Start is equal to Stride, (End - Start) + (Stride - 1) == End - 1. // If !IsSigned, 0 (BECount)) { ConstantMaxBECount = BECount; } else if (BECountIfBackedgeTaken && isa(BECountIfBackedgeTaken)) { // If we know exactly how many times the backedge will be taken if it's // taken at least once, then the backedge count will either be that or // zero. ConstantMaxBECount = BECountIfBackedgeTaken; MaxOrZero = true; } else { ConstantMaxBECount = computeMaxBECountForLT( Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); } if (isa(ConstantMaxBECount) && !isa(BECount)) ConstantMaxBECount = getConstant(getUnsignedRangeMax(BECount)); const SCEV *SymbolicMaxBECount = isa(BECount) ? ConstantMaxBECount : BECount; return ExitLimit(BECount, ConstantMaxBECount, SymbolicMaxBECount, MaxOrZero, Predicates); } ScalarEvolution::ExitLimit ScalarEvolution::howManyGreaterThans( const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, bool ControlsOnlyExit, bool AllowPredicates) { SmallPtrSet Predicates; // We handle only IV > Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); if (!IV && AllowPredicates) // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the // algorithm below. IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates); // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) return getCouldNotCompute(); auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW; bool NoWrap = ControlsOnlyExit && IV->getNoWrapFlags(WrapType); ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this)); // Avoid negative or zero stride values if (!isKnownPositive(Stride)) return getCouldNotCompute(); // Avoid proven overflow cases: this will ensure that the backedge taken count // will not generate any unsigned overflow. Relaxed no-overflow conditions // exploit NoWrapFlags, allowing to optimize in presence of undefined // behaviors like the case of C language. if (!Stride->isOne() && !NoWrap) if (canIVOverflowOnGT(RHS, Stride, IsSigned)) return getCouldNotCompute(); const SCEV *Start = IV->getStart(); const SCEV *End = RHS; if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) { // If we know that Start >= RHS in the context of loop, then we know that // min(RHS, Start) = RHS at this point. if (isLoopEntryGuardedByCond( L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, Start, RHS)) End = RHS; else End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start); } if (Start->getType()->isPointerTy()) { Start = getLosslessPtrToIntExpr(Start); if (isa(Start)) return Start; } if (End->getType()->isPointerTy()) { End = getLosslessPtrToIntExpr(End); if (isa(End)) return End; } // Compute ((Start - End) + (Stride - 1)) / Stride. // FIXME: This can overflow. Holding off on fixing this for now; // howManyGreaterThans will hopefully be gone soon. const SCEV *One = getOne(Stride->getType()); const SCEV *BECount = getUDivExpr( getAddExpr(getMinusSCEV(Start, End), getMinusSCEV(Stride, One)), Stride); APInt MaxStart = IsSigned ? getSignedRangeMax(Start) : getUnsignedRangeMax(Start); APInt MinStride = IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride); unsigned BitWidth = getTypeSizeInBits(LHS->getType()); APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1) : APInt::getMinValue(BitWidth) + (MinStride - 1); // Although End can be a MIN expression we estimate MinEnd considering only // the case End = RHS. This is safe because in the other case (Start - End) // is zero, leading to a zero maximum backedge taken count. APInt MinEnd = IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit) : APIntOps::umax(getUnsignedRangeMin(RHS), Limit); const SCEV *ConstantMaxBECount = isa(BECount) ? BECount : getUDivCeilSCEV(getConstant(MaxStart - MinEnd), getConstant(MinStride)); if (isa(ConstantMaxBECount)) ConstantMaxBECount = BECount; const SCEV *SymbolicMaxBECount = isa(BECount) ? ConstantMaxBECount : BECount; return ExitLimit(BECount, ConstantMaxBECount, SymbolicMaxBECount, false, Predicates); } const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, ScalarEvolution &SE) const { if (Range.isFullSet()) // Infinite loop. return SE.getCouldNotCompute(); // If the start is a non-zero constant, shift the range to simplify things. if (const SCEVConstant *SC = dyn_cast(getStart())) if (!SC->getValue()->isZero()) { SmallVector Operands(operands()); Operands[0] = SE.getZero(SC->getType()); const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), getNoWrapFlags(FlagNW)); if (const auto *ShiftedAddRec = dyn_cast(Shifted)) return ShiftedAddRec->getNumIterationsInRange( Range.subtract(SC->getAPInt()), SE); // This is strange and shouldn't happen. return SE.getCouldNotCompute(); } // The only time we can solve this is when we have all constant indices. // Otherwise, we cannot determine the overflow conditions. if (any_of(operands(), [](const SCEV *Op) { return !isa(Op); })) return SE.getCouldNotCompute(); // Okay at this point we know that all elements of the chrec are constants and // that the start element is zero. // First check to see if the range contains zero. If not, the first // iteration exits. unsigned BitWidth = SE.getTypeSizeInBits(getType()); if (!Range.contains(APInt(BitWidth, 0))) return SE.getZero(getType()); if (isAffine()) { // If this is an affine expression then we have this situation: // Solve {0,+,A} in Range === Ax in Range // We know that zero is in the range. If A is positive then we know that // the upper value of the range must be the first possible exit value. // If A is negative then the lower of the range is the last possible loop // value. Also note that we already checked for a full range. APInt A = cast(getOperand(1))->getAPInt(); APInt End = A.sge(1) ? (Range.getUpper() - 1) : Range.getLower(); // The exit value should be (End+A)/A. APInt ExitVal = (End + A).udiv(A); ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); // Evaluate at the exit value. If we really did fall out of the valid // range, then we computed our trip count, otherwise wrap around or other // things must have happened. ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE); if (Range.contains(Val->getValue())) return SE.getCouldNotCompute(); // Something strange happened // Ensure that the previous value is in the range. assert(Range.contains( EvaluateConstantChrecAtConstant(this, ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) && "Linear scev computation is off in a bad way!"); return SE.getConstant(ExitValue); } if (isQuadratic()) { if (auto S = SolveQuadraticAddRecRange(this, Range, SE)) return SE.getConstant(*S); } return SE.getCouldNotCompute(); } const SCEVAddRecExpr * SCEVAddRecExpr::getPostIncExpr(ScalarEvolution &SE) const { assert(getNumOperands() > 1 && "AddRec with zero step?"); // There is a temptation to just call getAddExpr(this, getStepRecurrence(SE)), // but in this case we cannot guarantee that the value returned will be an // AddRec because SCEV does not have a fixed point where it stops // simplification: it is legal to return ({rec1} + {rec2}). For example, it // may happen if we reach arithmetic depth limit while simplifying. So we // construct the returned value explicitly. SmallVector Ops; // If this is {A,+,B,+,C,...,+,N}, then its step is {B,+,C,+,...,+,N}, and // (this + Step) is {A+B,+,B+C,+...,+,N}. for (unsigned i = 0, e = getNumOperands() - 1; i < e; ++i) Ops.push_back(SE.getAddExpr(getOperand(i), getOperand(i + 1))); // We know that the last operand is not a constant zero (otherwise it would // have been popped out earlier). This guarantees us that if the result has // the same last operand, then it will also not be popped out, meaning that // the returned value will be an AddRec. const SCEV *Last = getOperand(getNumOperands() - 1); assert(!Last->isZero() && "Recurrency with zero step?"); Ops.push_back(Last); return cast(SE.getAddRecExpr(Ops, getLoop(), SCEV::FlagAnyWrap)); } // Return true when S contains at least an undef value. bool ScalarEvolution::containsUndefs(const SCEV *S) const { return SCEVExprContains(S, [](const SCEV *S) { if (const auto *SU = dyn_cast(S)) return isa(SU->getValue()); return false; }); } // Return true when S contains a value that is a nullptr. bool ScalarEvolution::containsErasedValue(const SCEV *S) const { return SCEVExprContains(S, [](const SCEV *S) { if (const auto *SU = dyn_cast(S)) return SU->getValue() == nullptr; return false; }); } /// Return the size of an element read or written by Inst. const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { Type *Ty; if (StoreInst *Store = dyn_cast(Inst)) Ty = Store->getValueOperand()->getType(); else if (LoadInst *Load = dyn_cast(Inst)) Ty = Load->getType(); else return nullptr; Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty)); return getSizeOfExpr(ETy, Ty); } //===----------------------------------------------------------------------===// // SCEVCallbackVH Class Implementation //===----------------------------------------------------------------------===// void ScalarEvolution::SCEVCallbackVH::deleted() { assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); if (PHINode *PN = dyn_cast(getValPtr())) SE->ConstantEvolutionLoopExitValue.erase(PN); SE->eraseValueFromMap(getValPtr()); // this now dangles! } void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); // Forget all the expressions associated with users of the old value, // so that future queries will recompute the expressions using the new // value. SE->forgetValue(getValPtr()); // this now dangles! } ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) : CallbackVH(V), SE(se) {} //===----------------------------------------------------------------------===// // ScalarEvolution Class Implementation //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI) : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64) { // To use guards for proving predicates, we need to scan every instruction in // relevant basic blocks, and not just terminators. Doing this is a waste of // time if the IR does not actually contain any calls to // @llvm.experimental.guard, so do a quick check and remember this beforehand. // // This pessimizes the case where a pass that preserves ScalarEvolution wants // to _add_ guards to the module when there weren't any before, and wants // ScalarEvolution to optimize based on those guards. For now we prefer to be // efficient in lieu of being smart in that rather obscure case. auto *GuardDecl = F.getParent()->getFunction( Intrinsic::getName(Intrinsic::experimental_guard)); HasGuards = GuardDecl && !GuardDecl->use_empty(); } ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)), ValueExprMap(std::move(Arg.ValueExprMap)), PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), PendingPhiRanges(std::move(Arg.PendingPhiRanges)), PendingMerges(std::move(Arg.PendingMerges)), ConstantMultipleCache(std::move(Arg.ConstantMultipleCache)), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), PredicatedBackedgeTakenCounts( std::move(Arg.PredicatedBackedgeTakenCounts)), BECountUsers(std::move(Arg.BECountUsers)), ConstantEvolutionLoopExitValue( std::move(Arg.ConstantEvolutionLoopExitValue)), ValuesAtScopes(std::move(Arg.ValuesAtScopes)), ValuesAtScopesUsers(std::move(Arg.ValuesAtScopesUsers)), LoopDispositions(std::move(Arg.LoopDispositions)), LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)), BlockDispositions(std::move(Arg.BlockDispositions)), SCEVUsers(std::move(Arg.SCEVUsers)), UnsignedRanges(std::move(Arg.UnsignedRanges)), SignedRanges(std::move(Arg.SignedRanges)), UniqueSCEVs(std::move(Arg.UniqueSCEVs)), UniquePreds(std::move(Arg.UniquePreds)), SCEVAllocator(std::move(Arg.SCEVAllocator)), LoopUsers(std::move(Arg.LoopUsers)), PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)), FirstUnknown(Arg.FirstUnknown) { Arg.FirstUnknown = nullptr; } ScalarEvolution::~ScalarEvolution() { // Iterate through all the SCEVUnknown instances and call their // destructors, so that they release their references to their values. for (SCEVUnknown *U = FirstUnknown; U;) { SCEVUnknown *Tmp = U; U = U->Next; Tmp->~SCEVUnknown(); } FirstUnknown = nullptr; ExprValueMap.clear(); ValueExprMap.clear(); HasRecMap.clear(); BackedgeTakenCounts.clear(); PredicatedBackedgeTakenCounts.clear(); assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); assert(PendingPhiRanges.empty() && "getRangeRef garbage"); assert(PendingMerges.empty() && "isImpliedViaMerge garbage"); assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!"); } bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { return !isa(getBackedgeTakenCount(L)); } static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, const Loop *L) { // Print all inner loops first for (Loop *I : *L) PrintLoopInfo(OS, SE, I); OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); if (ExitingBlocks.size() != 1) OS << " "; if (SE->hasLoopInvariantBackedgeTakenCount(L)) OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L) << "\n"; else OS << "Unpredictable backedge-taken count.\n"; if (ExitingBlocks.size() > 1) for (BasicBlock *ExitingBlock : ExitingBlocks) { OS << " exit count for " << ExitingBlock->getName() << ": " << *SE->getExitCount(L, ExitingBlock) << "\n"; } OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; auto *ConstantBTC = SE->getConstantMaxBackedgeTakenCount(L); if (!isa(ConstantBTC)) { OS << "constant max backedge-taken count is " << *ConstantBTC; if (SE->isBackedgeTakenCountMaxOrZero(L)) OS << ", actual taken count either this or zero."; } else { OS << "Unpredictable constant max backedge-taken count. "; } OS << "\n" "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; auto *SymbolicBTC = SE->getSymbolicMaxBackedgeTakenCount(L); if (!isa(SymbolicBTC)) { OS << "symbolic max backedge-taken count is " << *SymbolicBTC; if (SE->isBackedgeTakenCountMaxOrZero(L)) OS << ", actual taken count either this or zero."; } else { OS << "Unpredictable symbolic max backedge-taken count. "; } OS << "\n"; if (ExitingBlocks.size() > 1) for (BasicBlock *ExitingBlock : ExitingBlocks) { OS << " symbolic max exit count for " << ExitingBlock->getName() << ": " << *SE->getExitCount(L, ExitingBlock, ScalarEvolution::SymbolicMaximum) << "\n"; } OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; SmallVector Preds; auto PBT = SE->getPredicatedBackedgeTakenCount(L, Preds); if (!isa(PBT)) { OS << "Predicated backedge-taken count is " << *PBT << "\n"; OS << " Predicates:\n"; for (const auto *P : Preds) P->print(OS, 4); } else { OS << "Unpredictable predicated backedge-taken count.\n"; } if (SE->hasLoopInvariantBackedgeTakenCount(L)) { OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n"; } } namespace llvm { raw_ostream &operator<<(raw_ostream &OS, ScalarEvolution::LoopDisposition LD) { switch (LD) { case ScalarEvolution::LoopVariant: OS << "Variant"; break; case ScalarEvolution::LoopInvariant: OS << "Invariant"; break; case ScalarEvolution::LoopComputable: OS << "Computable"; break; } return OS; } raw_ostream &operator<<(raw_ostream &OS, ScalarEvolution::BlockDisposition BD) { switch (BD) { case ScalarEvolution::DoesNotDominateBlock: OS << "DoesNotDominate"; break; case ScalarEvolution::DominatesBlock: OS << "Dominates"; break; case ScalarEvolution::ProperlyDominatesBlock: OS << "ProperlyDominates"; break; } return OS; } } void ScalarEvolution::print(raw_ostream &OS) const { // ScalarEvolution's implementation of the print method is to print // out SCEV values of all instructions that are interesting. Doing // this potentially causes it to create new SCEV objects though, // which technically conflicts with the const qualifier. This isn't // observable from outside the class though, so casting away the // const isn't dangerous. ScalarEvolution &SE = *const_cast(this); if (ClassifyExpressions) { OS << "Classifying expressions for: "; F.printAsOperand(OS, /*PrintType=*/false); OS << "\n"; for (Instruction &I : instructions(F)) if (isSCEVable(I.getType()) && !isa(I)) { OS << I << '\n'; OS << " --> "; const SCEV *SV = SE.getSCEV(&I); SV->print(OS); if (!isa(SV)) { OS << " U: "; SE.getUnsignedRange(SV).print(OS); OS << " S: "; SE.getSignedRange(SV).print(OS); } const Loop *L = LI.getLoopFor(I.getParent()); const SCEV *AtUse = SE.getSCEVAtScope(SV, L); if (AtUse != SV) { OS << " --> "; AtUse->print(OS); if (!isa(AtUse)) { OS << " U: "; SE.getUnsignedRange(AtUse).print(OS); OS << " S: "; SE.getSignedRange(AtUse).print(OS); } } if (L) { OS << "\t\t" "Exits: "; const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); if (!SE.isLoopInvariant(ExitValue, L)) { OS << "<>"; } else { OS << *ExitValue; } bool First = true; for (const auto *Iter = L; Iter; Iter = Iter->getParentLoop()) { if (First) { OS << "\t\t" "LoopDispositions: { "; First = false; } else { OS << ", "; } Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": " << SE.getLoopDisposition(SV, Iter); } for (const auto *InnerL : depth_first(L)) { if (InnerL == L) continue; if (First) { OS << "\t\t" "LoopDispositions: { "; First = false; } else { OS << ", "; } InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": " << SE.getLoopDisposition(SV, InnerL); } OS << " }"; } OS << "\n"; } } OS << "Determining loop execution counts for: "; F.printAsOperand(OS, /*PrintType=*/false); OS << "\n"; for (Loop *I : LI) PrintLoopInfo(OS, &SE, I); } ScalarEvolution::LoopDisposition ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { auto &Values = LoopDispositions[S]; for (auto &V : Values) { if (V.getPointer() == L) return V.getInt(); } Values.emplace_back(L, LoopVariant); LoopDisposition D = computeLoopDisposition(S, L); auto &Values2 = LoopDispositions[S]; for (auto &V : llvm::reverse(Values2)) { if (V.getPointer() == L) { V.setInt(D); break; } } return D; } ScalarEvolution::LoopDisposition ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { switch (S->getSCEVType()) { case scConstant: case scVScale: return LoopInvariant; case scAddRecExpr: { const SCEVAddRecExpr *AR = cast(S); // If L is the addrec's loop, it's computable. if (AR->getLoop() == L) return LoopComputable; // Add recurrences are never invariant in the function-body (null loop). if (!L) return LoopVariant; // Everything that is not defined at loop entry is variant. if (DT.dominates(L->getHeader(), AR->getLoop()->getHeader())) return LoopVariant; assert(!L->contains(AR->getLoop()) && "Containing loop's header does not" " dominate the contained loop's header?"); // This recurrence is invariant w.r.t. L if AR's loop contains L. if (AR->getLoop()->contains(L)) return LoopInvariant; // This recurrence is variant w.r.t. L if any of its operands // are variant. for (const auto *Op : AR->operands()) if (!isLoopInvariant(Op, L)) return LoopVariant; // Otherwise it's loop-invariant. return LoopInvariant; } case scTruncate: case scZeroExtend: case scSignExtend: case scPtrToInt: case scAddExpr: case scMulExpr: case scUDivExpr: case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: case scSequentialUMinExpr: { bool HasVarying = false; for (const auto *Op : S->operands()) { LoopDisposition D = getLoopDisposition(Op, L); if (D == LoopVariant) return LoopVariant; if (D == LoopComputable) HasVarying = true; } return HasVarying ? LoopComputable : LoopInvariant; } case scUnknown: // All non-instruction values are loop invariant. All instructions are loop // invariant if they are not contained in the specified loop. // Instructions are never considered invariant in the function body // (null loop) because they are defined within the "loop". if (auto *I = dyn_cast(cast(S)->getValue())) return (L && !L->contains(I)) ? LoopInvariant : LoopVariant; return LoopInvariant; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { return getLoopDisposition(S, L) == LoopInvariant; } bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { return getLoopDisposition(S, L) == LoopComputable; } ScalarEvolution::BlockDisposition ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { auto &Values = BlockDispositions[S]; for (auto &V : Values) { if (V.getPointer() == BB) return V.getInt(); } Values.emplace_back(BB, DoesNotDominateBlock); BlockDisposition D = computeBlockDisposition(S, BB); auto &Values2 = BlockDispositions[S]; for (auto &V : llvm::reverse(Values2)) { if (V.getPointer() == BB) { V.setInt(D); break; } } return D; } ScalarEvolution::BlockDisposition ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { switch (S->getSCEVType()) { case scConstant: case scVScale: return ProperlyDominatesBlock; case scAddRecExpr: { // This uses a "dominates" query instead of "properly dominates" query // to test for proper dominance too, because the instruction which // produces the addrec's value is a PHI, and a PHI effectively properly // dominates its entire containing block. const SCEVAddRecExpr *AR = cast(S); if (!DT.dominates(AR->getLoop()->getHeader(), BB)) return DoesNotDominateBlock; // Fall through into SCEVNAryExpr handling. [[fallthrough]]; } case scTruncate: case scZeroExtend: case scSignExtend: case scPtrToInt: case scAddExpr: case scMulExpr: case scUDivExpr: case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: case scSequentialUMinExpr: { bool Proper = true; for (const SCEV *NAryOp : S->operands()) { BlockDisposition D = getBlockDisposition(NAryOp, BB); if (D == DoesNotDominateBlock) return DoesNotDominateBlock; if (D == DominatesBlock) Proper = false; } return Proper ? ProperlyDominatesBlock : DominatesBlock; } case scUnknown: if (Instruction *I = dyn_cast(cast(S)->getValue())) { if (I->getParent() == BB) return DominatesBlock; if (DT.properlyDominates(I->getParent(), BB)) return ProperlyDominatesBlock; return DoesNotDominateBlock; } return ProperlyDominatesBlock; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unknown SCEV kind!"); } bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { return getBlockDisposition(S, BB) >= DominatesBlock; } bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { return getBlockDisposition(S, BB) == ProperlyDominatesBlock; } bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; }); } void ScalarEvolution::forgetBackedgeTakenCounts(const Loop *L, bool Predicated) { auto &BECounts = Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts; auto It = BECounts.find(L); if (It != BECounts.end()) { for (const ExitNotTakenInfo &ENT : It->second.ExitNotTaken) { for (const SCEV *S : {ENT.ExactNotTaken, ENT.SymbolicMaxNotTaken}) { if (!isa(S)) { auto UserIt = BECountUsers.find(S); assert(UserIt != BECountUsers.end()); UserIt->second.erase({L, Predicated}); } } } BECounts.erase(It); } } void ScalarEvolution::forgetMemoizedResults(ArrayRef SCEVs) { SmallPtrSet ToForget(SCEVs.begin(), SCEVs.end()); SmallVector Worklist(ToForget.begin(), ToForget.end()); while (!Worklist.empty()) { const SCEV *Curr = Worklist.pop_back_val(); auto Users = SCEVUsers.find(Curr); if (Users != SCEVUsers.end()) for (const auto *User : Users->second) if (ToForget.insert(User).second) Worklist.push_back(User); } for (const auto *S : ToForget) forgetMemoizedResultsImpl(S); for (auto I = PredicatedSCEVRewrites.begin(); I != PredicatedSCEVRewrites.end();) { std::pair Entry = I->first; if (ToForget.count(Entry.first)) PredicatedSCEVRewrites.erase(I++); else ++I; } } void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) { LoopDispositions.erase(S); BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); HasRecMap.erase(S); ConstantMultipleCache.erase(S); if (auto *AR = dyn_cast(S)) { UnsignedWrapViaInductionTried.erase(AR); SignedWrapViaInductionTried.erase(AR); } auto ExprIt = ExprValueMap.find(S); if (ExprIt != ExprValueMap.end()) { for (Value *V : ExprIt->second) { auto ValueIt = ValueExprMap.find_as(V); if (ValueIt != ValueExprMap.end()) ValueExprMap.erase(ValueIt); } ExprValueMap.erase(ExprIt); } auto ScopeIt = ValuesAtScopes.find(S); if (ScopeIt != ValuesAtScopes.end()) { for (const auto &Pair : ScopeIt->second) if (!isa_and_nonnull(Pair.second)) llvm::erase(ValuesAtScopesUsers[Pair.second], std::make_pair(Pair.first, S)); ValuesAtScopes.erase(ScopeIt); } auto ScopeUserIt = ValuesAtScopesUsers.find(S); if (ScopeUserIt != ValuesAtScopesUsers.end()) { for (const auto &Pair : ScopeUserIt->second) llvm::erase(ValuesAtScopes[Pair.second], std::make_pair(Pair.first, S)); ValuesAtScopesUsers.erase(ScopeUserIt); } auto BEUsersIt = BECountUsers.find(S); if (BEUsersIt != BECountUsers.end()) { // Work on a copy, as forgetBackedgeTakenCounts() will modify the original. auto Copy = BEUsersIt->second; for (const auto &Pair : Copy) forgetBackedgeTakenCounts(Pair.getPointer(), Pair.getInt()); BECountUsers.erase(BEUsersIt); } auto FoldUser = FoldCacheUser.find(S); if (FoldUser != FoldCacheUser.end()) for (auto &KV : FoldUser->second) FoldCache.erase(KV); FoldCacheUser.erase(S); } void ScalarEvolution::getUsedLoops(const SCEV *S, SmallPtrSetImpl &LoopsUsed) { struct FindUsedLoops { FindUsedLoops(SmallPtrSetImpl &LoopsUsed) : LoopsUsed(LoopsUsed) {} SmallPtrSetImpl &LoopsUsed; bool follow(const SCEV *S) { if (auto *AR = dyn_cast(S)) LoopsUsed.insert(AR->getLoop()); return true; } bool isDone() const { return false; } }; FindUsedLoops F(LoopsUsed); SCEVTraversal(F).visitAll(S); } void ScalarEvolution::getReachableBlocks( SmallPtrSetImpl &Reachable, Function &F) { SmallVector Worklist; Worklist.push_back(&F.getEntryBlock()); while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); if (!Reachable.insert(BB).second) continue; Value *Cond; BasicBlock *TrueBB, *FalseBB; if (match(BB->getTerminator(), m_Br(m_Value(Cond), m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) { if (auto *C = dyn_cast(Cond)) { Worklist.push_back(C->isOne() ? TrueBB : FalseBB); continue; } if (auto *Cmp = dyn_cast(Cond)) { const SCEV *L = getSCEV(Cmp->getOperand(0)); const SCEV *R = getSCEV(Cmp->getOperand(1)); if (isKnownPredicateViaConstantRanges(Cmp->getPredicate(), L, R)) { Worklist.push_back(TrueBB); continue; } if (isKnownPredicateViaConstantRanges(Cmp->getInversePredicate(), L, R)) { Worklist.push_back(FalseBB); continue; } } } append_range(Worklist, successors(BB)); } } void ScalarEvolution::verify() const { ScalarEvolution &SE = *const_cast(this); ScalarEvolution SE2(F, TLI, AC, DT, LI); SmallVector LoopStack(LI.begin(), LI.end()); // Map's SCEV expressions from one ScalarEvolution "universe" to another. struct SCEVMapper : public SCEVRewriteVisitor { SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {} const SCEV *visitConstant(const SCEVConstant *Constant) { return SE.getConstant(Constant->getAPInt()); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { return SE.getUnknown(Expr->getValue()); } const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { return SE.getCouldNotCompute(); } }; SCEVMapper SCM(SE2); SmallPtrSet ReachableBlocks; SE2.getReachableBlocks(ReachableBlocks, F); auto GetDelta = [&](const SCEV *Old, const SCEV *New) -> const SCEV * { if (containsUndefs(Old) || containsUndefs(New)) { // SCEV treats "undef" as an unknown but consistent value (i.e. it does // not propagate undef aggressively). This means we can (and do) fail // verification in cases where a transform makes a value go from "undef" // to "undef+1" (say). The transform is fine, since in both cases the // result is "undef", but SCEV thinks the value increased by 1. return nullptr; } // Unless VerifySCEVStrict is set, we only compare constant deltas. const SCEV *Delta = SE2.getMinusSCEV(Old, New); if (!VerifySCEVStrict && !isa(Delta)) return nullptr; return Delta; }; while (!LoopStack.empty()) { auto *L = LoopStack.pop_back_val(); llvm::append_range(LoopStack, *L); // Only verify BECounts in reachable loops. For an unreachable loop, // any BECount is legal. if (!ReachableBlocks.contains(L->getHeader())) continue; // Only verify cached BECounts. Computing new BECounts may change the // results of subsequent SCEV uses. auto It = BackedgeTakenCounts.find(L); if (It == BackedgeTakenCounts.end()) continue; auto *CurBECount = SCM.visit(It->second.getExact(L, const_cast(this))); auto *NewBECount = SE2.getBackedgeTakenCount(L); if (CurBECount == SE2.getCouldNotCompute() || NewBECount == SE2.getCouldNotCompute()) { // NB! This situation is legal, but is very suspicious -- whatever pass // change the loop to make a trip count go from could not compute to // computable or vice-versa *should have* invalidated SCEV. However, we // choose not to assert here (for now) since we don't want false // positives. continue; } if (SE.getTypeSizeInBits(CurBECount->getType()) > SE.getTypeSizeInBits(NewBECount->getType())) NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType()); else if (SE.getTypeSizeInBits(CurBECount->getType()) < SE.getTypeSizeInBits(NewBECount->getType())) CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); const SCEV *Delta = GetDelta(CurBECount, NewBECount); if (Delta && !Delta->isZero()) { dbgs() << "Trip Count for " << *L << " Changed!\n"; dbgs() << "Old: " << *CurBECount << "\n"; dbgs() << "New: " << *NewBECount << "\n"; dbgs() << "Delta: " << *Delta << "\n"; std::abort(); } } // Collect all valid loops currently in LoopInfo. SmallPtrSet ValidLoops; SmallVector Worklist(LI.begin(), LI.end()); while (!Worklist.empty()) { Loop *L = Worklist.pop_back_val(); if (ValidLoops.insert(L).second) Worklist.append(L->begin(), L->end()); } for (const auto &KV : ValueExprMap) { #ifndef NDEBUG // Check for SCEV expressions referencing invalid/deleted loops. if (auto *AR = dyn_cast(KV.second)) { assert(ValidLoops.contains(AR->getLoop()) && "AddRec references invalid loop"); } #endif // Check that the value is also part of the reverse map. auto It = ExprValueMap.find(KV.second); if (It == ExprValueMap.end() || !It->second.contains(KV.first)) { dbgs() << "Value " << *KV.first << " is in ValueExprMap but not in ExprValueMap\n"; std::abort(); } if (auto *I = dyn_cast(&*KV.first)) { if (!ReachableBlocks.contains(I->getParent())) continue; const SCEV *OldSCEV = SCM.visit(KV.second); const SCEV *NewSCEV = SE2.getSCEV(I); const SCEV *Delta = GetDelta(OldSCEV, NewSCEV); if (Delta && !Delta->isZero()) { dbgs() << "SCEV for value " << *I << " changed!\n" << "Old: " << *OldSCEV << "\n" << "New: " << *NewSCEV << "\n" << "Delta: " << *Delta << "\n"; std::abort(); } } } for (const auto &KV : ExprValueMap) { for (Value *V : KV.second) { auto It = ValueExprMap.find_as(V); if (It == ValueExprMap.end()) { dbgs() << "Value " << *V << " is in ExprValueMap but not in ValueExprMap\n"; std::abort(); } if (It->second != KV.first) { dbgs() << "Value " << *V << " mapped to " << *It->second << " rather than " << *KV.first << "\n"; std::abort(); } } } // Verify integrity of SCEV users. for (const auto &S : UniqueSCEVs) { for (const auto *Op : S.operands()) { // We do not store dependencies of constants. if (isa(Op)) continue; auto It = SCEVUsers.find(Op); if (It != SCEVUsers.end() && It->second.count(&S)) continue; dbgs() << "Use of operand " << *Op << " by user " << S << " is not being tracked!\n"; std::abort(); } } // Verify integrity of ValuesAtScopes users. for (const auto &ValueAndVec : ValuesAtScopes) { const SCEV *Value = ValueAndVec.first; for (const auto &LoopAndValueAtScope : ValueAndVec.second) { const Loop *L = LoopAndValueAtScope.first; const SCEV *ValueAtScope = LoopAndValueAtScope.second; if (!isa(ValueAtScope)) { auto It = ValuesAtScopesUsers.find(ValueAtScope); if (It != ValuesAtScopesUsers.end() && is_contained(It->second, std::make_pair(L, Value))) continue; dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: " << *ValueAtScope << " missing in ValuesAtScopesUsers\n"; std::abort(); } } } for (const auto &ValueAtScopeAndVec : ValuesAtScopesUsers) { const SCEV *ValueAtScope = ValueAtScopeAndVec.first; for (const auto &LoopAndValue : ValueAtScopeAndVec.second) { const Loop *L = LoopAndValue.first; const SCEV *Value = LoopAndValue.second; assert(!isa(Value)); auto It = ValuesAtScopes.find(Value); if (It != ValuesAtScopes.end() && is_contained(It->second, std::make_pair(L, ValueAtScope))) continue; dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: " << *ValueAtScope << " missing in ValuesAtScopes\n"; std::abort(); } } // Verify integrity of BECountUsers. auto VerifyBECountUsers = [&](bool Predicated) { auto &BECounts = Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts; for (const auto &LoopAndBEInfo : BECounts) { for (const ExitNotTakenInfo &ENT : LoopAndBEInfo.second.ExitNotTaken) { for (const SCEV *S : {ENT.ExactNotTaken, ENT.SymbolicMaxNotTaken}) { if (!isa(S)) { auto UserIt = BECountUsers.find(S); if (UserIt != BECountUsers.end() && UserIt->second.contains({ LoopAndBEInfo.first, Predicated })) continue; dbgs() << "Value " << *S << " for loop " << *LoopAndBEInfo.first << " missing from BECountUsers\n"; std::abort(); } } } } }; VerifyBECountUsers(/* Predicated */ false); VerifyBECountUsers(/* Predicated */ true); // Verify intergity of loop disposition cache. for (auto &[S, Values] : LoopDispositions) { for (auto [Loop, CachedDisposition] : Values) { const auto RecomputedDisposition = SE2.getLoopDisposition(S, Loop); if (CachedDisposition != RecomputedDisposition) { dbgs() << "Cached disposition of " << *S << " for loop " << *Loop << " is incorrect: cached " << CachedDisposition << ", actual " << RecomputedDisposition << "\n"; std::abort(); } } } // Verify integrity of the block disposition cache. for (auto &[S, Values] : BlockDispositions) { for (auto [BB, CachedDisposition] : Values) { const auto RecomputedDisposition = SE2.getBlockDisposition(S, BB); if (CachedDisposition != RecomputedDisposition) { dbgs() << "Cached disposition of " << *S << " for block %" << BB->getName() << " is incorrect: cached " << CachedDisposition << ", actual " << RecomputedDisposition << "\n"; std::abort(); } } } // Verify FoldCache/FoldCacheUser caches. for (auto [FoldID, Expr] : FoldCache) { auto I = FoldCacheUser.find(Expr); if (I == FoldCacheUser.end()) { dbgs() << "Missing entry in FoldCacheUser for cached expression " << *Expr << "!\n"; std::abort(); } if (!is_contained(I->second, FoldID)) { dbgs() << "Missing FoldID in cached users of " << *Expr << "!\n"; std::abort(); } } for (auto [Expr, IDs] : FoldCacheUser) { for (auto &FoldID : IDs) { auto I = FoldCache.find(FoldID); if (I == FoldCache.end()) { dbgs() << "Missing entry in FoldCache for expression " << *Expr << "!\n"; std::abort(); } if (I->second != Expr) { dbgs() << "Entry in FoldCache doesn't match FoldCacheUser: " << *I->second << " != " << *Expr << "!\n"; std::abort(); } } } // Verify that ConstantMultipleCache computations are correct. We check that // cached multiples and recomputed multiples are multiples of each other to // verify correctness. It is possible that a recomputed multiple is different // from the cached multiple due to strengthened no wrap flags or changes in // KnownBits computations. for (auto [S, Multiple] : ConstantMultipleCache) { APInt RecomputedMultiple = SE2.getConstantMultiple(S); if ((Multiple != 0 && RecomputedMultiple != 0 && Multiple.urem(RecomputedMultiple) != 0 && RecomputedMultiple.urem(Multiple) != 0)) { dbgs() << "Incorrect cached computation in ConstantMultipleCache for " << *S << " : Computed " << RecomputedMultiple << " but cache contains " << Multiple << "!\n"; std::abort(); } } } bool ScalarEvolution::invalidate( Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv) { // Invalidate the ScalarEvolution object whenever it isn't preserved or one // of its dependencies is invalidated. auto PAC = PA.getChecker(); return !(PAC.preserved() || PAC.preservedSet>()) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA); } AnalysisKey ScalarEvolutionAnalysis::Key; ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult(F); auto &AC = AM.getResult(F); auto &DT = AM.getResult(F); auto &LI = AM.getResult(F); return ScalarEvolution(F, TLI, AC, DT, LI); } PreservedAnalyses ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { AM.getResult(F).verify(); return PreservedAnalyses::all(); } PreservedAnalyses ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { // For compatibility with opt's -analyze feature under legacy pass manager // which was not ported to NPM. This keeps tests using // update_analyze_test_checks.py working. OS << "Printing analysis 'Scalar Evolution Analysis' for function '" << F.getName() << "':\n"; AM.getResult(F).print(OS); return PreservedAnalyses::all(); } INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution", "Scalar Evolution Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution", "Scalar Evolution Analysis", false, true) char ScalarEvolutionWrapperPass::ID = 0; ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry()); } bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { SE.reset(new ScalarEvolution( F, getAnalysis().getTLI(F), getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), getAnalysis().getLoopInfo())); return false; } void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); } void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const { SE->print(OS); } void ScalarEvolutionWrapperPass::verifyAnalysis() const { if (!VerifySCEV) return; SE->verify(); } void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); } const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS, const SCEV *RHS) { return getComparePredicate(ICmpInst::ICMP_EQ, LHS, RHS); } const SCEVPredicate * ScalarEvolution::getComparePredicate(const ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { FoldingSetNodeID ID; assert(LHS->getType() == RHS->getType() && "Type mismatch between LHS and RHS"); // Unique this node based on the arguments ID.AddInteger(SCEVPredicate::P_Compare); ID.AddInteger(Pred); ID.AddPointer(LHS); ID.AddPointer(RHS); void *IP = nullptr; if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) return S; SCEVComparePredicate *Eq = new (SCEVAllocator) SCEVComparePredicate(ID.Intern(SCEVAllocator), Pred, LHS, RHS); UniquePreds.InsertNode(Eq, IP); return Eq; } const SCEVPredicate *ScalarEvolution::getWrapPredicate( const SCEVAddRecExpr *AR, SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { FoldingSetNodeID ID; // Unique this node based on the arguments ID.AddInteger(SCEVPredicate::P_Wrap); ID.AddPointer(AR); ID.AddInteger(AddedFlags); void *IP = nullptr; if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) return S; auto *OF = new (SCEVAllocator) SCEVWrapPredicate(ID.Intern(SCEVAllocator), AR, AddedFlags); UniquePreds.InsertNode(OF, IP); return OF; } namespace { class SCEVPredicateRewriter : public SCEVRewriteVisitor { public: /// Rewrites \p S in the context of a loop L and the SCEV predication /// infrastructure. /// /// If \p Pred is non-null, the SCEV expression is rewritten to respect the /// equivalences present in \p Pred. /// /// If \p NewPreds is non-null, rewrite is free to add further predicates to /// \p NewPreds such that the result will be an AddRecExpr. static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE, SmallPtrSetImpl *NewPreds, const SCEVPredicate *Pred) { SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred); return Rewriter.visit(S); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { if (Pred) { if (auto *U = dyn_cast(Pred)) { for (const auto *Pred : U->getPredicates()) if (const auto *IPred = dyn_cast(Pred)) if (IPred->getLHS() == Expr && IPred->getPredicate() == ICmpInst::ICMP_EQ) return IPred->getRHS(); } else if (const auto *IPred = dyn_cast(Pred)) { if (IPred->getLHS() == Expr && IPred->getPredicate() == ICmpInst::ICMP_EQ) return IPred->getRHS(); } } return convertToAddRecWithPreds(Expr); } const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { const SCEV *Operand = visit(Expr->getOperand()); const SCEVAddRecExpr *AR = dyn_cast(Operand); if (AR && AR->getLoop() == L && AR->isAffine()) { // This couldn't be folded because the operand didn't have the nuw // flag. Add the nusw flag as an assumption that we could make. const SCEV *Step = AR->getStepRecurrence(SE); Type *Ty = Expr->getType(); if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNUSW)) return SE.getAddRecExpr(SE.getZeroExtendExpr(AR->getStart(), Ty), SE.getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } return SE.getZeroExtendExpr(Operand, Expr->getType()); } const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { const SCEV *Operand = visit(Expr->getOperand()); const SCEVAddRecExpr *AR = dyn_cast(Operand); if (AR && AR->getLoop() == L && AR->isAffine()) { // This couldn't be folded because the operand didn't have the nsw // flag. Add the nssw flag as an assumption that we could make. const SCEV *Step = AR->getStepRecurrence(SE); Type *Ty = Expr->getType(); if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNSSW)) return SE.getAddRecExpr(SE.getSignExtendExpr(AR->getStart(), Ty), SE.getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } return SE.getSignExtendExpr(Operand, Expr->getType()); } private: explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE, SmallPtrSetImpl *NewPreds, const SCEVPredicate *Pred) : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {} bool addOverflowAssumption(const SCEVPredicate *P) { if (!NewPreds) { // Check if we've already made this assumption. return Pred && Pred->implies(P); } NewPreds->insert(P); return true; } bool addOverflowAssumption(const SCEVAddRecExpr *AR, SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { auto *A = SE.getWrapPredicate(AR, AddedFlags); return addOverflowAssumption(A); } // If \p Expr represents a PHINode, we try to see if it can be represented // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible // to add this predicate as a runtime overflow check, we return the AddRec. // If \p Expr does not meet these conditions (is not a PHI node, or we // couldn't create an AddRec for it, or couldn't add the predicate), we just // return \p Expr. const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) { if (!isa(Expr->getValue())) return Expr; std::optional< std::pair>> PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr); if (!PredicatedRewrite) return Expr; for (const auto *P : PredicatedRewrite->second){ // Wrap predicates from outer loops are not supported. if (auto *WP = dyn_cast(P)) { if (L != WP->getExpr()->getLoop()) return Expr; } if (!addOverflowAssumption(P)) return Expr; } return PredicatedRewrite->first; } SmallPtrSetImpl *NewPreds; const SCEVPredicate *Pred; const Loop *L; }; } // end anonymous namespace const SCEV * ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L, const SCEVPredicate &Preds) { return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds); } const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates( const SCEV *S, const Loop *L, SmallPtrSetImpl &Preds) { SmallPtrSet TransformPreds; S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr); auto *AddRec = dyn_cast(S); if (!AddRec) return nullptr; // Since the transformation was successful, we can now transfer the SCEV // predicates. for (const auto *P : TransformPreds) Preds.insert(P); return AddRec; } /// SCEV predicates SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID, SCEVPredicateKind Kind) : FastID(ID), Kind(Kind) {} SCEVComparePredicate::SCEVComparePredicate(const FoldingSetNodeIDRef ID, const ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) : SCEVPredicate(ID, P_Compare), Pred(Pred), LHS(LHS), RHS(RHS) { assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match"); assert(LHS != RHS && "LHS and RHS are the same SCEV"); } bool SCEVComparePredicate::implies(const SCEVPredicate *N) const { const auto *Op = dyn_cast(N); if (!Op) return false; if (Pred != ICmpInst::ICMP_EQ) return false; return Op->LHS == LHS && Op->RHS == RHS; } bool SCEVComparePredicate::isAlwaysTrue() const { return false; } void SCEVComparePredicate::print(raw_ostream &OS, unsigned Depth) const { if (Pred == ICmpInst::ICMP_EQ) OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n"; else OS.indent(Depth) << "Compare predicate: " << *LHS << " " << Pred << ") " << *RHS << "\n"; } SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID, const SCEVAddRecExpr *AR, IncrementWrapFlags Flags) : SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {} const SCEVAddRecExpr *SCEVWrapPredicate::getExpr() const { return AR; } bool SCEVWrapPredicate::implies(const SCEVPredicate *N) const { const auto *Op = dyn_cast(N); return Op && Op->AR == AR && setFlags(Flags, Op->Flags) == Flags; } bool SCEVWrapPredicate::isAlwaysTrue() const { SCEV::NoWrapFlags ScevFlags = AR->getNoWrapFlags(); IncrementWrapFlags IFlags = Flags; if (ScalarEvolution::setFlags(ScevFlags, SCEV::FlagNSW) == ScevFlags) IFlags = clearFlags(IFlags, IncrementNSSW); return IFlags == IncrementAnyWrap; } void SCEVWrapPredicate::print(raw_ostream &OS, unsigned Depth) const { OS.indent(Depth) << *getExpr() << " Added Flags: "; if (SCEVWrapPredicate::IncrementNUSW & getFlags()) OS << ""; if (SCEVWrapPredicate::IncrementNSSW & getFlags()) OS << ""; OS << "\n"; } SCEVWrapPredicate::IncrementWrapFlags SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { IncrementWrapFlags ImpliedFlags = IncrementAnyWrap; SCEV::NoWrapFlags StaticFlags = AR->getNoWrapFlags(); // We can safely transfer the NSW flag as NSSW. if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNSW) == StaticFlags) ImpliedFlags = IncrementNSSW; if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNUW) == StaticFlags) { // If the increment is positive, the SCEV NUW flag will also imply the // WrapPredicate NUSW flag. if (const auto *Step = dyn_cast(AR->getStepRecurrence(SE))) if (Step->getValue()->getValue().isNonNegative()) ImpliedFlags = setFlags(ImpliedFlags, IncrementNUSW); } return ImpliedFlags; } /// Union predicates don't get cached so create a dummy set ID for it. SCEVUnionPredicate::SCEVUnionPredicate(ArrayRef Preds) : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) { for (const auto *P : Preds) add(P); } bool SCEVUnionPredicate::isAlwaysTrue() const { return all_of(Preds, [](const SCEVPredicate *I) { return I->isAlwaysTrue(); }); } bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const { if (const auto *Set = dyn_cast(N)) return all_of(Set->Preds, [this](const SCEVPredicate *I) { return this->implies(I); }); return any_of(Preds, [N](const SCEVPredicate *I) { return I->implies(N); }); } void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { for (const auto *Pred : Preds) Pred->print(OS, Depth); } void SCEVUnionPredicate::add(const SCEVPredicate *N) { if (const auto *Set = dyn_cast(N)) { for (const auto *Pred : Set->Preds) add(Pred); return; } Preds.push_back(N); } PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L) : SE(SE), L(L) { SmallVector Empty; Preds = std::make_unique(Empty); } void ScalarEvolution::registerUser(const SCEV *User, ArrayRef Ops) { for (const auto *Op : Ops) // We do not expect that forgetting cached data for SCEVConstants will ever // open any prospects for sharpening or introduce any correctness issues, // so we don't bother storing their dependencies. if (!isa(Op)) SCEVUsers[Op].insert(User); } const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { const SCEV *Expr = SE.getSCEV(V); RewriteEntry &Entry = RewriteMap[Expr]; // If we already have an entry and the version matches, return it. if (Entry.second && Generation == Entry.first) return Entry.second; // We found an entry but it's stale. Rewrite the stale entry // according to the current predicate. if (Entry.second) Expr = Entry.second; const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, *Preds); Entry = {Generation, NewSCEV}; return NewSCEV; } const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { if (!BackedgeCount) { SmallVector Preds; BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, Preds); for (const auto *P : Preds) addPredicate(*P); } return BackedgeCount; } void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { if (Preds->implies(&Pred)) return; auto &OldPreds = Preds->getPredicates(); SmallVector NewPreds(OldPreds.begin(), OldPreds.end()); NewPreds.push_back(&Pred); Preds = std::make_unique(NewPreds); updateGeneration(); } const SCEVPredicate &PredicatedScalarEvolution::getPredicate() const { return *Preds; } void PredicatedScalarEvolution::updateGeneration() { // If the generation number wrapped recompute everything. if (++Generation == 0) { for (auto &II : RewriteMap) { const SCEV *Rewritten = II.second.second; II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, *Preds)}; } } } void PredicatedScalarEvolution::setNoOverflow( Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { const SCEV *Expr = getSCEV(V); const auto *AR = cast(Expr); auto ImpliedFlags = SCEVWrapPredicate::getImpliedFlags(AR, SE); // Clear the statically implied flags. Flags = SCEVWrapPredicate::clearFlags(Flags, ImpliedFlags); addPredicate(*SE.getWrapPredicate(AR, Flags)); auto II = FlagsMap.insert({V, Flags}); if (!II.second) II.first->second = SCEVWrapPredicate::setFlags(Flags, II.first->second); } bool PredicatedScalarEvolution::hasNoOverflow( Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { const SCEV *Expr = getSCEV(V); const auto *AR = cast(Expr); Flags = SCEVWrapPredicate::clearFlags( Flags, SCEVWrapPredicate::getImpliedFlags(AR, SE)); auto II = FlagsMap.find(V); if (II != FlagsMap.end()) Flags = SCEVWrapPredicate::clearFlags(Flags, II->second); return Flags == SCEVWrapPredicate::IncrementAnyWrap; } const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) { const SCEV *Expr = this->getSCEV(V); SmallPtrSet NewPreds; auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds); if (!New) return nullptr; for (const auto *P : NewPreds) addPredicate(*P); RewriteMap[SE.getSCEV(V)] = {Generation, New}; return New; } PredicatedScalarEvolution::PredicatedScalarEvolution( const PredicatedScalarEvolution &Init) : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(std::make_unique(Init.Preds->getPredicates())), Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { for (auto I : Init.FlagsMap) FlagsMap.insert(I); } void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { // For each block. for (auto *BB : L.getBlocks()) for (auto &I : *BB) { if (!SE.isSCEVable(I.getType())) continue; auto *Expr = SE.getSCEV(&I); auto II = RewriteMap.find(Expr); if (II == RewriteMap.end()) continue; // Don't print things that are not interesting. if (II->second.second == Expr) continue; OS.indent(Depth) << "[PSE]" << I << ":\n"; OS.indent(Depth + 2) << *Expr << "\n"; OS.indent(Depth + 2) << "--> " << *II->second.second << "\n"; } } // Match the mathematical pattern A - (A / B) * B, where A and B can be // arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used // for URem with constant power-of-2 second operands. // It's not always easy, as A and B can be folded (imagine A is X / 2, and B is // 4, A / B becomes X / 8). bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS) { // Try to match 'zext (trunc A to iB) to iY', which is used // for URem with constant power-of-2 second operands. Make sure the size of // the operand A matches the size of the whole expressions. if (const auto *ZExt = dyn_cast(Expr)) if (const auto *Trunc = dyn_cast(ZExt->getOperand(0))) { LHS = Trunc->getOperand(); // Bail out if the type of the LHS is larger than the type of the // expression for now. if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(Expr->getType())) return false; if (LHS->getType() != Expr->getType()) LHS = getZeroExtendExpr(LHS, Expr->getType()); RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1) << getTypeSizeInBits(Trunc->getType())); return true; } const auto *Add = dyn_cast(Expr); if (Add == nullptr || Add->getNumOperands() != 2) return false; const SCEV *A = Add->getOperand(1); const auto *Mul = dyn_cast(Add->getOperand(0)); if (Mul == nullptr) return false; const auto MatchURemWithDivisor = [&](const SCEV *B) { // (SomeExpr + (-(SomeExpr / B) * B)). if (Expr == getURemExpr(A, B)) { LHS = A; RHS = B; return true; } return false; }; // (SomeExpr + (-1 * (SomeExpr / B) * B)). if (Mul->getNumOperands() == 3 && isa(Mul->getOperand(0))) return MatchURemWithDivisor(Mul->getOperand(1)) || MatchURemWithDivisor(Mul->getOperand(2)); // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)). if (Mul->getNumOperands() == 2) return MatchURemWithDivisor(Mul->getOperand(1)) || MatchURemWithDivisor(Mul->getOperand(0)) || MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) || MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); return false; } const SCEV * ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); // Form an expression for the maximum exit count possible for this loop. We // merge the max and exact information to approximate a version of // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. SmallVector ExitCounts; for (BasicBlock *ExitingBB : ExitingBlocks) { const SCEV *ExitCount = getExitCount(L, ExitingBB, ScalarEvolution::SymbolicMaximum); if (!isa(ExitCount)) { assert(DT.dominates(ExitingBB, L->getLoopLatch()) && "We should only have known counts for exiting blocks that " "dominate latch!"); ExitCounts.push_back(ExitCount); } } if (ExitCounts.empty()) return getCouldNotCompute(); return getUMinFromMismatchedTypes(ExitCounts, /*Sequential*/ true); } /// A rewriter to replace SCEV expressions in Map with the corresponding entry /// in the map. It skips AddRecExpr because we cannot guarantee that the /// replacement is loop invariant in the loop of the AddRec. class SCEVLoopGuardRewriter : public SCEVRewriteVisitor { const DenseMap ⤅ public: SCEVLoopGuardRewriter(ScalarEvolution &SE, DenseMap &M) : SCEVRewriteVisitor(SE), Map(M) {} const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; } const SCEV *visitUnknown(const SCEVUnknown *Expr) { auto I = Map.find(Expr); if (I == Map.end()) return Expr; return I->second; } const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { auto I = Map.find(Expr); if (I == Map.end()) { // If we didn't find the extact ZExt expr in the map, check if there's an // entry for a smaller ZExt we can use instead. Type *Ty = Expr->getType(); const SCEV *Op = Expr->getOperand(0); unsigned Bitwidth = Ty->getScalarSizeInBits() / 2; while (Bitwidth % 8 == 0 && Bitwidth >= 8 && Bitwidth > Op->getType()->getScalarSizeInBits()) { Type *NarrowTy = IntegerType::get(SE.getContext(), Bitwidth); auto *NarrowExt = SE.getZeroExtendExpr(Op, NarrowTy); auto I = Map.find(NarrowExt); if (I != Map.end()) return SE.getZeroExtendExpr(I->second, Ty); Bitwidth = Bitwidth / 2; } return SCEVRewriteVisitor::visitZeroExtendExpr( Expr); } return I->second; } const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { auto I = Map.find(Expr); if (I == Map.end()) return SCEVRewriteVisitor::visitSignExtendExpr( Expr); return I->second; } const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) { auto I = Map.find(Expr); if (I == Map.end()) return SCEVRewriteVisitor::visitUMinExpr(Expr); return I->second; } const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) { auto I = Map.find(Expr); if (I == Map.end()) return SCEVRewriteVisitor::visitSMinExpr(Expr); return I->second; } }; const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { SmallVector ExprsToRewrite; auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS, const SCEV *RHS, DenseMap &RewriteMap) { // WARNING: It is generally unsound to apply any wrap flags to the proposed // replacement SCEV which isn't directly implied by the structure of that // SCEV. In particular, using contextual facts to imply flags is *NOT* // legal. See the scoping rules for flags in the header to understand why. // If LHS is a constant, apply information to the other expression. if (isa(LHS)) { std::swap(LHS, RHS); Predicate = CmpInst::getSwappedPredicate(Predicate); } // Check for a condition of the form (-C1 + X < C2). InstCombine will // create this form when combining two checks of the form (X u< C2 + C1) and // (X >=u C1). auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap, &ExprsToRewrite]() { auto *AddExpr = dyn_cast(LHS); if (!AddExpr || AddExpr->getNumOperands() != 2) return false; auto *C1 = dyn_cast(AddExpr->getOperand(0)); auto *LHSUnknown = dyn_cast(AddExpr->getOperand(1)); auto *C2 = dyn_cast(RHS); if (!C1 || !C2 || !LHSUnknown) return false; auto ExactRegion = ConstantRange::makeExactICmpRegion(Predicate, C2->getAPInt()) .sub(C1->getAPInt()); // Bail out, unless we have a non-wrapping, monotonic range. if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet()) return false; auto I = RewriteMap.find(LHSUnknown); const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHSUnknown; RewriteMap[LHSUnknown] = getUMaxExpr( getConstant(ExactRegion.getUnsignedMin()), getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUnsignedMax()))); ExprsToRewrite.push_back(LHSUnknown); return true; }; if (MatchRangeCheckIdiom()) return; // Return true if \p Expr is a MinMax SCEV expression with a non-negative // constant operand. If so, return in \p SCTy the SCEV type and in \p RHS // the non-constant operand and in \p LHS the constant operand. auto IsMinMaxSCEVWithNonNegativeConstant = [&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS, const SCEV *&RHS) { if (auto *MinMax = dyn_cast(Expr)) { if (MinMax->getNumOperands() != 2) return false; if (auto *C = dyn_cast(MinMax->getOperand(0))) { if (C->getAPInt().isNegative()) return false; SCTy = MinMax->getSCEVType(); LHS = MinMax->getOperand(0); RHS = MinMax->getOperand(1); return true; } } return false; }; // Checks whether Expr is a non-negative constant, and Divisor is a positive // constant, and returns their APInt in ExprVal and in DivisorVal. auto GetNonNegExprAndPosDivisor = [&](const SCEV *Expr, const SCEV *Divisor, APInt &ExprVal, APInt &DivisorVal) { auto *ConstExpr = dyn_cast(Expr); auto *ConstDivisor = dyn_cast(Divisor); if (!ConstExpr || !ConstDivisor) return false; ExprVal = ConstExpr->getAPInt(); DivisorVal = ConstDivisor->getAPInt(); return ExprVal.isNonNegative() && !DivisorVal.isNonPositive(); }; // Return a new SCEV that modifies \p Expr to the closest number divides by // \p Divisor and greater or equal than Expr. // For now, only handle constant Expr and Divisor. auto GetNextSCEVDividesByDivisor = [&](const SCEV *Expr, const SCEV *Divisor) { APInt ExprVal; APInt DivisorVal; if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal)) return Expr; APInt Rem = ExprVal.urem(DivisorVal); if (!Rem.isZero()) // return the SCEV: Expr + Divisor - Expr % Divisor return getConstant(ExprVal + DivisorVal - Rem); return Expr; }; // Return a new SCEV that modifies \p Expr to the closest number divides by // \p Divisor and less or equal than Expr. // For now, only handle constant Expr and Divisor. auto GetPreviousSCEVDividesByDivisor = [&](const SCEV *Expr, const SCEV *Divisor) { APInt ExprVal; APInt DivisorVal; if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal)) return Expr; APInt Rem = ExprVal.urem(DivisorVal); // return the SCEV: Expr - Expr % Divisor return getConstant(ExprVal - Rem); }; // Apply divisibilty by \p Divisor on MinMaxExpr with constant values, // recursively. This is done by aligning up/down the constant value to the // Divisor. std::function ApplyDivisibiltyOnMinMaxExpr = [&](const SCEV *MinMaxExpr, const SCEV *Divisor) { const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr; SCEVTypes SCTy; if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS, MinMaxRHS)) return MinMaxExpr; auto IsMin = isa(MinMaxExpr) || isa(MinMaxExpr); assert(isKnownNonNegative(MinMaxLHS) && "Expected non-negative operand!"); auto *DivisibleExpr = IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, Divisor) : GetNextSCEVDividesByDivisor(MinMaxLHS, Divisor); SmallVector Ops = { ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr}; return getMinMaxExpr(SCTy, Ops); }; // If we have LHS == 0, check if LHS is computing a property of some unknown // SCEV %v which we can rewrite %v to express explicitly. const SCEVConstant *RHSC = dyn_cast(RHS); if (Predicate == CmpInst::ICMP_EQ && RHSC && RHSC->getValue()->isNullValue()) { // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to // explicitly express that. const SCEV *URemLHS = nullptr; const SCEV *URemRHS = nullptr; if (matchURem(LHS, URemLHS, URemRHS)) { if (const SCEVUnknown *LHSUnknown = dyn_cast(URemLHS)) { auto I = RewriteMap.find(LHSUnknown); const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHSUnknown; RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS); const auto *Multiple = getMulExpr(getUDivExpr(RewrittenLHS, URemRHS), URemRHS); RewriteMap[LHSUnknown] = Multiple; ExprsToRewrite.push_back(LHSUnknown); return; } } } // Do not apply information for constants or if RHS contains an AddRec. if (isa(LHS) || containsAddRecurrence(RHS)) return; // If RHS is SCEVUnknown, make sure the information is applied to it. if (!isa(LHS) && isa(RHS)) { std::swap(LHS, RHS); Predicate = CmpInst::getSwappedPredicate(Predicate); } // Puts rewrite rule \p From -> \p To into the rewrite map. Also if \p From // and \p FromRewritten are the same (i.e. there has been no rewrite // registered for \p From), then puts this value in the list of rewritten // expressions. auto AddRewrite = [&](const SCEV *From, const SCEV *FromRewritten, const SCEV *To) { if (From == FromRewritten) ExprsToRewrite.push_back(From); RewriteMap[From] = To; }; // Checks whether \p S has already been rewritten. In that case returns the // existing rewrite because we want to chain further rewrites onto the // already rewritten value. Otherwise returns \p S. auto GetMaybeRewritten = [&](const SCEV *S) { auto I = RewriteMap.find(S); return I != RewriteMap.end() ? I->second : S; }; // Check for the SCEV expression (A /u B) * B while B is a constant, inside // \p Expr. The check is done recuresively on \p Expr, which is assumed to // be a composition of Min/Max SCEVs. Return whether the SCEV expression (A // /u B) * B was found, and return the divisor B in \p DividesBy. For // example, if Expr = umin (umax ((A /u 8) * 8, 16), 64), return true since // (A /u 8) * 8 matched the pattern, and return the constant SCEV 8 in \p // DividesBy. std::function HasDivisibiltyInfo = [&](const SCEV *Expr, const SCEV *&DividesBy) { if (auto *Mul = dyn_cast(Expr)) { if (Mul->getNumOperands() != 2) return false; auto *MulLHS = Mul->getOperand(0); auto *MulRHS = Mul->getOperand(1); if (isa(MulLHS)) std::swap(MulLHS, MulRHS); if (auto *Div = dyn_cast(MulLHS)) if (Div->getOperand(1) == MulRHS) { DividesBy = MulRHS; return true; } } if (auto *MinMax = dyn_cast(Expr)) return HasDivisibiltyInfo(MinMax->getOperand(0), DividesBy) || HasDivisibiltyInfo(MinMax->getOperand(1), DividesBy); return false; }; // Return true if Expr known to divide by \p DividesBy. std::function IsKnownToDivideBy = [&](const SCEV *Expr, const SCEV *DividesBy) { if (getURemExpr(Expr, DividesBy)->isZero()) return true; if (auto *MinMax = dyn_cast(Expr)) return IsKnownToDivideBy(MinMax->getOperand(0), DividesBy) && IsKnownToDivideBy(MinMax->getOperand(1), DividesBy); return false; }; const SCEV *RewrittenLHS = GetMaybeRewritten(LHS); const SCEV *DividesBy = nullptr; if (HasDivisibiltyInfo(RewrittenLHS, DividesBy)) // Check that the whole expression is divided by DividesBy DividesBy = IsKnownToDivideBy(RewrittenLHS, DividesBy) ? DividesBy : nullptr; // Collect rewrites for LHS and its transitive operands based on the // condition. // For min/max expressions, also apply the guard to its operands: // 'min(a, b) >= c' -> '(a >= c) and (b >= c)', // 'min(a, b) > c' -> '(a > c) and (b > c)', // 'max(a, b) <= c' -> '(a <= c) and (b <= c)', // 'max(a, b) < c' -> '(a < c) and (b < c)'. // We cannot express strict predicates in SCEV, so instead we replace them // with non-strict ones against plus or minus one of RHS depending on the // predicate. const SCEV *One = getOne(RHS->getType()); switch (Predicate) { case CmpInst::ICMP_ULT: if (RHS->getType()->isPointerTy()) return; RHS = getUMaxExpr(RHS, One); [[fallthrough]]; case CmpInst::ICMP_SLT: { RHS = getMinusSCEV(RHS, One); RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS; break; } case CmpInst::ICMP_UGT: case CmpInst::ICMP_SGT: RHS = getAddExpr(RHS, One); RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS; break; case CmpInst::ICMP_ULE: case CmpInst::ICMP_SLE: RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS; break; case CmpInst::ICMP_UGE: case CmpInst::ICMP_SGE: RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS; break; default: break; } SmallVector Worklist(1, LHS); SmallPtrSet Visited; auto EnqueueOperands = [&Worklist](const SCEVNAryExpr *S) { append_range(Worklist, S->operands()); }; while (!Worklist.empty()) { const SCEV *From = Worklist.pop_back_val(); if (isa(From)) continue; if (!Visited.insert(From).second) continue; const SCEV *FromRewritten = GetMaybeRewritten(From); const SCEV *To = nullptr; switch (Predicate) { case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: To = getUMinExpr(FromRewritten, RHS); if (auto *UMax = dyn_cast(FromRewritten)) EnqueueOperands(UMax); break; case CmpInst::ICMP_SLT: case CmpInst::ICMP_SLE: To = getSMinExpr(FromRewritten, RHS); if (auto *SMax = dyn_cast(FromRewritten)) EnqueueOperands(SMax); break; case CmpInst::ICMP_UGT: case CmpInst::ICMP_UGE: To = getUMaxExpr(FromRewritten, RHS); if (auto *UMin = dyn_cast(FromRewritten)) EnqueueOperands(UMin); break; case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: To = getSMaxExpr(FromRewritten, RHS); if (auto *SMin = dyn_cast(FromRewritten)) EnqueueOperands(SMin); break; case CmpInst::ICMP_EQ: if (isa(RHS)) To = RHS; break; case CmpInst::ICMP_NE: if (isa(RHS) && cast(RHS)->getValue()->isNullValue()) { const SCEV *OneAlignedUp = DividesBy ? GetNextSCEVDividesByDivisor(One, DividesBy) : One; To = getUMaxExpr(FromRewritten, OneAlignedUp); } break; default: break; } if (To) AddRewrite(From, FromRewritten, To); } }; BasicBlock *Header = L->getHeader(); SmallVector> Terms; // First, collect information from assumptions dominating the loop. for (auto &AssumeVH : AC.assumptions()) { if (!AssumeVH) continue; auto *AssumeI = cast(AssumeVH); if (!DT.dominates(AssumeI, Header)) continue; Terms.emplace_back(AssumeI->getOperand(0), true); } // Second, collect information from llvm.experimental.guards dominating the loop. auto *GuardDecl = F.getParent()->getFunction( Intrinsic::getName(Intrinsic::experimental_guard)); if (GuardDecl) for (const auto *GU : GuardDecl->users()) if (const auto *Guard = dyn_cast(GU)) if (Guard->getFunction() == Header->getParent() && DT.dominates(Guard, Header)) Terms.emplace_back(Guard->getArgOperand(0), true); // Third, collect conditions from dominating branches. Starting at the loop // predecessor, climb up the predecessor chain, as long as there are // predecessors that can be found that have unique successors leading to the // original header. // TODO: share this logic with isLoopEntryGuardedByCond. for (std::pair Pair( L->getLoopPredecessor(), Header); Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { const BranchInst *LoopEntryPredicate = dyn_cast(Pair.first->getTerminator()); if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional()) continue; Terms.emplace_back(LoopEntryPredicate->getCondition(), LoopEntryPredicate->getSuccessor(0) == Pair.second); } // Now apply the information from the collected conditions to RewriteMap. // Conditions are processed in reverse order, so the earliest conditions is // processed first. This ensures the SCEVs with the shortest dependency chains // are constructed first. DenseMap RewriteMap; for (auto [Term, EnterIfTrue] : reverse(Terms)) { SmallVector Worklist; SmallPtrSet Visited; Worklist.push_back(Term); while (!Worklist.empty()) { Value *Cond = Worklist.pop_back_val(); if (!Visited.insert(Cond).second) continue; if (auto *Cmp = dyn_cast(Cond)) { auto Predicate = EnterIfTrue ? Cmp->getPredicate() : Cmp->getInversePredicate(); const auto *LHS = getSCEV(Cmp->getOperand(0)); const auto *RHS = getSCEV(Cmp->getOperand(1)); CollectCondition(Predicate, LHS, RHS, RewriteMap); continue; } Value *L, *R; if (EnterIfTrue ? match(Cond, m_LogicalAnd(m_Value(L), m_Value(R))) : match(Cond, m_LogicalOr(m_Value(L), m_Value(R)))) { Worklist.push_back(L); Worklist.push_back(R); } } } if (RewriteMap.empty()) return Expr; // Now that all rewrite information is collect, rewrite the collected // expressions with the information in the map. This applies information to // sub-expressions. if (ExprsToRewrite.size() > 1) { for (const SCEV *Expr : ExprsToRewrite) { const SCEV *RewriteTo = RewriteMap[Expr]; RewriteMap.erase(Expr); SCEVLoopGuardRewriter Rewriter(*this, RewriteMap); RewriteMap.insert({Expr, Rewriter.visit(RewriteTo)}); } } SCEVLoopGuardRewriter Rewriter(*this, RewriteMap); return Rewriter.visit(Expr); } diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 26ed74108ec3..18a4223d481e 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1,7795 +1,7806 @@ //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines a pattern matching instruction selector for PowerPC, // converting from a legalized dag to a PPC dag. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "ppc-isel" #define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection" STATISTIC(NumSextSetcc, "Number of (sext(setcc)) nodes expanded into GPR sequence."); STATISTIC(NumZextSetcc, "Number of (zext(setcc)) nodes expanded into GPR sequence."); STATISTIC(SignExtensionsAdded, "Number of sign extensions for compare inputs added."); STATISTIC(ZeroExtensionsAdded, "Number of zero extensions for compare inputs added."); STATISTIC(NumLogicOpsOnComparison, "Number of logical ops on i1 values calculated in GPR."); STATISTIC(OmittedForNonExtendUses, "Number of compares not eliminated as they have non-extending uses."); STATISTIC(NumP9Setb, "Number of compares lowered to setb."); // FIXME: Remove this once the bug has been fixed! cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); static cl::opt UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden); static cl::opt BPermRewriterNoMasking( "ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for " "bit permutations"), cl::Hidden); static cl::opt EnableBranchHint( "ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden); static cl::opt EnableTLSOpt( "ppc-tls-opt", cl::init(true), cl::desc("Enable tls optimization peephole"), cl::Hidden); enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; static cl::opt CmpInGPR( "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), cl::desc("Specify the types of comparisons to emit GPR-only code for."), cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), clEnumValN(ICGPR_NonExtIn, "nonextin", "Only comparisons where inputs don't need [sz]ext."), clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), clEnumValN(ICGPR_ZextI32, "zexti32", "Only i32 comparisons with zext result."), clEnumValN(ICGPR_ZextI64, "zexti64", "Only i64 comparisons with zext result."), clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), clEnumValN(ICGPR_SextI32, "sexti32", "Only i32 comparisons with sext result."), clEnumValN(ICGPR_SextI64, "sexti64", "Only i64 comparisons with sext result."))); namespace { //===--------------------------------------------------------------------===// /// PPCDAGToDAGISel - PPC specific code to select PPC machine /// instructions for SelectionDAG operations. /// class PPCDAGToDAGISel : public SelectionDAGISel { const PPCTargetMachine &TM; const PPCSubtarget *Subtarget = nullptr; const PPCTargetLowering *PPCLowering = nullptr; unsigned GlobalBaseReg = 0; public: static char ID; PPCDAGToDAGISel() = delete; explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel) : SelectionDAGISel(ID, tm, OptLevel), TM(tm) {} bool runOnMachineFunction(MachineFunction &MF) override { // Make sure we re-emit a set of the global base reg if necessary GlobalBaseReg = 0; Subtarget = &MF.getSubtarget(); PPCLowering = Subtarget->getTargetLowering(); if (Subtarget->hasROPProtect()) { // Create a place on the stack for the ROP Protection Hash. // The ROP Protection Hash will always be 8 bytes and aligned to 8 // bytes. MachineFrameInfo &MFI = MF.getFrameInfo(); PPCFunctionInfo *FI = MF.getInfo(); const int Result = MFI.CreateStackObject(8, Align(8), false); FI->setROPProtectionHashSaveIndex(Result); } SelectionDAGISel::runOnMachineFunction(MF); return true; } void PreprocessISelDAG() override; void PostprocessISelDAG() override; /// getI16Imm - Return a target constant with the specified value, of type /// i16. inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i16); } /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); } /// getI64Imm - Return a target constant with the specified value, of type /// i64. inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i64); } /// getSmallIPtrImm - Return a target constant of pointer type. inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) { return CurDAG->getTargetConstant( Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); } /// isRotateAndMask - Returns true if Mask and Shift can be folded into a /// rotate and mask opcode and mask operation. static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME); /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC /// base register. Return the virtual register that holds this value. SDNode *getGlobalBaseReg(); void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0); // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. void Select(SDNode *N) override; bool tryBitfieldInsert(SDNode *N); bool tryBitPermutation(SDNode *N); bool tryIntCompareInGPR(SDNode *N); // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into // an X-Form load instruction with the offset being a relocation coming from // the PPCISD::ADD_TLS. bool tryTLSXFormLoad(LoadSDNode *N); // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into // an X-Form store instruction with the offset being a relocation coming from // the PPCISD::ADD_TLS. bool tryTLSXFormStore(StoreSDNode *N); /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SDValue Chain = SDValue()); /// SelectAddrImmOffs - Return true if the operand is valid for a preinc /// immediate field. Note that the operand at this point is already the /// result of a prior SelectAddressRegImm call. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { if (N.getOpcode() == ISD::TargetConstant || N.getOpcode() == ISD::TargetGlobalAddress) { Out = N; return true; } return false; } /// SelectDSForm - Returns true if address N can be represented by the /// addressing mode of DSForm instructions (a base register, plus a signed /// 16-bit displacement that is a multiple of 4. bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, Align(4)) == PPC::AM_DSForm; } /// SelectDQForm - Returns true if address N can be represented by the /// addressing mode of DQForm instructions (a base register, plus a signed /// 16-bit displacement that is a multiple of 16. bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, Align(16)) == PPC::AM_DQForm; } /// SelectDForm - Returns true if address N can be represented by /// the addressing mode of DForm instructions (a base register, plus a /// signed 16-bit immediate. bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, std::nullopt) == PPC::AM_DForm; } /// SelectPCRelForm - Returns true if address N can be represented by /// PC-Relative addressing mode. bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, std::nullopt) == PPC::AM_PCRel; } /// SelectPDForm - Returns true if address N can be represented by Prefixed /// DForm addressing mode (a base register, plus a signed 34-bit immediate. bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, std::nullopt) == PPC::AM_PrefixDForm; } /// SelectXForm - Returns true if address N can be represented by the /// addressing mode of XForm instructions (an indexed [r+r] operation). bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, std::nullopt) == PPC::AM_XForm; } /// SelectForceXForm - Given the specified address, force it to be /// represented as an indexed [r+r] operation (an XForm instruction). bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) == PPC::AM_XForm; } /// SelectAddrIdx - Given the specified address, check to see if it can be /// represented as an indexed [r+r] operation. /// This is for xform instructions whose associated displacement form is D. /// The last parameter \p 0 means associated D form has no requirment for 16 /// bit signed displacement. /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, std::nullopt); } /// SelectAddrIdx4 - Given the specified address, check to see if it can be /// represented as an indexed [r+r] operation. /// This is for xform instructions whose associated displacement form is DS. /// The last parameter \p 4 means associated DS form 16 bit signed /// displacement must be a multiple of 4. /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, Align(4)); } /// SelectAddrIdx16 - Given the specified address, check to see if it can be /// represented as an indexed [r+r] operation. /// This is for xform instructions whose associated displacement form is DQ. /// The last parameter \p 16 means associated DQ form 16 bit signed /// displacement must be a multiple of 16. /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, Align(16)); } /// SelectAddrIdxOnly - Given the specified address, force it to be /// represented as an indexed [r+r] operation. bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } /// SelectAddrImm - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement [r+imm]. /// The last parameter \p 0 means D form has no requirment for 16 bit signed /// displacement. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, std::nullopt); } /// SelectAddrImmX4 - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement that is a multiple of /// 4 (last parameter). Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4)); } /// SelectAddrImmX16 - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement that is a multiple of /// 16(last parameter). Suitable for use by STXV and friends. bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(16)); } /// SelectAddrImmX34 - Returns true if the address N can be represented by /// a base register plus a signed 34-bit displacement. Suitable for use by /// PSTXVP and friends. bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG); } // Select an address into a single register. bool SelectAddr(SDValue N, SDValue &Base) { Base = N; return true; } bool SelectAddrPCRel(SDValue N, SDValue &Base) { return PPCLowering->SelectAddressPCRel(N, Base); } /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a /// register can be improved, but it is wrong to substitute Reg+Reg for /// Reg in an asm, because the load or store opcode would have to change. bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector &OutOps) override { switch(ConstraintID) { default: errs() << "ConstraintID: " << InlineAsm::getMemConstraintName(ConstraintID) << "\n"; llvm_unreachable("Unexpected asm memory constraint"); case InlineAsm::ConstraintCode::es: case InlineAsm::ConstraintCode::m: case InlineAsm::ConstraintCode::o: case InlineAsm::ConstraintCode::Q: case InlineAsm::ConstraintCode::Z: case InlineAsm::ConstraintCode::Zy: // We need to make sure that this one operand does not end up in r0 // (because we might end up lowering this as 0(%op)). const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); SDLoc dl(Op); SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); SDValue NewOp = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, Op.getValueType(), Op, RC), 0); OutOps.push_back(NewOp); return false; } return true; } // Include the pieces autogenerated from the target description. #include "PPCGenDAGISel.inc" private: bool trySETCC(SDNode *N); bool tryFoldSWTestBRCC(SDNode *N); bool trySelectLoopCountIntrinsic(SDNode *N); bool tryAsSingleRLDICL(SDNode *N); bool tryAsSingleRLDCL(SDNode *N); bool tryAsSingleRLDICR(SDNode *N); bool tryAsSingleRLWINM(SDNode *N); bool tryAsSingleRLWINM8(SDNode *N); bool tryAsSingleRLWIMI(SDNode *N); bool tryAsPairOfRLDICL(SDNode *N); bool tryAsSingleRLDIMI(SDNode *N); void PeepholePPC64(); void PeepholePPC64ZExt(); void PeepholeCROps(); SDValue combineToCMPB(SDNode *N); void foldBoolExts(SDValue &Res, SDNode *&N); bool AllUsersSelectZero(SDNode *N); void SwapAllSelectUsers(SDNode *N); bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; void transferMemOperands(SDNode *N, SDNode *Result); }; } // end anonymous namespace char PPCDAGToDAGISel::ID = 0; INITIALIZE_PASS(PPCDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) /// getGlobalBaseReg - Output the instructions required to put the /// base address to use for accessing globals into a register. /// SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { if (!GlobalBaseReg) { const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); const Module *M = MF->getFunction().getParent(); DebugLoc dl; if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { if (Subtarget->isTargetELF()) { GlobalBaseReg = PPC::R30; if (!Subtarget->isSecurePlt() && M->getPICLevel() == PICLevel::SmallPIC) { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); MF->getInfo()->setUsesPICBase(true); } else { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::UpdateGBR), GlobalBaseReg) .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); MF->getInfo()->setUsesPICBase(true); } } else { GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); } } else { // We must ensure that this sequence is dominated by the prologue. // FIXME: This is a bit of a big hammer since we don't get the benefits // of shrink-wrapping whenever we emit this instruction. Considering // this is used in any function where we emit a jump table, this may be // a significant limitation. We should consider inserting this in the // block where it is used and then commoning this sequence up if it // appears in multiple places. // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of // MovePCtoLR8. MF->getInfo()->setShrinkWrapDisabled(true); GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); } } return CurDAG->getRegister(GlobalBaseReg, PPCLowering->getPointerTy(CurDAG->getDataLayout())) .getNode(); } // Check if a SDValue has the toc-data attribute. static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) { GlobalAddressSDNode *GA = dyn_cast(Val); if (!GA) return false; const GlobalVariable *GV = dyn_cast_or_null(GA->getGlobal()); if (!GV) return false; if (!GV->hasAttribute("toc-data")) return false; // TODO: These asserts should be updated as more support for the toc data // transformation is added (struct support, etc.). assert( PointerSize >= GV->getAlign().valueOrOne().value() && "GlobalVariables with an alignment requirement stricter than TOC entry " "size not supported by the toc data transformation."); Type *GVType = GV->getValueType(); assert(GVType->isSized() && "A GlobalVariable's size must be known to be " "supported by the toc data transformation."); if (GVType->isVectorTy()) report_fatal_error("A GlobalVariable of Vector type is not currently " "supported by the toc data transformation."); if (GVType->isArrayTy()) report_fatal_error("A GlobalVariable of Array type is not currently " "supported by the toc data transformation."); if (GVType->isStructTy()) report_fatal_error("A GlobalVariable of Struct type is not currently " "supported by the toc data transformation."); assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && "A GlobalVariable with size larger than a TOC entry is not currently " "supported by the toc data transformation."); if (GV->hasLocalLinkage() || GV->hasPrivateLinkage()) report_fatal_error("A GlobalVariable with private or local linkage is not " "currently supported by the toc data transformation."); assert(!GV->hasCommonLinkage() && "Tentative definitions cannot have the mapping class XMC_TD."); return true; } /// isInt32Immediate - This method tests to see if the node is a 32-bit constant /// operand. If so Imm will receive the 32-bit value. static bool isInt32Immediate(SDNode *N, unsigned &Imm) { if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { Imm = N->getAsZExtVal(); return true; } return false; } /// isInt64Immediate - This method tests to see if the node is a 64-bit constant /// operand. If so Imm will receive the 64-bit value. static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { Imm = N->getAsZExtVal(); return true; } return false; } // isInt32Immediate - This method tests to see if a constant operand. // If so Imm will receive the 32 bit value. static bool isInt32Immediate(SDValue N, unsigned &Imm) { return isInt32Immediate(N.getNode(), Imm); } /// isInt64Immediate - This method tests to see if the value is a 64-bit /// constant operand. If so Imm will receive the 64-bit value. static bool isInt64Immediate(SDValue N, uint64_t &Imm) { return isInt64Immediate(N.getNode(), Imm); } static unsigned getBranchHint(unsigned PCC, const FunctionLoweringInfo &FuncInfo, const SDValue &DestMBB) { assert(isa(DestMBB)); if (!FuncInfo.BPI) return PPC::BR_NO_HINT; const BasicBlock *BB = FuncInfo.MBB->getBasicBlock(); const Instruction *BBTerm = BB->getTerminator(); if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; const BasicBlock *TBB = BBTerm->getSuccessor(0); const BasicBlock *FBB = BBTerm->getSuccessor(1); auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB); auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB); // We only want to handle cases which are easy to predict at static time, e.g. // C++ throw statement, that is very likely not taken, or calling never // returned function, e.g. stdlib exit(). So we set Threshold to filter // unwanted cases. // // Below is LLVM branch weight table, we only want to handle case 1, 2 // // Case Taken:Nontaken Example // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), // 2. Invoke-terminating 1:1048575 // 3. Coldblock 4:64 __builtin_expect // 4. Loop Branch 124:4 For loop // 5. PH/ZH/FPH 20:12 const uint32_t Threshold = 10000; if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) return PPC::BR_NO_HINT; LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() << "::" << BB->getName() << "'\n" << " -> " << TBB->getName() << ": " << TProb << "\n" << " -> " << FBB->getName() << ": " << FProb << "\n"); const BasicBlockSDNode *BBDN = cast(DestMBB); // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock if (BBDN->getBasicBlock()->getBasicBlock() != TBB) std::swap(TProb, FProb); return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; } // isOpcWithIntImmediate - This method tests to see if the node is a specific // opcode and that it has a immediate integer right operand. // If so Imm will receive the 32 bit value. static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { return N->getOpcode() == Opc && isInt32Immediate(N->getOperand(1).getNode(), Imm); } void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) { SDLoc dl(SN); int FI = cast(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; if (SN->hasOneUse()) CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, getSmallIPtrImm(Offset, dl)); else ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, getSmallIPtrImm(Offset, dl))); } bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME) { // Don't even go down this path for i64, since different logic will be // necessary for rldicl/rldicr/rldimi. if (N->getValueType(0) != MVT::i32) return false; unsigned Shift = 32; unsigned Indeterminant = ~0; // bit mask marking indeterminant results unsigned Opcode = N->getOpcode(); if (N->getNumOperands() != 2 || !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) return false; if (Opcode == ISD::SHL) { // apply shift left to mask if it comes first if (isShiftMask) Mask = Mask << Shift; // determine which bits are made indeterminant by shift Indeterminant = ~(0xFFFFFFFFu << Shift); } else if (Opcode == ISD::SRL) { // apply shift right to mask if it comes first if (isShiftMask) Mask = Mask >> Shift; // determine which bits are made indeterminant by shift Indeterminant = ~(0xFFFFFFFFu >> Shift); // adjust for the left rotate Shift = 32 - Shift; } else if (Opcode == ISD::ROTL) { Indeterminant = 0; } else { return false; } // if the mask doesn't intersect any Indeterminant bits if (Mask && !(Mask & Indeterminant)) { SH = Shift & 31; // make sure the mask is still a mask (wrap arounds may not be) return isRunOfOnes(Mask, MB, ME); } return false; } // isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS // instruction use the thread pointer. static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG) { assert( Base.getOpcode() == PPCISD::ADD_TLS && "Only expecting the ADD_TLS instruction to acquire the thread pointer!"); const PPCSubtarget &Subtarget = CurDAG->getMachineFunction().getSubtarget(); SDValue ADDTLSOp1 = Base.getOperand(0); unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode(); // Account for when ADD_TLS is used for the initial-exec TLS model on Linux. // // Although ADD_TLS does not explicitly use the thread pointer // register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L // instruction will have a relocation specifier, @got@tprel, that is used to // generate a GOT entry. The linker replaces this entry with an offset for a // for a thread local variable, which will be relative to the thread pointer. if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L) return true; // When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR // node is produced instead to represent the aforementioned situation. LoadSDNode *LD = dyn_cast(ADDTLSOp1); if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR) return true; // A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand // to ADD_TLS represents a call to .__get_tpointer to get the thread pointer, // later returning it into R3. if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER) return true; // The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13). RegisterSDNode *AddFirstOpReg = dyn_cast_or_null(ADDTLSOp1.getNode()); if (AddFirstOpReg && AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister()) return true; return false; } // canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS // instruction is present. An ADD_TLS instruction, followed by a D-Form memory // operation, can be optimized to use an X-Form load or store, allowing the // ADD_TLS node to be removed completely. static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base) { // Do not do this transformation at -O0. if (CurDAG->getTarget().getOptLevel() == CodeGenOptLevel::None) return false; // In order to perform this optimization inside tryTLSXForm[Load|Store], // Base is expected to be an ADD_TLS node. if (Base.getOpcode() != PPCISD::ADD_TLS) return false; for (auto *ADDTLSUse : Base.getNode()->uses()) { // The optimization to convert the D-Form load/store into its X-Form // counterpart should only occur if the source value offset of the load/ // store is 0. This also means that The offset should always be undefined. if (LoadSDNode *LD = dyn_cast(ADDTLSUse)) { if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef()) return false; } else if (StoreSDNode *ST = dyn_cast(ADDTLSUse)) { if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef()) return false; } else // Don't optimize if there are ADD_TLS users that aren't load/stores. return false; } if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) return false; // Does the ADD_TLS node of the load/store use the thread pointer? // If the thread pointer is not used as one of the operands of ADD_TLS, // then this optimization is not valid. return isThreadPointerAcquisitionNode(Base, CurDAG); } bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { SDValue Base = ST->getBasePtr(); if (!canOptimizeTLSDFormToXForm(CurDAG, Base)) return false; SDLoc dl(ST); EVT MemVT = ST->getMemoryVT(); EVT RegVT = ST->getValue().getValueType(); unsigned Opcode; switch (MemVT.getSimpleVT().SimpleTy) { default: return false; case MVT::i8: { Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS; break; } case MVT::i16: { Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS; break; } case MVT::i32: { Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS; break; } case MVT::i64: { Opcode = PPC::STDXTLS; break; } case MVT::f32: { Opcode = PPC::STFSXTLS; break; } case MVT::f64: { Opcode = PPC::STFDXTLS; break; } } SDValue Chain = ST->getChain(); SDVTList VTs = ST->getVTList(); SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1), Chain}; SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); transferMemOperands(ST, MN); ReplaceNode(ST, MN); return true; } bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { SDValue Base = LD->getBasePtr(); if (!canOptimizeTLSDFormToXForm(CurDAG, Base)) return false; SDLoc dl(LD); EVT MemVT = LD->getMemoryVT(); EVT RegVT = LD->getValueType(0); bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; unsigned Opcode; switch (MemVT.getSimpleVT().SimpleTy) { default: return false; case MVT::i8: { Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS; break; } case MVT::i16: { if (RegVT == MVT::i32) Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32; else Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS; break; } case MVT::i32: { if (RegVT == MVT::i32) Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32; else Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS; break; } case MVT::i64: { Opcode = PPC::LDXTLS; break; } case MVT::f32: { Opcode = PPC::LFSXTLS; break; } case MVT::f64: { Opcode = PPC::LFDXTLS; break; } } SDValue Chain = LD->getChain(); SDVTList VTs = LD->getVTList(); SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain}; SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); transferMemOperands(LD, MN); ReplaceNode(LD, MN); return true; } /// Turn an or of two masked values into the rotate left word immediate then /// mask insert (rlwimi) instruction. bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDLoc dl(N); KnownBits LKnown = CurDAG->computeKnownBits(Op0); KnownBits RKnown = CurDAG->computeKnownBits(Op1); unsigned TargetMask = LKnown.Zero.getZExtValue(); unsigned InsertMask = RKnown.Zero.getZExtValue(); if ((TargetMask | InsertMask) == 0xFFFFFFFF) { unsigned Op0Opc = Op0.getOpcode(); unsigned Op1Opc = Op1.getOpcode(); unsigned Value, SH = 0; TargetMask = ~TargetMask; InsertMask = ~InsertMask; // If the LHS has a foldable shift and the RHS does not, then swap it to the // RHS so that we can fold the shift into the insert. if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { if (Op0.getOperand(0).getOpcode() == ISD::SHL || Op0.getOperand(0).getOpcode() == ISD::SRL) { if (Op1.getOperand(0).getOpcode() != ISD::SHL && Op1.getOperand(0).getOpcode() != ISD::SRL) { std::swap(Op0, Op1); std::swap(Op0Opc, Op1Opc); std::swap(TargetMask, InsertMask); } } } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && Op1.getOperand(0).getOpcode() != ISD::SRL) { std::swap(Op0, Op1); std::swap(Op0Opc, Op1Opc); std::swap(TargetMask, InsertMask); } } unsigned MB, ME; if (isRunOfOnes(InsertMask, MB, ME)) { if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && isInt32Immediate(Op1.getOperand(1), Value)) { Op1 = Op1.getOperand(0); SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; } if (Op1Opc == ISD::AND) { // The AND mask might not be a constant, and we need to make sure that // if we're going to fold the masking with the insert, all bits not // know to be zero in the mask are known to be one. KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1)); bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); unsigned SHOpc = Op1.getOperand(0).getOpcode(); if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { // Note that Value must be in range here (less than 32) because // otherwise there would not be any bits set in InsertMask. Op1 = Op1.getOperand(0).getOperand(0); SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; } } SH &= 31; SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); return true; } } return false; } static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { unsigned MaxTruncation = 0; // Cannot use range-based for loop here as we need the actual use (i.e. we // need the operand number corresponding to the use). A range-based for // will unbox the use and provide an SDNode*. for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); Use != UseEnd; ++Use) { unsigned Opc = Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); switch (Opc) { default: return 0; case ISD::TRUNCATE: if (Use->isMachineOpcode()) return 0; MaxTruncation = std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits()); continue; case ISD::STORE: { if (Use->isMachineOpcode()) return 0; StoreSDNode *STN = cast(*Use); unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); if (MemVTSize == 64 || Use.getOperandNo() != 0) return 0; MaxTruncation = std::max(MaxTruncation, MemVTSize); continue; } case PPC::STW8: case PPC::STWX8: case PPC::STWU8: case PPC::STWUX8: if (Use.getOperandNo() != 0) return 0; MaxTruncation = std::max(MaxTruncation, 32u); continue; case PPC::STH8: case PPC::STHX8: case PPC::STHU8: case PPC::STHUX8: if (Use.getOperandNo() != 0) return 0; MaxTruncation = std::max(MaxTruncation, 16u); continue; case PPC::STB8: case PPC::STBX8: case PPC::STBU8: case PPC::STBUX8: if (Use.getOperandNo() != 0) return 0; MaxTruncation = std::max(MaxTruncation, 8u); continue; } } return MaxTruncation; } // For any 32 < Num < 64, check if the Imm contains at least Num consecutive // zeros and return the number of bits by the left of these consecutive zeros. static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) { unsigned HiTZ = llvm::countr_zero(Hi_32(Imm)); unsigned LoLZ = llvm::countl_zero(Lo_32(Imm)); if ((HiTZ + LoLZ) >= Num) return (32 + HiTZ); return 0; } // Direct materialization of 64-bit constants by enumerated patterns. static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt) { unsigned TZ = llvm::countr_zero(Imm); unsigned LZ = llvm::countl_zero(Imm); unsigned TO = llvm::countr_one(Imm); unsigned LO = llvm::countl_one(Imm); unsigned Hi32 = Hi_32(Imm); unsigned Lo32 = Lo_32(Imm); SDNode *Result = nullptr; unsigned Shift = 0; auto getI32Imm = [CurDAG, dl](unsigned Imm) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); }; // Following patterns use 1 instructions to materialize the Imm. InstCnt = 1; // 1-1) Patterns : {zeros}{15-bit valve} // {ones}{15-bit valve} if (isInt<16>(Imm)) { SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64); return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); } // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros} // {ones}{15-bit valve}{16 zeros} if (TZ > 15 && (LZ > 32 || LO > 32)) return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm((Imm >> 16) & 0xffff)); // Following patterns use 2 instructions to materialize the Imm. InstCnt = 2; assert(LZ < 64 && "Unexpected leading zeros here."); // Count of ones follwing the leading zeros. unsigned FO = llvm::countl_one(Imm << LZ); // 2-1) Patterns : {zeros}{31-bit value} // {ones}{31-bit value} if (isInt<32>(Imm)) { uint64_t ImmHi16 = (Imm >> 16) & 0xffff; unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Imm & 0xffff)); } // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros} // {zeros}{15-bit value}{zeros} // {zeros}{ones}{15-bit value} // {ones}{15-bit value}{zeros} // We can take advantage of LI's sign-extension semantics to generate leading // ones, and then use RLDIC to mask off the ones in both sides after rotation. if ((LZ + FO + TZ) > 48) { Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm((Imm >> TZ) & 0xffff)); return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), getI32Imm(TZ), getI32Imm(LZ)); } // 2-3) Pattern : {zeros}{15-bit value}{ones} // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value, // therefore we can take advantage of LI's sign-extension semantics, and then // mask them off after rotation. // // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+ // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| // +------------------------+ +------------------------+ // 63 0 63 0 // Imm (Imm >> (48 - LZ) & 0xffff) // +----sext-----|--16-bit--+ +clear-|-----------------+ // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| // +------------------------+ +------------------------+ // 63 0 63 0 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ if ((LZ + TO) > 48) { // Since the immediates with (LZ > 32) have been handled by previous // patterns, here we have (LZ <= 32) to make sure we will not shift right // the Imm by a negative value. assert(LZ <= 32 && "Unexpected shift value."); Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm((Imm >> (48 - LZ) & 0xffff))); return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), getI32Imm(48 - LZ), getI32Imm(LZ)); } // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones} // {ones}{15-bit value}{ones} // We can take advantage of LI's sign-extension semantics to generate leading // ones, and then use RLDICL to mask off the ones in left sides (if required) // after rotation. // // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+ // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb| // +------------------------+ +------------------------+ // 63 0 63 0 // Imm (Imm >> TO) & 0xffff // +----sext-----|--16-bit--+ +LZ|---------------------+ // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111| // +------------------------+ +------------------------+ // 63 0 63 0 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ if ((LZ + FO + TO) > 48) { Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm((Imm >> TO) & 0xffff)); return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), getI32Imm(TO), getI32Imm(LZ)); } // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value} // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit // value, we can use LI for Lo16 without generating leading ones then add the // Hi16(in Lo32). if (LZ == 32 && ((Lo32 & 0x8000) == 0)) { Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo32 & 0xffff)); return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Lo32 >> 16)); } // 2-6) Patterns : {******}{49 zeros}{******} // {******}{49 ones}{******} // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15 // bits remain on both sides. Rotate right the Imm to construct an int<16> // value, use LI for int<16> value and then use RLDICL without mask to rotate // it back. // // 1) findContiguousZerosAtLeast(Imm, 49) // +------|--zeros-|------+ +---ones--||---15 bit--+ // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb| // +----------------------+ +----------------------+ // 63 0 63 0 // // 2) findContiguousZerosAtLeast(~Imm, 49) // +------|--ones--|------+ +---ones--||---15 bit--+ // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| // +----------------------+ +----------------------+ // 63 0 63 0 if ((Shift = findContiguousZerosAtLeast(Imm, 49)) || (Shift = findContiguousZerosAtLeast(~Imm, 49))) { uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(RotImm & 0xffff)); return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)); } // 2-7) Patterns : High word == Low word // This may require 2 to 3 instructions, depending on whether Lo32 can be // materialized in 1 instruction. if (Hi32 == Lo32) { // Handle the first 32 bits. uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff; uint64_t ImmLo16 = Lo32 & 0xffff; if (isInt<16>(Lo32)) Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16)); else if (!ImmLo16) Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16)); else { InstCnt = 3; Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16)); Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(ImmLo16)); } // Use rldimi to insert the Low word into High word. SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), getI32Imm(0)}; return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); } // Following patterns use 3 instructions to materialize the Imm. InstCnt = 3; // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros} // {zeros}{31-bit value}{zeros} // {zeros}{ones}{31-bit value} // {ones}{31-bit value}{zeros} // We can take advantage of LIS's sign-extension semantics to generate leading // ones, add the remaining bits with ORI, and then use RLDIC to mask off the // ones in both sides after rotation. if ((LZ + FO + TZ) > 32) { uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff; unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), getI32Imm((Imm >> TZ) & 0xffff)); return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), getI32Imm(TZ), getI32Imm(LZ)); } // 3-2) Pattern : {zeros}{31-bit value}{ones} // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits // value, therefore we can take advantage of LIS's sign-extension semantics, // add the remaining bits with ORI, and then mask them off after rotation. // This is similar to Pattern 2-3, please refer to the diagram there. if ((LZ + TO) > 32) { // Since the immediates with (LZ > 32) have been handled by previous // patterns, here we have (LZ <= 32) to make sure we will not shift right // the Imm by a negative value. assert(LZ <= 32 && "Unexpected shift value."); Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm((Imm >> (48 - LZ)) & 0xffff)); Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), getI32Imm((Imm >> (32 - LZ)) & 0xffff)); return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), getI32Imm(32 - LZ), getI32Imm(LZ)); } // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones} // {ones}{31-bit value}{ones} // We can take advantage of LIS's sign-extension semantics to generate leading // ones, add the remaining bits with ORI, and then use RLDICL to mask off the // ones in left sides (if required) after rotation. // This is similar to Pattern 2-4, please refer to the diagram there. if ((LZ + FO + TO) > 32) { Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm((Imm >> (TO + 16)) & 0xffff)); Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), getI32Imm((Imm >> TO) & 0xffff)); return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), getI32Imm(TO), getI32Imm(LZ)); } // 3-4) Patterns : {******}{33 zeros}{******} // {******}{33 ones}{******} // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31 // bits remain on both sides. Rotate right the Imm to construct an int<32> // value, use LIS + ORI for int<32> value and then use RLDICL without mask to // rotate it back. // This is similar to Pattern 2-6, please refer to the diagram there. if ((Shift = findContiguousZerosAtLeast(Imm, 33)) || (Shift = findContiguousZerosAtLeast(~Imm, 33))) { uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); uint64_t ImmHi16 = (RotImm >> 16) & 0xffff; unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(RotImm & 0xffff)); return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)); } InstCnt = 0; return nullptr; } // Try to select instructions to generate a 64 bit immediate using prefix as // well as non prefix instructions. The function will return the SDNode // to materialize that constant or it will return nullptr if it does not // find one. The variable InstCnt is set to the number of instructions that // were selected. static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt) { unsigned TZ = llvm::countr_zero(Imm); unsigned LZ = llvm::countl_zero(Imm); unsigned TO = llvm::countr_one(Imm); unsigned FO = llvm::countl_one(LZ == 64 ? 0 : (Imm << LZ)); unsigned Hi32 = Hi_32(Imm); unsigned Lo32 = Lo_32(Imm); auto getI32Imm = [CurDAG, dl](unsigned Imm) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); }; auto getI64Imm = [CurDAG, dl](uint64_t Imm) { return CurDAG->getTargetConstant(Imm, dl, MVT::i64); }; // Following patterns use 1 instruction to materialize Imm. InstCnt = 1; // The pli instruction can materialize up to 34 bits directly. // If a constant fits within 34-bits, emit the pli instruction here directly. if (isInt<34>(Imm)) return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, CurDAG->getTargetConstant(Imm, dl, MVT::i64)); // Require at least two instructions. InstCnt = 2; SDNode *Result = nullptr; // Patterns : {zeros}{ones}{33-bit value}{zeros} // {zeros}{33-bit value}{zeros} // {zeros}{ones}{33-bit value} // {ones}{33-bit value}{zeros} // We can take advantage of PLI's sign-extension semantics to generate leading // ones, and then use RLDIC to mask off the ones on both sides after rotation. if ((LZ + FO + TZ) > 30) { APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff); APInt Extended = SignedInt34.sext(64); Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(*Extended.getRawData())); return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), getI32Imm(TZ), getI32Imm(LZ)); } // Pattern : {zeros}{33-bit value}{ones} // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value, // therefore we can take advantage of PLI's sign-extension semantics, and then // mask them off after rotation. // // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+ // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| // +------------------------+ +------------------------+ // 63 0 63 0 // // +----sext-----|--34-bit--+ +clear-|-----------------+ // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| // +------------------------+ +------------------------+ // 63 0 63 0 if ((LZ + TO) > 30) { APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff); APInt Extended = SignedInt34.sext(64); Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(*Extended.getRawData())); return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), getI32Imm(30 - LZ), getI32Imm(LZ)); } // Patterns : {zeros}{ones}{33-bit value}{ones} // {ones}{33-bit value}{ones} // Similar to LI we can take advantage of PLI's sign-extension semantics to // generate leading ones, and then use RLDICL to mask off the ones in left // sides (if required) after rotation. if ((LZ + FO + TO) > 30) { APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff); APInt Extended = SignedInt34.sext(64); Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(*Extended.getRawData())); return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), getI32Imm(TO), getI32Imm(LZ)); } // Patterns : {******}{31 zeros}{******} // : {******}{31 ones}{******} // If Imm contains 31 consecutive zeros/ones then the remaining bit count // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI // for the int<33> value and then use RLDICL without a mask to rotate it back. // // +------|--ones--|------+ +---ones--||---33 bit--+ // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| // +----------------------+ +----------------------+ // 63 0 63 0 for (unsigned Shift = 0; Shift < 63; ++Shift) { uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); if (isInt<34>(RotImm)) { Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm)); return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)); } } // Patterns : High word == Low word // This is basically a splat of a 32 bit immediate. if (Hi32 == Lo32) { Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), getI32Imm(0)}; return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); } InstCnt = 3; // Catch-all // This pattern can form any 64 bit immediate in 3 instructions. SDNode *ResultHi = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); SDNode *ResultLo = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32)); SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32), getI32Imm(0)}; return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); } static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned *InstCnt = nullptr) { unsigned InstCntDirect = 0; // No more than 3 instructions are used if we can select the i64 immediate // directly. SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect); const PPCSubtarget &Subtarget = CurDAG->getMachineFunction().getSubtarget(); // If we have prefixed instructions and there is a chance we can // materialize the constant with fewer prefixed instructions than // non-prefixed, try that. if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) { unsigned InstCntDirectP = 0; SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP); // Use the prefix case in either of two cases: // 1) We have no result from the non-prefix case to use. // 2) The non-prefix case uses more instructions than the prefix case. // If the prefix and non-prefix cases use the same number of instructions // we will prefer the non-prefix case. if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) { if (InstCnt) *InstCnt = InstCntDirectP; return ResultP; } } if (Result) { if (InstCnt) *InstCnt = InstCntDirect; return Result; } auto getI32Imm = [CurDAG, dl](unsigned Imm) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); }; uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff; uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff; // Try to use 4 instructions to materialize the immediate which is "almost" a // splat of a 32 bit immediate. if (Hi16OfLo32 && Lo16OfLo32) { uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff; uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff; bool IsSelected = false; auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) { SDNode *Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16)); Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Lo16)); SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), getI32Imm(0)}; return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); }; if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) { IsSelected = true; Result = getSplat(Hi16OfLo32, Lo16OfLo32); // Modify Hi16OfHi32. SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48), getI32Imm(0)}; Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); } else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) { IsSelected = true; Result = getSplat(Hi16OfHi32, Lo16OfHi32); // Modify Lo16OfLo32. SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16), getI32Imm(16), getI32Imm(31)}; Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops); } else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) { IsSelected = true; Result = getSplat(Hi16OfHi32, Lo16OfHi32); // Modify Hi16OfLo32. SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16), getI32Imm(0), getI32Imm(15)}; Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops); } if (IsSelected == true) { if (InstCnt) *InstCnt = 4; return Result; } } // Handle the upper 32 bit value. Result = selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect); // Add in the last bits as required. if (Hi16OfLo32) { Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Hi16OfLo32)); ++InstCntDirect; } if (Lo16OfLo32) { Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Lo16OfLo32)); ++InstCntDirect; } if (InstCnt) *InstCnt = InstCntDirect; return Result; } // Select a 64-bit constant. static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { SDLoc dl(N); // Get 64 bit value. int64_t Imm = N->getAsZExtVal(); if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { uint64_t SextImm = SignExtend64(Imm, MinSize); SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); if (isInt<16>(SextImm)) return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); } return selectI64Imm(CurDAG, dl, Imm); } namespace { class BitPermutationSelector { struct ValueBit { SDValue V; // The bit number in the value, using a convention where bit 0 is the // lowest-order bit. unsigned Idx; // ConstZero means a bit we need to mask off. // Variable is a bit comes from an input variable. // VariableKnownToBeZero is also a bit comes from an input variable, // but it is known to be already zero. So we do not need to mask them. enum Kind { ConstZero, Variable, VariableKnownToBeZero } K; ValueBit(SDValue V, unsigned I, Kind K = Variable) : V(V), Idx(I), K(K) {} ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {} bool isZero() const { return K == ConstZero || K == VariableKnownToBeZero; } bool hasValue() const { return K == Variable || K == VariableKnownToBeZero; } SDValue getValue() const { assert(hasValue() && "Cannot get the value of a constant bit"); return V; } unsigned getValueBitIndex() const { assert(hasValue() && "Cannot get the value bit index of a constant bit"); return Idx; } }; // A bit group has the same underlying value and the same rotate factor. struct BitGroup { SDValue V; unsigned RLAmt; unsigned StartIdx, EndIdx; // This rotation amount assumes that the lower 32 bits of the quantity are // replicated in the high 32 bits by the rotation operator (which is done // by rlwinm and friends in 64-bit mode). bool Repl32; // Did converting to Repl32 == true change the rotation factor? If it did, // it decreased it by 32. bool Repl32CR; // Was this group coalesced after setting Repl32 to true? bool Repl32Coalesced; BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), Repl32Coalesced(false) { LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R << " [" << S << ", " << E << "]\n"); } }; // Information on each (Value, RLAmt) pair (like the number of groups // associated with each) used to choose the lowering method. struct ValueRotInfo { SDValue V; unsigned RLAmt = std::numeric_limits::max(); unsigned NumGroups = 0; unsigned FirstGroupStartIdx = std::numeric_limits::max(); bool Repl32 = false; ValueRotInfo() = default; // For sorting (in reverse order) by NumGroups, and then by // FirstGroupStartIdx. bool operator < (const ValueRotInfo &Other) const { // We need to sort so that the non-Repl32 come first because, when we're // doing masking, the Repl32 bit groups might be subsumed into the 64-bit // masking operation. if (Repl32 < Other.Repl32) return true; else if (Repl32 > Other.Repl32) return false; else if (NumGroups > Other.NumGroups) return true; else if (NumGroups < Other.NumGroups) return false; else if (RLAmt == 0 && Other.RLAmt != 0) return true; else if (RLAmt != 0 && Other.RLAmt == 0) return false; else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) return true; return false; } }; using ValueBitsMemoizedValue = std::pair>; using ValueBitsMemoizer = DenseMap>; ValueBitsMemoizer Memoizer; // Return a pair of bool and a SmallVector pointer to a memoization entry. // The bool is true if something interesting was deduced, otherwise if we're // providing only a generic representation of V (or something else likewise // uninteresting for instruction selection) through the SmallVector. std::pair *> getValueBits(SDValue V, unsigned NumBits) { auto &ValueEntry = Memoizer[V]; if (ValueEntry) return std::make_pair(ValueEntry->first, &ValueEntry->second); ValueEntry.reset(new ValueBitsMemoizedValue()); bool &Interesting = ValueEntry->first; SmallVector &Bits = ValueEntry->second; Bits.resize(NumBits); switch (V.getOpcode()) { default: break; case ISD::ROTL: if (isa(V.getOperand(1))) { - unsigned RotAmt = V.getConstantOperandVal(1); + assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!"); + unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; for (unsigned i = 0; i < NumBits; ++i) Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; return std::make_pair(Interesting = true, &Bits); } break; case ISD::SHL: case PPCISD::SHL: if (isa(V.getOperand(1))) { - unsigned ShiftAmt = V.getConstantOperandVal(1); + // sld takes 7 bits, slw takes 6. + unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; - for (unsigned i = ShiftAmt; i < NumBits; ++i) - Bits[i] = LHSBits[i - ShiftAmt]; - - for (unsigned i = 0; i < ShiftAmt; ++i) - Bits[i] = ValueBit(ValueBit::ConstZero); + if (ShiftAmt >= NumBits) { + for (unsigned i = 0; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } else { + for (unsigned i = ShiftAmt; i < NumBits; ++i) + Bits[i] = LHSBits[i - ShiftAmt]; + for (unsigned i = 0; i < ShiftAmt; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } return std::make_pair(Interesting = true, &Bits); } break; case ISD::SRL: case PPCISD::SRL: if (isa(V.getOperand(1))) { - unsigned ShiftAmt = V.getConstantOperandVal(1); + // srd takes lowest 7 bits, srw takes 6. + unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; - for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) - Bits[i] = LHSBits[i + ShiftAmt]; - - for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) - Bits[i] = ValueBit(ValueBit::ConstZero); + if (ShiftAmt >= NumBits) { + for (unsigned i = 0; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } else { + for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) + Bits[i] = LHSBits[i + ShiftAmt]; + for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } return std::make_pair(Interesting = true, &Bits); } break; case ISD::AND: if (isa(V.getOperand(1))) { uint64_t Mask = V.getConstantOperandVal(1); const SmallVector *LHSBits; // Mark this as interesting, only if the LHS was also interesting. This // prevents the overall procedure from matching a single immediate 'and' // (which is non-optimal because such an and might be folded with other // things if we don't select it here). std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); for (unsigned i = 0; i < NumBits; ++i) if (((Mask >> i) & 1) == 1) Bits[i] = (*LHSBits)[i]; else { // AND instruction masks this bit. If the input is already zero, // we have nothing to do here. Otherwise, make the bit ConstZero. if ((*LHSBits)[i].isZero()) Bits[i] = (*LHSBits)[i]; else Bits[i] = ValueBit(ValueBit::ConstZero); } return std::make_pair(Interesting, &Bits); } break; case ISD::OR: { const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; bool AllDisjoint = true; SDValue LastVal = SDValue(); unsigned LastIdx = 0; for (unsigned i = 0; i < NumBits; ++i) { if (LHSBits[i].isZero() && RHSBits[i].isZero()) { // If both inputs are known to be zero and one is ConstZero and // another is VariableKnownToBeZero, we can select whichever // we like. To minimize the number of bit groups, we select // VariableKnownToBeZero if this bit is the next bit of the same // input variable from the previous bit. Otherwise, we select // ConstZero. if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && LHSBits[i].getValueBitIndex() == LastIdx + 1) Bits[i] = LHSBits[i]; else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && RHSBits[i].getValueBitIndex() == LastIdx + 1) Bits[i] = RHSBits[i]; else Bits[i] = ValueBit(ValueBit::ConstZero); } else if (LHSBits[i].isZero()) Bits[i] = RHSBits[i]; else if (RHSBits[i].isZero()) Bits[i] = LHSBits[i]; else { AllDisjoint = false; break; } // We remember the value and bit index of this bit. if (Bits[i].hasValue()) { LastVal = Bits[i].getValue(); LastIdx = Bits[i].getValueBitIndex(); } else { if (LastVal) LastVal = SDValue(); LastIdx = 0; } } if (!AllDisjoint) break; return std::make_pair(Interesting = true, &Bits); } case ISD::ZERO_EXTEND: { // We support only the case with zero extension from i32 to i64 so far. if (V.getValueType() != MVT::i64 || V.getOperand(0).getValueType() != MVT::i32) break; const SmallVector *LHSBits; const unsigned NumOperandBits = 32; std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumOperandBits); for (unsigned i = 0; i < NumOperandBits; ++i) Bits[i] = (*LHSBits)[i]; for (unsigned i = NumOperandBits; i < NumBits; ++i) Bits[i] = ValueBit(ValueBit::ConstZero); return std::make_pair(Interesting, &Bits); } case ISD::TRUNCATE: { EVT FromType = V.getOperand(0).getValueType(); EVT ToType = V.getValueType(); // We support only the case with truncate from i64 to i32. if (FromType != MVT::i64 || ToType != MVT::i32) break; const unsigned NumAllBits = FromType.getSizeInBits(); SmallVector *InBits; std::tie(Interesting, InBits) = getValueBits(V.getOperand(0), NumAllBits); const unsigned NumValidBits = ToType.getSizeInBits(); // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value. // So, we cannot include this truncate. bool UseUpper32bit = false; for (unsigned i = 0; i < NumValidBits; ++i) if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) { UseUpper32bit = true; break; } if (UseUpper32bit) break; for (unsigned i = 0; i < NumValidBits; ++i) Bits[i] = (*InBits)[i]; return std::make_pair(Interesting, &Bits); } case ISD::AssertZext: { // For AssertZext, we look through the operand and // mark the bits known to be zero. const SmallVector *LHSBits; std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); EVT FromType = cast(V.getOperand(1))->getVT(); const unsigned NumValidBits = FromType.getSizeInBits(); for (unsigned i = 0; i < NumValidBits; ++i) Bits[i] = (*LHSBits)[i]; // These bits are known to be zero but the AssertZext may be from a value // that already has some constant zero bits (i.e. from a masking and). for (unsigned i = NumValidBits; i < NumBits; ++i) Bits[i] = (*LHSBits)[i].hasValue() ? ValueBit((*LHSBits)[i].getValue(), (*LHSBits)[i].getValueBitIndex(), ValueBit::VariableKnownToBeZero) : ValueBit(ValueBit::ConstZero); return std::make_pair(Interesting, &Bits); } case ISD::LOAD: LoadSDNode *LD = cast(V); if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { EVT VT = LD->getMemoryVT(); const unsigned NumValidBits = VT.getSizeInBits(); for (unsigned i = 0; i < NumValidBits; ++i) Bits[i] = ValueBit(V, i); // These bits are known to be zero. for (unsigned i = NumValidBits; i < NumBits; ++i) Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); // Zero-extending load itself cannot be optimized. So, it is not // interesting by itself though it gives useful information. return std::make_pair(Interesting = false, &Bits); } break; } for (unsigned i = 0; i < NumBits; ++i) Bits[i] = ValueBit(V, i); return std::make_pair(Interesting = false, &Bits); } // For each value (except the constant ones), compute the left-rotate amount // to get it from its original to final position. void computeRotationAmounts() { NeedMask = false; RLAmt.resize(Bits.size()); for (unsigned i = 0; i < Bits.size(); ++i) if (Bits[i].hasValue()) { unsigned VBI = Bits[i].getValueBitIndex(); if (i >= VBI) RLAmt[i] = i - VBI; else RLAmt[i] = Bits.size() - (VBI - i); } else if (Bits[i].isZero()) { NeedMask = true; RLAmt[i] = UINT32_MAX; } else { llvm_unreachable("Unknown value bit type"); } } // Collect groups of consecutive bits with the same underlying value and // rotation factor. If we're doing late masking, we ignore zeros, otherwise // they break up groups. void collectBitGroups(bool LateMask) { BitGroups.clear(); unsigned LastRLAmt = RLAmt[0]; SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); unsigned LastGroupStartIdx = 0; bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); for (unsigned i = 1; i < Bits.size(); ++i) { unsigned ThisRLAmt = RLAmt[i]; SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); if (LateMask && !ThisValue) { ThisValue = LastValue; ThisRLAmt = LastRLAmt; // If we're doing late masking, then the first bit group always starts // at zero (even if the first bits were zero). if (BitGroups.empty()) LastGroupStartIdx = 0; } // If this bit is known to be zero and the current group is a bit group // of zeros, we do not need to terminate the current bit group even the // Value or RLAmt does not match here. Instead, we terminate this group // when the first non-zero bit appears later. if (IsGroupOfZeros && Bits[i].isZero()) continue; // If this bit has the same underlying value and the same rotate factor as // the last one, then they're part of the same group. if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) // We cannot continue the current group if this bits is not known to // be zero in a bit group of zeros. if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) continue; if (LastValue.getNode()) BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, i-1)); LastRLAmt = ThisRLAmt; LastValue = ThisValue; LastGroupStartIdx = i; IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); } if (LastValue.getNode()) BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, Bits.size()-1)); if (BitGroups.empty()) return; // We might be able to combine the first and last groups. if (BitGroups.size() > 1) { // If the first and last groups are the same, then remove the first group // in favor of the last group, making the ending index of the last group // equal to the ending index of the to-be-removed first group. if (BitGroups[0].StartIdx == 0 && BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && BitGroups[0].V == BitGroups[BitGroups.size()-1].V && BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; BitGroups.erase(BitGroups.begin()); } } } // Take all (SDValue, RLAmt) pairs and sort them by the number of groups // associated with each. If the number of groups are same, we prefer a group // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate // instruction. If there is a degeneracy, pick the one that occurs // first (in the final value). void collectValueRotInfo() { ValueRots.clear(); for (auto &BG : BitGroups) { unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0); ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)]; VRI.V = BG.V; VRI.RLAmt = BG.RLAmt; VRI.Repl32 = BG.Repl32; VRI.NumGroups += 1; VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx); } // Now that we've collected the various ValueRotInfo instances, we need to // sort them. ValueRotsVec.clear(); for (auto &I : ValueRots) { ValueRotsVec.push_back(I.second); } llvm::sort(ValueRotsVec); } // In 64-bit mode, rlwinm and friends have a rotation operator that // replicates the low-order 32 bits into the high-order 32-bits. The mask // indices of these instructions can only be in the lower 32 bits, so they // can only represent some 64-bit bit groups. However, when they can be used, // the 32-bit replication can be used to represent, as a single bit group, // otherwise separate bit groups. We'll convert to replicated-32-bit bit // groups when possible. Returns true if any of the bit groups were // converted. void assignRepl32BitGroups() { // If we have bits like this: // // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24 // Groups: | RLAmt = 8 | RLAmt = 40 | // // But, making use of a 32-bit operation that replicates the low-order 32 // bits into the high-order 32 bits, this can be one bit group with a RLAmt // of 8. auto IsAllLow32 = [this](BitGroup & BG) { if (BG.StartIdx <= BG.EndIdx) { for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) { if (!Bits[i].hasValue()) continue; if (Bits[i].getValueBitIndex() >= 32) return false; } } else { for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) { if (!Bits[i].hasValue()) continue; if (Bits[i].getValueBitIndex() >= 32) return false; } for (unsigned i = 0; i <= BG.EndIdx; ++i) { if (!Bits[i].hasValue()) continue; if (Bits[i].getValueBitIndex() >= 32) return false; } } return true; }; for (auto &BG : BitGroups) { // If this bit group has RLAmt of 0 and will not be merged with // another bit group, we don't benefit from Repl32. We don't mark // such group to give more freedom for later instruction selection. if (BG.RLAmt == 0) { auto PotentiallyMerged = [this](BitGroup & BG) { for (auto &BG2 : BitGroups) if (&BG != &BG2 && BG.V == BG2.V && (BG2.RLAmt == 0 || BG2.RLAmt == 32)) return true; return false; }; if (!PotentiallyMerged(BG)) continue; } if (BG.StartIdx < 32 && BG.EndIdx < 32) { if (IsAllLow32(BG)) { if (BG.RLAmt >= 32) { BG.RLAmt -= 32; BG.Repl32CR = true; } BG.Repl32 = true; LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for " << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n"); } } } // Now walk through the bit groups, consolidating where possible. for (auto I = BitGroups.begin(); I != BitGroups.end();) { // We might want to remove this bit group by merging it with the previous // group (which might be the ending group). auto IP = (I == BitGroups.begin()) ? std::prev(BitGroups.end()) : std::prev(I); if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I->RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP->StartIdx << ", " << IP->EndIdx << "]\n"); IP->EndIdx = I->EndIdx; IP->Repl32CR = IP->Repl32CR || I->Repl32CR; IP->Repl32Coalesced = true; I = BitGroups.erase(I); continue; } else { // There is a special case worth handling: If there is a single group // covering the entire upper 32 bits, and it can be merged with both // the next and previous groups (which might be the same group), then // do so. If it is the same group (so there will be only one group in // total), then we need to reverse the order of the range so that it // covers the entire 64 bits. if (I->StartIdx == 32 && I->EndIdx == 63) { assert(std::next(I) == BitGroups.end() && "bit group ends at index 63 but there is another?"); auto IN = BitGroups.begin(); if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && IsAllLow32(*I)) { LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I->RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"); if (IP == IN) { // There is only one other group; change it to cover the whole // range (backward, so that it can still be Repl32 but cover the // whole 64-bit range). IP->StartIdx = 31; IP->EndIdx = 30; IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32; IP->Repl32Coalesced = true; I = BitGroups.erase(I); } else { // There are two separate groups, one before this group and one // after us (at the beginning). We're going to remove this group, // but also the group at the very beginning. IP->EndIdx = IN->EndIdx; IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32; IP->Repl32Coalesced = true; I = BitGroups.erase(I); BitGroups.erase(BitGroups.begin()); } // This must be the last group in the vector (and we might have // just invalidated the iterator above), so break here. break; } } } ++I; } } SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); } uint64_t getZerosMask() { uint64_t Mask = 0; for (unsigned i = 0; i < Bits.size(); ++i) { if (Bits[i].hasValue()) continue; Mask |= (UINT64_C(1) << i); } return ~Mask; } // This method extends an input value to 64 bit if input is 32-bit integer. // While selecting instructions in BitPermutationSelector in 64-bit mode, // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. // In such case, we extend it to 64 bit to be consistent with other values. SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { if (V.getValueSizeInBits() == 64) return V; assert(V.getValueSizeInBits() == 32); SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, ImDef, V, SubRegIdx), 0); return ExtVal; } SDValue TruncateToInt32(SDValue V, const SDLoc &dl) { if (V.getValueSizeInBits() == 32) return V; assert(V.getValueSizeInBits() == 64); SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, V, SubRegIdx), 0); return SubVal; } // Depending on the number of groups for a particular value, it might be // better to rotate, mask explicitly (using andi/andis), and then or the // result. Select this part of the result first. void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { if (BPermRewriterNoMasking) return; for (ValueRotInfo &VRI : ValueRotsVec) { unsigned Mask = 0; for (unsigned i = 0; i < Bits.size(); ++i) { if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V) continue; if (RLAmt[i] != VRI.RLAmt) continue; Mask |= (1u << i); } // Compute the masks for andi/andis that would be necessary. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; assert((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask for value bit groups"); bool NeedsRotate = VRI.RLAmt != 0; // We're trying to minimize the number of instructions. If we have one // group, using one of andi/andis can break even. If we have three // groups, we can use both andi and andis and break even (to use both // andi and andis we also need to or the results together). We need four // groups if we also need to rotate. To use andi/andis we need to do more // than break even because rotate-and-mask instructions tend to be easier // to schedule. // FIXME: We've biased here against using andi/andis, which is right for // POWER cores, but not optimal everywhere. For example, on the A2, // andi/andis have single-cycle latency whereas the rotate-and-mask // instructions take two cycles, and it would be better to bias toward // andi/andis in break-even cases. unsigned NumAndInsts = (unsigned) NeedsRotate + (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0) + (unsigned) (bool) Res; LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << ":" << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << VRI.NumGroups << "\n"); if (NumAndInsts >= VRI.NumGroups) continue; LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); if (InstCnt) *InstCnt += NumAndInsts; SDValue VRot; if (VRI.RLAmt) { SDValue Ops[] = { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), getI32Imm(31, dl) }; VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { VRot = TruncateToInt32(VRI.V, dl); } SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, VRot, getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot, getI32Imm(ANDISMask, dl)), 0); SDValue TotalVal; if (!ANDIVal) TotalVal = ANDISVal; else if (!ANDISVal) TotalVal = ANDIVal; else TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, ANDIVal, ANDISVal), 0); if (!Res) Res = TotalVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, Res, TotalVal), 0); // Now, remove all groups with this underlying value and rotation // factor. eraseMatchingBitGroups([VRI](const BitGroup &BG) { return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; }); } } // Instruction selection for the 32-bit case. SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { SDLoc dl(N); SDValue Res; if (InstCnt) *InstCnt = 0; // Take care of cases that should use andi/andis first. SelectAndParts32(dl, Res, InstCnt); // If we've not yet selected a 'starting' instruction, and we have no zeros // to fill in, select the (Value, RLAmt) with the highest priority (largest // number of groups), and start with this rotated value. if ((!NeedMask || LateMask) && !Res) { ValueRotInfo &VRI = ValueRotsVec[0]; if (VRI.RLAmt) { if (InstCnt) *InstCnt += 1; SDValue Ops[] = { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), getI32Imm(31, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { Res = TruncateToInt32(VRI.V, dl); } // Now, remove all groups with this underlying value and rotation factor. eraseMatchingBitGroups([VRI](const BitGroup &BG) { return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; }); } if (InstCnt) *InstCnt += BitGroups.size(); // Insert the other groups (one at a time). for (auto &BG : BitGroups) { if (!Res) { SDValue Ops[] = { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), getI32Imm(Bits.size() - BG.EndIdx - 1, dl), getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { SDValue Ops[] = { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), getI32Imm(Bits.size() - BG.EndIdx - 1, dl), getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); } } if (LateMask) { unsigned Mask = (unsigned) getZerosMask(); unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; assert((ANDIMask != 0 || ANDISMask != 0) && "No set bits in zeros mask?"); if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0); SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, Res, getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res, getI32Imm(ANDISMask, dl)), 0); if (!ANDIVal) Res = ANDISVal; else if (!ANDISVal) Res = ANDIVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, ANDIVal, ANDISVal), 0); } return Res.getNode(); } unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32, unsigned MaskStart, unsigned MaskEnd, bool IsIns) { // In the notation used by the instructions, 'start' and 'end' are reversed // because bits are counted from high to low order. unsigned InstMaskStart = 64 - MaskEnd - 1, InstMaskEnd = 64 - MaskStart - 1; if (Repl32) return 1; if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) || InstMaskEnd == 63 - RLAmt) return 1; return 2; } // For 64-bit values, not all combinations of rotates and masks are // available. Produce one if it is available. SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, bool Repl32, unsigned MaskStart, unsigned MaskEnd, unsigned *InstCnt = nullptr) { // In the notation used by the instructions, 'start' and 'end' are reversed // because bits are counted from high to low order. unsigned InstMaskStart = 64 - MaskEnd - 1, InstMaskEnd = 64 - MaskStart - 1; if (InstCnt) *InstCnt += 1; if (Repl32) { // This rotation amount assumes that the lower 32 bits of the quantity // are replicated in the high 32 bits by the rotation operator (which is // done by rlwinm and friends). assert(InstMaskStart >= 32 && "Mask cannot start out of range"); assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); SDValue Ops[] = { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63) { SDValue Ops[] = { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); } if (InstMaskStart == 0) { SDValue Ops[] = { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63 - RLAmt) { SDValue Ops[] = { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); } // We cannot do this with a single instruction, so we'll use two. The // problem is that we're not free to choose both a rotation amount and mask // start and end independently. We can choose an arbitrary mask start and // end, but then the rotation amount is fixed. Rotation, however, can be // inverted, and so by applying an "inverse" rotation first, we can get the // desired result. if (InstCnt) *InstCnt += 1; // The rotation mask for the second instruction must be MaskStart. unsigned RLAmt2 = MaskStart; // The first instruction must rotate V so that the overall rotation amount // is RLAmt. unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; if (RLAmt1) V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd); } // For 64-bit values, not all combinations of rotates and masks are // available. Produce a rotate-mask-and-insert if one is available. SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, unsigned RLAmt, bool Repl32, unsigned MaskStart, unsigned MaskEnd, unsigned *InstCnt = nullptr) { // In the notation used by the instructions, 'start' and 'end' are reversed // because bits are counted from high to low order. unsigned InstMaskStart = 64 - MaskEnd - 1, InstMaskEnd = 64 - MaskStart - 1; if (InstCnt) *InstCnt += 1; if (Repl32) { // This rotation amount assumes that the lower 32 bits of the quantity // are replicated in the high 32 bits by the rotation operator (which is // done by rlwinm and friends). assert(InstMaskStart >= 32 && "Mask cannot start out of range"); assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); SDValue Ops[] = { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63 - RLAmt) { SDValue Ops[] = { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); } // We cannot do this with a single instruction, so we'll use two. The // problem is that we're not free to choose both a rotation amount and mask // start and end independently. We can choose an arbitrary mask start and // end, but then the rotation amount is fixed. Rotation, however, can be // inverted, and so by applying an "inverse" rotation first, we can get the // desired result. if (InstCnt) *InstCnt += 1; // The rotation mask for the second instruction must be MaskStart. unsigned RLAmt2 = MaskStart; // The first instruction must rotate V so that the overall rotation amount // is RLAmt. unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; if (RLAmt1) V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); } void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { if (BPermRewriterNoMasking) return; // The idea here is the same as in the 32-bit version, but with additional // complications from the fact that Repl32 might be true. Because we // aggressively convert bit groups to Repl32 form (which, for small // rotation factors, involves no other change), and then coalesce, it might // be the case that a single 64-bit masking operation could handle both // some Repl32 groups and some non-Repl32 groups. If converting to Repl32 // form allowed coalescing, then we must use a 32-bit rotaton in order to // completely capture the new combined bit group. for (ValueRotInfo &VRI : ValueRotsVec) { uint64_t Mask = 0; // We need to add to the mask all bits from the associated bit groups. // If Repl32 is false, we need to add bits from bit groups that have // Repl32 true, but are trivially convertable to Repl32 false. Such a // group is trivially convertable if it overlaps only with the lower 32 // bits, and the group has not been coalesced. auto MatchingBG = [VRI](const BitGroup &BG) { if (VRI.V != BG.V) return false; unsigned EffRLAmt = BG.RLAmt; if (!VRI.Repl32 && BG.Repl32) { if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx && !BG.Repl32Coalesced) { if (BG.Repl32CR) EffRLAmt += 32; } else { return false; } } else if (VRI.Repl32 != BG.Repl32) { return false; } return VRI.RLAmt == EffRLAmt; }; for (auto &BG : BitGroups) { if (!MatchingBG(BG)) continue; if (BG.StartIdx <= BG.EndIdx) { for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) Mask |= (UINT64_C(1) << i); } else { for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) Mask |= (UINT64_C(1) << i); for (unsigned i = 0; i <= BG.EndIdx; ++i) Mask |= (UINT64_C(1) << i); } } // We can use the 32-bit andi/andis technique if the mask does not // require any higher-order bits. This can save an instruction compared // to always using the general 64-bit technique. bool Use32BitInsts = isUInt<32>(Mask); // Compute the masks for andi/andis that would be necessary. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = (Mask >> 16) & UINT16_MAX; bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)); unsigned NumAndInsts = (unsigned) NeedsRotate + (unsigned) (bool) Res; unsigned NumOfSelectInsts = 0; selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts); assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant."); if (Use32BitInsts) NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0); else NumAndInsts += NumOfSelectInsts + /* and */ 1; unsigned NumRLInsts = 0; bool FirstBG = true; bool MoreBG = false; for (auto &BG : BitGroups) { if (!MatchingBG(BG)) { MoreBG = true; continue; } NumRLInsts += SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, !FirstBG); FirstBG = false; } LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << NumRLInsts << "\n"); // When we'd use andi/andis, we bias toward using the rotates (andi only // has a record form, and is cracked on POWER cores). However, when using // general 64-bit constant formation, bias toward the constant form, // because that exposes more opportunities for CSE. if (NumAndInsts > NumRLInsts) continue; // When merging multiple bit groups, instruction or is used. // But when rotate is used, rldimi can inert the rotated value into any // register, so instruction or can be avoided. if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) continue; LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); if (InstCnt) *InstCnt += NumAndInsts; SDValue VRot; // We actually need to generate a rotation if we have a non-zero rotation // factor or, in the Repl32 case, if we care about any of the // higher-order replicated bits. In the latter case, we generate a mask // backward so that it actually includes the entire 64 bits. if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask))) VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63); else VRot = VRI.V; SDValue TotalVal; if (Use32BitInsts) { assert((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value"); SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, ExtendToInt64(VRot, dl), getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, ExtendToInt64(VRot, dl), getI32Imm(ANDISMask, dl)), 0); if (!ANDIVal) TotalVal = ANDISVal; else if (!ANDISVal) TotalVal = ANDIVal; else TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, ExtendToInt64(ANDIVal, dl), ANDISVal), 0); } else { TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); TotalVal = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, ExtendToInt64(VRot, dl), TotalVal), 0); } if (!Res) Res = TotalVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, ExtendToInt64(Res, dl), TotalVal), 0); // Now, remove all groups with this underlying value and rotation // factor. eraseMatchingBitGroups(MatchingBG); } } // Instruction selection for the 64-bit case. SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) { SDLoc dl(N); SDValue Res; if (InstCnt) *InstCnt = 0; // Take care of cases that should use andi/andis first. SelectAndParts64(dl, Res, InstCnt); // If we've not yet selected a 'starting' instruction, and we have no zeros // to fill in, select the (Value, RLAmt) with the highest priority (largest // number of groups), and start with this rotated value. if ((!NeedMask || LateMask) && !Res) { // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 // groups will come first, and so the VRI representing the largest number // of groups might not be first (it might be the first Repl32 groups). unsigned MaxGroupsIdx = 0; if (!ValueRotsVec[0].Repl32) { for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i) if (ValueRotsVec[i].Repl32) { if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups) MaxGroupsIdx = i; break; } } ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx]; bool NeedsRotate = false; if (VRI.RLAmt) { NeedsRotate = true; } else if (VRI.Repl32) { for (auto &BG : BitGroups) { if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt || BG.Repl32 != VRI.Repl32) continue; // We don't need a rotate if the bit group is confined to the lower // 32 bits. if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx) continue; NeedsRotate = true; break; } } if (NeedsRotate) Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63, InstCnt); else Res = VRI.V; // Now, remove all groups with this underlying value and rotation factor. if (Res) eraseMatchingBitGroups([VRI](const BitGroup &BG) { return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt && BG.Repl32 == VRI.Repl32; }); } // Because 64-bit rotates are more flexible than inserts, we might have a // preference regarding which one we do first (to save one instruction). if (!Res) for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) { if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, false) < SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, true)) { if (I != BitGroups.begin()) { BitGroup BG = *I; BitGroups.erase(I); BitGroups.insert(BitGroups.begin(), BG); } break; } } // Insert the other groups (one at a time). for (auto &BG : BitGroups) { if (!Res) Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, InstCnt); else Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, InstCnt); } if (LateMask) { uint64_t Mask = getZerosMask(); // We can use the 32-bit andi/andis technique if the mask does not // require any higher-order bits. This can save an instruction compared // to always using the general 64-bit technique. bool Use32BitInsts = isUInt<32>(Mask); // Compute the masks for andi/andis that would be necessary. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = (Mask >> 16) & UINT16_MAX; if (Use32BitInsts) { assert((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value"); if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0); SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0); if (!ANDIVal) Res = ANDISVal; else if (!ANDISVal) Res = ANDIVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, ExtendToInt64(ANDIVal, dl), ANDISVal), 0); } else { unsigned NumOfSelectInsts = 0; SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0); Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, ExtendToInt64(Res, dl), MaskVal), 0); if (InstCnt) *InstCnt += NumOfSelectInsts + /* and */ 1; } } return Res.getNode(); } SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { // Fill in BitGroups. collectBitGroups(LateMask); if (BitGroups.empty()) return nullptr; // For 64-bit values, figure out when we can use 32-bit instructions. if (Bits.size() == 64) assignRepl32BitGroups(); // Fill in ValueRotsVec. collectValueRotInfo(); if (Bits.size() == 32) { return Select32(N, LateMask, InstCnt); } else { assert(Bits.size() == 64 && "Not 64 bits here?"); return Select64(N, LateMask, InstCnt); } return nullptr; } void eraseMatchingBitGroups(function_ref F) { erase_if(BitGroups, F); } SmallVector Bits; bool NeedMask = false; SmallVector RLAmt; SmallVector BitGroups; DenseMap, ValueRotInfo> ValueRots; SmallVector ValueRotsVec; SelectionDAG *CurDAG = nullptr; public: BitPermutationSelector(SelectionDAG *DAG) : CurDAG(DAG) {} // Here we try to match complex bit permutations into a set of // rotate-and-shift/shift/and/or instructions, using a set of heuristics // known to produce optimal code for common cases (like i32 byte swapping). SDNode *Select(SDNode *N) { Memoizer.clear(); auto Result = getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); if (!Result.first) return nullptr; Bits = std::move(*Result.second); LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction" " selection for: "); LLVM_DEBUG(N->dump(CurDAG)); // Fill it RLAmt and set NeedMask. computeRotationAmounts(); if (!NeedMask) return Select(N, false); // We currently have two techniques for handling results with zeros: early // masking (the default) and late masking. Late masking is sometimes more // efficient, but because the structure of the bit groups is different, it // is hard to tell without generating both and comparing the results. With // late masking, we ignore zeros in the resulting value when inserting each // set of bit groups, and then mask in the zeros at the end. With early // masking, we only insert the non-zero parts of the result at every step. unsigned InstCnt = 0, InstCntLateMask = 0; LLVM_DEBUG(dbgs() << "\tEarly masking:\n"); SDNode *RN = Select(N, false, &InstCnt); LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"); LLVM_DEBUG(dbgs() << "\tLate masking:\n"); SDNode *RNLM = Select(N, true, &InstCntLateMask); LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask << " instructions\n"); if (InstCnt <= InstCntLateMask) { LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n"); return RN; } LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n"); return RNLM; } }; class IntegerCompareEliminator { SelectionDAG *CurDAG; PPCDAGToDAGISel *S; // Conversion type for interpreting results of a 32-bit instruction as // a 64-bit value or vice versa. enum ExtOrTruncConversion { Ext, Trunc }; // Modifiers to guide how an ISD::SETCC node's result is to be computed // in a GPR. // ZExtOrig - use the original condition code, zero-extend value // ZExtInvert - invert the condition code, zero-extend value // SExtOrig - use the original condition code, sign-extend value // SExtInvert - invert the condition code, sign-extend value enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; // Comparisons against zero to emit GPR code sequences for. Each of these // sequences may need to be emitted for two or more equivalent patterns. // For example (a >= 0) == (a > -1). The direction of the comparison () // matters as well as the extension type: sext (-1/0), zext (1/0). // GEZExt - (zext (LHS >= 0)) // GESExt - (sext (LHS >= 0)) // LEZExt - (zext (LHS <= 0)) // LESExt - (sext (LHS <= 0)) enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; SDNode *tryEXTEND(SDNode *N); SDNode *tryLogicOpOfCompares(SDNode *N); SDValue computeLogicOpInGPR(SDValue LogicOp); SDValue signExtendInputIfNeeded(SDValue Input); SDValue zeroExtendInputIfNeeded(SDValue Input); SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, ZeroCompare CmpTy); SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl); SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl); SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl); SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl); SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); public: IntegerCompareEliminator(SelectionDAG *DAG, PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { assert(CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && "Only expecting to use this on 64 bit targets."); } SDNode *Select(SDNode *N) { if (CmpInGPR == ICGPR_None) return nullptr; switch (N->getOpcode()) { default: break; case ISD::ZERO_EXTEND: if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || CmpInGPR == ICGPR_SextI64) return nullptr; [[fallthrough]]; case ISD::SIGN_EXTEND: if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_ZextI64) return nullptr; return tryEXTEND(N); case ISD::AND: case ISD::OR: case ISD::XOR: return tryLogicOpOfCompares(N); } return nullptr; } }; // The obvious case for wanting to keep the value in a GPR. Namely, the // result of the comparison is actually needed in a GPR. SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { assert((N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"); SDValue WideRes; // If we are zero-extending the result of a logical operation on i1 // values, we can keep the values in GPRs. if (ISD::isBitwiseLogicOp(N->getOperand(0).getOpcode()) && N->getOperand(0).getValueType() == MVT::i1 && N->getOpcode() == ISD::ZERO_EXTEND) WideRes = computeLogicOpInGPR(N->getOperand(0)); else if (N->getOperand(0).getOpcode() != ISD::SETCC) return nullptr; else WideRes = getSETCCInGPR(N->getOperand(0), N->getOpcode() == ISD::SIGN_EXTEND ? SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); if (!WideRes) return nullptr; SDLoc dl(N); bool Input32Bit = WideRes.getValueType() == MVT::i32; bool Output32Bit = N->getValueType(0) == MVT::i32; NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; SDValue ConvOp = WideRes; if (Input32Bit != Output32Bit) ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : ExtOrTruncConversion::Trunc); return ConvOp.getNode(); } // Attempt to perform logical operations on the results of comparisons while // keeping the values in GPRs. Without doing so, these would end up being // lowered to CR-logical operations which suffer from significant latency and // low ILP. SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { if (N->getValueType(0) != MVT::i1) return nullptr; assert(ISD::isBitwiseLogicOp(N->getOpcode()) && "Expected a logic operation on setcc results."); SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); if (!LoweredLogical) return nullptr; SDLoc dl(N); bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); SDValue LHS = LoweredLogical.getOperand(0); SDValue RHS = LoweredLogical.getOperand(1); SDValue WideOp; SDValue OpToConvToRecForm; // Look through any 32-bit to 64-bit implicit extend nodes to find the // opcode that is input to the XORI. if (IsBitwiseNegate && LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); else if (IsBitwiseNegate) // If the input to the XORI isn't an extension, that's what we're after. OpToConvToRecForm = LoweredLogical.getOperand(0); else // If this is not an XORI, it is a reg-reg logical op and we can convert // it to record-form. OpToConvToRecForm = LoweredLogical; // Get the record-form version of the node we're looking to use to get the // CR result from. uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); // Convert the right node to record-form. This is either the logical we're // looking at or it is the input node to the negation (if we're looking at // a bitwise negation). if (NewOpc != -1 && IsBitwiseNegate) { // The input to the XORI has a record-form. Use it. assert(LoweredLogical.getConstantOperandVal(1) == 1 && "Expected a PPC::XORI8 only for bitwise negation."); // Emit the record-form instruction. std::vector Ops; for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) Ops.push_back(OpToConvToRecForm.getOperand(i)); WideOp = SDValue(CurDAG->getMachineNode(NewOpc, dl, OpToConvToRecForm.getValueType(), MVT::Glue, Ops), 0); } else { assert((NewOpc != -1 || !IsBitwiseNegate) && "No record form available for AND8/OR8/XOR8?"); WideOp = SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc, dl, MVT::i64, MVT::Glue, LHS, RHS), 0); } // Select this node to a single bit from CR0 set by the record-form node // just created. For bitwise negation, use the EQ bit which is the equivalent // of negating the result (i.e. it is a bit set when the result of the // operation is zero). SDValue SRIdxVal = CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); SDValue CRBit = SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, CR0Reg, SRIdxVal, WideOp.getValue(1)), 0); return CRBit.getNode(); } // Lower a logical operation on i1 values into a GPR sequence if possible. // The result can be kept in a GPR if requested. // Three types of inputs can be handled: // - SETCC // - TRUNCATE // - Logical operation (AND/OR/XOR) // There is also a special case that is handled (namely a complement operation // achieved with xor %a, -1). SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { assert(ISD::isBitwiseLogicOp(LogicOp.getOpcode()) && "Can only handle logic operations here."); assert(LogicOp.getValueType() == MVT::i1 && "Can only handle logic operations on i1 values here."); SDLoc dl(LogicOp); SDValue LHS, RHS; // Special case: xor %a, -1 bool IsBitwiseNegation = isBitwiseNot(LogicOp); // Produces a GPR sequence for each operand of the binary logic operation. // For SETCC, it produces the respective comparison, for TRUNCATE it truncates // the value in a GPR and for logic operations, it will recursively produce // a GPR sequence for the operation. auto getLogicOperand = [&] (SDValue Operand) -> SDValue { unsigned OperandOpcode = Operand.getOpcode(); if (OperandOpcode == ISD::SETCC) return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); else if (OperandOpcode == ISD::TRUNCATE) { SDValue InputOp = Operand.getOperand(0); EVT InVT = InputOp.getValueType(); return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : PPC::RLDICL, dl, InVT, InputOp, S->getI64Imm(0, dl), S->getI64Imm(63, dl)), 0); } else if (ISD::isBitwiseLogicOp(OperandOpcode)) return computeLogicOpInGPR(Operand); return SDValue(); }; LHS = getLogicOperand(LogicOp.getOperand(0)); RHS = getLogicOperand(LogicOp.getOperand(1)); // If a GPR sequence can't be produced for the LHS we can't proceed. // Not producing a GPR sequence for the RHS is only a problem if this isn't // a bitwise negation operation. if (!LHS || (!RHS && !IsBitwiseNegation)) return SDValue(); NumLogicOpsOnComparison++; // We will use the inputs as 64-bit values. if (LHS.getValueType() == MVT::i32) LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); unsigned NewOpc; switch (LogicOp.getOpcode()) { default: llvm_unreachable("Unknown logic operation."); case ISD::AND: NewOpc = PPC::AND8; break; case ISD::OR: NewOpc = PPC::OR8; break; case ISD::XOR: NewOpc = PPC::XOR8; break; } if (IsBitwiseNegation) { RHS = S->getI64Imm(1, dl); NewOpc = PPC::XORI8; } return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); } /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. /// Otherwise just reinterpret it as a 64-bit value. /// Useful when emitting comparison code for 32-bit values without using /// the compare instruction (which only considers the lower 32-bits). SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { assert(Input.getValueType() == MVT::i32 && "Can only sign-extend 32-bit values here."); unsigned Opc = Input.getOpcode(); // The value was sign extended and then truncated to 32-bits. No need to // sign extend it again. if (Opc == ISD::TRUNCATE && (Input.getOperand(0).getOpcode() == ISD::AssertSext || Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); LoadSDNode *InputLoad = dyn_cast(Input); // The input is a sign-extending load. All ppc sign-extending loads // sign-extend to the full 64-bits. if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); ConstantSDNode *InputConst = dyn_cast(Input); // We don't sign-extend constants. if (InputConst) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); SDLoc dl(Input); SignExtensionsAdded++; return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, MVT::i64, Input), 0); } /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. /// Otherwise just reinterpret it as a 64-bit value. /// Useful when emitting comparison code for 32-bit values without using /// the compare instruction (which only considers the lower 32-bits). SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { assert(Input.getValueType() == MVT::i32 && "Can only zero-extend 32-bit values here."); unsigned Opc = Input.getOpcode(); // The only condition under which we can omit the actual extend instruction: // - The value is a positive constant // - The value comes from a load that isn't a sign-extending load // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && (Input.getOperand(0).getOpcode() == ISD::AssertZext || Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); if (IsTruncateOfZExt) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); ConstantSDNode *InputConst = dyn_cast(Input); if (InputConst && InputConst->getSExtValue() >= 0) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); LoadSDNode *InputLoad = dyn_cast(Input); // The input is a load that doesn't sign-extend (it will be zero-extended). if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); // None of the above, need to zero-extend. SDLoc dl(Input); ZeroExtensionsAdded++; return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, S->getI64Imm(0, dl), S->getI64Imm(32, dl)), 0); } // Handle a 32-bit value in a 64-bit register and vice-versa. These are of // course not actual zero/sign extensions that will generate machine code, // they're just a way to reinterpret a 32 bit value in a register as a // 64 bit value and vice-versa. SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv) { SDLoc dl(NatWidthRes); // For reinterpreting 32-bit values as 64 bit values, we generate // INSERT_SUBREG IMPLICIT_DEF:i64, , TargetConstant:i32<1> if (Conv == ExtOrTruncConversion::Ext) { SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, ImDef, NatWidthRes, SubRegIdx), 0); } assert(Conv == ExtOrTruncConversion::Trunc && "Unknown convertion between 32 and 64 bit values."); // For reinterpreting 64-bit values as 32-bit values, we just need to // EXTRACT_SUBREG (i.e. extract the low word). SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, NatWidthRes, SubRegIdx), 0); } // Produce a GPR sequence for compound comparisons (<=, >=) against zero. // Handle both zero-extensions and sign-extensions. SDValue IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, ZeroCompare CmpTy) { EVT InVT = LHS.getValueType(); bool Is32Bit = InVT == MVT::i32; SDValue ToExtend; // Produce the value that needs to be either zero or sign extended. switch (CmpTy) { case ZeroCompare::GEZExt: case ZeroCompare::GESExt: ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, dl, InVT, LHS, LHS), 0); break; case ZeroCompare::LEZExt: case ZeroCompare::LESExt: { if (Is32Bit) { // Upper 32 bits cannot be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); SDValue Neg = SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); ToExtend = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Neg, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); } else { SDValue Addi = SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, S->getI64Imm(~0ULL, dl)), 0); ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, Addi, LHS), 0); } break; } } // For 64-bit sequences, the extensions are the same for the GE/LE cases. if (!Is32Bit && (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, ToExtend, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); if (!Is32Bit && (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, S->getI64Imm(63, dl)), 0); assert(Is32Bit && "Should have handled the 32-bit sequences above."); // For 32-bit sequences, the extensions differ between GE/LE cases. switch (CmpTy) { case ZeroCompare::GEZExt: { SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), S->getI32Imm(31, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); } case ZeroCompare::GESExt: return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, S->getI32Imm(31, dl)), 0); case ZeroCompare::LEZExt: return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, S->getI32Imm(1, dl)), 0); case ZeroCompare::LESExt: return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, S->getI32Imm(-1, dl)), 0); } // The above case covers all the enumerators so it can't have a default clause // to avoid compiler warnings. llvm_unreachable("Unknown zero-comparison type."); } /// Produces a zero-extended result of comparing two 32-bit values according to /// the passed condition code. SDValue IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl) { if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) return SDValue(); bool IsRHSZero = RHSValue == 0; bool IsRHSOne = RHSValue == 1; bool IsRHSNegOne = RHSValue == -1LL; switch (CC) { default: return SDValue(); case ISD::SETEQ: { // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); SDValue Clz = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); } case ISD::SETNE: { // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); SDValue Clz = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; SDValue Shift = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, S->getI32Imm(1, dl)), 0); } case ISD::SETGE: { // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) if(IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) // by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isZero(); [[fallthrough]]; } case ISD::SETLE: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) if(IsRHSZero) { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); } // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue Sub = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); SDValue Shift = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, Shift, S->getI32Imm(1, dl)), 0); } case ISD::SETGT: { // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) // Handle SETLT -1 (which is equivalent to SETGE 0). if (IsRHSNegOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); if (IsRHSZero) { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue Neg = SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); } // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as // (%b < %a) by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isZero(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; [[fallthrough]]; } case ISD::SETLT: { // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) // Handle SETLT 1 (which is equivalent to SETLE 0). if (IsRHSOne) { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); } if (IsRHSZero) { SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), S->getI32Imm(31, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); } if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue SUBFNode = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SUBFNode, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); } case ISD::SETUGE: // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) std::swap(LHS, RHS); [[fallthrough]]; case ISD::SETULE: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = zeroExtendInputIfNeeded(LHS); RHS = zeroExtendInputIfNeeded(RHS); SDValue Subtract = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); SDValue SrdiNode = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, S->getI32Imm(1, dl)), 0); } case ISD::SETUGT: // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) std::swap(LHS, RHS); [[fallthrough]]; case ISD::SETULT: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = zeroExtendInputIfNeeded(LHS); RHS = zeroExtendInputIfNeeded(RHS); SDValue Subtract = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); } } } /// Produces a sign-extended result of comparing two 32-bit values according to /// the passed condition code. SDValue IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl) { if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) return SDValue(); bool IsRHSZero = RHSValue == 0; bool IsRHSOne = RHSValue == 1; bool IsRHSNegOne = RHSValue == -1LL; switch (CC) { default: return SDValue(); case ISD::SETEQ: { // (sext (setcc %a, %b, seteq)) -> // (ashr (shl (ctlz (xor %a, %b)), 58), 63) // (sext (setcc %a, 0, seteq)) -> // (ashr (shl (ctlz %a), 58), 63) SDValue CountInput = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); SDValue Cntlzw = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; SDValue Slwi = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); } case ISD::SETNE: { // Bitwise xor the operands, count leading zeros, shift right by 5 bits and // flip the bit, finally take 2's complement. // (sext (setcc %a, %b, setne)) -> // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) // Same as above, but the first xor is not needed. // (sext (setcc %a, 0, setne)) -> // (neg (xor (lshr (ctlz %a), 5), 1)) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); SDValue Clz = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; SDValue Shift = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); SDValue Xori = SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, S->getI32Imm(1, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); } case ISD::SETGE: { // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) // by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isZero(); [[fallthrough]]; } case ISD::SETLE: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue SUBFNode = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, LHS, RHS), 0); SDValue Srdi = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SUBFNode, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, S->getI32Imm(-1, dl)), 0); } case ISD::SETGT: { // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) if (IsRHSNegOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); if (IsRHSZero) { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue Neg = SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, S->getI64Imm(63, dl)), 0); } // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as // (%b < %a) by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isZero(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; [[fallthrough]]; } case ISD::SETLT: { // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) if (IsRHSOne) { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); } if (IsRHSZero) return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, S->getI32Imm(31, dl)), 0); if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue SUBFNode = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, SUBFNode, S->getI64Imm(63, dl)), 0); } case ISD::SETUGE: // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) std::swap(LHS, RHS); [[fallthrough]]; case ISD::SETULE: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = zeroExtendInputIfNeeded(LHS); RHS = zeroExtendInputIfNeeded(RHS); SDValue Subtract = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); SDValue Shift = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, S->getI32Imm(1, dl), S->getI32Imm(63,dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, S->getI32Imm(-1, dl)), 0); } case ISD::SETUGT: // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) std::swap(LHS, RHS); [[fallthrough]]; case ISD::SETULT: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = zeroExtendInputIfNeeded(LHS); RHS = zeroExtendInputIfNeeded(RHS); SDValue Subtract = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Subtract, S->getI64Imm(63, dl)), 0); } } } /// Produces a zero-extended result of comparing two 64-bit values according to /// the passed condition code. SDValue IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl) { if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) return SDValue(); bool IsRHSZero = RHSValue == 0; bool IsRHSOne = RHSValue == 1; bool IsRHSNegOne = RHSValue == -1LL; switch (CC) { default: return SDValue(); case ISD::SETEQ: { // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); SDValue Clz = SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, S->getI64Imm(58, dl), S->getI64Imm(63, dl)), 0); } case ISD::SETNE: { // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) // {addcz.reg, addcz.CA} = (addcarry %a, -1) // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); SDValue AC = SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, Xor, S->getI32Imm(~0U, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, Xor, AC.getValue(1)), 0); } case ISD::SETGE: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setge)) -> // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isZero(); [[fallthrough]]; } case ISD::SETLE: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setge)) -> // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); SDValue ShiftL = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); SDValue ShiftR = SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, S->getI64Imm(63, dl)), 0); SDValue SubtractCarry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, LHS, RHS), 1); return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, ShiftR, ShiftL, SubtractCarry), 0); } case ISD::SETGT: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setgt)) -> // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) if (IsRHSNegOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); if (IsRHSZero) { SDValue Addi = SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, S->getI64Imm(~0ULL, dl)), 0); SDValue Nor = SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); } std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isZero(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; [[fallthrough]]; } case ISD::SETLT: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setlt)) -> // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) if (IsRHSOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); if (IsRHSZero) return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); SDValue SRADINode = SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, S->getI64Imm(63, dl)), 0); SDValue SRDINode = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, RHS, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); SDValue SUBFC8Carry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, RHS, LHS), 1); SDValue ADDE8Node = SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, SRDINode, SRADINode, SUBFC8Carry), 0); return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ADDE8Node, S->getI64Imm(1, dl)), 0); } case ISD::SETUGE: // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) std::swap(LHS, RHS); [[fallthrough]]; case ISD::SETULE: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) SDValue SUBFC8Carry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, LHS, RHS), 1); SDValue SUBFE8Node = SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, LHS, SUBFC8Carry), 0); return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, SUBFE8Node, S->getI64Imm(1, dl)), 0); } case ISD::SETUGT: // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) std::swap(LHS, RHS); [[fallthrough]]; case ISD::SETULT: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) SDValue SubtractCarry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, RHS, LHS), 1); SDValue ExtSub = SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, LHS, LHS, SubtractCarry), 0); return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, ExtSub), 0); } } } /// Produces a sign-extended result of comparing two 64-bit values according to /// the passed condition code. SDValue IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl) { if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) return SDValue(); bool IsRHSZero = RHSValue == 0; bool IsRHSOne = RHSValue == 1; bool IsRHSNegOne = RHSValue == -1LL; switch (CC) { default: return SDValue(); case ISD::SETEQ: { // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) // {addcz.reg, addcz.CA} = (addcarry %a, -1) // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) SDValue AddInput = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); SDValue Addic = SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, AddInput, S->getI32Imm(~0U, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, Addic, Addic.getValue(1)), 0); } case ISD::SETNE: { // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); SDValue SC = SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, Xor, S->getI32Imm(0, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, SC, SC.getValue(1)), 0); } case ISD::SETGE: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setge)) -> // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isZero(); [[fallthrough]]; } case ISD::SETLE: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setge)) -> // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); SDValue ShiftR = SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, S->getI64Imm(63, dl)), 0); SDValue ShiftL = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); SDValue SubtractCarry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, LHS, RHS), 1); SDValue Adde = SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, ShiftR, ShiftL, SubtractCarry), 0); return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); } case ISD::SETGT: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setgt)) -> // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) if (IsRHSNegOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); if (IsRHSZero) { SDValue Add = SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, S->getI64Imm(-1, dl)), 0); SDValue Nor = SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, S->getI64Imm(63, dl)), 0); } std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isZero(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; [[fallthrough]]; } case ISD::SETLT: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setlt)) -> // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) if (IsRHSOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); if (IsRHSZero) { return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, S->getI64Imm(63, dl)), 0); } SDValue SRADINode = SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, S->getI64Imm(63, dl)), 0); SDValue SRDINode = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, RHS, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); SDValue SUBFC8Carry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, RHS, LHS), 1); SDValue ADDE8Node = SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, SRDINode, SRADINode, SUBFC8Carry), 0); SDValue XORI8Node = SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ADDE8Node, S->getI64Imm(1, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, XORI8Node), 0); } case ISD::SETUGE: // {subc.reg, subc.CA} = (subcarry %a, %b) // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) std::swap(LHS, RHS); [[fallthrough]]; case ISD::SETULE: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) SDValue SubtractCarry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, LHS, RHS), 1); SDValue ExtSub = SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, LHS, SubtractCarry), 0); return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, ExtSub, ExtSub), 0); } case ISD::SETUGT: // {subc.reg, subc.CA} = (subcarry %b, %a) // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) std::swap(LHS, RHS); [[fallthrough]]; case ISD::SETULT: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) SDValue SubCarry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, RHS, LHS), 1); return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, LHS, LHS, SubCarry), 0); } } } /// Do all uses of this SDValue need the result in a GPR? /// This is meant to be used on values that have type i1 since /// it is somewhat meaningless to ask if values of other types /// should be kept in GPR's. static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { assert(Compare.getOpcode() == ISD::SETCC && "An ISD::SETCC node required here."); // For values that have a single use, the caller should obviously already have // checked if that use is an extending use. We check the other uses here. if (Compare.hasOneUse()) return true; // We want the value in a GPR if it is being extended, used for a select, or // used in logical operations. for (auto *CompareUse : Compare.getNode()->uses()) if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && CompareUse->getOpcode() != ISD::ZERO_EXTEND && CompareUse->getOpcode() != ISD::SELECT && !ISD::isBitwiseLogicOp(CompareUse->getOpcode())) { OmittedForNonExtendUses++; return false; } return true; } /// Returns an equivalent of a SETCC node but with the result the same width as /// the inputs. This can also be used for SELECT_CC if either the true or false /// values is a power of two while the other is zero. SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts) { assert((Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."); // Don't convert this comparison to a GPR sequence because there are uses // of the i1 result (i.e. uses that require the result in the CR). if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) return SDValue(); SDValue LHS = Compare.getOperand(0); SDValue RHS = Compare.getOperand(1); // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; ISD::CondCode CC = cast(Compare.getOperand(CCOpNum))->get(); EVT InputVT = LHS.getValueType(); if (InputVT != MVT::i32 && InputVT != MVT::i64) return SDValue(); if (ConvOpts == SetccInGPROpts::ZExtInvert || ConvOpts == SetccInGPROpts::SExtInvert) CC = ISD::getSetCCInverse(CC, InputVT); bool Inputs32Bit = InputVT == MVT::i32; SDLoc dl(Compare); ConstantSDNode *RHSConst = dyn_cast(RHS); int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || ConvOpts == SetccInGPROpts::SExtInvert; if (IsSext && Inputs32Bit) return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); else if (Inputs32Bit) return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); else if (IsSext) return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); } } // end anonymous namespace bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return false; // This optimization will emit code that assumes 64-bit registers // so we don't want to run it in 32-bit mode. Also don't run it // on functions that are not to be optimized. if (TM.getOptLevel() == CodeGenOptLevel::None || !TM.isPPC64()) return false; // For POWER10, it is more profitable to use the set boolean extension // instructions rather than the integer compare elimination codegen. // Users can override this via the command line option, `--ppc-gpr-icmps`. if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1()) return false; switch (N->getOpcode()) { default: break; case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::AND: case ISD::OR: case ISD::XOR: { IntegerCompareEliminator ICmpElim(CurDAG, this); if (SDNode *New = ICmpElim.Select(N)) { ReplaceNode(N, New); return true; } } } return false; } bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return false; if (!UseBitPermRewriter) return false; switch (N->getOpcode()) { default: break; case ISD::SRL: // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that // uses the BRH instruction. if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 && N->getOperand(0).getOpcode() == ISD::BSWAP) { auto &OpRight = N->getOperand(1); ConstantSDNode *SRLConst = dyn_cast(OpRight); if (SRLConst && SRLConst->getSExtValue() == 16) return false; } [[fallthrough]]; case ISD::ROTL: case ISD::SHL: case ISD::AND: case ISD::OR: { BitPermutationSelector BPS(CurDAG); if (SDNode *New = BPS.Select(N)) { ReplaceNode(N, New); return true; } return false; } } return false; } /// SelectCC - Select a comparison of the specified values with the specified /// condition code, returning the CR# of the expression. SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SDValue Chain) { // Always select the LHS. unsigned Opc; if (LHS.getValueType() == MVT::i32) { unsigned Imm; if (CC == ISD::SETEQ || CC == ISD::SETNE) { if (isInt32Immediate(RHS, Imm)) { // SETEQ/SETNE comparison with 16-bit immediate, fold it. if (isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, getI32Imm(Imm & 0xFFFF, dl)), 0); // If this is a 16-bit signed immediate, fold it. if (isInt<16>((int)Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, getI32Imm(Imm & 0xFFFF, dl)), 0); // For non-equality comparisons, the default code would materialize the // constant, then compare against it, like this: // lis r2, 4660 // ori r2, r2, 22136 // cmpw cr0, r3, r2 // Since we are just comparing for equality, we can emit this instead: // xoris r0,r3,0x1234 // cmplwi cr0,r0,0x5678 // beq cr0,L6 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, getI32Imm(Imm >> 16, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, getI32Imm(Imm & 0xFFFF, dl)), 0); } Opc = PPC::CMPLW; } else if (ISD::isUnsignedIntSetCC(CC)) { if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, getI32Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLW; } else { int16_t SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, getI32Imm((int)SImm & 0xFFFF, dl)), 0); Opc = PPC::CMPW; } } else if (LHS.getValueType() == MVT::i64) { uint64_t Imm; if (CC == ISD::SETEQ || CC == ISD::SETNE) { if (isInt64Immediate(RHS.getNode(), Imm)) { // SETEQ/SETNE comparison with 16-bit immediate, fold it. if (isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, getI32Imm(Imm & 0xFFFF, dl)), 0); // If this is a 16-bit signed immediate, fold it. if (isInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, getI32Imm(Imm & 0xFFFF, dl)), 0); // For non-equality comparisons, the default code would materialize the // constant, then compare against it, like this: // lis r2, 4660 // ori r2, r2, 22136 // cmpd cr0, r3, r2 // Since we are just comparing for equality, we can emit this instead: // xoris r0,r3,0x1234 // cmpldi cr0,r0,0x5678 // beq cr0,L6 if (isUInt<32>(Imm)) { SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, getI64Imm(Imm >> 16, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, getI64Imm(Imm & 0xFFFF, dl)), 0); } } Opc = PPC::CMPLD; } else if (ISD::isUnsignedIntSetCC(CC)) { if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, getI64Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLD; } else { int16_t SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, getI64Imm(SImm & 0xFFFF, dl)), 0); Opc = PPC::CMPD; } } else if (LHS.getValueType() == MVT::f32) { if (Subtarget->hasSPE()) { switch (CC) { default: case ISD::SETEQ: case ISD::SETNE: Opc = PPC::EFSCMPEQ; break; case ISD::SETLT: case ISD::SETGE: case ISD::SETOLT: case ISD::SETOGE: case ISD::SETULT: case ISD::SETUGE: Opc = PPC::EFSCMPLT; break; case ISD::SETGT: case ISD::SETLE: case ISD::SETOGT: case ISD::SETOLE: case ISD::SETUGT: case ISD::SETULE: Opc = PPC::EFSCMPGT; break; } } else Opc = PPC::FCMPUS; } else if (LHS.getValueType() == MVT::f64) { if (Subtarget->hasSPE()) { switch (CC) { default: case ISD::SETEQ: case ISD::SETNE: Opc = PPC::EFDCMPEQ; break; case ISD::SETLT: case ISD::SETGE: case ISD::SETOLT: case ISD::SETOGE: case ISD::SETULT: case ISD::SETUGE: Opc = PPC::EFDCMPLT; break; case ISD::SETGT: case ISD::SETLE: case ISD::SETOGT: case ISD::SETOLE: case ISD::SETUGT: case ISD::SETULE: Opc = PPC::EFDCMPGT; break; } } else Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; } else { assert(LHS.getValueType() == MVT::f128 && "Unknown vt!"); assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector"); Opc = PPC::XSCMPUQP; } if (Chain) return SDValue( CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain), 0); else return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, const PPCSubtarget *Subtarget) { // For SPE instructions, the result is in GT bit of the CR bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint(); switch (CC) { case ISD::SETUEQ: case ISD::SETONE: case ISD::SETOLE: case ISD::SETOGE: llvm_unreachable("Should be lowered by legalize!"); default: llvm_unreachable("Unknown condition!"); case ISD::SETOEQ: case ISD::SETEQ: return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ; case ISD::SETUNE: case ISD::SETNE: return UseSPE ? PPC::PRED_LE : PPC::PRED_NE; case ISD::SETOLT: case ISD::SETLT: return UseSPE ? PPC::PRED_GT : PPC::PRED_LT; case ISD::SETULE: case ISD::SETLE: return PPC::PRED_LE; case ISD::SETOGT: case ISD::SETGT: return PPC::PRED_GT; case ISD::SETUGE: case ISD::SETGE: return UseSPE ? PPC::PRED_LE : PPC::PRED_GE; case ISD::SETO: return PPC::PRED_NU; case ISD::SETUO: return PPC::PRED_UN; // These two are invalid for floating point. Assume we have int. case ISD::SETULT: return PPC::PRED_LT; case ISD::SETUGT: return PPC::PRED_GT; } } /// getCRIdxForSetCC - Return the index of the condition register field /// associated with the SetCC condition, and whether or not the field is /// treated as inverted. That is, lt = 0; ge = 0 inverted. static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { Invert = false; switch (CC) { default: llvm_unreachable("Unknown condition!"); case ISD::SETOLT: case ISD::SETLT: return 0; // Bit #0 = SETOLT case ISD::SETOGT: case ISD::SETGT: return 1; // Bit #1 = SETOGT case ISD::SETOEQ: case ISD::SETEQ: return 2; // Bit #2 = SETOEQ case ISD::SETUO: return 3; // Bit #3 = SETUO case ISD::SETUGE: case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE case ISD::SETULE: case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE case ISD::SETUNE: case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO case ISD::SETUEQ: case ISD::SETOGE: case ISD::SETOLE: case ISD::SETONE: llvm_unreachable("Invalid branch code: should be expanded by legalize"); // These are invalid for floating point. Assume integer. case ISD::SETULT: return 0; case ISD::SETUGT: return 1; } } // getVCmpInst: return the vector compare instruction for the specified // vector type and condition code. Since this is for altivec specific code, // only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128, // and v4f32). static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate) { Swap = false; Negate = false; if (VecVT.isFloatingPoint()) { /* Handle some cases by swapping input operands. */ switch (CC) { case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; default: break; } /* Handle some cases by negating the result. */ switch (CC) { case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; default: break; } /* We have instructions implementing the remaining cases. */ switch (CC) { case ISD::SETEQ: case ISD::SETOEQ: if (VecVT == MVT::v4f32) return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; else if (VecVT == MVT::v2f64) return PPC::XVCMPEQDP; break; case ISD::SETGT: case ISD::SETOGT: if (VecVT == MVT::v4f32) return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; else if (VecVT == MVT::v2f64) return PPC::XVCMPGTDP; break; case ISD::SETGE: case ISD::SETOGE: if (VecVT == MVT::v4f32) return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; else if (VecVT == MVT::v2f64) return PPC::XVCMPGEDP; break; default: break; } llvm_unreachable("Invalid floating-point vector compare condition"); } else { /* Handle some cases by swapping input operands. */ switch (CC) { case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; default: break; } /* Handle some cases by negating the result. */ switch (CC) { case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; default: break; } /* We have instructions implementing the remaining cases. */ switch (CC) { case ISD::SETEQ: case ISD::SETUEQ: if (VecVT == MVT::v16i8) return PPC::VCMPEQUB; else if (VecVT == MVT::v8i16) return PPC::VCMPEQUH; else if (VecVT == MVT::v4i32) return PPC::VCMPEQUW; else if (VecVT == MVT::v2i64) return PPC::VCMPEQUD; else if (VecVT == MVT::v1i128) return PPC::VCMPEQUQ; break; case ISD::SETGT: if (VecVT == MVT::v16i8) return PPC::VCMPGTSB; else if (VecVT == MVT::v8i16) return PPC::VCMPGTSH; else if (VecVT == MVT::v4i32) return PPC::VCMPGTSW; else if (VecVT == MVT::v2i64) return PPC::VCMPGTSD; else if (VecVT == MVT::v1i128) return PPC::VCMPGTSQ; break; case ISD::SETUGT: if (VecVT == MVT::v16i8) return PPC::VCMPGTUB; else if (VecVT == MVT::v8i16) return PPC::VCMPGTUH; else if (VecVT == MVT::v4i32) return PPC::VCMPGTUW; else if (VecVT == MVT::v2i64) return PPC::VCMPGTUD; else if (VecVT == MVT::v1i128) return PPC::VCMPGTUQ; break; default: break; } llvm_unreachable("Invalid integer vector compare condition"); } } bool PPCDAGToDAGISel::trySETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; bool IsStrict = N->isStrictFPOpcode(); ISD::CondCode CC = cast(N->getOperand(IsStrict ? 3 : 2))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); SDValue LHS = N->getOperand(IsStrict ? 1 : 0); SDValue RHS = N->getOperand(IsStrict ? 2 : 1); if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. // setcc op, 0 if (Imm == 0) { SDValue Op = LHS; switch (CC) { default: break; case ISD::SETEQ: { Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } case ISD::SETNE: { if (isPPC64) break; SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U, dl)), 0); CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); return true; } case ISD::SETLT: { SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } case ISD::SETGT: { SDValue T = SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } } } else if (Imm == ~0U) { // setcc op, -1 SDValue Op = LHS; switch (CC) { default: break; case ISD::SETEQ: if (isPPC64) break; Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(1, dl)), 0); CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(CurDAG->getMachineNode(PPC::LI, dl, MVT::i32, getI32Imm(0, dl)), 0), Op.getValue(1)); return true; case ISD::SETNE: { if (isPPC64) break; Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U, dl)); CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, SDValue(AD, 1)); return true; } case ISD::SETLT: { SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, getI32Imm(1, dl)), 0); SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, Op), 0); SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } case ISD::SETGT: { SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); return true; } } } } // Altivec Vector compare instructions do not set any CR register by default and // vector compare operations return the same type as the operands. if (!IsStrict && LHS.getValueType().isVector()) { if (Subtarget->hasSPE()) return false; EVT VecVT = LHS.getValueType(); bool Swap, Negate; unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate); if (Swap) std::swap(LHS, RHS); EVT ResVT = VecVT.changeVectorElementTypeToInteger(); if (Negate) { SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, ResVT, VCmp, VCmp); return true; } CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); return true; } if (Subtarget->useCRBits()) return false; bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain); if (IsStrict) CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1)); SDValue IntCR; // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that // The correct compare instruction is already set by SelectCC() if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { Idx = 1; } // Force the ccreg into CR7. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); SDValue InGlue; // Null incoming flag value. CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, InGlue).getValue(1); IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, CCReg), 0); SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; if (!Inv) { CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } // Get the specified bit. SDValue Tmp = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); return true; } /// Does this node represent a load/store node whose address can be represented /// with a register plus an immediate that's a multiple of \p Val: bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { LoadSDNode *LDN = dyn_cast(N); StoreSDNode *STN = dyn_cast(N); MemIntrinsicSDNode *MIN = dyn_cast(N); SDValue AddrOp; if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT)) AddrOp = N->getOperand(1); else if (STN) AddrOp = STN->getOperand(2); // If the address points a frame object or a frame object with an offset, // we need to check the object alignment. short Imm = 0; if (FrameIndexSDNode *FI = dyn_cast( AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) : AddrOp)) { // If op0 is a frame index that is under aligned, we can't do it either, // because it is translated to r31 or r1 + slot + offset. We won't know the // slot number until the stack frame is finalized. const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value(); if ((SlotAlign % Val) != 0) return false; // If we have an offset, we need further check on the offset. if (AddrOp.getOpcode() != ISD::ADD) return true; } if (AddrOp.getOpcode() == ISD::ADD) return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); // If the address comes from the outside, the offset will be zero. return AddrOp.getOpcode() == ISD::CopyFromReg; } void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { // Transfer memoperands. MachineMemOperand *MemOp = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(Result), {MemOp}); } static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, bool &NeedSwapOps, bool &IsUnCmp) { assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here."); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue TrueRes = N->getOperand(2); SDValue FalseRes = N->getOperand(3); ConstantSDNode *TrueConst = dyn_cast(TrueRes); if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 && N->getSimpleValueType(0) != MVT::i32)) return false; // We are looking for any of: // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) int64_t TrueResVal = TrueConst->getSExtValue(); if ((TrueResVal < -1 || TrueResVal > 1) || (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || (TrueResVal == 0 && (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) return false; SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC ? FalseRes : FalseRes.getOperand(0); bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC; if (SetOrSelCC.getOpcode() != ISD::SETCC && SetOrSelCC.getOpcode() != ISD::SELECT_CC) return false; // Without this setb optimization, the outer SELECT_CC will be manually // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass // transforms pseudo instruction to isel instruction. When there are more than // one use for result like zext/sext, with current optimization we only see // isel is replaced by setb but can't see any significant gain. Since // setb has longer latency than original isel, we should avoid this. Another // point is that setb requires comparison always kept, it can break the // opportunity to get the comparison away if we have in future. if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) return false; SDValue InnerLHS = SetOrSelCC.getOperand(0); SDValue InnerRHS = SetOrSelCC.getOperand(1); ISD::CondCode InnerCC = cast(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); // If the inner comparison is a select_cc, make sure the true/false values are // 1/-1 and canonicalize it if needed. if (InnerIsSel) { ConstantSDNode *SelCCTrueConst = dyn_cast(SetOrSelCC.getOperand(2)); ConstantSDNode *SelCCFalseConst = dyn_cast(SetOrSelCC.getOperand(3)); if (!SelCCTrueConst || !SelCCFalseConst) return false; int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); // The values must be -1/1 (requiring a swap) or 1/-1. if (SelCCTVal == -1 && SelCCFVal == 1) { std::swap(InnerLHS, InnerRHS); } else if (SelCCTVal != 1 || SelCCFVal != -1) return false; } // Canonicalize unsigned case if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { IsUnCmp = true; InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; } bool InnerSwapped = false; if (LHS == InnerRHS && RHS == InnerLHS) InnerSwapped = true; else if (LHS != InnerLHS || RHS != InnerRHS) return false; switch (CC) { // (select_cc lhs, rhs, 0, \ // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) case ISD::SETEQ: if (!InnerIsSel) return false; if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) return false; NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; break; // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) case ISD::SETULT: if (!IsUnCmp && InnerCC != ISD::SETNE) return false; IsUnCmp = true; [[fallthrough]]; case ISD::SETLT: if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || (InnerCC == ISD::SETLT && InnerSwapped)) NeedSwapOps = (TrueResVal == 1); else return false; break; // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) case ISD::SETUGT: if (!IsUnCmp && InnerCC != ISD::SETNE) return false; IsUnCmp = true; [[fallthrough]]; case ISD::SETGT: if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || (InnerCC == ISD::SETGT && InnerSwapped)) NeedSwapOps = (TrueResVal == -1); else return false; break; default: return false; } LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: "); LLVM_DEBUG(N->dump()); return true; } // Return true if it's a software square-root/divide operand. static bool isSWTestOp(SDValue N) { if (N.getOpcode() == PPCISD::FTSQRT) return true; if (N.getNumOperands() < 1 || !isa(N.getOperand(0)) || N.getOpcode() != ISD::INTRINSIC_WO_CHAIN) return false; switch (N.getConstantOperandVal(0)) { case Intrinsic::ppc_vsx_xvtdivdp: case Intrinsic::ppc_vsx_xvtdivsp: case Intrinsic::ppc_vsx_xvtsqrtdp: case Intrinsic::ppc_vsx_xvtsqrtsp: return true; } return false; } bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) { assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected."); // We are looking for following patterns, where `truncate to i1` actually has // the same semantic with `and 1`. // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp) // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp) // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp) // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp) // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp) // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp) // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp) // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp) ISD::CondCode CC = cast(N->getOperand(1))->get(); if (CC != ISD::SETEQ && CC != ISD::SETNE) return false; SDValue CmpRHS = N->getOperand(3); if (!isNullConstant(CmpRHS)) return false; SDValue CmpLHS = N->getOperand(2); if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0))) return false; unsigned PCC = 0; bool IsCCNE = CC == ISD::SETNE; if (CmpLHS.getOpcode() == ISD::AND && isa(CmpLHS.getOperand(1))) switch (CmpLHS.getConstantOperandVal(1)) { case 1: PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; break; case 2: PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE; break; case 4: PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE; break; case 8: PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE; break; default: return false; } else if (CmpLHS.getOpcode() == ISD::TRUNCATE && CmpLHS.getValueType() == MVT::i1) PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; if (PCC) { SDLoc dl(N); SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4), N->getOperand(0)}; CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); return true; } return false; } bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) { // Sometimes the promoted value of the intrinsic is ANDed by some non-zero // value, for example when crbits is disabled. If so, select the // loop_decrement intrinsics now. ISD::CondCode CC = cast(N->getOperand(1))->get(); SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); if (LHS.getOpcode() != ISD::AND || !isa(LHS.getOperand(1)) || isNullConstant(LHS.getOperand(1))) return false; if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN || LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement) return false; if (!isa(RHS)) return false; assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"); SDValue OldDecrement = LHS.getOperand(0); assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!"); SDLoc DecrementLoc(OldDecrement); SDValue ChainInput = OldDecrement.getOperand(0); SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc) : getI32Imm(1, DecrementLoc)}; unsigned DecrementOpcode = Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop; SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc, MVT::i1, DecrementOps); unsigned Val = RHS->getAsZExtVal(); bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val); unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn; ReplaceUses(LHS.getValue(0), LHS.getOperand(1)); CurDAG->RemoveDeadNode(LHS.getNode()); // Mark the old loop_decrement intrinsic as dead. ReplaceUses(OldDecrement.getValue(1), ChainInput); CurDAG->RemoveDeadNode(OldDecrement.getNode()); SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, ChainInput, N->getOperand(0)); CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0), N->getOperand(4), Chain); return true; } bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); unsigned Imm; if (!isInt32Immediate(N->getOperand(1), Imm)) return false; SDLoc dl(N); SDValue Val = N->getOperand(0); unsigned SH, MB, ME; // If this is an and of a value rotated between 0 and 31 bits and then and'd // with a mask, emit rlwinm if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) { Val = Val.getOperand(0); SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl)}; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } // If this is just a masked value where the input is not handled, and // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) { SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl), getI32Imm(ME, dl)}; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } // AND X, 0 -> 0, not "rlwinm 32". if (Imm == 0) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); return true; } return false; } bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); uint64_t Imm64; if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) return false; unsigned MB, ME; if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) { // MB ME // +----------------------+ // |xxxxxxxxxxx00011111000| // +----------------------+ // 0 32 64 // We can only do it if the MB is larger than 32 and MB <= ME // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even // we didn't rotate it. SDLoc dl(N); SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl), getI64Imm(ME - 32, dl)}; CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); return true; } return false; } bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); uint64_t Imm64; if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) return false; // Do nothing if it is 16-bit imm as the pattern in the .td file handle // it well with "andi.". if (isUInt<16>(Imm64)) return false; SDLoc Loc(N); SDValue Val = N->getOperand(0); // Optimized with two rldicl's as follows: // Add missing bits on left to the mask and check that the mask is a // wrapped run of ones, i.e. // Change pattern |0001111100000011111111| // to |1111111100000011111111|. unsigned NumOfLeadingZeros = llvm::countl_zero(Imm64); if (NumOfLeadingZeros != 0) Imm64 |= maskLeadingOnes(NumOfLeadingZeros); unsigned MB, ME; if (!isRunOfOnes64(Imm64, MB, ME)) return false; // ME MB MB-ME+63 // +----------------------+ +----------------------+ // |1111111100000011111111| -> |0000001111111111111111| // +----------------------+ +----------------------+ // 0 63 0 63 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between. unsigned OnesOnLeft = ME + 1; unsigned ZerosInBetween = (MB - ME + 63) & 63; // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear // on the left the bits that are already zeros in the mask. Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val, getI64Imm(OnesOnLeft, Loc), getI64Imm(ZerosInBetween, Loc)), 0); // MB-ME+63 ME MB // +----------------------+ +----------------------+ // |0000001111111111111111| -> |0001111100000011111111| // +----------------------+ +----------------------+ // 0 63 0 63 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the // left the number of ones we previously added. SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc), getI64Imm(NumOfLeadingZeros, Loc)}; CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); return true; } bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); unsigned Imm; if (!isInt32Immediate(N->getOperand(1), Imm)) return false; SDValue Val = N->getOperand(0); unsigned Imm2; // ISD::OR doesn't get all the bitfield insertion fun. // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a // bitfield insert. if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2)) return false; // The idea here is to check whether this is equivalent to: // (c1 & m) | (x & ~m) // where m is a run-of-ones mask. The logic here is that, for each bit in // c1 and c2: // - if both are 1, then the output will be 1. // - if both are 0, then the output will be 0. // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will // come from x. // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will // be 0. // If that last condition is never the case, then we can form m from the // bits that are the same between c1 and c2. unsigned MB, ME; if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) { SDLoc dl(N); SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl), getI32Imm(MB, dl), getI32Imm(ME, dl)}; ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); return true; } return false; } bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); uint64_t Imm64; if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64)) return false; SDValue Val = N->getOperand(0); if (Val.getOpcode() != ISD::ROTL) return false; // Looking to try to avoid a situation like this one: // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23) // %and1 = and i64 %2, 9223372036854775807 // In this function we are looking to try to match RLDCL. However, the above // DAG would better match RLDICL instead which is not what we are looking // for here. SDValue RotateAmt = Val.getOperand(1); if (RotateAmt.getOpcode() == ISD::Constant) return false; unsigned MB = 64 - llvm::countr_one(Imm64); SDLoc dl(N); SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)}; CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops); return true; } bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); uint64_t Imm64; if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64)) return false; // If this is a 64-bit zero-extension mask, emit rldicl. unsigned MB = 64 - llvm::countr_one(Imm64); unsigned SH = 0; unsigned Imm; SDValue Val = N->getOperand(0); SDLoc dl(N); if (Val.getOpcode() == ISD::ANY_EXTEND) { auto Op0 = Val.getOperand(0); if (Op0.getOpcode() == ISD::SRL && isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { auto ResultType = Val.getNode()->getValueType(0); auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType); SDValue IDVal(ImDef, 0); Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType, IDVal, Op0.getOperand(0), getI32Imm(1, dl)), 0); SH = 64 - Imm; } } // If the operand is a logical right shift, we can fold it into this // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) // for n <= mb. The right shift is really a left rotate followed by a // mask, and this mask is a more-restrictive sub-mask of the mask implied // by the shift. if (Val.getOpcode() == ISD::SRL && isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { assert(Imm < 64 && "Illegal shift amount"); Val = Val.getOperand(0); SH = 64 - Imm; } SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)}; CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); return true; } bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); uint64_t Imm64; if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(~Imm64)) return false; // If this is a negated 64-bit zero-extension mask, // i.e. the immediate is a sequence of ones from most significant side // and all zero for reminder, we should use rldicr. unsigned MB = 63 - llvm::countr_one(~Imm64); unsigned SH = 0; SDLoc dl(N); SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)}; CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); return true; } bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) { assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected"); uint64_t Imm64; unsigned MB, ME; SDValue N0 = N->getOperand(0); // We won't get fewer instructions if the imm is 32-bit integer. // rldimi requires the imm to have consecutive ones with both sides zero. // Also, make sure the first Op has only one use, otherwise this may increase // register pressure since rldimi is destructive. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse()) return false; unsigned SH = 63 - ME; SDLoc Dl(N); // Use select64Imm for making LI instr instead of directly putting Imm64 SDValue Ops[] = { N->getOperand(0), SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0), getI32Imm(SH, Dl), getI32Imm(MB, Dl)}; CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops); return true; } // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. void PPCDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { N->setNodeId(-1); return; // Already selected. } // In case any misguided DAG-level optimizations form an ADD with a // TargetConstant operand, crash here instead of miscompiling (by selecting // an r+r add instead of some kind of r+i add). if (N->getOpcode() == ISD::ADD && N->getOperand(1).getOpcode() == ISD::TargetConstant) llvm_unreachable("Invalid ADD with TargetConstant operand"); // Try matching complex bit permutations before doing anything else. if (tryBitPermutation(N)) return; // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). if (tryIntCompareInGPR(N)) return; switch (N->getOpcode()) { default: break; case ISD::Constant: if (N->getValueType(0) == MVT::i64) { ReplaceNode(N, selectI64Imm(CurDAG, N)); return; } break; case ISD::INTRINSIC_VOID: { auto IntrinsicID = N->getConstantOperandVal(1); if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw && IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap) break; unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw || IntrinsicID == Intrinsic::ppc_trapd) ? PPC::TDI : PPC::TWI; SmallVector OpsWithMD; unsigned MDIndex; if (IntrinsicID == Intrinsic::ppc_tdw || IntrinsicID == Intrinsic::ppc_tw) { SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)}; int16_t SImmOperand2; int16_t SImmOperand3; int16_t SImmOperand4; bool isOperand2IntS16Immediate = isIntS16Immediate(N->getOperand(2), SImmOperand2); bool isOperand3IntS16Immediate = isIntS16Immediate(N->getOperand(3), SImmOperand3); // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg + // reg or imm + imm. The imm + imm form will be optimized to either an // unconditional trap or a nop in a later pass. if (isOperand2IntS16Immediate == isOperand3IntS16Immediate) Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW; else if (isOperand3IntS16Immediate) // The 2nd and 3rd operands are reg + imm. Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl); else { // The 2nd and 3rd operands are imm + reg. bool isOperand4IntS16Immediate = isIntS16Immediate(N->getOperand(4), SImmOperand4); (void)isOperand4IntS16Immediate; assert(isOperand4IntS16Immediate && "The 4th operand is not an Immediate"); // We need to flip the condition immediate TO. int16_t TO = int(SImmOperand4) & 0x1F; // We swap the first and second bit of TO if they are not same. if ((TO & 0x1) != ((TO & 0x2) >> 1)) TO = (TO & 0x1) ? TO + 1 : TO - 1; // We swap the fourth and fifth bit of TO if they are not same. if ((TO & 0x8) != ((TO & 0x10) >> 1)) TO = (TO & 0x8) ? TO + 8 : TO - 8; Ops[0] = getI32Imm(TO, dl); Ops[1] = N->getOperand(3); Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl); } OpsWithMD = {Ops[0], Ops[1], Ops[2]}; MDIndex = 5; } else { OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)}; MDIndex = 3; } if (N->getNumOperands() > MDIndex) { SDValue MDV = N->getOperand(MDIndex); const MDNode *MD = cast(MDV)->getMD(); assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!"); assert((isa(MD->getOperand(0)) && cast( MD->getOperand(0))->getString().equals("ppc-trap-reason")) && "Unsupported annotation data type!"); for (unsigned i = 1; i < MD->getNumOperands(); i++) { assert(isa(MD->getOperand(i)) && "Invalid data type for annotation ppc-trap-reason!"); OpsWithMD.push_back( getI32Imm(std::stoi(cast( MD->getOperand(i))->getString().str()), dl)); } } OpsWithMD.push_back(N->getOperand(0)); // chain CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD); return; } case ISD::INTRINSIC_WO_CHAIN: { // We emit the PPC::FSELS instruction here because of type conflicts with // the comparison operand. The FSELS instruction is defined to use an 8-byte // comparison like the FSELD version. The fsels intrinsic takes a 4-byte // value for the comparison. When selecting through a .td file, a type // error is raised. Must check this first so we never break on the // !Subtarget->isISA3_1() check. auto IntID = N->getConstantOperandVal(0); if (IntID == Intrinsic::ppc_fsels) { SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)}; CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops); return; } if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) { auto Pred = N->getConstantOperandVal(1); unsigned Opcode = IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec; unsigned SubReg = 0; unsigned ShiftVal = 0; bool Reverse = false; switch (Pred) { case 0: SubReg = PPC::sub_eq; ShiftVal = 1; break; case 1: SubReg = PPC::sub_eq; ShiftVal = 1; Reverse = true; break; case 2: SubReg = PPC::sub_lt; ShiftVal = 3; break; case 3: SubReg = PPC::sub_lt; ShiftVal = 3; Reverse = true; break; case 4: SubReg = PPC::sub_gt; ShiftVal = 2; break; case 5: SubReg = PPC::sub_gt; ShiftVal = 2; Reverse = true; break; case 6: SubReg = PPC::sub_un; break; case 7: SubReg = PPC::sub_un; Reverse = true; break; } EVT VTs[] = {MVT::v16i8, MVT::Glue}; SDValue Ops[] = {N->getOperand(2), N->getOperand(3), CurDAG->getTargetConstant(0, dl, MVT::i32)}; SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0); SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32); // On Power10, we can use SETBC[R]. On prior architectures, we have to use // MFOCRF and shift/negate the value. if (Subtarget->isISA3_1()) { SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32); SDValue CRBit = SDValue( CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, CR6Reg, SubRegIdx, BCDOp.getValue(1)), 0); CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32, CRBit); } else { SDValue Move = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg, BCDOp.getValue(1)), 0); SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl), getI32Imm(31, dl), getI32Imm(31, dl)}; if (!Reverse) CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); else { SDValue Shift = SDValue( CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl)); } } return; } if (!Subtarget->isISA3_1()) break; unsigned Opcode = 0; switch (IntID) { default: break; case Intrinsic::ppc_altivec_vstribr_p: Opcode = PPC::VSTRIBR_rec; break; case Intrinsic::ppc_altivec_vstribl_p: Opcode = PPC::VSTRIBL_rec; break; case Intrinsic::ppc_altivec_vstrihr_p: Opcode = PPC::VSTRIHR_rec; break; case Intrinsic::ppc_altivec_vstrihl_p: Opcode = PPC::VSTRIHL_rec; break; } if (!Opcode) break; // Generate the appropriate vector string isolate intrinsic to match. EVT VTs[] = {MVT::v16i8, MVT::Glue}; SDValue VecStrOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0); // Vector string isolate instructions update the EQ bit of CR6. // Generate a SETBC instruction to extract the bit and place it in a GPR. SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32); SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32); SDValue CRBit = SDValue( CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, CR6Reg, SubRegIdx, VecStrOp.getValue(1)), 0); CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit); return; } case ISD::SETCC: case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: if (trySETCC(N)) return; break; // These nodes will be transformed into GETtlsADDR32 node, which // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT case PPCISD::ADDI_TLSLD_L_ADDR: case PPCISD::ADDI_TLSGD_L_ADDR: { const Module *Mod = MF->getFunction().getParent(); if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() || Mod->getPICLevel() == PICLevel::SmallPIC) break; // Attach global base pointer on GETtlsADDR32 node in order to // generate secure plt code for TLS symbols. getGlobalBaseReg(); } break; case PPCISD::CALL: { if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || !TM.isPositionIndependent() || !Subtarget->isSecurePlt() || !Subtarget->isTargetELF()) break; SDValue Op = N->getOperand(1); if (GlobalAddressSDNode *GA = dyn_cast(Op)) { if (GA->getTargetFlags() == PPCII::MO_PLT) getGlobalBaseReg(); } else if (ExternalSymbolSDNode *ES = dyn_cast(Op)) { if (ES->getTargetFlags() == PPCII::MO_PLT) getGlobalBaseReg(); } } break; case PPCISD::GlobalBaseReg: ReplaceNode(N, getGlobalBaseReg()); return; case ISD::FrameIndex: selectFrameIndex(N, N); return; case PPCISD::MFOCRF: { SDValue InGlue = N->getOperand(1); ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, N->getOperand(0), InGlue)); return; } case PPCISD::READ_TIME_BASE: ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, MVT::Other, N->getOperand(0))); return; case PPCISD::SRA_ADDZE: { SDValue N0 = N->getOperand(0); SDValue ShiftAmt = CurDAG->getTargetConstant(*cast(N->getOperand(1))-> getConstantIntValue(), dl, N->getValueType(0)); if (N->getValueType(0) == MVT::i64) { SDNode *Op = CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, N0, ShiftAmt); CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), SDValue(Op, 1)); return; } else { assert(N->getValueType(0) == MVT::i32 && "Expecting i64 or i32 in PPCISD::SRA_ADDZE"); SDNode *Op = CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, N0, ShiftAmt); CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), SDValue(Op, 1)); return; } } case ISD::STORE: { // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to // X-form stores. StoreSDNode *ST = cast(N); if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) && ST->getAddressingMode() != ISD::PRE_INC) if (tryTLSXFormStore(ST)) return; break; } case ISD::LOAD: { // Handle preincrement loads. LoadSDNode *LD = cast(N); EVT LoadedVT = LD->getMemoryVT(); // Normal loads are handled by code generated from the .td file. if (LD->getAddressingMode() != ISD::PRE_INC) { // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to // X-form loads. if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI())) if (tryTLSXFormLoad(LD)) return; break; } SDValue Offset = LD->getOffset(); if (Offset.getOpcode() == ISD::TargetConstant || Offset.getOpcode() == ISD::TargetGlobalAddress) { unsigned Opcode; bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; if (LD->getValueType(0) != MVT::i64) { // Handle PPC32 integer and normal FP loads. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); switch (LoadedVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid PPC load type!"); case MVT::f64: Opcode = PPC::LFDU; break; case MVT::f32: Opcode = PPC::LFSU; break; case MVT::i32: Opcode = PPC::LWZU; break; case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; case MVT::i1: case MVT::i8: Opcode = PPC::LBZU; break; } } else { assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); switch (LoadedVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid PPC load type!"); case MVT::i64: Opcode = PPC::LDU; break; case MVT::i32: Opcode = PPC::LWZU8; break; case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; case MVT::i1: case MVT::i8: Opcode = PPC::LBZU8; break; } } SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; SDNode *MN = CurDAG->getMachineNode( Opcode, dl, LD->getValueType(0), PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); transferMemOperands(N, MN); ReplaceNode(N, MN); return; } else { unsigned Opcode; bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; if (LD->getValueType(0) != MVT::i64) { // Handle PPC32 integer and normal FP loads. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); switch (LoadedVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid PPC load type!"); case MVT::f64: Opcode = PPC::LFDUX; break; case MVT::f32: Opcode = PPC::LFSUX; break; case MVT::i32: Opcode = PPC::LWZUX; break; case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; case MVT::i1: case MVT::i8: Opcode = PPC::LBZUX; break; } } else { assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && "Invalid sext update load"); switch (LoadedVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid PPC load type!"); case MVT::i64: Opcode = PPC::LDUX; break; case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; case MVT::i1: case MVT::i8: Opcode = PPC::LBZUX8; break; } } SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Base, Offset, Chain }; SDNode *MN = CurDAG->getMachineNode( Opcode, dl, LD->getValueType(0), PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); transferMemOperands(N, MN); ReplaceNode(N, MN); return; } } case ISD::AND: // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) || tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) return; // Other cases are autogenerated. break; case ISD::OR: { if (N->getValueType(0) == MVT::i32) if (tryBitfieldInsert(N)) return; int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0)); // If this is equivalent to an add, then we can fold it with the // FrameIndex calculation. if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm); return; } } // If this is 'or' against an imm with consecutive ones and both sides zero, // try to emit rldimi if (tryAsSingleRLDIMI(N)) return; // OR with a 32-bit immediate can be handled by ori + oris // without creating an immediate in a GPR. uint64_t Imm64 = 0; bool IsPPC64 = Subtarget->isPPC64(); if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && (Imm64 & ~0xFFFFFFFFuLL) == 0) { // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. uint64_t ImmHi = Imm64 >> 16; uint64_t ImmLo = Imm64 & 0xFFFF; if (ImmHi != 0 && ImmLo != 0) { SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, N->getOperand(0), getI16Imm(ImmLo, dl)); SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1); return; } } // Other cases are autogenerated. break; } case ISD::XOR: { // XOR with a 32-bit immediate can be handled by xori + xoris // without creating an immediate in a GPR. uint64_t Imm64 = 0; bool IsPPC64 = Subtarget->isPPC64(); if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && (Imm64 & ~0xFFFFFFFFuLL) == 0) { // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. uint64_t ImmHi = Imm64 >> 16; uint64_t ImmLo = Imm64 & 0xFFFF; if (ImmHi != 0 && ImmLo != 0) { SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, N->getOperand(0), getI16Imm(ImmLo, dl)); SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1); return; } } break; } case ISD::ADD: { int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm); return; } break; } case ISD::SHL: { unsigned Imm, SH, MB, ME; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && isRotateAndMask(N, Imm, true, SH, MB, ME)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return; } // Other cases are autogenerated. break; } case ISD::SRL: { unsigned Imm, SH, MB, ME; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && isRotateAndMask(N, Imm, true, SH, MB, ME)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return; } // Other cases are autogenerated. break; } case ISD::MUL: { SDValue Op1 = N->getOperand(1); if (Op1.getOpcode() != ISD::Constant || (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32)) break; // If the multiplier fits int16, we can handle it with mulli. int64_t Imm = Op1->getAsZExtVal(); unsigned Shift = llvm::countr_zero(Imm); if (isInt<16>(Imm) || !Shift) break; // If the shifted value fits int16, we can do this transformation: // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2). uint64_t ImmSh = Imm >> Shift; if (!isInt<16>(ImmSh)) break; uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16); if (Op1.getValueType() == MVT::i64) { SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64, N->getOperand(0), SDImm); SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl)}; CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); return; } else { SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32); SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32, N->getOperand(0), SDImm); SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl), getI32Imm(0, dl), getI32Imm(31 - Shift, dl)}; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return; } break; } // FIXME: Remove this once the ANDI glue bug is fixed: case PPCISD::ANDI_rec_1_EQ_BIT: case PPCISD::ANDI_rec_1_GT_BIT: { if (!ANDIGlueBug) break; EVT InVT = N->getOperand(0).getValueType(); assert((InVT == MVT::i64 || InVT == MVT::i32) && "Invalid input type for ANDI_rec_1_EQ_BIT"); unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec; SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, N->getOperand(0), CurDAG->getTargetConstant(1, dl, InVT)), 0); SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); SDValue SRIdxVal = CurDAG->getTargetConstant( N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt, dl, MVT::i32); CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); return; } case ISD::SELECT_CC: { ISD::CondCode CC = cast(N->getOperand(4))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); // If this is a select of i1 operands, we'll pattern match it. if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1) break; if (Subtarget->isISA3_0() && Subtarget->isPPC64()) { bool NeedSwapOps = false; bool IsUnCmp = false; if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (NeedSwapOps) std::swap(LHS, RHS); // Make use of SelectCC to generate the comparison to set CR bits, for // equality comparisons having one literal operand, SelectCC probably // doesn't need to materialize the whole literal and just use xoris to // check it first, it leads the following comparison result can't // exactly represent GT/LT relationship. So to avoid this we specify // SETGT/SETUGT here instead of SETEQ. SDValue GenCC = SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); CurDAG->SelectNodeTo( N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, N->getValueType(0), GenCC); NumP9Setb++; return; } } // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc if (!isPPC64 && isNullConstant(N->getOperand(1)) && isOneConstant(N->getOperand(2)) && isNullConstant(N->getOperand(3)) && CC == ISD::SETNE && // FIXME: Implement this optzn for PPC64. N->getValueType(0) == MVT::i32) { SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, N->getOperand(0), getI32Imm(~0U, dl)); CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), N->getOperand(0), SDValue(Tmp, 1)); return; } SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); if (N->getValueType(0) == MVT::i1) { // An i1 select is: (c & t) | (!c & f). bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); unsigned SRI; switch (Idx) { default: llvm_unreachable("Invalid CC index"); case 0: SRI = PPC::sub_lt; break; case 1: SRI = PPC::sub_gt; break; case 2: SRI = PPC::sub_eq; break; case 3: SRI = PPC::sub_un; break; } SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, CCBit, CCBit), 0); SDValue C = Inv ? NotCCBit : CCBit, NotC = Inv ? CCBit : NotCCBit; SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, C, N->getOperand(2)), 0); SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, NotC, N->getOperand(3)), 0); CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); return; } unsigned BROpc = getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget); unsigned SelectCCOp; if (N->getValueType(0) == MVT::i32) SelectCCOp = PPC::SELECT_CC_I4; else if (N->getValueType(0) == MVT::i64) SelectCCOp = PPC::SELECT_CC_I8; else if (N->getValueType(0) == MVT::f32) { if (Subtarget->hasP8Vector()) SelectCCOp = PPC::SELECT_CC_VSSRC; else if (Subtarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE4; else SelectCCOp = PPC::SELECT_CC_F4; } else if (N->getValueType(0) == MVT::f64) { if (Subtarget->hasVSX()) SelectCCOp = PPC::SELECT_CC_VSFRC; else if (Subtarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE; else SelectCCOp = PPC::SELECT_CC_F8; } else if (N->getValueType(0) == MVT::f128) SelectCCOp = PPC::SELECT_CC_F16; else if (Subtarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE; else if (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64) SelectCCOp = PPC::SELECT_CC_VSRC; else SelectCCOp = PPC::SELECT_CC_VRRC; SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), getI32Imm(BROpc, dl) }; CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); return; } case ISD::VECTOR_SHUFFLE: if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64)) { ShuffleVectorSDNode *SVN = cast(N); SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); unsigned DM[2]; for (int i = 0; i < 2; ++i) if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) DM[i] = 0; else DM[i] = 1; if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && isa(Op1.getOperand(0))) { LoadSDNode *LD = cast(Op1.getOperand(0)); SDValue Base, Offset; if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && (LD->getMemoryVT() == MVT::f64 || LD->getMemoryVT() == MVT::i64) && SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; MachineMemOperand *MemOp = LD->getMemOperand(); SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, N->getValueType(0), Ops); CurDAG->setNodeMemRefs(cast(NewN), {MemOp}); return; } } // For little endian, we must swap the input operands and adjust // the mask elements (reverse and invert them). if (Subtarget->isLittleEndian()) { std::swap(Op1, Op2); unsigned tmp = DM[0]; DM[0] = 1 - DM[1]; DM[1] = 1 - tmp; } SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, MVT::i32); SDValue Ops[] = { Op1, Op2, DMV }; CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); return; } break; case PPCISD::BDNZ: case PPCISD::BDZ: { bool IsPPC64 = Subtarget->isPPC64(); SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), MVT::Other, Ops); return; } case PPCISD::COND_BRANCH: { // Op #0 is the Chain. // Op #1 is the PPC::PRED_* number. // Op #2 is the CR# // Op #3 is the Dest MBB // Op #4 is the Flag. // Prevent PPC::PRED_* from being selected into LI. unsigned PCC = N->getConstantOperandVal(1); if (EnableBranchHint) PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3)); SDValue Pred = getI32Imm(PCC, dl); SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), N->getOperand(0), N->getOperand(4) }; CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); return; } case ISD::BR_CC: { if (tryFoldSWTestBRCC(N)) return; if (trySelectLoopCountIntrinsic(N)) return; ISD::CondCode CC = cast(N->getOperand(1))->get(); unsigned PCC = getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget); if (N->getOperand(2).getValueType() == MVT::i1) { unsigned Opc; bool Swap; switch (PCC) { default: llvm_unreachable("Unexpected Boolean-operand predicate"); case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; } // A signed comparison of i1 values produces the opposite result to an // unsigned one if the condition code includes less-than or greater-than. // This is because 1 is the most negative signed i1 number and the most // positive unsigned i1 number. The CR-logical operations used for such // comparisons are non-commutative so for signed comparisons vs. unsigned // ones, the input operands just need to be swapped. if (ISD::isSignedIntSetCC(CC)) Swap = !Swap; SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, N->getOperand(Swap ? 3 : 2), N->getOperand(Swap ? 2 : 3)), 0); CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), N->getOperand(0)); return; } if (EnableBranchHint) PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4)); SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, N->getOperand(4), N->getOperand(0) }; CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); return; } case ISD::BRIND: { // FIXME: Should custom lower this. SDValue Chain = N->getOperand(0); SDValue Target = N->getOperand(1); unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, Chain), 0); CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); return; } case PPCISD::TOC_ENTRY: { const bool isPPC64 = Subtarget->isPPC64(); const bool isELFABI = Subtarget->isSVR4ABI(); const bool isAIXABI = Subtarget->isAIXABI(); // PowerPC only support small, medium and large code model. const CodeModel::Model CModel = TM.getCodeModel(); assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && "PowerPC doesn't support tiny or kernel code models."); if (isAIXABI && CModel == CodeModel::Medium) report_fatal_error("Medium code model is not supported on AIX."); // For 64-bit ELF small code model, we allow SelectCodeCommon to handle // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX // small code model, we need to check for a toc-data attribute. if (isPPC64 && !isAIXABI && CModel == CodeModel::Small) break; auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry, EVT OperandTy) { SDValue GA = TocEntry->getOperand(0); SDValue TocBase = TocEntry->getOperand(1); SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase); transferMemOperands(TocEntry, MN); ReplaceNode(TocEntry, MN); }; // Handle 32-bit small code model. if (!isPPC64 && CModel == CodeModel::Small) { // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either // PPC::ADDItoc, or PPC::LWZtoc if (isELFABI) { assert(TM.isPositionIndependent() && "32-bit ELF can only have TOC entries in position independent" " code."); // 32-bit ELF always uses a small code model toc access. replaceWith(PPC::LWZtoc, N, MVT::i32); return; } assert(isAIXABI && "ELF ABI already handled"); if (hasTocDataAttr(N->getOperand(0), CurDAG->getDataLayout().getPointerSize())) { replaceWith(PPC::ADDItoc, N, MVT::i32); return; } replaceWith(PPC::LWZtoc, N, MVT::i32); return; } if (isPPC64 && CModel == CodeModel::Small) { assert(isAIXABI && "ELF ABI handled in common SelectCode"); if (hasTocDataAttr(N->getOperand(0), CurDAG->getDataLayout().getPointerSize())) { replaceWith(PPC::ADDItoc8, N, MVT::i64); return; } // Break if it doesn't have toc data attribute. Proceed with common // SelectCode. break; } assert(CModel != CodeModel::Small && "All small code models handled."); assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit" " ELF/AIX or 32-bit AIX in the following."); // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We // generate two instructions as described below. The first source operand // is a symbol reference. If it must be toc-referenced according to // Subtarget, we generate: // [32-bit AIX] // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) // [64-bit ELF/AIX] // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) // Otherwise we generate: // ADDItocL(ADDIStocHA8(%x2, @sym), @sym) SDValue GA = N->getOperand(0); SDValue TOCbase = N->getOperand(1); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDNode *Tmp = CurDAG->getMachineNode( isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA); if (PPCLowering->isAccessedAsGotIndirect(GA)) { // If it is accessed as got-indirect, we need an extra LWZ/LD to load // the address. SDNode *MN = CurDAG->getMachineNode( isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0)); transferMemOperands(N, MN); ReplaceNode(N, MN); return; } // Build the address relative to the TOC-pointer. ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA)); return; } case PPCISD::PPC32_PICGOT: // Generate a PIC-safe GOT reference. assert(Subtarget->is32BitELFABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::i32); return; case PPCISD::VADD_SPLAT: { // This expands into one of three sequences, depending on whether // the first operand is odd or even, positive or negative. assert(isa(N->getOperand(0)) && isa(N->getOperand(1)) && "Invalid operand on VADD_SPLAT!"); int Elt = N->getConstantOperandVal(0); int EltSize = N->getConstantOperandVal(1); unsigned Opc1, Opc2, Opc3; EVT VT; if (EltSize == 1) { Opc1 = PPC::VSPLTISB; Opc2 = PPC::VADDUBM; Opc3 = PPC::VSUBUBM; VT = MVT::v16i8; } else if (EltSize == 2) { Opc1 = PPC::VSPLTISH; Opc2 = PPC::VADDUHM; Opc3 = PPC::VSUBUHM; VT = MVT::v8i16; } else { assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); Opc1 = PPC::VSPLTISW; Opc2 = PPC::VADDUWM; Opc3 = PPC::VSUBUWM; VT = MVT::v4i32; } if ((Elt & 1) == 0) { // Elt is even, in the range [-32,-18] + [16,30]. // // Convert: VADD_SPLAT elt, size // Into: tmp = VSPLTIS[BHW] elt // VADDU[BHW]M tmp, tmp // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 SDValue EltVal = getI32Imm(Elt >> 1, dl); SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); SDValue TmpVal = SDValue(Tmp, 0); ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); return; } else if (Elt > 0) { // Elt is odd and positive, in the range [17,31]. // // Convert: VADD_SPLAT elt, size // Into: tmp1 = VSPLTIS[BHW] elt-16 // tmp2 = VSPLTIS[BHW] -16 // VSUBU[BHW]M tmp1, tmp2 SDValue EltVal = getI32Imm(Elt - 16, dl); SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); EltVal = getI32Imm(-16, dl); SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), SDValue(Tmp2, 0))); return; } else { // Elt is odd and negative, in the range [-31,-17]. // // Convert: VADD_SPLAT elt, size // Into: tmp1 = VSPLTIS[BHW] elt+16 // tmp2 = VSPLTIS[BHW] -16 // VADDU[BHW]M tmp1, tmp2 SDValue EltVal = getI32Imm(Elt + 16, dl); SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); EltVal = getI32Imm(-16, dl); SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), SDValue(Tmp2, 0))); return; } } case PPCISD::LD_SPLAT: { // Here we want to handle splat load for type v16i8 and v8i16 when there is // no direct move, we don't need to use stack for this case. If target has // direct move, we should be able to get the best selection in the .td file. if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove()) break; EVT Type = N->getValueType(0); if (Type != MVT::v16i8 && Type != MVT::v8i16) break; // If the alignment for the load is 16 or bigger, we don't need the // permutated mask to get the required value. The value must be the 0 // element in big endian target or 7/15 in little endian target in the // result vsx register of lvx instruction. // Select the instruction in the .td file. if (cast(N)->getAlign() >= Align(16) && isOffsetMultipleOf(N, 16)) break; SDValue ZeroReg = CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO, Subtarget->isPPC64() ? MVT::i64 : MVT::i32); unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI; // v16i8 LD_SPLAT addr // ======> // Mask = LVSR/LVSL 0, addr // LoadLow = LVX 0, addr // Perm = VPERM LoadLow, LoadLow, Mask // Splat = VSPLTB 15/0, Perm // // v8i16 LD_SPLAT addr // ======> // Mask = LVSR/LVSL 0, addr // LoadLow = LVX 0, addr // LoadHigh = LVX (LI, 1), addr // Perm = VPERM LoadLow, LoadHigh, Mask // Splat = VSPLTH 7/0, Perm unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH; unsigned SplatElemIndex = Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0; SDNode *Mask = CurDAG->getMachineNode( Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg, N->getOperand(1)); SDNode *LoadLow = CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other, {ZeroReg, N->getOperand(1), N->getOperand(0)}); SDNode *LoadHigh = LoadLow; if (Type == MVT::v8i16) { LoadHigh = CurDAG->getMachineNode( PPC::LVX, dl, MVT::v16i8, MVT::Other, {SDValue(CurDAG->getMachineNode( LIOpcode, dl, MVT::i32, CurDAG->getTargetConstant(1, dl, MVT::i8)), 0), N->getOperand(1), SDValue(LoadLow, 1)}); } CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1)); transferMemOperands(N, LoadHigh); SDNode *Perm = CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0), SDValue(LoadHigh, 0), SDValue(Mask, 0)); CurDAG->SelectNodeTo(N, SplatOp, Type, CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8), SDValue(Perm, 0)); return; } } SelectCode(N); } // If the target supports the cmpb instruction, do the idiom recognition here. // We don't do this as a DAG combine because we don't want to do it as nodes // are being combined (because we might miss part of the eventual idiom). We // don't want to do it during instruction selection because we want to reuse // the logic for lowering the masking operations already part of the // instruction selector. SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { SDLoc dl(N); assert(N->getOpcode() == ISD::OR && "Only OR nodes are supported for CMPB"); SDValue Res; if (!Subtarget->hasCMPB()) return Res; if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return Res; EVT VT = N->getValueType(0); SDValue RHS, LHS; bool BytesFound[8] = {false, false, false, false, false, false, false, false}; uint64_t Mask = 0, Alt = 0; auto IsByteSelectCC = [this](SDValue O, unsigned &b, uint64_t &Mask, uint64_t &Alt, SDValue &LHS, SDValue &RHS) { if (O.getOpcode() != ISD::SELECT_CC) return false; ISD::CondCode CC = cast(O.getOperand(4))->get(); if (!isa(O.getOperand(2)) || !isa(O.getOperand(3))) return false; uint64_t PM = O.getConstantOperandVal(2); uint64_t PAlt = O.getConstantOperandVal(3); for (b = 0; b < 8; ++b) { uint64_t Mask = UINT64_C(0xFF) << (8*b); if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) break; } if (b == 8) return false; Mask |= PM; Alt |= PAlt; if (!isa(O.getOperand(1)) || O.getConstantOperandVal(1) != 0) { SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); if (Op1.getOpcode() == ISD::TRUNCATE) Op1 = Op1.getOperand(0); if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && isa(Op0.getOperand(1))) { unsigned Bits = Op0.getValueSizeInBits(); if (b != Bits/8-1) return false; if (Op0.getConstantOperandVal(1) != Bits-8) return false; LHS = Op0.getOperand(0); RHS = Op1.getOperand(0); return true; } // When we have small integers (i16 to be specific), the form present // post-legalization uses SETULT in the SELECT_CC for the // higher-order byte, depending on the fact that the // even-higher-order bytes are known to all be zero, for example: // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult // (so when the second byte is the same, because all higher-order // bits from bytes 3 and 4 are known to be zero, the result of the // xor can be at most 255) if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && isa(O.getOperand(1))) { uint64_t ULim = O.getConstantOperandVal(1); if (ULim != (UINT64_C(1) << b*8)) return false; // Now we need to make sure that the upper bytes are known to be // zero. unsigned Bits = Op0.getValueSizeInBits(); if (!CurDAG->MaskedValueIsZero( Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) return false; LHS = Op0.getOperand(0); RHS = Op0.getOperand(1); return true; } return false; } if (CC != ISD::SETEQ) return false; SDValue Op = O.getOperand(0); if (Op.getOpcode() == ISD::AND) { if (!isa(Op.getOperand(1))) return false; if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b))) return false; SDValue XOR = Op.getOperand(0); if (XOR.getOpcode() == ISD::TRUNCATE) XOR = XOR.getOperand(0); if (XOR.getOpcode() != ISD::XOR) return false; LHS = XOR.getOperand(0); RHS = XOR.getOperand(1); return true; } else if (Op.getOpcode() == ISD::SRL) { if (!isa(Op.getOperand(1))) return false; unsigned Bits = Op.getValueSizeInBits(); if (b != Bits/8-1) return false; if (Op.getConstantOperandVal(1) != Bits-8) return false; SDValue XOR = Op.getOperand(0); if (XOR.getOpcode() == ISD::TRUNCATE) XOR = XOR.getOperand(0); if (XOR.getOpcode() != ISD::XOR) return false; LHS = XOR.getOperand(0); RHS = XOR.getOperand(1); return true; } return false; }; SmallVector Queue(1, SDValue(N, 0)); while (!Queue.empty()) { SDValue V = Queue.pop_back_val(); for (const SDValue &O : V.getNode()->ops()) { unsigned b = 0; uint64_t M = 0, A = 0; SDValue OLHS, ORHS; if (O.getOpcode() == ISD::OR) { Queue.push_back(O); } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { if (!LHS) { LHS = OLHS; RHS = ORHS; BytesFound[b] = true; Mask |= M; Alt |= A; } else if ((LHS == ORHS && RHS == OLHS) || (RHS == ORHS && LHS == OLHS)) { BytesFound[b] = true; Mask |= M; Alt |= A; } else { return Res; } } else { return Res; } } } unsigned LastB = 0, BCnt = 0; for (unsigned i = 0; i < 8; ++i) if (BytesFound[LastB]) { ++BCnt; LastB = i; } if (!LastB || BCnt < 2) return Res; // Because we'll be zero-extending the output anyway if don't have a specific // value for each input byte (via the Mask), we can 'anyext' the inputs. if (LHS.getValueType() != VT) { LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); } Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1); if (NonTrivialMask && !Alt) { // Res = Mask & CMPB Res = CurDAG->getNode(ISD::AND, dl, VT, Res, CurDAG->getConstant(Mask, dl, VT)); } else if (Alt) { // Res = (CMPB & Mask) | (~CMPB & Alt) // Which, as suggested here: // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge // can be written as: // Res = Alt ^ ((Alt ^ Mask) & CMPB) // useful because the (Alt ^ Mask) can be pre-computed. Res = CurDAG->getNode(ISD::AND, dl, VT, Res, CurDAG->getConstant(Mask ^ Alt, dl, VT)); Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, CurDAG->getConstant(Alt, dl, VT)); } return Res; } // When CR bit registers are enabled, an extension of an i1 variable to a i32 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus // involves constant materialization of a 0 or a 1 or both. If the result of // the extension is then operated upon by some operator that can be constant // folded with a constant 0 or 1, and that constant can be materialized using // only one instruction (like a zero or one), then we should fold in those // operations with the select. void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { if (!Subtarget->useCRBits()) return; if (N->getOpcode() != ISD::ZERO_EXTEND && N->getOpcode() != ISD::SIGN_EXTEND && N->getOpcode() != ISD::ANY_EXTEND) return; if (N->getOperand(0).getValueType() != MVT::i1) return; if (!N->hasOneUse()) return; SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Cond = N->getOperand(0); SDValue ConstTrue = CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT); SDValue ConstFalse = CurDAG->getConstant(0, dl, VT); do { SDNode *User = *N->use_begin(); if (User->getNumOperands() != 2) break; auto TryFold = [this, N, User, dl](SDValue Val) { SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); SDValue O0 = UserO0.getNode() == N ? Val : UserO0; SDValue O1 = UserO1.getNode() == N ? Val : UserO1; return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, User->getValueType(0), {O0, O1}); }; // FIXME: When the semantics of the interaction between select and undef // are clearly defined, it may turn out to be unnecessary to break here. SDValue TrueRes = TryFold(ConstTrue); if (!TrueRes || TrueRes.isUndef()) break; SDValue FalseRes = TryFold(ConstFalse); if (!FalseRes || FalseRes.isUndef()) break; // For us to materialize these using one instruction, we must be able to // represent them as signed 16-bit integers. uint64_t True = TrueRes->getAsZExtVal(), False = FalseRes->getAsZExtVal(); if (!isInt<16>(True) || !isInt<16>(False)) break; // We can replace User with a new SELECT node, and try again to see if we // can fold the select with its user. Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); N = User; ConstTrue = TrueRes; ConstFalse = FalseRes; } while (N->hasOneUse()); } void PPCDAGToDAGISel::PreprocessISelDAG() { SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); bool MadeChange = false; while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; if (N->use_empty()) continue; SDValue Res; switch (N->getOpcode()) { default: break; case ISD::OR: Res = combineToCMPB(N); break; } if (!Res) foldBoolExts(Res, N); if (Res) { LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); LLVM_DEBUG(N->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nNew: "); LLVM_DEBUG(Res.getNode()->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); MadeChange = true; } } if (MadeChange) CurDAG->RemoveDeadNodes(); } /// PostprocessISelDAG - Perform some late peephole optimizations /// on the DAG representation. void PPCDAGToDAGISel::PostprocessISelDAG() { // Skip peepholes at -O0. if (TM.getOptLevel() == CodeGenOptLevel::None) return; PeepholePPC64(); PeepholeCROps(); PeepholePPC64ZExt(); } // Check if all users of this node will become isel where the second operand // is the constant zero. If this is so, and if we can negate the condition, // then we can flip the true and false operands. This will allow the zero to // be folded with the isel so that we don't need to materialize a register // containing zero. bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { for (const SDNode *User : N->uses()) { if (!User->isMachineOpcode()) return false; if (User->getMachineOpcode() != PPC::SELECT_I4 && User->getMachineOpcode() != PPC::SELECT_I8) return false; SDNode *Op1 = User->getOperand(1).getNode(); SDNode *Op2 = User->getOperand(2).getNode(); // If we have a degenerate select with two equal operands, swapping will // not do anything, and we may run into an infinite loop. if (Op1 == Op2) return false; if (!Op2->isMachineOpcode()) return false; if (Op2->getMachineOpcode() != PPC::LI && Op2->getMachineOpcode() != PPC::LI8) return false; if (!isNullConstant(Op2->getOperand(0))) return false; } return true; } void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { SmallVector ToReplace; for (SDNode *User : N->uses()) { assert((User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && "Must have all select users"); ToReplace.push_back(User); } for (SDNode *User : ToReplace) { SDNode *ResNode = CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), User->getValueType(0), User->getOperand(0), User->getOperand(2), User->getOperand(1)); LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); LLVM_DEBUG(User->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nNew: "); LLVM_DEBUG(ResNode->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); ReplaceUses(User, ResNode); } } void PPCDAGToDAGISel::PeepholeCROps() { bool IsModified; do { IsModified = false; for (SDNode &Node : CurDAG->allnodes()) { MachineSDNode *MachineNode = dyn_cast(&Node); if (!MachineNode || MachineNode->use_empty()) continue; SDNode *ResNode = MachineNode; bool Op1Set = false, Op1Unset = false, Op1Not = false, Op2Set = false, Op2Unset = false, Op2Not = false; unsigned Opcode = MachineNode->getMachineOpcode(); switch (Opcode) { default: break; case PPC::CRAND: case PPC::CRNAND: case PPC::CROR: case PPC::CRXOR: case PPC::CRNOR: case PPC::CREQV: case PPC::CRANDC: case PPC::CRORC: { SDValue Op = MachineNode->getOperand(1); if (Op.isMachineOpcode()) { if (Op.getMachineOpcode() == PPC::CRSET) Op2Set = true; else if (Op.getMachineOpcode() == PPC::CRUNSET) Op2Unset = true; else if ((Op.getMachineOpcode() == PPC::CRNOR && Op.getOperand(0) == Op.getOperand(1)) || Op.getMachineOpcode() == PPC::CRNOT) Op2Not = true; } [[fallthrough]]; } case PPC::BC: case PPC::BCn: case PPC::SELECT_I4: case PPC::SELECT_I8: case PPC::SELECT_F4: case PPC::SELECT_F8: case PPC::SELECT_SPE: case PPC::SELECT_SPE4: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: case PPC::SELECT_VSSRC: case PPC::SELECT_VSRC: { SDValue Op = MachineNode->getOperand(0); if (Op.isMachineOpcode()) { if (Op.getMachineOpcode() == PPC::CRSET) Op1Set = true; else if (Op.getMachineOpcode() == PPC::CRUNSET) Op1Unset = true; else if ((Op.getMachineOpcode() == PPC::CRNOR && Op.getOperand(0) == Op.getOperand(1)) || Op.getMachineOpcode() == PPC::CRNOT) Op1Not = true; } } break; } bool SelectSwap = false; switch (Opcode) { default: break; case PPC::CRAND: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // x & x = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Set) // 1 & y = y ResNode = MachineNode->getOperand(1).getNode(); else if (Op2Set) // x & 1 = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Unset || Op2Unset) // x & 0 = 0 & y = 0 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), MVT::i1); else if (Op1Not) // ~x & y = andc(y, x) ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(0). getOperand(0)); else if (Op2Not) // x & ~y = andc(x, y) ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CRNAND: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // nand(x, x) -> nor(x, x) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(0)); else if (Op1Set) // nand(1, y) -> nor(y, y) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op2Set) // nand(x, 1) -> nor(x, x) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(0)); else if (Op1Unset || Op2Unset) // nand(x, 0) = nand(0, y) = 1 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), MVT::i1); else if (Op1Not) // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // nand(x, ~y) = ~x | y = orc(y, x) ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1). getOperand(0), MachineNode->getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CROR: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // x | x = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Set || Op2Set) // x | 1 = 1 | y = 1 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), MVT::i1); else if (Op1Unset) // 0 | y = y ResNode = MachineNode->getOperand(1).getNode(); else if (Op2Unset) // x | 0 = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Not) // ~x | y = orc(y, x) ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(0). getOperand(0)); else if (Op2Not) // x | ~y = orc(x, y) ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CRXOR: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // xor(x, x) = 0 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), MVT::i1); else if (Op1Set) // xor(1, y) -> nor(y, y) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op2Set) // xor(x, 1) -> nor(x, x) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(0)); else if (Op1Unset) // xor(0, y) = y ResNode = MachineNode->getOperand(1).getNode(); else if (Op2Unset) // xor(x, 0) = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Not) // xor(~x, y) = eqv(x, y) ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // xor(x, ~y) = eqv(x, y) ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CRNOR: if (Op1Set || Op2Set) // nor(1, y) -> 0 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), MVT::i1); else if (Op1Unset) // nor(0, y) = ~y -> nor(y, y) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op2Unset) // nor(x, 0) = ~x ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(0)); else if (Op1Not) // nor(~x, y) = andc(x, y) ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // nor(x, ~y) = andc(y, x) ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1). getOperand(0), MachineNode->getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CREQV: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // eqv(x, x) = 1 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), MVT::i1); else if (Op1Set) // eqv(1, y) = y ResNode = MachineNode->getOperand(1).getNode(); else if (Op2Set) // eqv(x, 1) = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Unset) // eqv(0, y) = ~y -> nor(y, y) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op2Unset) // eqv(x, 0) = ~x ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(0)); else if (Op1Not) // eqv(~x, y) = xor(x, y) ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // eqv(x, ~y) = xor(x, y) ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CRANDC: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // andc(x, x) = 0 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), MVT::i1); else if (Op1Set) // andc(1, y) = ~y ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op1Unset || Op2Set) // andc(0, y) = andc(x, 1) = 0 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), MVT::i1); else if (Op2Unset) // andc(x, 0) = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Not) // andc(~x, y) = ~(x | y) = nor(x, y) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // andc(x, ~y) = x & y ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(0)); SelectSwap = true; } break; case PPC::CRORC: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // orc(x, x) = 1 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), MVT::i1); else if (Op1Set || Op2Unset) // orc(1, y) = orc(x, 0) = 1 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), MVT::i1); else if (Op2Set) // orc(x, 1) = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Unset) // orc(0, y) = ~y ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op1Not) // orc(~x, y) = ~(x & y) = nand(x, y) ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // orc(x, ~y) = x | y ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(0)); SelectSwap = true; } break; case PPC::SELECT_I4: case PPC::SELECT_I8: case PPC::SELECT_F4: case PPC::SELECT_F8: case PPC::SELECT_SPE: case PPC::SELECT_SPE4: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: case PPC::SELECT_VSSRC: case PPC::SELECT_VSRC: if (Op1Set) ResNode = MachineNode->getOperand(1).getNode(); else if (Op1Unset) ResNode = MachineNode->getOperand(2).getNode(); else if (Op1Not) ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), SDLoc(MachineNode), MachineNode->getValueType(0), MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(2), MachineNode->getOperand(1)); break; case PPC::BC: case PPC::BCn: if (Op1Not) ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : PPC::BC, SDLoc(MachineNode), MVT::Other, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1), MachineNode->getOperand(2)); // FIXME: Handle Op1Set, Op1Unset here too. break; } // If we're inverting this node because it is used only by selects that // we'd like to swap, then swap the selects before the node replacement. if (SelectSwap) SwapAllSelectUsers(MachineNode); if (ResNode != MachineNode) { LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); LLVM_DEBUG(MachineNode->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nNew: "); LLVM_DEBUG(ResNode->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); ReplaceUses(MachineNode, ResNode); IsModified = true; } } if (IsModified) CurDAG->RemoveDeadNodes(); } while (IsModified); } // Gather the set of 32-bit operations that are known to have their // higher-order 32 bits zero, where ToPromote contains all such operations. static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl &ToPromote) { if (!Op32.isMachineOpcode()) return false; // First, check for the "frontier" instructions (those that will clear the // higher-order 32 bits. // For RLWINM and RLWNM, we need to make sure that the mask does not wrap // around. If it does not, then these instructions will clear the // higher-order bits. if ((Op32.getMachineOpcode() == PPC::RLWINM || Op32.getMachineOpcode() == PPC::RLWNM) && Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) { ToPromote.insert(Op32.getNode()); return true; } // SLW and SRW always clear the higher-order bits. if (Op32.getMachineOpcode() == PPC::SLW || Op32.getMachineOpcode() == PPC::SRW) { ToPromote.insert(Op32.getNode()); return true; } // For LI and LIS, we need the immediate to be positive (so that it is not // sign extended). if (Op32.getMachineOpcode() == PPC::LI || Op32.getMachineOpcode() == PPC::LIS) { if (!isUInt<15>(Op32.getConstantOperandVal(0))) return false; ToPromote.insert(Op32.getNode()); return true; } // LHBRX and LWBRX always clear the higher-order bits. if (Op32.getMachineOpcode() == PPC::LHBRX || Op32.getMachineOpcode() == PPC::LWBRX) { ToPromote.insert(Op32.getNode()); return true; } // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. if (Op32.getMachineOpcode() == PPC::CNTLZW || Op32.getMachineOpcode() == PPC::CNTTZW) { ToPromote.insert(Op32.getNode()); return true; } // Next, check for those instructions we can look through. // Assuming the mask does not wrap around, then the higher-order bits are // taken directly from the first operand. if (Op32.getMachineOpcode() == PPC::RLWIMI && Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) { SmallPtrSet ToPromote1; if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) return false; ToPromote.insert(Op32.getNode()); ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); return true; } // For OR, the higher-order bits are zero if that is true for both operands. // For SELECT_I4, the same is true (but the relevant operand numbers are // shifted by 1). if (Op32.getMachineOpcode() == PPC::OR || Op32.getMachineOpcode() == PPC::SELECT_I4) { unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0; SmallPtrSet ToPromote1; if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1)) return false; if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1)) return false; ToPromote.insert(Op32.getNode()); ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); return true; } // For ORI and ORIS, we need the higher-order bits of the first operand to be // zero, and also for the constant to be positive (so that it is not sign // extended). if (Op32.getMachineOpcode() == PPC::ORI || Op32.getMachineOpcode() == PPC::ORIS) { SmallPtrSet ToPromote1; if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) return false; if (!isUInt<15>(Op32.getConstantOperandVal(1))) return false; ToPromote.insert(Op32.getNode()); ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); return true; } // The higher-order bits of AND are zero if that is true for at least one of // the operands. if (Op32.getMachineOpcode() == PPC::AND) { SmallPtrSet ToPromote1, ToPromote2; bool Op0OK = PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); bool Op1OK = PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2); if (!Op0OK && !Op1OK) return false; ToPromote.insert(Op32.getNode()); if (Op0OK) ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); if (Op1OK) ToPromote.insert(ToPromote2.begin(), ToPromote2.end()); return true; } // For ANDI and ANDIS, the higher-order bits are zero if either that is true // of the first operand, or if the second operand is positive (so that it is // not sign extended). if (Op32.getMachineOpcode() == PPC::ANDI_rec || Op32.getMachineOpcode() == PPC::ANDIS_rec) { SmallPtrSet ToPromote1; bool Op0OK = PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1)); if (!Op0OK && !Op1OK) return false; ToPromote.insert(Op32.getNode()); if (Op0OK) ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); return true; } return false; } void PPCDAGToDAGISel::PeepholePPC64ZExt() { if (!Subtarget->isPPC64()) return; // When we zero-extend from i32 to i64, we use a pattern like this: // def : Pat<(i64 (zext i32:$in)), // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), // 0, 32)>; // There are several 32-bit shift/rotate instructions, however, that will // clear the higher-order bits of their output, rendering the RLDICL // unnecessary. When that happens, we remove it here, and redefine the // relevant 32-bit operation to be a 64-bit operation. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); bool MadeChange = false; while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; // Skip dead nodes and any non-machine opcodes. if (N->use_empty() || !N->isMachineOpcode()) continue; if (N->getMachineOpcode() != PPC::RLDICL) continue; if (N->getConstantOperandVal(1) != 0 || N->getConstantOperandVal(2) != 32) continue; SDValue ISR = N->getOperand(0); if (!ISR.isMachineOpcode() || ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG) continue; if (!ISR.hasOneUse()) continue; if (ISR.getConstantOperandVal(2) != PPC::sub_32) continue; SDValue IDef = ISR.getOperand(0); if (!IDef.isMachineOpcode() || IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF) continue; // We now know that we're looking at a canonical i32 -> i64 zext. See if we // can get rid of it. SDValue Op32 = ISR->getOperand(1); if (!Op32.isMachineOpcode()) continue; // There are some 32-bit instructions that always clear the high-order 32 // bits, there are also some instructions (like AND) that we can look // through. SmallPtrSet ToPromote; if (!PeepholePPC64ZExtGather(Op32, ToPromote)) continue; // If the ToPromote set contains nodes that have uses outside of the set // (except for the original INSERT_SUBREG), then abort the transformation. bool OutsideUse = false; for (SDNode *PN : ToPromote) { for (SDNode *UN : PN->uses()) { if (!ToPromote.count(UN) && UN != ISR.getNode()) { OutsideUse = true; break; } } if (OutsideUse) break; } if (OutsideUse) continue; MadeChange = true; // We now know that this zero extension can be removed by promoting to // nodes in ToPromote to 64-bit operations, where for operations in the // frontier of the set, we need to insert INSERT_SUBREGs for their // operands. for (SDNode *PN : ToPromote) { unsigned NewOpcode; switch (PN->getMachineOpcode()) { default: llvm_unreachable("Don't know the 64-bit variant of this instruction"); case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break; case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break; case PPC::SLW: NewOpcode = PPC::SLW8; break; case PPC::SRW: NewOpcode = PPC::SRW8; break; case PPC::LI: NewOpcode = PPC::LI8; break; case PPC::LIS: NewOpcode = PPC::LIS8; break; case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; case PPC::OR: NewOpcode = PPC::OR8; break; case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; case PPC::ORI: NewOpcode = PPC::ORI8; break; case PPC::ORIS: NewOpcode = PPC::ORIS8; break; case PPC::AND: NewOpcode = PPC::AND8; break; case PPC::ANDI_rec: NewOpcode = PPC::ANDI8_rec; break; case PPC::ANDIS_rec: NewOpcode = PPC::ANDIS8_rec; break; } // Note: During the replacement process, the nodes will be in an // inconsistent state (some instructions will have operands with values // of the wrong type). Once done, however, everything should be right // again. SmallVector Ops; for (const SDValue &V : PN->ops()) { if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 && !isa(V)) { SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) }; SDNode *ReplOp = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V), ISR.getNode()->getVTList(), ReplOpOps); Ops.push_back(SDValue(ReplOp, 0)); } else { Ops.push_back(V); } } // Because all to-be-promoted nodes only have users that are other // promoted nodes (or the original INSERT_SUBREG), we can safely replace // the i32 result value type with i64. SmallVector NewVTs; SDVTList VTs = PN->getVTList(); for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i) if (VTs.VTs[i] == MVT::i32) NewVTs.push_back(MVT::i64); else NewVTs.push_back(VTs.VTs[i]); LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "); LLVM_DEBUG(PN->dump(CurDAG)); CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); LLVM_DEBUG(dbgs() << "\nNew: "); LLVM_DEBUG(PN->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); } // Now we replace the original zero extend and its associated INSERT_SUBREG // with the value feeding the INSERT_SUBREG (which has now been promoted to // return an i64). LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "); LLVM_DEBUG(N->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nNew: "); LLVM_DEBUG(Op32.getNode()->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); ReplaceUses(N, Op32.getNode()); } if (MadeChange) CurDAG->RemoveDeadNodes(); } static bool isVSXSwap(SDValue N) { if (!N->isMachineOpcode()) return false; unsigned Opc = N->getMachineOpcode(); // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate // operand is 2. if (Opc == PPC::XXPERMDIs) { return isa(N->getOperand(1)) && N->getConstantOperandVal(1) == 2; } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) { return N->getOperand(0) == N->getOperand(1) && isa(N->getOperand(2)) && N->getConstantOperandVal(2) == 2; } return false; } // TODO: Make this complete and replace with a table-gen bit. static bool isLaneInsensitive(SDValue N) { if (!N->isMachineOpcode()) return false; unsigned Opc = N->getMachineOpcode(); switch (Opc) { default: return false; case PPC::VAVGSB: case PPC::VAVGUB: case PPC::VAVGSH: case PPC::VAVGUH: case PPC::VAVGSW: case PPC::VAVGUW: case PPC::VMAXFP: case PPC::VMAXSB: case PPC::VMAXUB: case PPC::VMAXSH: case PPC::VMAXUH: case PPC::VMAXSW: case PPC::VMAXUW: case PPC::VMINFP: case PPC::VMINSB: case PPC::VMINUB: case PPC::VMINSH: case PPC::VMINUH: case PPC::VMINSW: case PPC::VMINUW: case PPC::VADDFP: case PPC::VADDUBM: case PPC::VADDUHM: case PPC::VADDUWM: case PPC::VSUBFP: case PPC::VSUBUBM: case PPC::VSUBUHM: case PPC::VSUBUWM: case PPC::VAND: case PPC::VANDC: case PPC::VOR: case PPC::VORC: case PPC::VXOR: case PPC::VNOR: case PPC::VMULUWM: return true; } } // Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is // lane-insensitive. static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) { // Our desired xxswap might be source of COPY_TO_REGCLASS. // TODO: Can we put this a common method for DAG? auto SkipRCCopy = [](SDValue V) { while (V->isMachineOpcode() && V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) { // All values in the chain should have single use. if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode())) return SDValue(); V = V->getOperand(0); } return V.hasOneUse() ? V : SDValue(); }; SDValue VecOp = SkipRCCopy(N->getOperand(0)); if (!VecOp || !isLaneInsensitive(VecOp)) return; SDValue LHS = SkipRCCopy(VecOp.getOperand(0)), RHS = SkipRCCopy(VecOp.getOperand(1)); if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS)) return; // These swaps may still have chain-uses here, count on dead code elimination // in following passes to remove them. DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0)); DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0)); DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0)); } void PPCDAGToDAGISel::PeepholePPC64() { SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; // Skip dead nodes and any non-machine opcodes. if (N->use_empty() || !N->isMachineOpcode()) continue; if (isVSXSwap(SDValue(N, 0))) reduceVSXSwap(N, CurDAG); unsigned FirstOp; unsigned StorageOpcode = N->getMachineOpcode(); bool RequiresMod4Offset = false; switch (StorageOpcode) { default: continue; case PPC::LWA: case PPC::LD: case PPC::DFLOADf64: case PPC::DFLOADf32: RequiresMod4Offset = true; [[fallthrough]]; case PPC::LBZ: case PPC::LBZ8: case PPC::LFD: case PPC::LFS: case PPC::LHA: case PPC::LHA8: case PPC::LHZ: case PPC::LHZ8: case PPC::LWZ: case PPC::LWZ8: FirstOp = 0; break; case PPC::STD: case PPC::DFSTOREf64: case PPC::DFSTOREf32: RequiresMod4Offset = true; [[fallthrough]]; case PPC::STB: case PPC::STB8: case PPC::STFD: case PPC::STFS: case PPC::STH: case PPC::STH8: case PPC::STW: case PPC::STW8: FirstOp = 1; break; } // If this is a load or store with a zero offset, or within the alignment, // we may be able to fold an add-immediate into the memory operation. // The check against alignment is below, as it can't occur until we check // the arguments to N if (!isa(N->getOperand(FirstOp))) continue; SDValue Base = N->getOperand(FirstOp + 1); if (!Base.isMachineOpcode()) continue; unsigned Flags = 0; bool ReplaceFlags = true; // When the feeding operation is an add-immediate of some sort, // determine whether we need to add relocation information to the // target flags on the immediate operand when we fold it into the // load instruction. // // For something like ADDItocL, the relocation information is // inferred from the opcode; when we process it in the AsmPrinter, // we add the necessary relocation there. A load, though, can receive // relocation from various flavors of ADDIxxx, so we need to carry // the relocation information in the target flags. switch (Base.getMachineOpcode()) { default: continue; case PPC::ADDI8: case PPC::ADDI: // In some cases (such as TLS) the relocation information // is already in place on the operand, so copying the operand // is sufficient. ReplaceFlags = false; break; case PPC::ADDIdtprelL: Flags = PPCII::MO_DTPREL_LO; break; case PPC::ADDItlsldL: Flags = PPCII::MO_TLSLD_LO; break; case PPC::ADDItocL: Flags = PPCII::MO_TOC_LO; break; } SDValue ImmOpnd = Base.getOperand(1); // On PPC64, the TOC base pointer is guaranteed by the ABI only to have // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, // we might have needed different @ha relocation values for the offset // pointers). int MaxDisplacement = 7; if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { const GlobalValue *GV = GA->getGlobal(); Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement); } bool UpdateHBase = false; SDValue HBase = Base.getOperand(0); int Offset = N->getConstantOperandVal(FirstOp); if (ReplaceFlags) { if (Offset < 0 || Offset > MaxDisplacement) { // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only // one use, then we can do this for any offset, we just need to also // update the offset (i.e. the symbol addend) on the addis also. if (Base.getMachineOpcode() != PPC::ADDItocL) continue; if (!HBase.isMachineOpcode() || HBase.getMachineOpcode() != PPC::ADDIStocHA8) continue; if (!Base.hasOneUse() || !HBase.hasOneUse()) continue; SDValue HImmOpnd = HBase.getOperand(1); if (HImmOpnd != ImmOpnd) continue; UpdateHBase = true; } } else { // Global addresses can be folded, but only if they are sufficiently // aligned. if (RequiresMod4Offset) { if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { const GlobalValue *GV = GA->getGlobal(); Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); if (Alignment < 4) continue; } } // If we're directly folding the addend from an addi instruction, then: // 1. In general, the offset on the memory access must be zero. // 2. If the addend is a constant, then it can be combined with a // non-zero offset, but only if the result meets the encoding // requirements. if (auto *C = dyn_cast(ImmOpnd)) { Offset += C->getSExtValue(); if (RequiresMod4Offset && (Offset % 4) != 0) continue; if (!isInt<16>(Offset)) continue; ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), ImmOpnd.getValueType()); } else if (Offset != 0) { continue; } } // We found an opportunity. Reverse the operands from the add // immediate and substitute them into the load or store. If // needed, update the target flags for the immediate operand to // reflect the necessary relocation information. LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); LLVM_DEBUG(Base->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nN: "); LLVM_DEBUG(N->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); // If the relocation information isn't already present on the // immediate operand, add it now. if (ReplaceFlags) { if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); // We can't perform this optimization for data whose alignment // is insufficient for the instruction encoding. if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) { LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); continue; } ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); } else if (ConstantPoolSDNode *CP = dyn_cast(ImmOpnd)) { const Constant *C = CP->getConstVal(); ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(), Offset, Flags); } } if (FirstOp == 1) // Store (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, Base.getOperand(0), N->getOperand(3)); else // Load (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), N->getOperand(2)); if (UpdateHBase) (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), ImmOpnd); // The add-immediate may now be dead, in which case remove it. if (Base.getNode()->use_empty()) CurDAG->RemoveDeadNode(Base.getNode()); } } /// createPPCISelDag - This pass converts a legalized DAG into a /// PowerPC-specific DAG, ready for instruction scheduling. /// FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, CodeGenOptLevel OptLevel) { return new PPCDAGToDAGISel(TM, OptLevel); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 249f4a7710e0..5d207dcfd18d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1,4936 +1,4936 @@ //===- InstructionCombining.cpp - Combine multiple instructions -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // InstructionCombining - Combine instructions to form fewer, simple // instructions. This pass does not modify the CFG. This pass is where // algebraic simplification happens. // // This pass combines things like: // %Y = add i32 %X, 1 // %Z = add i32 %Y, 1 // into: // %Z = add i32 %X, 2 // // This is a simple worklist driven algorithm. // // This pass guarantees that the following canonicalizations are performed on // the program: // 1. If a binary operator has a constant operand, it is moved to the RHS // 2. Bitwise operators with constant operands are always grouped so that // shifts are performed first, then or's, then and's, then xor's. // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible // 4. All cmp instructions on boolean values are replaced with logical ops // 5. add X, X is represented as (X*2) => (X << 1) // 6. Multiplies with a power-of-two constant argument are transformed into // shifts. // ... etc. // //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/EHPersonalities.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include #include #include #include #include #include #include #define DEBUG_TYPE "instcombine" #include "llvm/Transforms/Utils/InstructionWorklist.h" #include using namespace llvm; using namespace llvm::PatternMatch; STATISTIC(NumWorklistIterations, "Number of instruction combining iterations performed"); STATISTIC(NumOneIteration, "Number of functions with one iteration"); STATISTIC(NumTwoIterations, "Number of functions with two iterations"); STATISTIC(NumThreeIterations, "Number of functions with three iterations"); STATISTIC(NumFourOrMoreIterations, "Number of functions with four or more iterations"); STATISTIC(NumCombined , "Number of insts combined"); STATISTIC(NumConstProp, "Number of constant folds"); STATISTIC(NumDeadInst , "Number of dead inst eliminated"); STATISTIC(NumSunkInst , "Number of instructions sunk"); STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc , "Number of reassociations"); DEBUG_COUNTER(VisitCounter, "instcombine-visit", "Controls which instructions are visited"); static cl::opt EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true)); static cl::opt MaxSinkNumUsers( "instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking")); static cl::opt MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine")); // FIXME: Remove this flag when it is no longer necessary to convert // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false // increases variable availability at the cost of accuracy. Variables that // cannot be promoted by mem2reg or SROA will be described as living in memory // for their entire lifetime. However, passes like DSE and instcombine can // delete stores to the alloca, leading to misleading and inaccurate debug // information. This flag can be removed when those passes are fixed. static cl::opt ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true)); std::optional InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) { // Handle target specific intrinsics if (II.getCalledFunction()->isTargetIntrinsic()) { return TTI.instCombineIntrinsic(*this, II); } return std::nullopt; } std::optional InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) { // Handle target specific intrinsics if (II.getCalledFunction()->isTargetIntrinsic()) { return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known, KnownBitsComputed); } return std::nullopt; } std::optional InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts, APInt &PoisonElts2, APInt &PoisonElts3, std::function SimplifyAndSetOp) { // Handle target specific intrinsics if (II.getCalledFunction()->isTargetIntrinsic()) { return TTI.simplifyDemandedVectorEltsIntrinsic( *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3, SimplifyAndSetOp); } return std::nullopt; } bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { return TTI.isValidAddrSpaceCast(FromAS, ToAS); } Value *InstCombinerImpl::EmitGEPOffset(User *GEP) { return llvm::emitGEPOffset(&Builder, DL, GEP); } /// Legal integers and common types are considered desirable. This is used to /// avoid creating instructions with types that may not be supported well by the /// the backend. /// NOTE: This treats i8, i16 and i32 specially because they are common /// types in frontend languages. bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const { switch (BitWidth) { case 8: case 16: case 32: return true; default: return DL.isLegalInteger(BitWidth); } } /// Return true if it is desirable to convert an integer computation from a /// given bit width to a new bit width. /// We don't want to convert from a legal or desirable type (like i8) to an /// illegal type or from a smaller to a larger illegal type. A width of '1' /// is always treated as a desirable type because i1 is a fundamental type in /// IR, and there are many specialized optimizations for i1 types. /// Common/desirable widths are equally treated as legal to convert to, in /// order to open up more combining opportunities. bool InstCombinerImpl::shouldChangeType(unsigned FromWidth, unsigned ToWidth) const { bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth); bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth); // Convert to desirable widths even if they are not legal types. // Only shrink types, to prevent infinite loops. if (ToWidth < FromWidth && isDesirableIntType(ToWidth)) return true; // If this is a legal or desiable integer from type, and the result would be // an illegal type, don't do the transformation. if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal) return false; // Otherwise, if both are illegal, do not increase the size of the result. We // do allow things like i160 -> i64, but not i64 -> i160. if (!FromLegal && !ToLegal && ToWidth > FromWidth) return false; return true; } /// Return true if it is desirable to convert a computation from 'From' to 'To'. /// We don't want to convert from a legal to an illegal type or from a smaller /// to a larger illegal type. i1 is always treated as a legal type because it is /// a fundamental type in IR, and there are many specialized optimizations for /// i1 types. bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const { // TODO: This could be extended to allow vectors. Datalayout changes might be // needed to properly support that. if (!From->isIntegerTy() || !To->isIntegerTy()) return false; unsigned FromWidth = From->getPrimitiveSizeInBits(); unsigned ToWidth = To->getPrimitiveSizeInBits(); return shouldChangeType(FromWidth, ToWidth); } // Return true, if No Signed Wrap should be maintained for I. // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C", // where both B and C should be ConstantInts, results in a constant that does // not overflow. This function only handles the Add and Sub opcodes. For // all other opcodes, the function conservatively returns false. static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { auto *OBO = dyn_cast(&I); if (!OBO || !OBO->hasNoSignedWrap()) return false; // We reason about Add and Sub Only. Instruction::BinaryOps Opcode = I.getOpcode(); if (Opcode != Instruction::Add && Opcode != Instruction::Sub) return false; const APInt *BVal, *CVal; if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal))) return false; bool Overflow = false; if (Opcode == Instruction::Add) (void)BVal->sadd_ov(*CVal, Overflow); else (void)BVal->ssub_ov(*CVal, Overflow); return !Overflow; } static bool hasNoUnsignedWrap(BinaryOperator &I) { auto *OBO = dyn_cast(&I); return OBO && OBO->hasNoUnsignedWrap(); } static bool hasNoSignedWrap(BinaryOperator &I) { auto *OBO = dyn_cast(&I); return OBO && OBO->hasNoSignedWrap(); } /// Conservatively clears subclassOptionalData after a reassociation or /// commutation. We preserve fast-math flags when applicable as they can be /// preserved. static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { FPMathOperator *FPMO = dyn_cast(&I); if (!FPMO) { I.clearSubclassOptionalData(); return; } FastMathFlags FMF = I.getFastMathFlags(); I.clearSubclassOptionalData(); I.setFastMathFlags(FMF); } /// Combine constant operands of associative operations either before or after a /// cast to eliminate one of the associative operations: /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2))) /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2)) static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC) { auto *Cast = dyn_cast(BinOp1->getOperand(0)); if (!Cast || !Cast->hasOneUse()) return false; // TODO: Enhance logic for other casts and remove this check. auto CastOpcode = Cast->getOpcode(); if (CastOpcode != Instruction::ZExt) return false; // TODO: Enhance logic for other BinOps and remove this check. if (!BinOp1->isBitwiseLogicOp()) return false; auto AssocOpcode = BinOp1->getOpcode(); auto *BinOp2 = dyn_cast(Cast->getOperand(0)); if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode) return false; Constant *C1, *C2; if (!match(BinOp1->getOperand(1), m_Constant(C1)) || !match(BinOp2->getOperand(1), m_Constant(C2))) return false; // TODO: This assumes a zext cast. // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2 // to the destination type might lose bits. // Fold the constants together in the destination type: // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC) const DataLayout &DL = IC.getDataLayout(); Type *DestTy = C1->getType(); Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL); if (!CastC2) return false; Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL); if (!FoldedC) return false; IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0)); IC.replaceOperand(*BinOp1, 1, FoldedC); BinOp1->dropPoisonGeneratingFlags(); Cast->dropPoisonGeneratingFlags(); return true; } // Simplifies IntToPtr/PtrToInt RoundTrip Cast. // inttoptr ( ptrtoint (x) ) --> x Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) { auto *IntToPtr = dyn_cast(Val); if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) == DL.getTypeSizeInBits(IntToPtr->getSrcTy())) { auto *PtrToInt = dyn_cast(IntToPtr->getOperand(0)); Type *CastTy = IntToPtr->getDestTy(); if (PtrToInt && CastTy->getPointerAddressSpace() == PtrToInt->getSrcTy()->getPointerAddressSpace() && DL.getTypeSizeInBits(PtrToInt->getSrcTy()) == DL.getTypeSizeInBits(PtrToInt->getDestTy())) return PtrToInt->getOperand(0); } return nullptr; } /// This performs a few simplifications for operators that are associative or /// commutative: /// /// Commutative operators: /// /// 1. Order operands such that they are listed from right (least complex) to /// left (most complex). This puts constants before unary operators before /// binary operators. /// /// Associative operators: /// /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. /// /// Associative and commutative operators: /// /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" /// if C1 and C2 are constants. bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Instruction::BinaryOps Opcode = I.getOpcode(); bool Changed = false; do { // Order operands such that they are listed from right (least complex) to // left (most complex). This puts constants before unary operators before // binary operators. if (I.isCommutative() && getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) Changed = !I.swapOperands(); if (I.isCommutative()) { if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) { replaceOperand(I, 0, Pair->first); replaceOperand(I, 1, Pair->second); Changed = true; } } BinaryOperator *Op0 = dyn_cast(I.getOperand(0)); BinaryOperator *Op1 = dyn_cast(I.getOperand(1)); if (I.isAssociative()) { // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. if (Op0 && Op0->getOpcode() == Opcode) { Value *A = Op0->getOperand(0); Value *B = Op0->getOperand(1); Value *C = I.getOperand(1); // Does "B op C" simplify? if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "A op V". replaceOperand(I, 0, A); replaceOperand(I, 1, V); bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0); bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0); // Conservatively clear all optional flags since they may not be // preserved by the reassociation. Reset nsw/nuw based on the above // analysis. ClearSubclassDataAfterReassociation(I); // Note: this is only valid because SimplifyBinOp doesn't look at // the operands to Op0. if (IsNUW) I.setHasNoUnsignedWrap(true); if (IsNSW) I.setHasNoSignedWrap(true); Changed = true; ++NumReassoc; continue; } } // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. if (Op1 && Op1->getOpcode() == Opcode) { Value *A = I.getOperand(0); Value *B = Op1->getOperand(0); Value *C = Op1->getOperand(1); // Does "A op B" simplify? if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op C". replaceOperand(I, 0, V); replaceOperand(I, 1, C); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; } } } if (I.isAssociative() && I.isCommutative()) { if (simplifyAssocCastAssoc(&I, *this)) { Changed = true; ++NumReassoc; continue; } // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. if (Op0 && Op0->getOpcode() == Opcode) { Value *A = Op0->getOperand(0); Value *B = Op0->getOperand(1); Value *C = I.getOperand(1); // Does "C op A" simplify? if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op B". replaceOperand(I, 0, V); replaceOperand(I, 1, B); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; } } // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. if (Op1 && Op1->getOpcode() == Opcode) { Value *A = I.getOperand(0); Value *B = Op1->getOperand(0); Value *C = Op1->getOperand(1); // Does "C op A" simplify? if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "B op V". replaceOperand(I, 0, B); replaceOperand(I, 1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; } } // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" // if C1 and C2 are constants. Value *A, *B; Constant *C1, *C2, *CRes; if (Op0 && Op1 && Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) && match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) && (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) { bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0) && hasNoUnsignedWrap(*Op1); BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ? BinaryOperator::CreateNUW(Opcode, A, B) : BinaryOperator::Create(Opcode, A, B); if (isa(NewBO)) { FastMathFlags Flags = I.getFastMathFlags() & Op0->getFastMathFlags() & Op1->getFastMathFlags(); NewBO->setFastMathFlags(Flags); } InsertNewInstWith(NewBO, I.getIterator()); NewBO->takeName(Op1); replaceOperand(I, 0, NewBO); replaceOperand(I, 1, CRes); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); if (IsNUW) I.setHasNoUnsignedWrap(true); Changed = true; continue; } } // No further simplifications. return Changed; } while (true); } /// Return whether "X LOp (Y ROp Z)" is always equal to /// "(X LOp Y) ROp (X LOp Z)". static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp) { // X & (Y | Z) <--> (X & Y) | (X & Z) // X & (Y ^ Z) <--> (X & Y) ^ (X & Z) if (LOp == Instruction::And) return ROp == Instruction::Or || ROp == Instruction::Xor; // X | (Y & Z) <--> (X | Y) & (X | Z) if (LOp == Instruction::Or) return ROp == Instruction::And; // X * (Y + Z) <--> (X * Y) + (X * Z) // X * (Y - Z) <--> (X * Y) - (X * Z) if (LOp == Instruction::Mul) return ROp == Instruction::Add || ROp == Instruction::Sub; return false; } /// Return whether "(X LOp Y) ROp Z" is always equal to /// "(X ROp Z) LOp (Y ROp Z)". static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp) { if (Instruction::isCommutative(ROp)) return leftDistributesOverRight(ROp, LOp); // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts. return Instruction::isBitwiseLogicOp(LOp) && Instruction::isShift(ROp); // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z", // but this requires knowing that the addition does not overflow and other // such subtleties. } /// This function returns identity value for given opcode, which can be used to /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1). static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) { if (isa(V)) return nullptr; return ConstantExpr::getBinOpIdentity(Opcode, V->getType()); } /// This function predicates factorization using distributive laws. By default, /// it just returns the 'Op' inputs. But for special-cases like /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to /// allow more factorization opportunities. static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) { assert(Op && "Expected a binary operator"); LHS = Op->getOperand(0); RHS = Op->getOperand(1); if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) { Constant *C; if (match(Op, m_Shl(m_Value(), m_Constant(C)))) { // X << C --> X * (1 << C) RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C); return Instruction::Mul; } // TODO: We can add other conversions e.g. shr => div etc. } if (Instruction::isBitwiseLogicOp(TopOpcode)) { if (OtherOp && OtherOp->getOpcode() == Instruction::AShr && match(Op, m_LShr(m_NonNegative(), m_Value()))) { // lshr nneg C, X --> ashr nneg C, X return Instruction::AShr; } } return Op->getOpcode(); } /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D) { assert(A && B && C && D && "All values must be provided"); Value *V = nullptr; Value *RetVal = nullptr; Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // Does "X op' Y" always equal "Y op' X"? bool InnerCommutative = Instruction::isCommutative(InnerOpcode); // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) { // Does the instruction have the form "(A op' B) op (A op' D)" or, in the // commutative case, "(A op' B) op (C op' A)"? if (A == C || (InnerCommutative && A == D)) { if (A != C) std::swap(C, D); // Consider forming "A op' (B op D)". // If "B op D" simplifies then it can be formed with no cost. V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I)); // If "B op D" doesn't simplify then only go on if one of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); if (V) RetVal = Builder.CreateBinOp(InnerOpcode, A, V); } } // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) { // Does the instruction have the form "(A op' B) op (C op' B)" or, in the // commutative case, "(A op' B) op (B op' D)"? if (B == D || (InnerCommutative && B == C)) { if (B != D) std::swap(C, D); // Consider forming "(A op C) op' B". // If "A op C" simplifies then it can be formed with no cost. V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); // If "A op C" doesn't simplify then only go on if one of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); if (V) RetVal = Builder.CreateBinOp(InnerOpcode, V, B); } } if (!RetVal) return nullptr; ++NumFactor; RetVal->takeName(&I); // Try to add no-overflow flags to the final value. if (isa(RetVal)) { bool HasNSW = false; bool HasNUW = false; if (isa(&I)) { HasNSW = I.hasNoSignedWrap(); HasNUW = I.hasNoUnsignedWrap(); } if (auto *LOBO = dyn_cast(LHS)) { HasNSW &= LOBO->hasNoSignedWrap(); HasNUW &= LOBO->hasNoUnsignedWrap(); } if (auto *ROBO = dyn_cast(RHS)) { HasNSW &= ROBO->hasNoSignedWrap(); HasNUW &= ROBO->hasNoUnsignedWrap(); } if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) { // We can propagate 'nsw' if we know that // %Y = mul nsw i16 %X, C // %Z = add nsw i16 %Y, %X // => // %Z = mul nsw i16 %X, C+1 // // iff C+1 isn't INT_MIN const APInt *CInt; if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue()) cast(RetVal)->setHasNoSignedWrap(HasNSW); // nuw can be propagated with any constant or nuw value. cast(RetVal)->setHasNoUnsignedWrap(HasNUW); } } return RetVal; } // If `I` has one Const operand and the other matches `(ctpop (not x))`, // replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`. // This is only useful is the new subtract can fold so we only handle the // following cases: // 1) (add/sub/disjoint_or C, (ctpop (not x)) // -> (add/sub/disjoint_or C', (ctpop x)) // 1) (cmp pred C, (ctpop (not x)) // -> (cmp pred C', (ctpop x)) Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) { unsigned Opc = I->getOpcode(); unsigned ConstIdx = 1; switch (Opc) { default: return nullptr; // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x)) // We can fold the BitWidth(x) with add/sub/icmp as long the other operand // is constant. case Instruction::Sub: ConstIdx = 0; break; case Instruction::ICmp: // Signed predicates aren't correct in some edge cases like for i2 types, as // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed // comparisons against it are simplfied to unsigned. if (cast(I)->isSigned()) return nullptr; break; case Instruction::Or: if (!match(I, m_DisjointOr(m_Value(), m_Value()))) return nullptr; [[fallthrough]]; case Instruction::Add: break; } Value *Op; // Find ctpop. if (!match(I->getOperand(1 - ConstIdx), m_OneUse(m_Intrinsic(m_Value(Op))))) return nullptr; Constant *C; // Check other operand is ImmConstant. if (!match(I->getOperand(ConstIdx), m_ImmConstant(C))) return nullptr; Type *Ty = Op->getType(); Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits()); // Need extra check for icmp. Note if this check is true, it generally means // the icmp will simplify to true/false. if (Opc == Instruction::ICmp && !cast(I)->isEquality() && !ConstantExpr::getICmp(ICmpInst::ICMP_UGT, C, BitWidthC)->isZeroValue()) return nullptr; // Check we can invert `(not x)` for free. bool Consumes = false; if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes) return nullptr; Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder); assert(NotOp != nullptr && "Desync between isFreeToInvert and getFreelyInverted"); Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp); Value *R = nullptr; // Do the transformation here to avoid potentially introducing an infinite // loop. switch (Opc) { case Instruction::Sub: R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC)); break; case Instruction::Or: case Instruction::Add: R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp); break; case Instruction::ICmp: R = Builder.CreateICmp(cast(I)->getSwappedPredicate(), CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C)); break; default: llvm_unreachable("Unhandled Opcode"); } assert(R != nullptr); return replaceInstUsesWith(*I, R); } // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C)) // IFF // 1) the logic_shifts match // 2) either both binops are binops and one is `and` or // BinOp1 is `and` // (logic_shift (inv_logic_shift C1, C), C) == C1 or // // -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C) // // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt)) // IFF // 1) the logic_shifts match // 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`). // // -> (BinOp (logic_shift (BinOp X, Y)), Mask) // // (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt)) // IFF // 1) Binop1 is bitwise logical operator `and`, `or` or `xor` // 2) Binop2 is `not` // // -> (arithmetic_shift Binop1((not X), Y), Amt) Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) { const DataLayout &DL = I.getModule()->getDataLayout(); auto IsValidBinOpc = [](unsigned Opc) { switch (Opc) { default: return false; case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::Add: // Skip Sub as we only match constant masks which will canonicalize to use // add. return true; } }; // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra // constraints. auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2, unsigned ShOpc) { assert(ShOpc != Instruction::AShr); return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) || ShOpc == Instruction::Shl; }; auto GetInvShift = [](unsigned ShOpc) { assert(ShOpc != Instruction::AShr); return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr; }; auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2, unsigned ShOpc, Constant *CMask, Constant *CShift) { // If the BinOp1 is `and` we don't need to check the mask. if (BinOpc1 == Instruction::And) return true; // For all other possible transfers we need complete distributable // binop/shift (anything but `add` + `lshr`). if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc)) return false; // If BinOp2 is `and`, any mask works (this only really helps for non-splat // vecs, otherwise the mask will be simplified and the following check will // handle it). if (BinOpc2 == Instruction::And) return true; // Otherwise, need mask that meets the below requirement. // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask Constant *MaskInvShift = ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL); return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) == CMask; }; auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * { Constant *CMask, *CShift; Value *X, *Y, *ShiftedX, *Mask, *Shift; if (!match(I.getOperand(ShOpnum), m_OneUse(m_Shift(m_Value(Y), m_Value(Shift))))) return nullptr; if (!match(I.getOperand(1 - ShOpnum), m_BinOp(m_Value(ShiftedX), m_Value(Mask)))) return nullptr; if (!match(ShiftedX, m_OneUse(m_Shift(m_Value(X), m_Specific(Shift))))) return nullptr; // Make sure we are matching instruction shifts and not ConstantExpr auto *IY = dyn_cast(I.getOperand(ShOpnum)); auto *IX = dyn_cast(ShiftedX); if (!IY || !IX) return nullptr; // LHS and RHS need same shift opcode unsigned ShOpc = IY->getOpcode(); if (ShOpc != IX->getOpcode()) return nullptr; // Make sure binop is real instruction and not ConstantExpr auto *BO2 = dyn_cast(I.getOperand(1 - ShOpnum)); if (!BO2) return nullptr; unsigned BinOpc = BO2->getOpcode(); // Make sure we have valid binops. if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc)) return nullptr; if (ShOpc == Instruction::AShr) { if (Instruction::isBitwiseLogicOp(I.getOpcode()) && BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) { Value *NotX = Builder.CreateNot(X); Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX); return BinaryOperator::Create( static_cast(ShOpc), NewBinOp, Shift); } return nullptr; } // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just // distribute to drop the shift irrelevant of constants. if (BinOpc == I.getOpcode() && IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) { Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y); Value *NewBinOp1 = Builder.CreateBinOp( static_cast(ShOpc), NewBinOp2, Shift); return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask); } // Otherwise we can only distribute by constant shifting the mask, so // ensure we have constants. if (!match(Shift, m_ImmConstant(CShift))) return nullptr; if (!match(Mask, m_ImmConstant(CMask))) return nullptr; // Check if we can distribute the binops. if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift)) return nullptr; Constant *NewCMask = ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL); Value *NewBinOp2 = Builder.CreateBinOp( static_cast(BinOpc), X, NewCMask); Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2); return BinaryOperator::Create(static_cast(ShOpc), NewBinOp1, CShift); }; if (Instruction *R = MatchBinOp(0)) return R; return MatchBinOp(1); } // (Binop (zext C), (select C, T, F)) // -> (select C, (binop 1, T), (binop 0, F)) // // (Binop (sext C), (select C, T, F)) // -> (select C, (binop -1, T), (binop 0, F)) // // Attempt to simplify binary operations into a select with folded args, when // one operand of the binop is a select instruction and the other operand is a // zext/sext extension, whose value is the select condition. Instruction * InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) { // TODO: this simplification may be extended to any speculatable instruction, // not just binops, and would possibly be handled better in FoldOpIntoSelect. Instruction::BinaryOps Opc = I.getOpcode(); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Value *A, *CondVal, *TrueVal, *FalseVal; Value *CastOp; auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) { return match(CastOp, m_ZExtOrSExt(m_Value(A))) && A->getType()->getScalarSizeInBits() == 1 && match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal), m_Value(FalseVal))); }; // Make sure one side of the binop is a select instruction, and the other is a // zero/sign extension operating on a i1. if (MatchSelectAndCast(LHS, RHS)) CastOp = LHS; else if (MatchSelectAndCast(RHS, LHS)) CastOp = RHS; else return nullptr; auto NewFoldedConst = [&](bool IsTrueArm, Value *V) { bool IsCastOpRHS = (CastOp == RHS); bool IsZExt = isa(CastOp); Constant *C; if (IsTrueArm) { C = Constant::getNullValue(V->getType()); } else if (IsZExt) { unsigned BitWidth = V->getType()->getScalarSizeInBits(); C = Constant::getIntegerValue(V->getType(), APInt(BitWidth, 1)); } else { C = Constant::getAllOnesValue(V->getType()); } return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, C) : Builder.CreateBinOp(Opc, C, V); }; // If the value used in the zext/sext is the select condition, or the negated // of the select condition, the binop can be simplified. if (CondVal == A) { Value *NewTrueVal = NewFoldedConst(false, TrueVal); return SelectInst::Create(CondVal, NewTrueVal, NewFoldedConst(true, FalseVal)); } if (match(A, m_Not(m_Specific(CondVal)))) { Value *NewTrueVal = NewFoldedConst(true, TrueVal); return SelectInst::Create(CondVal, NewTrueVal, NewFoldedConst(false, FalseVal)); } return nullptr; } Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast(LHS); BinaryOperator *Op1 = dyn_cast(RHS); Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); Value *A, *B, *C, *D; Instruction::BinaryOps LHSOpcode, RHSOpcode; if (Op0) LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1); if (Op1) RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0); // The instruction has the form "(A op' B) op (C op' D)". Try to factorize // a common term. if (Op0 && Op1 && LHSOpcode == RHSOpcode) if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D)) return V; // The instruction has the form "(A op' B) op (C)". Try to factorize common // term. if (Op0) if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident)) return V; // The instruction has the form "(B) op (C op' D)". Try to factorize common // term. if (Op1) if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) if (Value *V = tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D)) return V; return nullptr; } /// This tries to simplify binary operations which some other binary operation /// distributes over either by factorizing out common terms /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win). /// Returns the simplified value, or null if it didn't simplify. Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast(LHS); BinaryOperator *Op1 = dyn_cast(RHS); Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // Factorization. if (Value *R = tryFactorizationFolds(I)) return R; // Expansion. if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { // The instruction has the form "(A op' B) op C". See if expanding it out // to "(A op C) op' (B op C)" results in simplifications. Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' // Disable the use of undef because it's not safe to distribute undef. auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef(); Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive); Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive); // Do "A op C" and "B op C" both simplify? if (L && R) { // They do! Return "L op' R". ++NumExpand; C = Builder.CreateBinOp(InnerOpcode, L, R); C->takeName(&I); return C; } // Does "A op C" simplify to the identity value for the inner opcode? if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) { // They do! Return "B op C". ++NumExpand; C = Builder.CreateBinOp(TopLevelOpcode, B, C); C->takeName(&I); return C; } // Does "B op C" simplify to the identity value for the inner opcode? if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) { // They do! Return "A op C". ++NumExpand; C = Builder.CreateBinOp(TopLevelOpcode, A, C); C->takeName(&I); return C; } } if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) { // The instruction has the form "A op (B op' C)". See if expanding it out // to "(A op B) op' (A op C)" results in simplifications. Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' // Disable the use of undef because it's not safe to distribute undef. auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef(); Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive); Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive); // Do "A op B" and "A op C" both simplify? if (L && R) { // They do! Return "L op' R". ++NumExpand; A = Builder.CreateBinOp(InnerOpcode, L, R); A->takeName(&I); return A; } // Does "A op B" simplify to the identity value for the inner opcode? if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) { // They do! Return "A op C". ++NumExpand; A = Builder.CreateBinOp(TopLevelOpcode, A, C); A->takeName(&I); return A; } // Does "A op C" simplify to the identity value for the inner opcode? if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) { // They do! Return "A op B". ++NumExpand; A = Builder.CreateBinOp(TopLevelOpcode, A, B); A->takeName(&I); return A; } } return SimplifySelectsFeedingBinaryOp(I, LHS, RHS); } static std::optional> matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) { if (LHS->getParent() != RHS->getParent()) return std::nullopt; if (LHS->getNumIncomingValues() < 2) return std::nullopt; if (!equal(LHS->blocks(), RHS->blocks())) return std::nullopt; Value *L0 = LHS->getIncomingValue(0); Value *R0 = RHS->getIncomingValue(0); for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) { Value *L1 = LHS->getIncomingValue(I); Value *R1 = RHS->getIncomingValue(I); if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1)) continue; return std::nullopt; } return std::optional(std::pair(L0, R0)); } std::optional> InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) { Instruction *LHSInst = dyn_cast(LHS); Instruction *RHSInst = dyn_cast(RHS); if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode()) return std::nullopt; switch (LHSInst->getOpcode()) { case Instruction::PHI: return matchSymmetricPhiNodesPair(cast(LHS), cast(RHS)); case Instruction::Select: { Value *Cond = LHSInst->getOperand(0); Value *TrueVal = LHSInst->getOperand(1); Value *FalseVal = LHSInst->getOperand(2); if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) && FalseVal == RHSInst->getOperand(1)) return std::pair(TrueVal, FalseVal); return std::nullopt; } case Instruction::Call: { // Match min(a, b) and max(a, b) MinMaxIntrinsic *LHSMinMax = dyn_cast(LHSInst); MinMaxIntrinsic *RHSMinMax = dyn_cast(RHSInst); if (LHSMinMax && RHSMinMax && LHSMinMax->getPredicate() == ICmpInst::getSwappedPredicate(RHSMinMax->getPredicate()) && ((LHSMinMax->getLHS() == RHSMinMax->getLHS() && LHSMinMax->getRHS() == RHSMinMax->getRHS()) || (LHSMinMax->getLHS() == RHSMinMax->getRHS() && LHSMinMax->getRHS() == RHSMinMax->getLHS()))) return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS()); return std::nullopt; } default: return std::nullopt; } } Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS) { Value *A, *B, *C, *D, *E, *F; bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C))); bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F))); if (!LHSIsSelect && !RHSIsSelect) return nullptr; FastMathFlags FMF; BuilderTy::FastMathFlagGuard Guard(Builder); if (isa(&I)) { FMF = I.getFastMathFlags(); Builder.setFastMathFlags(FMF); } Instruction::BinaryOps Opcode = I.getOpcode(); SimplifyQuery Q = SQ.getWithInstruction(&I); Value *Cond, *True = nullptr, *False = nullptr; // Special-case for add/negate combination. Replace the zero in the negation // with the trailing add operand: // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N) // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * { // We need an 'add' and exactly 1 arm of the select to have been simplified. if (Opcode != Instruction::Add || (!True && !False) || (True && False)) return nullptr; Value *N; if (True && match(FVal, m_Neg(m_Value(N)))) { Value *Sub = Builder.CreateSub(Z, N); return Builder.CreateSelect(Cond, True, Sub, I.getName()); } if (False && match(TVal, m_Neg(m_Value(N)))) { Value *Sub = Builder.CreateSub(Z, N); return Builder.CreateSelect(Cond, Sub, False, I.getName()); } return nullptr; }; if (LHSIsSelect && RHSIsSelect && A == D) { // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F) Cond = A; True = simplifyBinOp(Opcode, B, E, FMF, Q); False = simplifyBinOp(Opcode, C, F, FMF, Q); if (LHS->hasOneUse() && RHS->hasOneUse()) { if (False && !True) True = Builder.CreateBinOp(Opcode, B, E); else if (True && !False) False = Builder.CreateBinOp(Opcode, C, F); } } else if (LHSIsSelect && LHS->hasOneUse()) { // (A ? B : C) op Y -> A ? (B op Y) : (C op Y) Cond = A; True = simplifyBinOp(Opcode, B, RHS, FMF, Q); False = simplifyBinOp(Opcode, C, RHS, FMF, Q); if (Value *NewSel = foldAddNegate(B, C, RHS)) return NewSel; } else if (RHSIsSelect && RHS->hasOneUse()) { // X op (D ? E : F) -> D ? (X op E) : (X op F) Cond = D; True = simplifyBinOp(Opcode, LHS, E, FMF, Q); False = simplifyBinOp(Opcode, LHS, F, FMF, Q); if (Value *NewSel = foldAddNegate(E, F, LHS)) return NewSel; } if (!True || !False) return nullptr; Value *SI = Builder.CreateSelect(Cond, True, False); SI->takeName(&I); return SI; } /// Freely adapt every user of V as-if V was changed to !V. /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done. void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) { assert(!isa(I) && "Shouldn't invert users of constant"); for (User *U : make_early_inc_range(I->users())) { if (U == IgnoredUser) continue; // Don't consider this user. switch (cast(U)->getOpcode()) { case Instruction::Select: { auto *SI = cast(U); SI->swapValues(); SI->swapProfMetadata(); break; } case Instruction::Br: cast(U)->swapSuccessors(); // swaps prof metadata too break; case Instruction::Xor: replaceInstUsesWith(cast(*U), I); // Add to worklist for DCE. addToWorklist(cast(U)); break; default: llvm_unreachable("Got unexpected user - out of sync with " "canFreelyInvertAllUsersOf() ?"); } } } /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a /// constant zero (which is the 'negate' form). Value *InstCombinerImpl::dyn_castNegVal(Value *V) const { Value *NegV; if (match(V, m_Neg(m_Value(NegV)))) return NegV; // Constants can be considered to be negated values if they can be folded. if (ConstantInt *C = dyn_cast(V)) return ConstantExpr::getNeg(C); if (ConstantDataVector *C = dyn_cast(V)) if (C->getType()->getElementType()->isIntegerTy()) return ConstantExpr::getNeg(C); if (ConstantVector *CV = dyn_cast(V)) { for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { Constant *Elt = CV->getAggregateElement(i); if (!Elt) return nullptr; if (isa(Elt)) continue; if (!isa(Elt)) return nullptr; } return ConstantExpr::getNeg(CV); } // Negate integer vector splats. if (auto *CV = dyn_cast(V)) if (CV->getType()->isVectorTy() && CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue()) return ConstantExpr::getNeg(CV); return nullptr; } /// A binop with a constant operand and a sign-extended boolean operand may be /// converted into a select of constants by applying the binary operation to /// the constant with the two possible values of the extended boolean (0 or -1). Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { // TODO: Handle non-commutative binop (constant is operand 0). // TODO: Handle zext. // TODO: Peek through 'not' of cast. Value *BO0 = BO.getOperand(0); Value *BO1 = BO.getOperand(1); Value *X; Constant *C; if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) || !X->getType()->isIntOrIntVectorTy(1)) return nullptr; // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C) Constant *Ones = ConstantInt::getAllOnesValue(BO.getType()); Constant *Zero = ConstantInt::getNullValue(BO.getType()); Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C); Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C); return SelectInst::Create(X, TVal, FVal); } static Constant *constantFoldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm) { SmallVector ConstOps; for (Value *Op : I.operands()) { CmpInst::Predicate Pred; Constant *C = nullptr; if (Op == SI) { C = dyn_cast(IsTrueArm ? SI->getTrueValue() : SI->getFalseValue()); } else if (match(SI->getCondition(), m_ICmp(Pred, m_Specific(Op), m_Constant(C))) && Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && isGuaranteedNotToBeUndefOrPoison(C)) { // Pass } else { C = dyn_cast(Op); } if (C == nullptr) return nullptr; ConstOps.push_back(C); } return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout()); } static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC) { Instruction *Clone = I.clone(); Clone->replaceUsesOfWith(SI, NewOp); IC.InsertNewInstBefore(Clone, SI->getIterator()); return Clone; } Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse) { // Don't modify shared select instructions unless set FoldWithMultiUse if (!SI->hasOneUse() && !FoldWithMultiUse) return nullptr; Value *TV = SI->getTrueValue(); Value *FV = SI->getFalseValue(); if (!(isa(TV) || isa(FV))) return nullptr; // Bool selects with constant operands can be folded to logical ops. if (SI->getType()->isIntOrIntVectorTy(1)) return nullptr; // If it's a bitcast involving vectors, make sure it has the same number of // elements on both sides. if (auto *BC = dyn_cast(&Op)) { VectorType *DestTy = dyn_cast(BC->getDestTy()); VectorType *SrcTy = dyn_cast(BC->getSrcTy()); // Verify that either both or neither are vectors. if ((SrcTy == nullptr) != (DestTy == nullptr)) return nullptr; // If vectors, verify that they have the same number of elements. if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount()) return nullptr; } // Test if a FCmpInst instruction is used exclusively by a select as // part of a minimum or maximum operation. If so, refrain from doing // any other folding. This helps out other analyses which understand // non-obfuscated minimum and maximum idioms. And in this case, at // least one of the comparison operands has at least one user besides // the compare (the select), which would often largely negate the // benefit of folding anyway. if (auto *CI = dyn_cast(SI->getCondition())) { if (CI->hasOneUse()) { Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); if ((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) return nullptr; } } // Make sure that one of the select arms constant folds successfully. Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true); Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false); if (!NewTV && !NewFV) return nullptr; // Create an instruction for the arm that did not fold. if (!NewTV) NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this); if (!NewFV) NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this); return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ) { // NB: It is a precondition of this transform that the operands be // phi translatable! This is usually trivially satisfied by limiting it // to constant ops, and for selects we do a more sophisticated check. SmallVector Ops; for (Value *Op : I.operands()) { if (Op == PN) Ops.push_back(InValue); else Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB)); } // Don't consider the simplification successful if we get back a constant // expression. That's just an instruction in hiding. // Also reject the case where we simplify back to the phi node. We wouldn't // be able to remove it in that case. Value *NewVal = simplifyInstructionWithOperands( &I, Ops, SQ.getWithInstruction(InBB->getTerminator())); if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr())) return NewVal; // Check if incoming PHI value can be replaced with constant // based on implied condition. BranchInst *TerminatorBI = dyn_cast(InBB->getTerminator()); const ICmpInst *ICmp = dyn_cast(&I); if (TerminatorBI && TerminatorBI->isConditional() && TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) { bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent(); std::optional ImpliedCond = isImpliedCondition(TerminatorBI->getCondition(), ICmp->getPredicate(), Ops[0], Ops[1], DL, LHSIsTrue); if (ImpliedCond) return ConstantInt::getBool(I.getType(), ImpliedCond.value()); } return nullptr; } Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { unsigned NumPHIValues = PN->getNumIncomingValues(); if (NumPHIValues == 0) return nullptr; // We normally only transform phis with a single use. However, if a PHI has // multiple uses and they are all the same operation, we can fold *all* of the // uses into the PHI. if (!PN->hasOneUse()) { // Walk the use list for the instruction, comparing them to I. for (User *U : PN->users()) { Instruction *UI = cast(U); if (UI != &I && !I.isIdenticalTo(UI)) return nullptr; } // Otherwise, we can replace *all* users with the new PHI we form. } // Check to see whether the instruction can be folded into each phi operand. // If there is one operand that does not fold, remember the BB it is in. // If there is more than one or if *it* is a PHI, bail out. SmallVector NewPhiValues; BasicBlock *NonSimplifiedBB = nullptr; Value *NonSimplifiedInVal = nullptr; for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InVal = PN->getIncomingValue(i); BasicBlock *InBB = PN->getIncomingBlock(i); if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) { NewPhiValues.push_back(NewVal); continue; } if (NonSimplifiedBB) return nullptr; // More than one non-simplified value. NonSimplifiedBB = InBB; NonSimplifiedInVal = InVal; NewPhiValues.push_back(nullptr); // If the InVal is an invoke at the end of the pred block, then we can't // insert a computation after it without breaking the edge. if (isa(InVal)) if (cast(InVal)->getParent() == NonSimplifiedBB) return nullptr; // If the incoming non-constant value is reachable from the phis block, // we'll push the operation across a loop backedge. This could result in // an infinite combine loop, and is generally non-profitable (especially // if the operation was originally outside the loop). if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT, LI)) return nullptr; } // If there is exactly one non-simplified value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be // inserting the computation on some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. // Also, make sure that the pred block is not dead code. if (NonSimplifiedBB != nullptr) { BranchInst *BI = dyn_cast(NonSimplifiedBB->getTerminator()); if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(NonSimplifiedBB)) return nullptr; } // Okay, we can do the transformation: create the new PHI node. PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues()); InsertNewInstBefore(NewPN, PN->getIterator()); NewPN->takeName(PN); NewPN->setDebugLoc(PN->getDebugLoc()); // If we are going to have to insert a new computation, do so right before the // predecessor's terminator. Instruction *Clone = nullptr; if (NonSimplifiedBB) { Clone = I.clone(); for (Use &U : Clone->operands()) { if (U == PN) U = NonSimplifiedInVal; else U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB); } InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator()); } for (unsigned i = 0; i != NumPHIValues; ++i) { if (NewPhiValues[i]) NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i)); else NewPN->addIncoming(Clone, PN->getIncomingBlock(i)); } for (User *U : make_early_inc_range(PN->users())) { Instruction *User = cast(U); if (User == &I) continue; replaceInstUsesWith(*User, NewPN); eraseInstFromFunction(*User); } replaceAllDbgUsesWith(const_cast(*PN), const_cast(*NewPN), const_cast(*PN), DT); return replaceInstUsesWith(I, NewPN); } Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { // TODO: This should be similar to the incoming values check in foldOpIntoPhi: // we are guarding against replicating the binop in >1 predecessor. // This could miss matching a phi with 2 constant incoming values. auto *Phi0 = dyn_cast(BO.getOperand(0)); auto *Phi1 = dyn_cast(BO.getOperand(1)); if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() || Phi0->getNumOperands() != Phi1->getNumOperands()) return nullptr; // TODO: Remove the restriction for binop being in the same block as the phis. if (BO.getParent() != Phi0->getParent() || BO.getParent() != Phi1->getParent()) return nullptr; // Fold if there is at least one specific constant value in phi0 or phi1's // incoming values that comes from the same block and this specific constant // value can be used to do optimization for specific binary operator. // For example: // %phi0 = phi i32 [0, %bb0], [%i, %bb1] // %phi1 = phi i32 [%j, %bb0], [0, %bb1] // %add = add i32 %phi0, %phi1 // ==> // %add = phi i32 [%j, %bb0], [%i, %bb1] Constant *C = ConstantExpr::getBinOpIdentity(BO.getOpcode(), BO.getType(), /*AllowRHSConstant*/ false); if (C) { SmallVector NewIncomingValues; auto CanFoldIncomingValuePair = [&](std::tuple T) { auto &Phi0Use = std::get<0>(T); auto &Phi1Use = std::get<1>(T); if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use)) return false; Value *Phi0UseV = Phi0Use.get(); Value *Phi1UseV = Phi1Use.get(); if (Phi0UseV == C) NewIncomingValues.push_back(Phi1UseV); else if (Phi1UseV == C) NewIncomingValues.push_back(Phi0UseV); else return false; return true; }; if (all_of(zip(Phi0->operands(), Phi1->operands()), CanFoldIncomingValuePair)) { PHINode *NewPhi = PHINode::Create(Phi0->getType(), Phi0->getNumOperands()); assert(NewIncomingValues.size() == Phi0->getNumOperands() && "The number of collected incoming values should equal the number " "of the original PHINode operands!"); for (unsigned I = 0; I < Phi0->getNumOperands(); I++) NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I)); return NewPhi; } } if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2) return nullptr; // Match a pair of incoming constants for one of the predecessor blocks. BasicBlock *ConstBB, *OtherBB; Constant *C0, *C1; if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) { ConstBB = Phi0->getIncomingBlock(0); OtherBB = Phi0->getIncomingBlock(1); } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) { ConstBB = Phi0->getIncomingBlock(1); OtherBB = Phi0->getIncomingBlock(0); } else { return nullptr; } if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1))) return nullptr; // The block that we are hoisting to must reach here unconditionally. // Otherwise, we could be speculatively executing an expensive or // non-speculative op. auto *PredBlockBranch = dyn_cast(OtherBB->getTerminator()); if (!PredBlockBranch || PredBlockBranch->isConditional() || !DT.isReachableFromEntry(OtherBB)) return nullptr; // TODO: This check could be tightened to only apply to binops (div/rem) that // are not safe to speculatively execute. But that could allow hoisting // potentially expensive instructions (fdiv for example). for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter) if (!isGuaranteedToTransferExecutionToSuccessor(&*BBIter)) return nullptr; // Fold constants for the predecessor block with constant incoming values. Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL); if (!NewC) return nullptr; // Make a new binop in the predecessor block with the non-constant incoming // values. Builder.SetInsertPoint(PredBlockBranch); Value *NewBO = Builder.CreateBinOp(BO.getOpcode(), Phi0->getIncomingValueForBlock(OtherBB), Phi1->getIncomingValueForBlock(OtherBB)); if (auto *NotFoldedNewBO = dyn_cast(NewBO)) NotFoldedNewBO->copyIRFlags(&BO); // Replace the binop with a phi of the new values. The old phis are dead. PHINode *NewPhi = PHINode::Create(BO.getType(), 2); NewPhi->addIncoming(NewBO, OtherBB); NewPhi->addIncoming(NewC, ConstBB); return NewPhi; } Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { if (!isa(I.getOperand(1))) return nullptr; if (auto *Sel = dyn_cast(I.getOperand(0))) { if (Instruction *NewSel = FoldOpIntoSelect(I, Sel)) return NewSel; } else if (auto *PN = dyn_cast(I.getOperand(0))) { if (Instruction *NewPhi = foldOpIntoPhi(I, PN)) return NewPhi; } return nullptr; } static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { // If this GEP has only 0 indices, it is the same pointer as // Src. If Src is not a trivial GEP too, don't combine // the indices. if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && !Src.hasOneUse()) return false; return true; } Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { if (!isa(Inst.getType())) return nullptr; BinaryOperator::BinaryOps Opcode = Inst.getOpcode(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); assert(cast(LHS->getType())->getElementCount() == cast(Inst.getType())->getElementCount()); assert(cast(RHS->getType())->getElementCount() == cast(Inst.getType())->getElementCount()); // If both operands of the binop are vector concatenations, then perform the // narrow binop on each pair of the source operands followed by concatenation // of the results. Value *L0, *L1, *R0, *R1; ArrayRef Mask; if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) && match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) && LHS->hasOneUse() && RHS->hasOneUse() && cast(LHS)->isConcat() && cast(RHS)->isConcat()) { // This transform does not have the speculative execution constraint as // below because the shuffle is a concatenation. The new binops are // operating on exactly the same elements as the existing binop. // TODO: We could ease the mask requirement to allow different undef lanes, // but that requires an analysis of the binop-with-undef output value. Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0); if (auto *BO = dyn_cast(NewBO0)) BO->copyIRFlags(&Inst); Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1); if (auto *BO = dyn_cast(NewBO1)) BO->copyIRFlags(&Inst); return new ShuffleVectorInst(NewBO0, NewBO1, Mask); } auto createBinOpReverse = [&](Value *X, Value *Y) { Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName()); if (auto *BO = dyn_cast(V)) BO->copyIRFlags(&Inst); Module *M = Inst.getModule(); Function *F = Intrinsic::getDeclaration( M, Intrinsic::experimental_vector_reverse, V->getType()); return CallInst::Create(F, V); }; // NOTE: Reverse shuffles don't require the speculative execution protection // below because they don't affect which lanes take part in the computation. Value *V1, *V2; if (match(LHS, m_VecReverse(m_Value(V1)))) { // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2)) if (match(RHS, m_VecReverse(m_Value(V2))) && (LHS->hasOneUse() || RHS->hasOneUse() || (LHS == RHS && LHS->hasNUses(2)))) return createBinOpReverse(V1, V2); // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat)) if (LHS->hasOneUse() && isSplatValue(RHS)) return createBinOpReverse(V1, RHS); } // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2)) else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2))))) return createBinOpReverse(LHS, V2); // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. if (!isSafeToSpeculativelyExecute(&Inst)) return nullptr; auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef M) { Value *XY = Builder.CreateBinOp(Opcode, X, Y); if (auto *BO = dyn_cast(XY)) BO->copyIRFlags(&Inst); return new ShuffleVectorInst(XY, M); }; // If both arguments of the binary operation are shuffles that use the same // mask and shuffle within a single vector, move the shuffle after the binop. if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) && match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) && V1->getType() == V2->getType() && (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) { // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask) return createBinOpShuffle(V1, V2, Mask); } // If both arguments of a commutative binop are select-shuffles that use the // same mask with commuted operands, the shuffles are unnecessary. if (Inst.isCommutative() && match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) && match(RHS, m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) { auto *LShuf = cast(LHS); auto *RShuf = cast(RHS); // TODO: Allow shuffles that contain undefs in the mask? // That is legal, but it reduces undef knowledge. // TODO: Allow arbitrary shuffles by shuffling after binop? // That might be legal, but we have to deal with poison. if (LShuf->isSelect() && !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) && RShuf->isSelect() && !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) { // Example: // LHS = shuffle V1, V2, <0, 5, 6, 3> // RHS = shuffle V2, V1, <0, 5, 6, 3> // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2); NewBO->copyIRFlags(&Inst); return NewBO; } } // If one argument is a shuffle within one vector and the other is a constant, // try moving the shuffle after the binary operation. This canonicalization // intends to move shuffles closer to other shuffles and binops closer to // other binops, so they can be folded. It may also enable demanded elements // transforms. Constant *C; auto *InstVTy = dyn_cast(Inst.getType()); if (InstVTy && match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))), m_ImmConstant(C))) && cast(V1->getType())->getNumElements() <= InstVTy->getNumElements()) { assert(InstVTy->getScalarType() == V1->getType()->getScalarType() && "Shuffle should not change scalar type"); // Find constant NewC that has property: // shuffle(NewC, ShMask) = C // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>) // reorder is not possible. A 1-to-1 mapping is not required. Example: // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = bool ConstOp1 = isa(RHS); ArrayRef ShMask = Mask; unsigned SrcVecNumElts = cast(V1->getType())->getNumElements(); PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType()); SmallVector NewVecC(SrcVecNumElts, PoisonScalar); bool MayChange = true; unsigned NumElts = InstVTy->getNumElements(); for (unsigned I = 0; I < NumElts; ++I) { Constant *CElt = C->getAggregateElement(I); if (ShMask[I] >= 0) { assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle"); Constant *NewCElt = NewVecC[ShMask[I]]; // Bail out if: // 1. The constant vector contains a constant expression. // 2. The shuffle needs an element of the constant vector that can't // be mapped to a new constant vector. // 3. This is a widening shuffle that copies elements of V1 into the // extended elements (extending with poison is allowed). if (!CElt || (!isa(NewCElt) && NewCElt != CElt) || I >= SrcVecNumElts) { MayChange = false; break; } NewVecC[ShMask[I]] = CElt; } // If this is a widening shuffle, we must be able to extend with poison // elements. If the original binop does not produce a poison in the high // lanes, then this transform is not safe. // Similarly for poison lanes due to the shuffle mask, we can only // transform binops that preserve poison. // TODO: We could shuffle those non-poison constant values into the // result by using a constant vector (rather than an poison vector) // as operand 1 of the new binop, but that might be too aggressive // for target-independent shuffle creation. if (I >= SrcVecNumElts || ShMask[I] < 0) { Constant *MaybePoison = ConstOp1 ? ConstantFoldBinaryOpOperands(Opcode, PoisonScalar, CElt, DL) : ConstantFoldBinaryOpOperands(Opcode, CElt, PoisonScalar, DL); if (!MaybePoison || !isa(MaybePoison)) { MayChange = false; break; } } } if (MayChange) { Constant *NewC = ConstantVector::get(NewVecC); // It may not be safe to execute a binop on a vector with poison elements // because the entire instruction can be folded to undef or create poison // that did not exist in the original code. // TODO: The shift case should not be necessary. if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1)) NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1); // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask) // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask) Value *NewLHS = ConstOp1 ? V1 : NewC; Value *NewRHS = ConstOp1 ? NewC : V1; return createBinOpShuffle(NewLHS, NewRHS, Mask); } } // Try to reassociate to sink a splat shuffle after a binary operation. if (Inst.isAssociative() && Inst.isCommutative()) { // Canonicalize shuffle operand as LHS. if (isa(RHS)) std::swap(LHS, RHS); Value *X; ArrayRef MaskC; int SplatIndex; Value *Y, *OtherOp; if (!match(LHS, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) || !match(MaskC, m_SplatOrUndefMask(SplatIndex)) || X->getType() != Inst.getType() || !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp))))) return nullptr; // FIXME: This may not be safe if the analysis allows undef elements. By // moving 'Y' before the splat shuffle, we are implicitly assuming // that it is not undef/poison at the splat index. if (isSplatValue(OtherOp, SplatIndex)) { std::swap(Y, OtherOp); } else if (!isSplatValue(Y, SplatIndex)) { return nullptr; } // X and Y are splatted values, so perform the binary operation on those // values followed by a splat followed by the 2nd binary operation: // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp Value *NewBO = Builder.CreateBinOp(Opcode, X, Y); SmallVector NewMask(MaskC.size(), SplatIndex); Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask); Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp); // Intersect FMF on both new binops. Other (poison-generating) flags are // dropped to be safe. if (isa(R)) { R->copyFastMathFlags(&Inst); R->andIRFlags(RHS); } if (auto *NewInstBO = dyn_cast(NewBO)) NewInstBO->copyIRFlags(R); return R; } return nullptr; } /// Try to narrow the width of a binop if at least 1 operand is an extend of /// of a value. This requires a potentially expensive known bits check to make /// sure the narrow op does not overflow. Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) { // We need at least one extended operand. Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1); // If this is a sub, we swap the operands since we always want an extension // on the RHS. The LHS can be an extension or a constant. if (BO.getOpcode() == Instruction::Sub) std::swap(Op0, Op1); Value *X; bool IsSext = match(Op0, m_SExt(m_Value(X))); if (!IsSext && !match(Op0, m_ZExt(m_Value(X)))) return nullptr; // If both operands are the same extension from the same source type and we // can eliminate at least one (hasOneUse), this might work. CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt; Value *Y; if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() && cast(Op1)->getOpcode() == CastOpc && (Op0->hasOneUse() || Op1->hasOneUse()))) { // If that did not match, see if we have a suitable constant operand. // Truncating and extending must produce the same constant. Constant *WideC; if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC))) return nullptr; Constant *NarrowC = getLosslessTrunc(WideC, X->getType(), CastOpc); if (!NarrowC) return nullptr; Y = NarrowC; } // Swap back now that we found our operands. if (BO.getOpcode() == Instruction::Sub) std::swap(X, Y); // Both operands have narrow versions. Last step: the math must not overflow // in the narrow width. if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext)) return nullptr; // bo (ext X), (ext Y) --> ext (bo X, Y) // bo (ext X), C --> ext (bo X, C') Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow"); if (auto *NewBinOp = dyn_cast(NarrowBO)) { if (IsSext) NewBinOp->setHasNoSignedWrap(); else NewBinOp->setHasNoUnsignedWrap(); } return CastInst::Create(CastOpc, NarrowBO, BO.getType()); } static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) { // At least one GEP must be inbounds. if (!GEP1.isInBounds() && !GEP2.isInBounds()) return false; return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) && (GEP2.isInBounds() || GEP2.hasAllZeroIndices()); } /// Thread a GEP operation with constant indices through the constant true/false /// arms of a select. static Instruction *foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder) { if (!GEP.hasAllConstantIndices()) return nullptr; Instruction *Sel; Value *Cond; Constant *TrueC, *FalseC; if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) || !match(Sel, m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC)))) return nullptr; // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC' // Propagate 'inbounds' and metadata from existing instructions. // Note: using IRBuilder to create the constants for efficiency. SmallVector IndexC(GEP.indices()); bool IsInBounds = GEP.isInBounds(); Type *Ty = GEP.getSourceElementType(); Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds); Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds); return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel); } Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src) { // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction with matching element type, combine the // indices of the two getelementptr instructions into a single instruction. if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; // For constant GEPs, use a more general offset-based folding approach. Type *PtrTy = Src->getType()->getScalarType(); if (GEP.hasAllConstantIndices() && (Src->hasOneUse() || Src->hasAllConstantIndices())) { // Split Src into a variable part and a constant suffix. gep_type_iterator GTI = gep_type_begin(*Src); Type *BaseType = GTI.getIndexedType(); bool IsFirstType = true; unsigned NumVarIndices = 0; for (auto Pair : enumerate(Src->indices())) { if (!isa(Pair.value())) { BaseType = GTI.getIndexedType(); IsFirstType = false; NumVarIndices = Pair.index() + 1; } ++GTI; } // Determine the offset for the constant suffix of Src. APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0); if (NumVarIndices != Src->getNumIndices()) { // FIXME: getIndexedOffsetInType() does not handled scalable vectors. if (BaseType->isScalableTy()) return nullptr; SmallVector ConstantIndices; if (!IsFirstType) ConstantIndices.push_back( Constant::getNullValue(Type::getInt32Ty(GEP.getContext()))); append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices)); Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices); } // Add the offset for GEP (which is fully constant). if (!GEP.accumulateConstantOffset(DL, Offset)) return nullptr; APInt OffsetOld = Offset; // Convert the total offset back into indices. SmallVector ConstIndices = DL.getGEPIndicesForOffset(BaseType, Offset); if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) { // If both GEP are constant-indexed, and cannot be merged in either way, // convert them to a GEP of i8. if (Src->hasAllConstantIndices()) return replaceInstUsesWith( GEP, Builder.CreateGEP( Builder.getInt8Ty(), Src->getOperand(0), Builder.getInt(OffsetOld), "", isMergedGEPInBounds(*Src, *cast(&GEP)))); return nullptr; } bool IsInBounds = isMergedGEPInBounds(*Src, *cast(&GEP)); SmallVector Indices; append_range(Indices, drop_end(Src->indices(), Src->getNumIndices() - NumVarIndices)); for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) { Indices.push_back(ConstantInt::get(GEP.getContext(), Idx)); // Even if the total offset is inbounds, we may end up representing it // by first performing a larger negative offset, and then a smaller // positive one. The large negative offset might go out of bounds. Only // preserve inbounds if all signs are the same. IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative(); } return replaceInstUsesWith( GEP, Builder.CreateGEP(Src->getSourceElementType(), Src->getOperand(0), Indices, "", IsInBounds)); } if (Src->getResultElementType() != GEP.getSourceElementType()) return nullptr; SmallVector Indices; // Find out whether the last index in the source GEP is a sequential idx. bool EndsWithSequential = false; for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); I != E; ++I) EndsWithSequential = I.isSequential(); // Can we combine the two pointer arithmetics offsets? if (EndsWithSequential) { // Replace: gep (gep %P, long B), long A, ... // With: T = long A+B; gep %P, T, ... Value *SO1 = Src->getOperand(Src->getNumOperands()-1); Value *GO1 = GEP.getOperand(1); // If they aren't the same type, then the input hasn't been processed // by the loop above yet (which canonicalizes sequential index types to // intptr_t). Just avoid transforming this until the input has been // normalized. if (SO1->getType() != GO1->getType()) return nullptr; Value *Sum = simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP)); // Only do the combine when we are sure the cost after the // merge is never more than that before the merge. if (Sum == nullptr) return nullptr; // Update the GEP in place if possible. if (Src->getNumOperands() == 2) { GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast(&GEP))); replaceOperand(GEP, 0, Src->getOperand(0)); replaceOperand(GEP, 1, Sum); return &GEP; } Indices.append(Src->op_begin()+1, Src->op_end()-1); Indices.push_back(Sum); Indices.append(GEP.op_begin()+2, GEP.op_end()); } else if (isa(*GEP.idx_begin()) && cast(*GEP.idx_begin())->isNullValue() && Src->getNumOperands() != 1) { // Otherwise we can do the fold if the first index of the GEP is a zero Indices.append(Src->op_begin()+1, Src->op_end()); Indices.append(GEP.idx_begin()+1, GEP.idx_end()); } if (!Indices.empty()) return replaceInstUsesWith( GEP, Builder.CreateGEP( Src->getSourceElementType(), Src->getOperand(0), Indices, "", isMergedGEPInBounds(*Src, *cast(&GEP)))); return nullptr; } Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume, unsigned Depth) { static Value *const NonNull = reinterpret_cast(uintptr_t(1)); // ~(~(X)) -> X. Value *A, *B; if (match(V, m_Not(m_Value(A)))) { DoesConsume = true; return A; } Constant *C; // Constants can be considered to be not'ed values. if (match(V, m_ImmConstant(C))) return ConstantExpr::getNot(C); if (Depth++ >= MaxAnalysisRecursionDepth) return nullptr; // The rest of the cases require that we invert all uses so don't bother // doing the analysis if we know we can't use the result. if (!WillInvertAllUses) return nullptr; // Compares can be inverted if all of their uses are being modified to use // the ~V. if (auto *I = dyn_cast(V)) { if (Builder != nullptr) return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0), I->getOperand(1)); return NonNull; } // If `V` is of the form `A + B` then `-1 - V` can be folded into // `(-1 - B) - A` if we are willing to invert all of the uses. if (match(V, m_Add(m_Value(A), m_Value(B)))) { if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder, DoesConsume, Depth)) return Builder ? Builder->CreateSub(BV, A) : NonNull; if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder, DoesConsume, Depth)) return Builder ? Builder->CreateSub(AV, B) : NonNull; return nullptr; } // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded // into `A ^ B` if we are willing to invert all of the uses. if (match(V, m_Xor(m_Value(A), m_Value(B)))) { if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder, DoesConsume, Depth)) return Builder ? Builder->CreateXor(A, BV) : NonNull; if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder, DoesConsume, Depth)) return Builder ? Builder->CreateXor(AV, B) : NonNull; return nullptr; } // If `V` is of the form `B - A` then `-1 - V` can be folded into // `A + (-1 - B)` if we are willing to invert all of the uses. if (match(V, m_Sub(m_Value(A), m_Value(B)))) { if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder, DoesConsume, Depth)) return Builder ? Builder->CreateAdd(AV, B) : NonNull; return nullptr; } // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded // into `A s>> B` if we are willing to invert all of the uses. if (match(V, m_AShr(m_Value(A), m_Value(B)))) { if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder, DoesConsume, Depth)) return Builder ? Builder->CreateAShr(AV, B) : NonNull; return nullptr; } Value *Cond; // LogicOps are special in that we canonicalize them at the cost of an // instruction. bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) && !shouldAvoidAbsorbingNotIntoSelect(*cast(V)); // Selects/min/max with invertible operands are freely invertible if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) { if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr, DoesConsume, Depth)) return nullptr; if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder, DoesConsume, Depth)) { if (Builder != nullptr) { Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder, DoesConsume, Depth); assert(NotB != nullptr && "Unable to build inverted value for known freely invertable op"); if (auto *II = dyn_cast(V)) return Builder->CreateBinaryIntrinsic( getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB); return Builder->CreateSelect(Cond, NotA, NotB); } return NonNull; } } return nullptr; } Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *PtrOp = GEP.getOperand(0); SmallVector Indices(GEP.indices()); Type *GEPType = GEP.getType(); Type *GEPEltType = GEP.getSourceElementType(); bool IsGEPSrcEleScalable = GEPEltType->isScalableTy(); if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(), SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); // For vector geps, use the generic demanded vector support. // Skip if GEP return type is scalable. The number of elements is unknown at // compile-time. if (auto *GEPFVTy = dyn_cast(GEPType)) { auto VWidth = GEPFVTy->getNumElements(); APInt PoisonElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask, PoisonElts)) { if (V != &GEP) return replaceInstUsesWith(GEP, V); return &GEP; } // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if // possible (decide on canonical form for pointer broadcast), 3) exploit // undef elements to decrease demanded bits } // Eliminate unneeded casts for indices, and replace indices which displace // by multiples of a zero size type with zero. bool MadeChange = false; // Index width may not be the same width as pointer width. // Data layout chooses the right type based on supported integer types. Type *NewScalarIndexTy = DL.getIndexType(GEP.getPointerOperandType()->getScalarType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; ++I, ++GTI) { // Skip indices into struct types. if (GTI.isStruct()) continue; Type *IndexTy = (*I)->getType(); Type *NewIndexType = IndexTy->isVectorTy() ? VectorType::get(NewScalarIndexTy, cast(IndexTy)->getElementCount()) : NewScalarIndexTy; // If the element type has zero size then any index over it is equivalent // to an index of zero, so replace it with zero if it is not zero already. Type *EltTy = GTI.getIndexedType(); if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero()) if (!isa(*I) || !match(I->get(), m_Zero())) { *I = Constant::getNullValue(NewIndexType); MadeChange = true; } if (IndexTy != NewIndexType) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. *I = Builder.CreateIntCast(*I, NewIndexType, true); MadeChange = true; } } if (MadeChange) return &GEP; // Check to see if the inputs to the PHI node are getelementptr instructions. if (auto *PN = dyn_cast(PtrOp)) { auto *Op1 = dyn_cast(PN->getOperand(0)); if (!Op1) return nullptr; // Don't fold a GEP into itself through a PHI node. This can only happen // through the back-edge of a loop. Folding a GEP into itself means that // the value of the previous iteration needs to be stored in the meantime, // thus requiring an additional register variable to be live, but not // actually achieving anything (the GEP still needs to be executed once per // loop iteration). if (Op1 == &GEP) return nullptr; int DI = -1; for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { auto *Op2 = dyn_cast(*I); if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() || Op1->getSourceElementType() != Op2->getSourceElementType()) return nullptr; // As for Op1 above, don't try to fold a GEP into itself. if (Op2 == &GEP) return nullptr; // Keep track of the type as we walk the GEP. Type *CurTy = nullptr; for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) { if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType()) return nullptr; if (Op1->getOperand(J) != Op2->getOperand(J)) { if (DI == -1) { // We have not seen any differences yet in the GEPs feeding the // PHI yet, so we record this one if it is allowed to be a // variable. // The first two arguments can vary for any GEP, the rest have to be // static for struct slots if (J > 1) { assert(CurTy && "No current type?"); if (CurTy->isStructTy()) return nullptr; } DI = J; } else { // The GEP is different by more than one input. While this could be // extended to support GEPs that vary by more than one variable it // doesn't make sense since it greatly increases the complexity and // would result in an R+R+R addressing mode which no backend // directly supports and would need to be broken into several // simpler instructions anyway. return nullptr; } } // Sink down a layer of the type for the next iteration. if (J > 0) { if (J == 1) { CurTy = Op1->getSourceElementType(); } else { CurTy = GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J)); } } } } // If not all GEPs are identical we'll have to create a new PHI node. // Check that the old PHI node has only one use so that it will get // removed. if (DI != -1 && !PN->hasOneUse()) return nullptr; auto *NewGEP = cast(Op1->clone()); if (DI == -1) { // All the GEPs feeding the PHI are identical. Clone one down into our // BB so that it can be merged with the current GEP. } else { // All the GEPs feeding the PHI differ at a single offset. Clone a GEP // into the current block so it can be merged, and create a new PHI to // set that index. PHINode *NewPN; { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.SetInsertPoint(PN); NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(), PN->getNumOperands()); } for (auto &I : PN->operands()) NewPN->addIncoming(cast(I)->getOperand(DI), PN->getIncomingBlock(I)); NewGEP->setOperand(DI, NewPN); } NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt()); return replaceOperand(GEP, 0, NewGEP); } if (auto *Src = dyn_cast(PtrOp)) if (Instruction *I = visitGEPOfGEP(GEP, Src)) return I; // Skip if GEP source element type is scalable. The type alloc size is unknown // at compile-time. if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) { unsigned AS = GEP.getPointerAddressSpace(); if (GEP.getOperand(1)->getType()->getScalarSizeInBits() == DL.getIndexSizeInBits(AS)) { uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue(); if (TyAllocSize == 1) { // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), // but only if the result pointer is only used as if it were an integer, // or both point to the same underlying object (otherwise provenance is // not necessarily retained). Value *X = GEP.getPointerOperand(); Value *Y; if (match(GEP.getOperand(1), m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) && GEPType == Y->getType()) { bool HasSameUnderlyingObject = getUnderlyingObject(X) == getUnderlyingObject(Y); bool Changed = false; GEP.replaceUsesWithIf(Y, [&](Use &U) { bool ShouldReplace = HasSameUnderlyingObject || isa(U.getUser()) || isa(U.getUser()); Changed |= ShouldReplace; return ShouldReplace; }); return Changed ? &GEP : nullptr; } } else { // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V) Value *V; if ((has_single_bit(TyAllocSize) && match(GEP.getOperand(1), - m_Exact(m_AShr(m_Value(V), - m_SpecificInt(countr_zero(TyAllocSize)))))) || + m_Exact(m_Shr(m_Value(V), + m_SpecificInt(countr_zero(TyAllocSize)))))) || match(GEP.getOperand(1), - m_Exact(m_SDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) { + m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) { GetElementPtrInst *NewGEP = GetElementPtrInst::Create( Builder.getInt8Ty(), GEP.getPointerOperand(), V); NewGEP->setIsInBounds(GEP.isInBounds()); return NewGEP; } } } } // We do not handle pointer-vector geps here. if (GEPType->isVectorTy()) return nullptr; if (GEP.getNumIndices() == 1) { // Try to replace ADD + GEP with GEP + GEP. Value *Idx1, *Idx2; if (match(GEP.getOperand(1), m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) { // %idx = add i64 %idx1, %idx2 // %gep = getelementptr i32, ptr %ptr, i64 %idx // as: // %newptr = getelementptr i32, ptr %ptr, i64 %idx1 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2 auto *NewPtr = Builder.CreateGEP(GEP.getResultElementType(), GEP.getPointerOperand(), Idx1); return GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr, Idx2); } ConstantInt *C; if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAdd( m_Value(Idx1), m_ConstantInt(C))))))) { // %add = add nsw i32 %idx1, idx2 // %sidx = sext i32 %add to i64 // %gep = getelementptr i32, ptr %ptr, i64 %sidx // as: // %newptr = getelementptr i32, ptr %ptr, i32 %idx1 // %newgep = getelementptr i32, ptr %newptr, i32 idx2 auto *NewPtr = Builder.CreateGEP( GEP.getResultElementType(), GEP.getPointerOperand(), Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType())); return GetElementPtrInst::Create( GEP.getResultElementType(), NewPtr, Builder.CreateSExt(C, GEP.getOperand(1)->getType())); } } if (!GEP.isInBounds()) { unsigned IdxWidth = DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace()); APInt BasePtrOffset(IdxWidth, 0); Value *UnderlyingPtrOp = PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, BasePtrOffset); bool CanBeNull, CanBeFreed; uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes( DL, CanBeNull, CanBeFreed); if (!CanBeNull && !CanBeFreed && DerefBytes != 0) { if (GEP.accumulateConstantOffset(DL, BasePtrOffset) && BasePtrOffset.isNonNegative()) { APInt AllocSize(IdxWidth, DerefBytes); if (BasePtrOffset.ule(AllocSize)) { return GetElementPtrInst::CreateInBounds( GEP.getSourceElementType(), PtrOp, Indices, GEP.getName()); } } } } if (Instruction *R = foldSelectGEP(GEP, Builder)) return R; return nullptr; } static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI) { if (isa(V)) return true; if (auto *LI = dyn_cast(V)) return isa(LI->getPointerOperand()); // Two distinct allocations will never be equal. return isAllocLikeFn(V, &TLI) && V != AI; } /// Given a call CB which uses an address UsedV, return true if we can prove the /// call's only possible effect is storing to V. static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI) { if (!CB.use_empty()) // TODO: add recursion if returned attribute is present return false; if (CB.isTerminator()) // TODO: remove implementation restriction return false; if (!CB.willReturn() || !CB.doesNotThrow()) return false; // If the only possible side effect of the call is writing to the alloca, // and the result isn't used, we can safely remove any reads implied by the // call including those which might read the alloca itself. std::optional Dest = MemoryLocation::getForDest(&CB, TLI); return Dest && Dest->Ptr == UsedV; } static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl &Users, const TargetLibraryInfo &TLI) { SmallVector Worklist; const std::optional Family = getAllocationFamily(AI, &TLI); Worklist.push_back(AI); do { Instruction *PI = Worklist.pop_back_val(); for (User *U : PI->users()) { Instruction *I = cast(U); switch (I->getOpcode()) { default: // Give up the moment we see something we can't handle. return false; case Instruction::AddrSpaceCast: case Instruction::BitCast: case Instruction::GetElementPtr: Users.emplace_back(I); Worklist.push_back(I); continue; case Instruction::ICmp: { ICmpInst *ICI = cast(I); // We can fold eq/ne comparisons with null to false/true, respectively. // We also fold comparisons in some conditions provided the alloc has // not escaped (see isNeverEqualToUnescapedAlloc). if (!ICI->isEquality()) return false; unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0; if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI)) return false; // Do not fold compares to aligned_alloc calls, as they may have to // return null in case the required alignment cannot be satisfied, // unless we can prove that both alignment and size are valid. auto AlignmentAndSizeKnownValid = [](CallBase *CB) { // Check if alignment and size of a call to aligned_alloc is valid, // that is alignment is a power-of-2 and the size is a multiple of the // alignment. const APInt *Alignment; const APInt *Size; return match(CB->getArgOperand(0), m_APInt(Alignment)) && match(CB->getArgOperand(1), m_APInt(Size)) && Alignment->isPowerOf2() && Size->urem(*Alignment).isZero(); }; auto *CB = dyn_cast(AI); LibFunc TheLibFunc; if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) && TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc && !AlignmentAndSizeKnownValid(CB)) return false; Users.emplace_back(I); continue; } case Instruction::Call: // Ignore no-op and store intrinsics. if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: return false; case Intrinsic::memmove: case Intrinsic::memcpy: case Intrinsic::memset: { MemIntrinsic *MI = cast(II); if (MI->isVolatile() || MI->getRawDest() != PI) return false; [[fallthrough]]; } case Intrinsic::assume: case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::objectsize: Users.emplace_back(I); continue; case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: Users.emplace_back(I); Worklist.push_back(I); continue; } } if (isRemovableWrite(*cast(I), PI, TLI)) { Users.emplace_back(I); continue; } if (getFreedOperand(cast(I), &TLI) == PI && getAllocationFamily(I, &TLI) == Family) { assert(Family); Users.emplace_back(I); continue; } if (getReallocatedOperand(cast(I)) == PI && getAllocationFamily(I, &TLI) == Family) { assert(Family); Users.emplace_back(I); Worklist.push_back(I); continue; } return false; case Instruction::Store: { StoreInst *SI = cast(I); if (SI->isVolatile() || SI->getPointerOperand() != PI) return false; Users.emplace_back(I); continue; } } llvm_unreachable("missing a return?"); } } while (!Worklist.empty()); return true; } Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { assert(isa(MI) || isRemovableAlloc(&cast(MI), &TLI)); // If we have a malloc call which is only used in any amount of comparisons to // null and free calls, delete the calls and replace the comparisons with true // or false as appropriate. // This is based on the principle that we can substitute our own allocation // function (which will never return null) rather than knowledge of the // specific function being called. In some sense this can change the permitted // outputs of a program (when we convert a malloc to an alloca, the fact that // the allocation is now on the stack is potentially visible, for example), // but we believe in a permissible manner. SmallVector Users; // If we are removing an alloca with a dbg.declare, insert dbg.value calls // before each store. SmallVector DVIs; SmallVector DPVs; std::unique_ptr DIB; if (isa(MI)) { findDbgUsers(DVIs, &MI, &DPVs); DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false)); } if (isAllocSiteRemovable(&MI, Users, TLI)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { // Lowering all @llvm.objectsize calls first because they may // use a bitcast/GEP of the alloca we are removing. if (!Users[i]) continue; Instruction *I = cast(&*Users[i]); if (IntrinsicInst *II = dyn_cast(I)) { if (II->getIntrinsicID() == Intrinsic::objectsize) { SmallVector InsertedInstructions; Value *Result = lowerObjectSizeCall( II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions); for (Instruction *Inserted : InsertedInstructions) Worklist.add(Inserted); replaceInstUsesWith(*I, Result); eraseInstFromFunction(*I); Users[i] = nullptr; // Skip examining in the next loop. } } } for (unsigned i = 0, e = Users.size(); i != e; ++i) { if (!Users[i]) continue; Instruction *I = cast(&*Users[i]); if (ICmpInst *C = dyn_cast(I)) { replaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()), C->isFalseWhenEqual())); } else if (auto *SI = dyn_cast(I)) { for (auto *DVI : DVIs) if (DVI->isAddressOfVariable()) ConvertDebugDeclareToDebugValue(DVI, SI, *DIB); for (auto *DPV : DPVs) if (DPV->isAddressOfVariable()) ConvertDebugDeclareToDebugValue(DPV, SI, *DIB); } else { // Casts, GEP, or anything else: we're about to delete this instruction, // so it can not have any valid uses. replaceInstUsesWith(*I, PoisonValue::get(I->getType())); } eraseInstFromFunction(*I); } if (InvokeInst *II = dyn_cast(&MI)) { // Replace invoke with a NOP intrinsic to maintain the original CFG Module *M = II->getModule(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), std::nullopt, "", II->getParent()); } // Remove debug intrinsics which describe the value contained within the // alloca. In addition to removing dbg.{declare,addr} which simply point to // the alloca, remove dbg.value(, ..., DW_OP_deref)'s as well, e.g.: // // ``` // define void @foo(i32 %0) { // %a = alloca i32 ; Deleted. // store i32 %0, i32* %a // dbg.value(i32 %0, "arg0") ; Not deleted. // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted. // call void @trivially_inlinable_no_op(i32* %a) // ret void // } // ``` // // This may not be required if we stop describing the contents of allocas // using dbg.value(, ..., DW_OP_deref), but we currently do this in // the LowerDbgDeclare utility. // // If there is a dead store to `%a` in @trivially_inlinable_no_op, the // "arg0" dbg.value may be stale after the call. However, failing to remove // the DW_OP_deref dbg.value causes large gaps in location coverage. // // FIXME: the Assignment Tracking project has now likely made this // redundant (and it's sometimes harmful). for (auto *DVI : DVIs) if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref()) DVI->eraseFromParent(); for (auto *DPV : DPVs) if (DPV->isAddressOfVariable() || DPV->getExpression()->startsWithDeref()) DPV->eraseFromParent(); return eraseInstFromFunction(MI); } return nullptr; } /// Move the call to free before a NULL test. /// /// Check if this free is accessed after its argument has been test /// against NULL (property 0). /// If yes, it is legal to move this call in its predecessor block. /// /// The move is performed only if the block containing the call to free /// will be removed, i.e.: /// 1. it has only one predecessor P, and P has two successors /// 2. it contains the call, noops, and an unconditional branch /// 3. its successor is the same as its predecessor's successor /// /// The profitability is out-of concern here and this function should /// be called only if the caller knows this transformation would be /// profitable (e.g., for code size). static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL) { Value *Op = FI.getArgOperand(0); BasicBlock *FreeInstrBB = FI.getParent(); BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor(); // Validate part of constraint #1: Only one predecessor // FIXME: We can extend the number of predecessor, but in that case, we // would duplicate the call to free in each predecessor and it may // not be profitable even for code size. if (!PredBB) return nullptr; // Validate constraint #2: Does this block contains only the call to // free, noops, and an unconditional branch? BasicBlock *SuccBB; Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator(); if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB))) return nullptr; // If there are only 2 instructions in the block, at this point, // this is the call to free and unconditional. // If there are more than 2 instructions, check that they are noops // i.e., they won't hurt the performance of the generated code. if (FreeInstrBB->size() != 2) { for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) { if (&Inst == &FI || &Inst == FreeInstrBBTerminator) continue; auto *Cast = dyn_cast(&Inst); if (!Cast || !Cast->isNoopCast(DL)) return nullptr; } } // Validate the rest of constraint #1 by matching on the pred branch. Instruction *TI = PredBB->getTerminator(); BasicBlock *TrueBB, *FalseBB; ICmpInst::Predicate Pred; if (!match(TI, m_Br(m_ICmp(Pred, m_CombineOr(m_Specific(Op), m_Specific(Op->stripPointerCasts())), m_Zero()), TrueBB, FalseBB))) return nullptr; if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) return nullptr; // Validate constraint #3: Ensure the null case just falls through. if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB)) return nullptr; assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) && "Broken CFG: missing edge from predecessor to successor"); // At this point, we know that everything in FreeInstrBB can be moved // before TI. for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) { if (&Instr == FreeInstrBBTerminator) break; Instr.moveBeforePreserving(TI); } assert(FreeInstrBB->size() == 1 && "Only the branch instruction should remain"); // Now that we've moved the call to free before the NULL check, we have to // remove any attributes on its parameter that imply it's non-null, because // those attributes might have only been valid because of the NULL check, and // we can get miscompiles if we keep them. This is conservative if non-null is // also implied by something other than the NULL check, but it's guaranteed to // be correct, and the conservativeness won't matter in practice, since the // attributes are irrelevant for the call to free itself and the pointer // shouldn't be used after the call. AttributeList Attrs = FI.getAttributes(); Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull); Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable); if (Dereferenceable.isValid()) { uint64_t Bytes = Dereferenceable.getDereferenceableBytes(); Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::Dereferenceable); Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes); } FI.setAttributes(Attrs); return &FI; } Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) { // free undef -> unreachable. if (isa(Op)) { // Leave a marker since we can't modify the CFG here. CreateNonTerminatorUnreachable(&FI); return eraseInstFromFunction(FI); } // If we have 'free null' delete the instruction. This can happen in stl code // when lots of inlining happens. if (isa(Op)) return eraseInstFromFunction(FI); // If we had free(realloc(...)) with no intervening uses, then eliminate the // realloc() entirely. CallInst *CI = dyn_cast(Op); if (CI && CI->hasOneUse()) if (Value *ReallocatedOp = getReallocatedOperand(CI)) return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp)); // If we optimize for code size, try to move the call to free before the null // test so that simplify cfg can remove the empty block and dead code // elimination the branch. I.e., helps to turn something like: // if (foo) free(foo); // into // free(foo); // // Note that we can only do this for 'free' and not for any flavor of // 'operator delete'; there is no 'operator delete' symbol for which we are // permitted to invent a call, even if we're passing in a null pointer. if (MinimizeSize) { LibFunc Func; if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free) if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL)) return I; } return nullptr; } Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) { // Nothing for now. return nullptr; } // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()! bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) { // Try to remove the previous instruction if it must lead to unreachable. // This includes instructions like stores and "llvm.assume" that may not get // removed by simple dead code elimination. bool Changed = false; while (Instruction *Prev = I.getPrevNonDebugInstruction()) { // While we theoretically can erase EH, that would result in a block that // used to start with an EH no longer starting with EH, which is invalid. // To make it valid, we'd need to fixup predecessors to no longer refer to // this block, but that changes CFG, which is not allowed in InstCombine. if (Prev->isEHPad()) break; // Can not drop any more instructions. We're done here. if (!isGuaranteedToTransferExecutionToSuccessor(Prev)) break; // Can not drop any more instructions. We're done here. // Otherwise, this instruction can be freely erased, // even if it is not side-effect free. // A value may still have uses before we process it here (for example, in // another unreachable block), so convert those to poison. replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType())); eraseInstFromFunction(*Prev); Changed = true; } return Changed; } Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { removeInstructionsBeforeUnreachable(I); return nullptr; } Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) { assert(BI.isUnconditional() && "Only for unconditional branches."); // If this store is the second-to-last instruction in the basic block // (excluding debug info and bitcasts of pointers) and if the block ends with // an unconditional branch, try to move the store to the successor block. auto GetLastSinkableStore = [](BasicBlock::iterator BBI) { auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) { return BBI->isDebugOrPseudoInst() || (isa(BBI) && BBI->getType()->isPointerTy()); }; BasicBlock::iterator FirstInstr = BBI->getParent()->begin(); do { if (BBI != FirstInstr) --BBI; } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI)); return dyn_cast(BBI); }; if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI))) if (mergeStoreIntoSuccessor(*SI)) return &BI; return nullptr; } void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To, SmallVectorImpl &Worklist) { if (!DeadEdges.insert({From, To}).second) return; // Replace phi node operands in successor with poison. for (PHINode &PN : To->phis()) for (Use &U : PN.incoming_values()) if (PN.getIncomingBlock(U) == From && !isa(U)) { replaceUse(U, PoisonValue::get(PN.getType())); addToWorklist(&PN); MadeIRChange = true; } Worklist.push_back(To); } // Under the assumption that I is unreachable, remove it and following // instructions. Changes are reported directly to MadeIRChange. void InstCombinerImpl::handleUnreachableFrom( Instruction *I, SmallVectorImpl &Worklist) { BasicBlock *BB = I->getParent(); for (Instruction &Inst : make_early_inc_range( make_range(std::next(BB->getTerminator()->getReverseIterator()), std::next(I->getReverseIterator())))) { if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) { replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType())); MadeIRChange = true; } if (Inst.isEHPad() || Inst.getType()->isTokenTy()) continue; // RemoveDIs: erase debug-info on this instruction manually. Inst.dropDbgValues(); eraseInstFromFunction(Inst); MadeIRChange = true; } // RemoveDIs: to match behaviour in dbg.value mode, drop debug-info on // terminator too. BB->getTerminator()->dropDbgValues(); // Handle potentially dead successors. for (BasicBlock *Succ : successors(BB)) addDeadEdge(BB, Succ, Worklist); } void InstCombinerImpl::handlePotentiallyDeadBlocks( SmallVectorImpl &Worklist) { while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); if (!all_of(predecessors(BB), [&](BasicBlock *Pred) { return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred); })) continue; handleUnreachableFrom(&BB->front(), Worklist); } } void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc) { SmallVector Worklist; for (BasicBlock *Succ : successors(BB)) { // The live successor isn't dead. if (Succ == LiveSucc) continue; addDeadEdge(BB, Succ, Worklist); } handlePotentiallyDeadBlocks(Worklist); } Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) { if (BI.isUnconditional()) return visitUnconditionalBranchInst(BI); // Change br (not X), label True, label False to: br X, label False, True Value *Cond = BI.getCondition(); Value *X; if (match(Cond, m_Not(m_Value(X))) && !isa(X)) { // Swap Destinations and condition... BI.swapSuccessors(); return replaceOperand(BI, 0, X); } // Canonicalize logical-and-with-invert as logical-or-with-invert. // This is done by inverting the condition and swapping successors: // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T Value *Y; if (isa(Cond) && match(Cond, m_OneUse(m_LogicalAnd(m_Value(X), m_OneUse(m_Not(m_Value(Y))))))) { Value *NotX = Builder.CreateNot(X, "not." + X->getName()); Value *Or = Builder.CreateLogicalOr(NotX, Y); BI.swapSuccessors(); return replaceOperand(BI, 0, Or); } // If the condition is irrelevant, remove the use so that other // transforms on the condition become more effective. if (!isa(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1)) return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType())); // Canonicalize, for example, fcmp_one -> fcmp_oeq. CmpInst::Predicate Pred; if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) && !isCanonicalPredicate(Pred)) { // Swap destinations and condition. auto *Cmp = cast(Cond); Cmp->setPredicate(CmpInst::getInversePredicate(Pred)); BI.swapSuccessors(); Worklist.push(Cmp); return &BI; } if (isa(Cond)) { handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr); return nullptr; } if (auto *CI = dyn_cast(Cond)) { handlePotentiallyDeadSuccessors(BI.getParent(), BI.getSuccessor(!CI->getZExtValue())); return nullptr; } DC.registerBranch(&BI); return nullptr; } Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) { Value *Cond = SI.getCondition(); Value *Op0; ConstantInt *AddRHS; if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) { // Change 'switch (X+4) case 1:' into 'switch (X) case -3'. for (auto Case : SI.cases()) { Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS); assert(isa(NewCase) && "Result of expression should be constant"); Case.setValue(cast(NewCase)); } return replaceOperand(SI, 0, Op0); } ConstantInt *SubLHS; if (match(Cond, m_Sub(m_ConstantInt(SubLHS), m_Value(Op0)))) { // Change 'switch (1-X) case 1:' into 'switch (X) case 0'. for (auto Case : SI.cases()) { Constant *NewCase = ConstantExpr::getSub(SubLHS, Case.getCaseValue()); assert(isa(NewCase) && "Result of expression should be constant"); Case.setValue(cast(NewCase)); } return replaceOperand(SI, 0, Op0); } uint64_t ShiftAmt; if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) && ShiftAmt < Op0->getType()->getScalarSizeInBits() && all_of(SI.cases(), [&](const auto &Case) { return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt; })) { // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'. OverflowingBinaryOperator *Shl = cast(Cond); if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() || Shl->hasOneUse()) { Value *NewCond = Op0; if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) { // If the shift may wrap, we need to mask off the shifted bits. unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); NewCond = Builder.CreateAnd( Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt)); } for (auto Case : SI.cases()) { const APInt &CaseVal = Case.getCaseValue()->getValue(); APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt) : CaseVal.lshr(ShiftAmt); Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase)); } return replaceOperand(SI, 0, NewCond); } } // Fold switch(zext/sext(X)) into switch(X) if possible. if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) { bool IsZExt = isa(Cond); Type *SrcTy = Op0->getType(); unsigned NewWidth = SrcTy->getScalarSizeInBits(); if (all_of(SI.cases(), [&](const auto &Case) { const APInt &CaseVal = Case.getCaseValue()->getValue(); return IsZExt ? CaseVal.isIntN(NewWidth) : CaseVal.isSignedIntN(NewWidth); })) { for (auto &Case : SI.cases()) { APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth); Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase)); } return replaceOperand(SI, 0, Op0); } } KnownBits Known = computeKnownBits(Cond, 0, &SI); unsigned LeadingKnownZeros = Known.countMinLeadingZeros(); unsigned LeadingKnownOnes = Known.countMinLeadingOnes(); // Compute the number of leading bits we can ignore. // TODO: A better way to determine this would use ComputeNumSignBits(). for (const auto &C : SI.cases()) { LeadingKnownZeros = std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero()); LeadingKnownOnes = std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one()); } unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes); // Shrink the condition operand if the new type is smaller than the old type. // But do not shrink to a non-standard type, because backend can't generate // good code for that yet. // TODO: We can make it aggressive again after fixing PR39569. if (NewWidth > 0 && NewWidth < Known.getBitWidth() && shouldChangeType(Known.getBitWidth(), NewWidth)) { IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth); Builder.SetInsertPoint(&SI); Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc"); for (auto Case : SI.cases()) { APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth); Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase)); } return replaceOperand(SI, 0, NewCond); } if (isa(Cond)) { handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr); return nullptr; } if (auto *CI = dyn_cast(Cond)) { handlePotentiallyDeadSuccessors(SI.getParent(), SI.findCaseValue(CI)->getCaseSuccessor()); return nullptr; } return nullptr; } Instruction * InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) { auto *WO = dyn_cast(EV.getAggregateOperand()); if (!WO) return nullptr; Intrinsic::ID OvID = WO->getIntrinsicID(); const APInt *C = nullptr; if (match(WO->getRHS(), m_APIntAllowUndef(C))) { if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow || OvID == Intrinsic::umul_with_overflow)) { // extractvalue (any_mul_with_overflow X, -1), 0 --> -X if (C->isAllOnes()) return BinaryOperator::CreateNeg(WO->getLHS()); // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n if (C->isPowerOf2()) { return BinaryOperator::CreateShl( WO->getLHS(), ConstantInt::get(WO->getLHS()->getType(), C->logBase2())); } } } // We're extracting from an overflow intrinsic. See if we're the only user. // That allows us to simplify multiple result intrinsics to simpler things // that just get one value. if (!WO->hasOneUse()) return nullptr; // Check if we're grabbing only the result of a 'with overflow' intrinsic // and replace it with a traditional binary instruction. if (*EV.idx_begin() == 0) { Instruction::BinaryOps BinOp = WO->getBinaryOp(); Value *LHS = WO->getLHS(), *RHS = WO->getRHS(); // Replace the old instruction's uses with poison. replaceInstUsesWith(*WO, PoisonValue::get(WO->getType())); eraseInstFromFunction(*WO); return BinaryOperator::Create(BinOp, LHS, RHS); } assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst"); // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS. if (OvID == Intrinsic::usub_with_overflow) return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS()); // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but // +1 is not possible because we assume signed values. if (OvID == Intrinsic::smul_with_overflow && WO->getLHS()->getType()->isIntOrIntVectorTy(1)) return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS()); // If only the overflow result is used, and the right hand side is a // constant (or constant splat), we can remove the intrinsic by directly // checking for overflow. if (C) { // Compute the no-wrap range for LHS given RHS=C, then construct an // equivalent icmp, potentially using an offset. ConstantRange NWR = ConstantRange::makeExactNoWrapRegion( WO->getBinaryOp(), *C, WO->getNoWrapKind()); CmpInst::Predicate Pred; APInt NewRHSC, Offset; NWR.getEquivalentICmp(Pred, NewRHSC, Offset); auto *OpTy = WO->getRHS()->getType(); auto *NewLHS = WO->getLHS(); if (Offset != 0) NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset)); return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS, ConstantInt::get(OpTy, NewRHSC)); } return nullptr; } Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { Value *Agg = EV.getAggregateOperand(); if (!EV.hasIndices()) return replaceInstUsesWith(EV, Agg); if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(), SQ.getWithInstruction(&EV))) return replaceInstUsesWith(EV, V); if (InsertValueInst *IV = dyn_cast(Agg)) { // We're extracting from an insertvalue instruction, compare the indices const unsigned *exti, *exte, *insi, *inse; for (exti = EV.idx_begin(), insi = IV->idx_begin(), exte = EV.idx_end(), inse = IV->idx_end(); exti != exte && insi != inse; ++exti, ++insi) { if (*insi != *exti) // The insert and extract both reference distinctly different elements. // This means the extract is not influenced by the insert, and we can // replace the aggregate operand of the extract with the aggregate // operand of the insert. i.e., replace // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 // %E = extractvalue { i32, { i32 } } %I, 0 // with // %E = extractvalue { i32, { i32 } } %A, 0 return ExtractValueInst::Create(IV->getAggregateOperand(), EV.getIndices()); } if (exti == exte && insi == inse) // Both iterators are at the end: Index lists are identical. Replace // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 // %C = extractvalue { i32, { i32 } } %B, 1, 0 // with "i32 42" return replaceInstUsesWith(EV, IV->getInsertedValueOperand()); if (exti == exte) { // The extract list is a prefix of the insert list. i.e. replace // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 // %E = extractvalue { i32, { i32 } } %I, 1 // with // %X = extractvalue { i32, { i32 } } %A, 1 // %E = insertvalue { i32 } %X, i32 42, 0 // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(), EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), ArrayRef(insi, inse)); } if (insi == inse) // The insert list is a prefix of the extract list // We can simply remove the common indices from the extract and make it // operate on the inserted value instead of the insertvalue result. // i.e., replace // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 // %E = extractvalue { i32, { i32 } } %I, 1, 0 // with // %E extractvalue { i32 } { i32 42 }, 0 return ExtractValueInst::Create(IV->getInsertedValueOperand(), ArrayRef(exti, exte)); } if (Instruction *R = foldExtractOfOverflowIntrinsic(EV)) return R; if (LoadInst *L = dyn_cast(Agg)) { // Bail out if the aggregate contains scalable vector type if (auto *STy = dyn_cast(Agg->getType()); STy && STy->containsScalableVectorType()) return nullptr; // If the (non-volatile) load only has one use, we can rewrite this to a // load from a GEP. This reduces the size of the load. If a load is used // only by extractvalue instructions then this either must have been // optimized before, or it is a struct with padding, in which case we // don't want to do the transformation as it loses padding knowledge. if (L->isSimple() && L->hasOneUse()) { // extractvalue has integer indices, getelementptr has Value*s. Convert. SmallVector Indices; // Prefix an i32 0 since we need the first element. Indices.push_back(Builder.getInt32(0)); for (unsigned Idx : EV.indices()) Indices.push_back(Builder.getInt32(Idx)); // We need to insert these at the location of the old load, not at that of // the extractvalue. Builder.SetInsertPoint(L); Value *GEP = Builder.CreateInBoundsGEP(L->getType(), L->getPointerOperand(), Indices); Instruction *NL = Builder.CreateLoad(EV.getType(), GEP); // Whatever aliasing information we had for the orignal load must also // hold for the smaller load, so propagate the annotations. NL->setAAMetadata(L->getAAMetadata()); // Returning the load directly will cause the main loop to insert it in // the wrong spot, so use replaceInstUsesWith(). return replaceInstUsesWith(EV, NL); } } if (auto *PN = dyn_cast(Agg)) if (Instruction *Res = foldOpIntoPhi(EV, PN)) return Res; // We could simplify extracts from other values. Note that nested extracts may // already be simplified implicitly by the above: extract (extract (insert) ) // will be translated into extract ( insert ( extract ) ) first and then just // the value inserted, if appropriate. Similarly for extracts from single-use // loads: extract (extract (load)) will be translated to extract (load (gep)) // and if again single-use then via load (gep (gep)) to load (gep). // However, double extracts from e.g. function arguments or return values // aren't handled yet. return nullptr; } /// Return 'true' if the given typeinfo will match anything. static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) { switch (Personality) { case EHPersonality::GNU_C: case EHPersonality::GNU_C_SjLj: case EHPersonality::Rust: // The GCC C EH and Rust personality only exists to support cleanups, so // it's not clear what the semantics of catch clauses are. return false; case EHPersonality::Unknown: return false; case EHPersonality::GNU_Ada: // While __gnat_all_others_value will match any Ada exception, it doesn't // match foreign exceptions (or didn't, before gcc-4.7). return false; case EHPersonality::GNU_CXX: case EHPersonality::GNU_CXX_SjLj: case EHPersonality::GNU_ObjC: case EHPersonality::MSVC_X86SEH: case EHPersonality::MSVC_TableSEH: case EHPersonality::MSVC_CXX: case EHPersonality::CoreCLR: case EHPersonality::Wasm_CXX: case EHPersonality::XL_CXX: return TypeInfo->isNullValue(); } llvm_unreachable("invalid enum"); } static bool shorter_filter(const Value *LHS, const Value *RHS) { return cast(LHS->getType())->getNumElements() < cast(RHS->getType())->getNumElements(); } Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) { // The logic here should be correct for any real-world personality function. // However if that turns out not to be true, the offending logic can always // be conditioned on the personality function, like the catch-all logic is. EHPersonality Personality = classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn()); // Simplify the list of clauses, eg by removing repeated catch clauses // (these are often created by inlining). bool MakeNewInstruction = false; // If true, recreate using the following: SmallVector NewClauses; // - Clauses for the new instruction; bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. SmallPtrSet AlreadyCaught; // Typeinfos known caught already. for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) { bool isLastClause = i + 1 == e; if (LI.isCatch(i)) { // A catch clause. Constant *CatchClause = LI.getClause(i); Constant *TypeInfo = CatchClause->stripPointerCasts(); // If we already saw this clause, there is no point in having a second // copy of it. if (AlreadyCaught.insert(TypeInfo).second) { // This catch clause was not already seen. NewClauses.push_back(CatchClause); } else { // Repeated catch clause - drop the redundant copy. MakeNewInstruction = true; } // If this is a catch-all then there is no point in keeping any following // clauses or marking the landingpad as having a cleanup. if (isCatchAll(Personality, TypeInfo)) { if (!isLastClause) MakeNewInstruction = true; CleanupFlag = false; break; } } else { // A filter clause. If any of the filter elements were already caught // then they can be dropped from the filter. It is tempting to try to // exploit the filter further by saying that any typeinfo that does not // occur in the filter can't be caught later (and thus can be dropped). // However this would be wrong, since typeinfos can match without being // equal (for example if one represents a C++ class, and the other some // class derived from it). assert(LI.isFilter(i) && "Unsupported landingpad clause!"); Constant *FilterClause = LI.getClause(i); ArrayType *FilterType = cast(FilterClause->getType()); unsigned NumTypeInfos = FilterType->getNumElements(); // An empty filter catches everything, so there is no point in keeping any // following clauses or marking the landingpad as having a cleanup. By // dealing with this case here the following code is made a bit simpler. if (!NumTypeInfos) { NewClauses.push_back(FilterClause); if (!isLastClause) MakeNewInstruction = true; CleanupFlag = false; break; } bool MakeNewFilter = false; // If true, make a new filter. SmallVector NewFilterElts; // New elements. if (isa(FilterClause)) { // Not an empty filter - it contains at least one null typeinfo. assert(NumTypeInfos > 0 && "Should have handled empty filter already!"); Constant *TypeInfo = Constant::getNullValue(FilterType->getElementType()); // If this typeinfo is a catch-all then the filter can never match. if (isCatchAll(Personality, TypeInfo)) { // Throw the filter away. MakeNewInstruction = true; continue; } // There is no point in having multiple copies of this typeinfo, so // discard all but the first copy if there is more than one. NewFilterElts.push_back(TypeInfo); if (NumTypeInfos > 1) MakeNewFilter = true; } else { ConstantArray *Filter = cast(FilterClause); SmallPtrSet SeenInFilter; // For uniquing the elements. NewFilterElts.reserve(NumTypeInfos); // Remove any filter elements that were already caught or that already // occurred in the filter. While there, see if any of the elements are // catch-alls. If so, the filter can be discarded. bool SawCatchAll = false; for (unsigned j = 0; j != NumTypeInfos; ++j) { Constant *Elt = Filter->getOperand(j); Constant *TypeInfo = Elt->stripPointerCasts(); if (isCatchAll(Personality, TypeInfo)) { // This element is a catch-all. Bail out, noting this fact. SawCatchAll = true; break; } // Even if we've seen a type in a catch clause, we don't want to // remove it from the filter. An unexpected type handler may be // set up for a call site which throws an exception of the same // type caught. In order for the exception thrown by the unexpected // handler to propagate correctly, the filter must be correctly // described for the call site. // // Example: // // void unexpected() { throw 1;} // void foo() throw (int) { // std::set_unexpected(unexpected); // try { // throw 2.0; // } catch (int i) {} // } // There is no point in having multiple copies of the same typeinfo in // a filter, so only add it if we didn't already. if (SeenInFilter.insert(TypeInfo).second) NewFilterElts.push_back(cast(Elt)); } // A filter containing a catch-all cannot match anything by definition. if (SawCatchAll) { // Throw the filter away. MakeNewInstruction = true; continue; } // If we dropped something from the filter, make a new one. if (NewFilterElts.size() < NumTypeInfos) MakeNewFilter = true; } if (MakeNewFilter) { FilterType = ArrayType::get(FilterType->getElementType(), NewFilterElts.size()); FilterClause = ConstantArray::get(FilterType, NewFilterElts); MakeNewInstruction = true; } NewClauses.push_back(FilterClause); // If the new filter is empty then it will catch everything so there is // no point in keeping any following clauses or marking the landingpad // as having a cleanup. The case of the original filter being empty was // already handled above. if (MakeNewFilter && !NewFilterElts.size()) { assert(MakeNewInstruction && "New filter but not a new instruction!"); CleanupFlag = false; break; } } } // If several filters occur in a row then reorder them so that the shortest // filters come first (those with the smallest number of elements). This is // advantageous because shorter filters are more likely to match, speeding up // unwinding, but mostly because it increases the effectiveness of the other // filter optimizations below. for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) { unsigned j; // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters. for (j = i; j != e; ++j) if (!isa(NewClauses[j]->getType())) break; // Check whether the filters are already sorted by length. We need to know // if sorting them is actually going to do anything so that we only make a // new landingpad instruction if it does. for (unsigned k = i; k + 1 < j; ++k) if (shorter_filter(NewClauses[k+1], NewClauses[k])) { // Not sorted, so sort the filters now. Doing an unstable sort would be // correct too but reordering filters pointlessly might confuse users. std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j, shorter_filter); MakeNewInstruction = true; break; } // Look for the next batch of filters. i = j + 1; } // If typeinfos matched if and only if equal, then the elements of a filter L // that occurs later than a filter F could be replaced by the intersection of // the elements of F and L. In reality two typeinfos can match without being // equal (for example if one represents a C++ class, and the other some class // derived from it) so it would be wrong to perform this transform in general. // However the transform is correct and useful if F is a subset of L. In that // case L can be replaced by F, and thus removed altogether since repeating a // filter is pointless. So here we look at all pairs of filters F and L where // L follows F in the list of clauses, and remove L if every element of F is // an element of L. This can occur when inlining C++ functions with exception // specifications. for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) { // Examine each filter in turn. Value *Filter = NewClauses[i]; ArrayType *FTy = dyn_cast(Filter->getType()); if (!FTy) // Not a filter - skip it. continue; unsigned FElts = FTy->getNumElements(); // Examine each filter following this one. Doing this backwards means that // we don't have to worry about filters disappearing under us when removed. for (unsigned j = NewClauses.size() - 1; j != i; --j) { Value *LFilter = NewClauses[j]; ArrayType *LTy = dyn_cast(LFilter->getType()); if (!LTy) // Not a filter - skip it. continue; // If Filter is a subset of LFilter, i.e. every element of Filter is also // an element of LFilter, then discard LFilter. SmallVectorImpl::iterator J = NewClauses.begin() + j; // If Filter is empty then it is a subset of LFilter. if (!FElts) { // Discard LFilter. NewClauses.erase(J); MakeNewInstruction = true; // Move on to the next filter. continue; } unsigned LElts = LTy->getNumElements(); // If Filter is longer than LFilter then it cannot be a subset of it. if (FElts > LElts) // Move on to the next filter. continue; // At this point we know that LFilter has at least one element. if (isa(LFilter)) { // LFilter only contains zeros. // Filter is a subset of LFilter iff Filter contains only zeros (as we // already know that Filter is not longer than LFilter). if (isa(Filter)) { assert(FElts <= LElts && "Should have handled this case earlier!"); // Discard LFilter. NewClauses.erase(J); MakeNewInstruction = true; } // Move on to the next filter. continue; } ConstantArray *LArray = cast(LFilter); if (isa(Filter)) { // Filter only contains zeros. // Since Filter is non-empty and contains only zeros, it is a subset of // LFilter iff LFilter contains a zero. assert(FElts > 0 && "Should have eliminated the empty filter earlier!"); for (unsigned l = 0; l != LElts; ++l) if (LArray->getOperand(l)->isNullValue()) { // LFilter contains a zero - discard it. NewClauses.erase(J); MakeNewInstruction = true; break; } // Move on to the next filter. continue; } // At this point we know that both filters are ConstantArrays. Loop over // operands to see whether every element of Filter is also an element of // LFilter. Since filters tend to be short this is probably faster than // using a method that scales nicely. ConstantArray *FArray = cast(Filter); bool AllFound = true; for (unsigned f = 0; f != FElts; ++f) { Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts(); AllFound = false; for (unsigned l = 0; l != LElts; ++l) { Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts(); if (LTypeInfo == FTypeInfo) { AllFound = true; break; } } if (!AllFound) break; } if (AllFound) { // Discard LFilter. NewClauses.erase(J); MakeNewInstruction = true; } // Move on to the next filter. } } // If we changed any of the clauses, replace the old landingpad instruction // with a new one. if (MakeNewInstruction) { LandingPadInst *NLI = LandingPadInst::Create(LI.getType(), NewClauses.size()); for (unsigned i = 0, e = NewClauses.size(); i != e; ++i) NLI->addClause(NewClauses[i]); // A landing pad with no clauses must have the cleanup flag set. It is // theoretically possible, though highly unlikely, that we eliminated all // clauses. If so, force the cleanup flag to true. if (NewClauses.empty()) CleanupFlag = true; NLI->setCleanup(CleanupFlag); return NLI; } // Even if none of the clauses changed, we may nonetheless have understood // that the cleanup flag is pointless. Clear it if so. if (LI.isCleanup() != CleanupFlag) { assert(!CleanupFlag && "Adding a cleanup, not removing one?!"); LI.setCleanup(CleanupFlag); return &LI; } return nullptr; } Value * InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) { // Try to push freeze through instructions that propagate but don't produce // poison as far as possible. If an operand of freeze follows three // conditions 1) one-use, 2) does not produce poison, and 3) has all but one // guaranteed-non-poison operands then push the freeze through to the one // operand that is not guaranteed non-poison. The actual transform is as // follows. // Op1 = ... ; Op1 can be posion // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have // ; single guaranteed-non-poison operands // ... = Freeze(Op0) // => // Op1 = ... // Op1.fr = Freeze(Op1) // ... = Inst(Op1.fr, NonPoisonOps...) auto *OrigOp = OrigFI.getOperand(0); auto *OrigOpInst = dyn_cast(OrigOp); // While we could change the other users of OrigOp to use freeze(OrigOp), that // potentially reduces their optimization potential, so let's only do this iff // the OrigOp is only used by the freeze. if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa(OrigOp)) return nullptr; // We can't push the freeze through an instruction which can itself create // poison. If the only source of new poison is flags, we can simply // strip them (since we know the only use is the freeze and nothing can // benefit from them.) if (canCreateUndefOrPoison(cast(OrigOp), /*ConsiderFlagsAndMetadata*/ false)) return nullptr; // If operand is guaranteed not to be poison, there is no need to add freeze // to the operand. So we first find the operand that is not guaranteed to be // poison. Use *MaybePoisonOperand = nullptr; for (Use &U : OrigOpInst->operands()) { if (isa(U.get()) || isGuaranteedNotToBeUndefOrPoison(U.get())) continue; if (!MaybePoisonOperand) MaybePoisonOperand = &U; else return nullptr; } OrigOpInst->dropPoisonGeneratingFlagsAndMetadata(); // If all operands are guaranteed to be non-poison, we can drop freeze. if (!MaybePoisonOperand) return OrigOp; Builder.SetInsertPoint(OrigOpInst); auto *FrozenMaybePoisonOperand = Builder.CreateFreeze( MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr"); replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand); return OrigOp; } Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI, PHINode *PN) { // Detect whether this is a recurrence with a start value and some number of // backedge values. We'll check whether we can push the freeze through the // backedge values (possibly dropping poison flags along the way) until we // reach the phi again. In that case, we can move the freeze to the start // value. Use *StartU = nullptr; SmallVector Worklist; for (Use &U : PN->incoming_values()) { if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) { // Add backedge value to worklist. Worklist.push_back(U.get()); continue; } // Don't bother handling multiple start values. if (StartU) return nullptr; StartU = &U; } if (!StartU || Worklist.empty()) return nullptr; // Not a recurrence. Value *StartV = StartU->get(); BasicBlock *StartBB = PN->getIncomingBlock(*StartU); bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV); // We can't insert freeze if the start value is the result of the // terminator (e.g. an invoke). if (StartNeedsFreeze && StartBB->getTerminator() == StartV) return nullptr; SmallPtrSet Visited; SmallVector DropFlags; while (!Worklist.empty()) { Value *V = Worklist.pop_back_val(); if (!Visited.insert(V).second) continue; if (Visited.size() > 32) return nullptr; // Limit the total number of values we inspect. // Assume that PN is non-poison, because it will be after the transform. if (V == PN || isGuaranteedNotToBeUndefOrPoison(V)) continue; Instruction *I = dyn_cast(V); if (!I || canCreateUndefOrPoison(cast(I), /*ConsiderFlagsAndMetadata*/ false)) return nullptr; DropFlags.push_back(I); append_range(Worklist, I->operands()); } for (Instruction *I : DropFlags) I->dropPoisonGeneratingFlagsAndMetadata(); if (StartNeedsFreeze) { Builder.SetInsertPoint(StartBB->getTerminator()); Value *FrozenStartV = Builder.CreateFreeze(StartV, StartV->getName() + ".fr"); replaceUse(*StartU, FrozenStartV); } return replaceInstUsesWith(FI, PN); } bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) { Value *Op = FI.getOperand(0); if (isa(Op) || Op->hasOneUse()) return false; // Move the freeze directly after the definition of its operand, so that // it dominates the maximum number of uses. Note that it may not dominate // *all* uses if the operand is an invoke/callbr and the use is in a phi on // the normal/default destination. This is why the domination check in the // replacement below is still necessary. BasicBlock::iterator MoveBefore; if (isa(Op)) { MoveBefore = FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca(); } else { auto MoveBeforeOpt = cast(Op)->getInsertionPointAfterDef(); if (!MoveBeforeOpt) return false; MoveBefore = *MoveBeforeOpt; } // Don't move to the position of a debug intrinsic. if (isa(MoveBefore)) MoveBefore = MoveBefore->getNextNonDebugInstruction()->getIterator(); // Re-point iterator to come after any debug-info records, if we're // running in "RemoveDIs" mode MoveBefore.setHeadBit(false); bool Changed = false; if (&FI != &*MoveBefore) { FI.moveBefore(*MoveBefore->getParent(), MoveBefore); Changed = true; } Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool { bool Dominates = DT.dominates(&FI, U); Changed |= Dominates; return Dominates; }); return Changed; } // Check if any direct or bitcast user of this value is a shuffle instruction. static bool isUsedWithinShuffleVector(Value *V) { for (auto *U : V->users()) { if (isa(U)) return true; else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U)) return true; } return false; } Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { Value *Op0 = I.getOperand(0); if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // freeze (phi const, x) --> phi const, (freeze x) if (auto *PN = dyn_cast(Op0)) { if (Instruction *NV = foldOpIntoPhi(I, PN)) return NV; if (Instruction *NV = foldFreezeIntoRecurrence(I, PN)) return NV; } if (Value *NI = pushFreezeToPreventPoisonFromPropagating(I)) return replaceInstUsesWith(I, NI); // If I is freeze(undef), check its uses and fold it to a fixed constant. // - or: pick -1 // - select's condition: if the true value is constant, choose it by making // the condition true. // - default: pick 0 // // Note that this transform is intentionally done here rather than // via an analysis in InstSimplify or at individual user sites. That is // because we must produce the same value for all uses of the freeze - // it's the reason "freeze" exists! // // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid // duplicating logic for binops at least. auto getUndefReplacement = [&I](Type *Ty) { Constant *BestValue = nullptr; Constant *NullValue = Constant::getNullValue(Ty); for (const auto *U : I.users()) { Constant *C = NullValue; if (match(U, m_Or(m_Value(), m_Value()))) C = ConstantInt::getAllOnesValue(Ty); else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value()))) C = ConstantInt::getTrue(Ty); if (!BestValue) BestValue = C; else if (BestValue != C) BestValue = NullValue; } assert(BestValue && "Must have at least one use"); return BestValue; }; if (match(Op0, m_Undef())) { // Don't fold freeze(undef/poison) if it's used as a vector operand in // a shuffle. This may improve codegen for shuffles that allow // unspecified inputs. if (isUsedWithinShuffleVector(&I)) return nullptr; return replaceInstUsesWith(I, getUndefReplacement(I.getType())); } Constant *C; if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) { Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType()); return replaceInstUsesWith(I, Constant::replaceUndefsWith(C, ReplaceC)); } // Replace uses of Op with freeze(Op). if (freezeOtherUses(I)) return &I; return nullptr; } /// Check for case where the call writes to an otherwise dead alloca. This /// shows up for unused out-params in idiomatic C/C++ code. Note that this /// helper *only* analyzes the write; doesn't check any other legality aspect. static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) { auto *CB = dyn_cast(I); if (!CB) // TODO: handle e.g. store to alloca here - only worth doing if we extend // to allow reload along used path as described below. Otherwise, this // is simply a store to a dead allocation which will be removed. return false; std::optional Dest = MemoryLocation::getForDest(CB, TLI); if (!Dest) return false; auto *AI = dyn_cast(getUnderlyingObject(Dest->Ptr)); if (!AI) // TODO: allow malloc? return false; // TODO: allow memory access dominated by move point? Note that since AI // could have a reference to itself captured by the call, we would need to // account for cycles in doing so. SmallVector AllocaUsers; SmallPtrSet Visited; auto pushUsers = [&](const Instruction &I) { for (const User *U : I.users()) { if (Visited.insert(U).second) AllocaUsers.push_back(U); } }; pushUsers(*AI); while (!AllocaUsers.empty()) { auto *UserI = cast(AllocaUsers.pop_back_val()); if (isa(UserI) || isa(UserI) || isa(UserI)) { pushUsers(*UserI); continue; } if (UserI == CB) continue; // TODO: support lifetime.start/end here return false; } return true; } /// Try to move the specified instruction from its current block into the /// beginning of DestBlock, which can only happen if it's safe to move the /// instruction past all of the instructions between it and the end of its /// block. bool InstCombinerImpl::tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. if (isa(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || I->isTerminator()) return false; // Do not sink static or dynamic alloca instructions. Static allocas must // remain in the entry block, and dynamic allocas must not be sunk in between // a stacksave / stackrestore pair, which would incorrectly shorten its // lifetime. if (isa(I)) return false; // Do not sink into catchswitch blocks. if (isa(DestBlock->getTerminator())) return false; // Do not sink convergent call instructions. if (auto *CI = dyn_cast(I)) { if (CI->isConvergent()) return false; } // Unless we can prove that the memory write isn't visibile except on the // path we're sinking to, we must bail. if (I->mayWriteToMemory()) { if (!SoleWriteToDeadLocal(I, TLI)) return false; } // We can only sink load instructions if there is nothing between the load and // the end of block that could change the value. if (I->mayReadFromMemory()) { // We don't want to do any sophisticated alias analysis, so we only check // the instructions after I in I's parent block if we try to sink to its // successor block. if (DestBlock->getUniquePredecessor() != I->getParent()) return false; for (BasicBlock::iterator Scan = std::next(I->getIterator()), E = I->getParent()->end(); Scan != E; ++Scan) if (Scan->mayWriteToMemory()) return false; } I->dropDroppableUses([&](const Use *U) { auto *I = dyn_cast(U->getUser()); if (I && I->getParent() != DestBlock) { Worklist.add(I); return true; } return false; }); /// FIXME: We could remove droppable uses that are not dominated by /// the new position. BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); I->moveBefore(*DestBlock, InsertPos); ++NumSunkInst; // Also sink all related debug uses from the source basic block. Otherwise we // get debug use before the def. Attempt to salvage debug uses first, to // maximise the range variables have location for. If we cannot salvage, then // mark the location undef: we know it was supposed to receive a new location // here, but that computation has been sunk. SmallVector DbgUsers; findDbgUsers(DbgUsers, I); // For all debug values in the destination block, the sunk instruction // will still be available, so they do not need to be dropped. SmallVector DbgUsersToSalvage; SmallVector DPValuesToSalvage; for (auto &DbgUser : DbgUsers) if (DbgUser->getParent() != DestBlock) DbgUsersToSalvage.push_back(DbgUser); // Process the sinking DbgUsersToSalvage in reverse order, as we only want // to clone the last appearing debug intrinsic for each given variable. SmallVector DbgUsersToSink; for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage) if (DVI->getParent() == SrcBlock) DbgUsersToSink.push_back(DVI); llvm::sort(DbgUsersToSink, [](auto *A, auto *B) { return B->comesBefore(A); }); SmallVector DIIClones; SmallSet SunkVariables; for (auto *User : DbgUsersToSink) { // A dbg.declare instruction should not be cloned, since there can only be // one per variable fragment. It should be left in the original place // because the sunk instruction is not an alloca (otherwise we could not be // here). if (isa(User)) continue; DebugVariable DbgUserVariable = DebugVariable(User->getVariable(), User->getExpression(), User->getDebugLoc()->getInlinedAt()); if (!SunkVariables.insert(DbgUserVariable).second) continue; // Leave dbg.assign intrinsics in their original positions and there should // be no need to insert a clone. if (isa(User)) continue; DIIClones.emplace_back(cast(User->clone())); if (isa(User) && isa(I)) DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0)); LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n'); } // Perform salvaging without the clones, then sink the clones. if (!DIIClones.empty()) { // RemoveDIs: pass in empty vector of DPValues until we get to instrumenting // this pass. SmallVector DummyDPValues; salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, DummyDPValues); // The clones are in reverse order of original appearance, reverse again to // maintain the original order. for (auto &DIIClone : llvm::reverse(DIIClones)) { DIIClone->insertBefore(&*InsertPos); LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n'); } } return true; } bool InstCombinerImpl::run() { while (!Worklist.isEmpty()) { // Walk deferred instructions in reverse order, and push them to the // worklist, which means they'll end up popped from the worklist in-order. while (Instruction *I = Worklist.popDeferred()) { // Check to see if we can DCE the instruction. We do this already here to // reduce the number of uses and thus allow other folds to trigger. // Note that eraseInstFromFunction() may push additional instructions on // the deferred worklist, so this will DCE whole instruction chains. if (isInstructionTriviallyDead(I, &TLI)) { eraseInstFromFunction(*I); ++NumDeadInst; continue; } Worklist.push(I); } Instruction *I = Worklist.removeOne(); if (I == nullptr) continue; // skip null values. // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I, &TLI)) { eraseInstFromFunction(*I); ++NumDeadInst; continue; } if (!DebugCounter::shouldExecute(VisitCounter)) continue; // See if we can trivially sink this instruction to its user if we can // prove that the successor is not executed more frequently than our block. // Return the UserBlock if successful. auto getOptionalSinkBlockForInst = [this](Instruction *I) -> std::optional { if (!EnableCodeSinking) return std::nullopt; BasicBlock *BB = I->getParent(); BasicBlock *UserParent = nullptr; unsigned NumUsers = 0; for (auto *U : I->users()) { if (U->isDroppable()) continue; if (NumUsers > MaxSinkNumUsers) return std::nullopt; Instruction *UserInst = cast(U); // Special handling for Phi nodes - get the block the use occurs in. if (PHINode *PN = dyn_cast(UserInst)) { for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { if (PN->getIncomingValue(i) == I) { // Bail out if we have uses in different blocks. We don't do any // sophisticated analysis (i.e finding NearestCommonDominator of // these use blocks). if (UserParent && UserParent != PN->getIncomingBlock(i)) return std::nullopt; UserParent = PN->getIncomingBlock(i); } } assert(UserParent && "expected to find user block!"); } else { if (UserParent && UserParent != UserInst->getParent()) return std::nullopt; UserParent = UserInst->getParent(); } // Make sure these checks are done only once, naturally we do the checks // the first time we get the userparent, this will save compile time. if (NumUsers == 0) { // Try sinking to another block. If that block is unreachable, then do // not bother. SimplifyCFG should handle it. if (UserParent == BB || !DT.isReachableFromEntry(UserParent)) return std::nullopt; auto *Term = UserParent->getTerminator(); // See if the user is one of our successors that has only one // predecessor, so that we don't have to split the critical edge. // Another option where we can sink is a block that ends with a // terminator that does not pass control to other block (such as // return or unreachable or resume). In this case: // - I dominates the User (by SSA form); // - the User will be executed at most once. // So sinking I down to User is always profitable or neutral. if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term)) return std::nullopt; assert(DT.dominates(BB, UserParent) && "Dominance relation broken?"); } NumUsers++; } // No user or only has droppable users. if (!UserParent) return std::nullopt; return UserParent; }; auto OptBB = getOptionalSinkBlockForInst(I); if (OptBB) { auto *UserParent = *OptBB; // Okay, the CFG is simple enough, try to sink this instruction. if (tryToSinkInstruction(I, UserParent)) { LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); MadeIRChange = true; // We'll add uses of the sunk instruction below, but since // sinking can expose opportunities for it's *operands* add // them to the worklist for (Use &U : I->operands()) if (Instruction *OpI = dyn_cast(U.get())) Worklist.push(OpI); } } // Now that we have an instruction, try combining it to simplify it. Builder.SetInsertPoint(I); Builder.CollectMetadataToCopy( I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation}); #ifndef NDEBUG std::string OrigI; #endif LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n'); if (Instruction *Result = visit(*I)) { ++NumCombined; // Should we replace the old instruction with a new one? if (Result != I) { LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); Result->copyMetadata(*I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation}); // Everything uses the new instruction now. I->replaceAllUsesWith(Result); // Move the name to the new instruction first. Result->takeName(I); // Insert the new instruction into the basic block... BasicBlock *InstParent = I->getParent(); BasicBlock::iterator InsertPos = I->getIterator(); // Are we replace a PHI with something that isn't a PHI, or vice versa? if (isa(Result) != isa(I)) { // We need to fix up the insertion point. if (isa(I)) // PHI -> Non-PHI InsertPos = InstParent->getFirstInsertionPt(); else // Non-PHI -> PHI InsertPos = InstParent->getFirstNonPHIIt(); } Result->insertInto(InstParent, InsertPos); // Push the new instruction and any users onto the worklist. Worklist.pushUsersToWorkList(*Result); Worklist.push(Result); eraseInstFromFunction(*I); } else { LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n' << " New = " << *I << '\n'); // If the instruction was modified, it's possible that it is now dead. // if so, remove it. if (isInstructionTriviallyDead(I, &TLI)) { eraseInstFromFunction(*I); } else { Worklist.pushUsersToWorkList(*I); Worklist.push(I); } } MadeIRChange = true; } } Worklist.zap(); return MadeIRChange; } // Track the scopes used by !alias.scope and !noalias. In a function, a // @llvm.experimental.noalias.scope.decl is only useful if that scope is used // by both sets. If not, the declaration of the scope can be safely omitted. // The MDNode of the scope can be omitted as well for the instructions that are // part of this function. We do not do that at this point, as this might become // too time consuming to do. class AliasScopeTracker { SmallPtrSet UsedAliasScopesAndLists; SmallPtrSet UsedNoAliasScopesAndLists; public: void analyse(Instruction *I) { // This seems to be faster than checking 'mayReadOrWriteMemory()'. if (!I->hasMetadataOtherThanDebugLoc()) return; auto Track = [](Metadata *ScopeList, auto &Container) { const auto *MDScopeList = dyn_cast_or_null(ScopeList); if (!MDScopeList || !Container.insert(MDScopeList).second) return; for (const auto &MDOperand : MDScopeList->operands()) if (auto *MDScope = dyn_cast(MDOperand)) Container.insert(MDScope); }; Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists); Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists); } bool isNoAliasScopeDeclDead(Instruction *Inst) { NoAliasScopeDeclInst *Decl = dyn_cast(Inst); if (!Decl) return false; assert(Decl->use_empty() && "llvm.experimental.noalias.scope.decl in use ?"); const MDNode *MDSL = Decl->getScopeList(); assert(MDSL->getNumOperands() == 1 && "llvm.experimental.noalias.scope should refer to a single scope"); auto &MDOperand = MDSL->getOperand(0); if (auto *MD = dyn_cast(MDOperand)) return !UsedAliasScopesAndLists.contains(MD) || !UsedNoAliasScopesAndLists.contains(MD); // Not an MDNode ? throw away. return true; } }; /// Populate the IC worklist from a function, by walking it in reverse /// post-order and adding all reachable code to the worklist. /// /// This has a couple of tricks to make the code faster and more powerful. In /// particular, we constant fold and DCE instructions as we go, to avoid adding /// them to the worklist (this significantly speeds up instcombine on code where /// many instructions are dead or constant). Additionally, if we find a branch /// whose condition is a known constant, we only visit the reachable successors. bool InstCombinerImpl::prepareWorklist( Function &F, ReversePostOrderTraversal &RPOT) { bool MadeIRChange = false; SmallPtrSet LiveBlocks; SmallVector InstrsForInstructionWorklist; DenseMap FoldedConstants; AliasScopeTracker SeenAliasScopes; auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) { for (BasicBlock *Succ : successors(BB)) if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second) for (PHINode &PN : Succ->phis()) for (Use &U : PN.incoming_values()) if (PN.getIncomingBlock(U) == BB && !isa(U)) { U.set(PoisonValue::get(PN.getType())); MadeIRChange = true; } }; for (BasicBlock *BB : RPOT) { if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) { return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred); })) { HandleOnlyLiveSuccessor(BB, nullptr); continue; } LiveBlocks.insert(BB); for (Instruction &Inst : llvm::make_early_inc_range(*BB)) { // ConstantProp instruction if trivially constant. if (!Inst.use_empty() && (Inst.getNumOperands() == 0 || isa(Inst.getOperand(0)))) if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) { LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst << '\n'); Inst.replaceAllUsesWith(C); ++NumConstProp; if (isInstructionTriviallyDead(&Inst, &TLI)) Inst.eraseFromParent(); MadeIRChange = true; continue; } // See if we can constant fold its operands. for (Use &U : Inst.operands()) { if (!isa(U) && !isa(U)) continue; auto *C = cast(U); Constant *&FoldRes = FoldedConstants[C]; if (!FoldRes) FoldRes = ConstantFoldConstant(C, DL, &TLI); if (FoldRes != C) { LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst << "\n Old = " << *C << "\n New = " << *FoldRes << '\n'); U = FoldRes; MadeIRChange = true; } } // Skip processing debug and pseudo intrinsics in InstCombine. Processing // these call instructions consumes non-trivial amount of time and // provides no value for the optimization. if (!Inst.isDebugOrPseudoInst()) { InstrsForInstructionWorklist.push_back(&Inst); SeenAliasScopes.analyse(&Inst); } } // If this is a branch or switch on a constant, mark only the single // live successor. Otherwise assume all successors are live. Instruction *TI = BB->getTerminator(); if (BranchInst *BI = dyn_cast(TI); BI && BI->isConditional()) { if (isa(BI->getCondition())) { // Branch on undef is UB. HandleOnlyLiveSuccessor(BB, nullptr); continue; } if (auto *Cond = dyn_cast(BI->getCondition())) { bool CondVal = Cond->getZExtValue(); HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal)); continue; } } else if (SwitchInst *SI = dyn_cast(TI)) { if (isa(SI->getCondition())) { // Switch on undef is UB. HandleOnlyLiveSuccessor(BB, nullptr); continue; } if (auto *Cond = dyn_cast(SI->getCondition())) { HandleOnlyLiveSuccessor(BB, SI->findCaseValue(Cond)->getCaseSuccessor()); continue; } } } // Remove instructions inside unreachable blocks. This prevents the // instcombine code from having to deal with some bad special cases, and // reduces use counts of instructions. for (BasicBlock &BB : F) { if (LiveBlocks.count(&BB)) continue; unsigned NumDeadInstInBB; unsigned NumDeadDbgInstInBB; std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) = removeAllNonTerminatorAndEHPadInstructions(&BB); MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0; NumDeadInst += NumDeadInstInBB; } // Once we've found all of the instructions to add to instcombine's worklist, // add them in reverse order. This way instcombine will visit from the top // of the function down. This jives well with the way that it adds all uses // of instructions to the worklist after doing a transformation, thus avoiding // some N^2 behavior in pathological cases. Worklist.reserve(InstrsForInstructionWorklist.size()); for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) { // DCE instruction if trivially dead. As we iterate in reverse program // order here, we will clean up whole chains of dead instructions. if (isInstructionTriviallyDead(Inst, &TLI) || SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) { ++NumDeadInst; LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); salvageDebugInfo(*Inst); Inst->eraseFromParent(); MadeIRChange = true; continue; } Worklist.push(Inst); } return MadeIRChange; } static bool combineInstructionsOverFunction( Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, LoopInfo *LI, const InstCombineOptions &Opts) { auto &DL = F.getParent()->getDataLayout(); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. IRBuilder Builder( F.getContext(), TargetFolder(DL), IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) { Worklist.add(I); if (auto *Assume = dyn_cast(I)) AC.registerAssumption(Assume); })); ReversePostOrderTraversal RPOT(&F.front()); // Lower dbg.declare intrinsics otherwise their value may be clobbered // by instcombiner. bool MadeIRChange = false; if (ShouldLowerDbgDeclare) MadeIRChange = LowerDbgDeclare(F); // Iterate while there is work to do. unsigned Iteration = 0; while (true) { ++Iteration; if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) { LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations << " on " << F.getName() << " reached; stopping without verifying fixpoint\n"); break; } ++NumWorklistIterations; LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, ORE, BFI, PSI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT); MadeChangeInThisIteration |= IC.run(); if (!MadeChangeInThisIteration) break; MadeIRChange = true; if (Iteration > Opts.MaxIterations) { report_fatal_error( "Instruction Combining did not reach a fixpoint after " + Twine(Opts.MaxIterations) + " iterations"); } } if (Iteration == 1) ++NumOneIteration; else if (Iteration == 2) ++NumTwoIterations; else if (Iteration == 3) ++NumThreeIterations; else ++NumFourOrMoreIterations; return MadeIRChange; } InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options(Opts) {} void InstCombinePass::printPipeline( raw_ostream &OS, function_ref MapClassName2PassName) { static_cast *>(this)->printPipeline( OS, MapClassName2PassName); OS << '<'; OS << "max-iterations=" << Options.MaxIterations << ";"; OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;"; OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint"; OS << '>'; } PreservedAnalyses InstCombinePass::run(Function &F, FunctionAnalysisManager &AM) { auto &AC = AM.getResult(F); auto &DT = AM.getResult(F); auto &TLI = AM.getResult(F); auto &ORE = AM.getResult(F); auto &TTI = AM.getResult(F); // TODO: Only use LoopInfo when the option is set. This requires that the // callers in the pass pipeline explicitly set the option. auto *LI = AM.getCachedResult(F); if (!LI && Options.UseLoopInfo) LI = &AM.getResult(F); auto *AA = &AM.getResult(F); auto &MAMProxy = AM.getResult(F); ProfileSummaryInfo *PSI = MAMProxy.getCachedResult(*F.getParent()); auto *BFI = (PSI && PSI->hasProfileSummary()) ? &AM.getResult(F) : nullptr; if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, BFI, PSI, LI, Options)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); // Mark all the analyses that instcombine updates as preserved. PreservedAnalyses PA; PA.preserveSet(); return PA; } void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addRequired(); LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); } bool InstructionCombiningPass::runOnFunction(Function &F) { if (skipFunction(F)) return false; // Required analyses. auto AA = &getAnalysis().getAAResults(); auto &AC = getAnalysis().getAssumptionCache(F); auto &TLI = getAnalysis().getTLI(F); auto &TTI = getAnalysis().getTTI(F); auto &DT = getAnalysis().getDomTree(); auto &ORE = getAnalysis().getORE(); // Optional analyses. auto *LIWP = getAnalysisIfAvailable(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; ProfileSummaryInfo *PSI = &getAnalysis().getPSI(); BlockFrequencyInfo *BFI = (PSI && PSI->hasProfileSummary()) ? &getAnalysis().getBFI() : nullptr; return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, BFI, PSI, LI, InstCombineOptions()); } char InstructionCombiningPass::ID = 0; InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID) { initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry()); } INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) // Initialization Routines void llvm::initializeInstCombine(PassRegistry &Registry) { initializeInstructionCombiningPassPass(Registry); } FunctionPass *llvm::createInstructionCombiningPass() { return new InstructionCombiningPass(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index a1d7f0f9ba0f..a3951fdf8a15 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1,2358 +1,2303 @@ //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains the implementation of the scalar evolution expander, // which is used to generate the code corresponding to a given scalar evolution // expression. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/LoopUtils.h" #ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS #define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X) #else #define SCEV_DEBUG_WITH_TYPE(TYPE, X) #endif using namespace llvm; cl::opt llvm::SCEVCheapExpansionBudget( "scev-cheap-expansion-budget", cl::Hidden, cl::init(4), cl::desc("When performing SCEV expansion only if it is cheap to do, this " "controls the budget that is considered cheap (default = 4)")); using namespace PatternMatch; /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, /// reusing an existing cast if a suitable one (= dominating IP) exists, or /// creating a new one. Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op, BasicBlock::iterator IP) { // This function must be called with the builder having a valid insertion // point. It doesn't need to be the actual IP where the uses of the returned // cast will be added, but it must dominate such IP. // We use this precondition to produce a cast that will dominate all its // uses. In particular, this is crucial for the case where the builder's // insertion point *is* the point where we were asked to put the cast. // Since we don't know the builder's insertion point is actually // where the uses will be added (only that it dominates it), we are // not allowed to move it. BasicBlock::iterator BIP = Builder.GetInsertPoint(); Value *Ret = nullptr; // Check to see if there is already a cast! for (User *U : V->users()) { if (U->getType() != Ty) continue; CastInst *CI = dyn_cast(U); if (!CI || CI->getOpcode() != Op) continue; // Found a suitable cast that is at IP or comes before IP. Use it. Note that // the cast must also properly dominate the Builder's insertion point. if (IP->getParent() == CI->getParent() && &*BIP != CI && (&*IP == CI || CI->comesBefore(&*IP))) { Ret = CI; break; } } // Create a new cast. if (!Ret) { SCEVInsertPointGuard Guard(Builder, this); Builder.SetInsertPoint(&*IP); Ret = Builder.CreateCast(Op, V, Ty, V->getName()); } // We assert at the end of the function since IP might point to an // instruction with different dominance properties than a cast // (an invoke for example) and not dominate BIP (but the cast does). assert(!isa(Ret) || SE.DT.dominates(cast(Ret), &*BIP)); return Ret; } BasicBlock::iterator SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) const { BasicBlock::iterator IP = ++I->getIterator(); if (auto *II = dyn_cast(I)) IP = II->getNormalDest()->begin(); while (isa(IP)) ++IP; if (isa(IP) || isa(IP)) { ++IP; } else if (isa(IP)) { IP = MustDominate->getParent()->getFirstInsertionPt(); } else { assert(!IP->isEHPad() && "unexpected eh pad!"); } // Adjust insert point to be after instructions inserted by the expander, so // we can re-use already inserted instructions. Avoid skipping past the // original \p MustDominate, in case it is an inserted instruction. while (isInsertedInstruction(&*IP) && &*IP != MustDominate) ++IP; return IP; } BasicBlock::iterator SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const { // Cast the argument at the beginning of the entry block, after // any bitcasts of other arguments. if (Argument *A = dyn_cast(V)) { BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); while ((isa(IP) && isa(cast(IP)->getOperand(0)) && cast(IP)->getOperand(0) != A) || isa(IP)) ++IP; return IP; } // Cast the instruction immediately after the instruction. if (Instruction *I = dyn_cast(V)) return findInsertPointAfter(I, &*Builder.GetInsertPoint()); // Otherwise, this must be some kind of a constant, // so let's plop this cast into the function's entry block. assert(isa(V) && "Expected the cast argument to be a global/constant"); return Builder.GetInsertBlock() ->getParent() ->getEntryBlock() .getFirstInsertionPt(); } /// InsertNoopCastOfTo - Insert a cast of V to the specified type, /// which must be possible with a noop cast, doing what we can to share /// the casts. Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); assert((Op == Instruction::BitCast || Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && "InsertNoopCastOfTo cannot perform non-noop casts!"); assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && "InsertNoopCastOfTo cannot change sizes!"); // inttoptr only works for integral pointers. For non-integral pointers, we // can create a GEP on null with the integral value as index. Note that // it is safe to use GEP of null instead of inttoptr here, because only // expressions already based on a GEP of null should be converted to pointers // during expansion. if (Op == Instruction::IntToPtr) { auto *PtrTy = cast(Ty); if (DL.isNonIntegralPointerType(PtrTy)) return Builder.CreatePtrAdd(Constant::getNullValue(PtrTy), V, "scevgep"); } // Short-circuit unnecessary bitcasts. if (Op == Instruction::BitCast) { if (V->getType() == Ty) return V; if (CastInst *CI = dyn_cast(V)) { if (CI->getOperand(0)->getType() == Ty) return CI->getOperand(0); } } // Short-circuit unnecessary inttoptr<->ptrtoint casts. if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { if (CastInst *CI = dyn_cast(V)) if ((CI->getOpcode() == Instruction::PtrToInt || CI->getOpcode() == Instruction::IntToPtr) && SE.getTypeSizeInBits(CI->getType()) == SE.getTypeSizeInBits(CI->getOperand(0)->getType())) return CI->getOperand(0); if (ConstantExpr *CE = dyn_cast(V)) if ((CE->getOpcode() == Instruction::PtrToInt || CE->getOpcode() == Instruction::IntToPtr) && SE.getTypeSizeInBits(CE->getType()) == SE.getTypeSizeInBits(CE->getOperand(0)->getType())) return CE->getOperand(0); } // Fold a cast of a constant. if (Constant *C = dyn_cast(V)) return ConstantExpr::getCast(Op, C, Ty); // Try to reuse existing cast, or insert one. return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V)); } /// InsertBinop - Insert the specified binary operator, doing a small amount /// of work to avoid inserting an obviously redundant operation, and hoisting /// to an outer loop when the opportunity is there and it is safe. Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, SCEV::NoWrapFlags Flags, bool IsSafeToHoist) { // Fold a binop with constant operands. if (Constant *CLHS = dyn_cast(LHS)) if (Constant *CRHS = dyn_cast(RHS)) if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL)) return Res; // Do a quick scan to see if we have this binop nearby. If so, reuse it. unsigned ScanLimit = 6; BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); // Scanning starts from the last instruction before the insertion point. BasicBlock::iterator IP = Builder.GetInsertPoint(); if (IP != BlockBegin) { --IP; for (; ScanLimit; --IP, --ScanLimit) { // Don't count dbg.value against the ScanLimit, to avoid perturbing the // generated code. if (isa(IP)) ScanLimit++; auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) { // Ensure that no-wrap flags match. if (isa(I)) { if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW)) return true; if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW)) return true; } // Conservatively, do not use any instruction which has any of exact // flags installed. if (isa(I) && I->isExact()) return true; return false; }; if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP)) return &*IP; if (IP == BlockBegin) break; } } // Save the original insertion point so we can restore it when we're done. DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); SCEVInsertPointGuard Guard(Builder, this); if (IsSafeToHoist) { // Move the insertion point out of as many loops as we can. while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) break; // Ok, move up a level. Builder.SetInsertPoint(Preheader->getTerminator()); } } // If we haven't found this binop, insert it. // TODO: Use the Builder, which will make CreateBinOp below fold with // InstSimplifyFolder. Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS)); BO->setDebugLoc(Loc); if (Flags & SCEV::FlagNUW) BO->setHasNoUnsignedWrap(); if (Flags & SCEV::FlagNSW) BO->setHasNoSignedWrap(); return BO; } /// expandAddToGEP - Expand an addition expression with a pointer type into /// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps /// BasicAliasAnalysis and other passes analyze the result. See the rules /// for getelementptr vs. inttoptr in /// http://llvm.org/docs/LangRef.html#pointeraliasing /// for details. /// /// Design note: The correctness of using getelementptr here depends on /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as /// they may introduce pointer arithmetic which may not be safely converted /// into getelementptr. /// /// Design note: It might seem desirable for this function to be more /// loop-aware. If some of the indices are loop-invariant while others /// aren't, it might seem desirable to emit multiple GEPs, keeping the /// loop-invariant portions of the overall computation outside the loop. /// However, there are a few reasons this is not done here. Hoisting simple /// arithmetic is a low-level optimization that often isn't very /// important until late in the optimization process. In fact, passes /// like InstructionCombining will combine GEPs, even if it means /// pushing loop-invariant computation down into loops, so even if the /// GEPs were split here, the work would quickly be undone. The /// LoopStrengthReduction pass, which is usually run quite late (and /// after the last InstructionCombining pass), takes care of hoisting /// loop-invariant portions of expressions, after considering what /// can be folded using target addressing modes. /// Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) { assert(!isa(V) || SE.DT.dominates(cast(V), &*Builder.GetInsertPoint())); Value *Idx = expand(Offset); // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast(V)) if (Constant *CRHS = dyn_cast(Idx)) return Builder.CreatePtrAdd(CLHS, CRHS); // Do a quick scan to see if we have this GEP nearby. If so, reuse it. unsigned ScanLimit = 6; BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); // Scanning starts from the last instruction before the insertion point. BasicBlock::iterator IP = Builder.GetInsertPoint(); if (IP != BlockBegin) { --IP; for (; ScanLimit; --IP, --ScanLimit) { // Don't count dbg.value against the ScanLimit, to avoid perturbing the // generated code. if (isa(IP)) ScanLimit++; if (IP->getOpcode() == Instruction::GetElementPtr && IP->getOperand(0) == V && IP->getOperand(1) == Idx && cast(&*IP)->getSourceElementType() == Builder.getInt8Ty()) return &*IP; if (IP == BlockBegin) break; } } // Save the original insertion point so we can restore it when we're done. SCEVInsertPointGuard Guard(Builder, this); // Move the insertion point out of as many loops as we can. while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) break; // Ok, move up a level. Builder.SetInsertPoint(Preheader->getTerminator()); } // Emit a GEP. return Builder.CreatePtrAdd(V, Idx, "scevgep"); } /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for /// SCEV expansion. If they are nested, this is the most nested. If they are /// neighboring, pick the later. static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, DominatorTree &DT) { if (!A) return B; if (!B) return A; if (A->contains(B)) return B; if (B->contains(A)) return A; if (DT.dominates(A->getHeader(), B->getHeader())) return B; if (DT.dominates(B->getHeader(), A->getHeader())) return A; return A; // Arbitrarily break the tie. } /// getRelevantLoop - Get the most relevant loop associated with the given /// expression, according to PickMostRelevantLoop. const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { // Test whether we've already computed the most relevant loop for this SCEV. auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr)); if (!Pair.second) return Pair.first->second; switch (S->getSCEVType()) { case scConstant: case scVScale: return nullptr; // A constant has no relevant loops. case scTruncate: case scZeroExtend: case scSignExtend: case scPtrToInt: case scAddExpr: case scMulExpr: case scUDivExpr: case scAddRecExpr: case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: case scSequentialUMinExpr: { const Loop *L = nullptr; if (const SCEVAddRecExpr *AR = dyn_cast(S)) L = AR->getLoop(); for (const SCEV *Op : S->operands()) L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT); return RelevantLoops[S] = L; } case scUnknown: { const SCEVUnknown *U = cast(S); if (const Instruction *I = dyn_cast(U->getValue())) return Pair.first->second = SE.LI.getLoopFor(I->getParent()); // A non-instruction has no relevant loops. return nullptr; } case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); } llvm_unreachable("Unexpected SCEV type!"); } namespace { /// LoopCompare - Compare loops by PickMostRelevantLoop. class LoopCompare { DominatorTree &DT; public: explicit LoopCompare(DominatorTree &dt) : DT(dt) {} bool operator()(std::pair LHS, std::pair RHS) const { // Keep pointer operands sorted at the end. if (LHS.second->getType()->isPointerTy() != RHS.second->getType()->isPointerTy()) return LHS.second->getType()->isPointerTy(); // Compare loops with PickMostRelevantLoop. if (LHS.first != RHS.first) return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; // If one operand is a non-constant negative and the other is not, // put the non-constant negative on the right so that a sub can // be used instead of a negate and add. if (LHS.second->isNonConstantNegative()) { if (!RHS.second->isNonConstantNegative()) return false; } else if (RHS.second->isNonConstantNegative()) return true; // Otherwise they are equivalent according to this comparison. return false; } }; } Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Collect all the add operands in a loop, along with their associated loops. // Iterate in reverse so that constants are emitted last, all else equal, and // so that pointer operands are inserted first, which the code below relies on // to form more involved GEPs. SmallVector, 8> OpsAndLoops; for (const SCEV *Op : reverse(S->operands())) OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op)); // Sort by loop. Use a stable sort so that constants follow non-constants and // pointer operands precede non-pointer operands. llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); // Emit instructions to add all the operands. Hoist as much as possible // out of loops, and form meaningful getelementptrs where possible. Value *Sum = nullptr; for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) { const Loop *CurLoop = I->first; const SCEV *Op = I->second; if (!Sum) { // This is the first operand. Just expand it. Sum = expand(Op); ++I; continue; } assert(!Op->getType()->isPointerTy() && "Only first op can be pointer"); if (isa(Sum->getType())) { // The running sum expression is a pointer. Try to form a getelementptr // at this level with that as the base. SmallVector NewOps; for (; I != E && I->first == CurLoop; ++I) { // If the operand is SCEVUnknown and not instructions, peek through // it, to enable more of it to be folded into the GEP. const SCEV *X = I->second; if (const SCEVUnknown *U = dyn_cast(X)) if (!isa(U->getValue())) X = SE.getSCEV(U->getValue()); NewOps.push_back(X); } Sum = expandAddToGEP(SE.getAddExpr(NewOps), Sum); } else if (Op->isNonConstantNegative()) { // Instead of doing a negate and add, just do a subtract. Value *W = expand(SE.getNegativeSCEV(Op)); Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); ++I; } else { // A simple add. Value *W = expand(Op); // Canonicalize a constant to the RHS. if (isa(Sum)) std::swap(Sum, W); Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(), /*IsSafeToHoist*/ true); ++I; } } return Sum; } Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { Type *Ty = S->getType(); // Collect all the mul operands in a loop, along with their associated loops. // Iterate in reverse so that constants are emitted last, all else equal. SmallVector, 8> OpsAndLoops; for (const SCEV *Op : reverse(S->operands())) OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op)); // Sort by loop. Use a stable sort so that constants follow non-constants. llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); // Emit instructions to mul all the operands. Hoist as much as possible // out of loops. Value *Prod = nullptr; auto I = OpsAndLoops.begin(); // Expand the calculation of X pow N in the following manner: // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then: // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK). const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() { auto E = I; // Calculate how many times the same operand from the same loop is included // into this power. uint64_t Exponent = 0; const uint64_t MaxExponent = UINT64_MAX >> 1; // No one sane will ever try to calculate such huge exponents, but if we // need this, we stop on UINT64_MAX / 2 because we need to exit the loop // below when the power of 2 exceeds our Exponent, and we want it to be // 1u << 31 at most to not deal with unsigned overflow. while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) { ++Exponent; ++E; } assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?"); // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them // that are needed into the result. Value *P = expand(I->second); Value *Result = nullptr; if (Exponent & 1) Result = P; for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) { P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); if (Exponent & BinExp) Result = Result ? InsertBinop(Instruction::Mul, Result, P, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true) : P; } I = E; assert(Result && "Nothing was expanded?"); return Result; }; while (I != OpsAndLoops.end()) { if (!Prod) { // This is the first operand. Just expand it. Prod = ExpandOpBinPowN(); } else if (I->second->isAllOnesValue()) { // Instead of doing a multiply by negative one, just do a negate. Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); ++I; } else { // A simple mul. Value *W = ExpandOpBinPowN(); // Canonicalize a constant to the RHS. if (isa(Prod)) std::swap(Prod, W); const APInt *RHS; if (match(W, m_Power2(RHS))) { // Canonicalize Prod*(1<isVectorTy() && "vector types are not SCEVable"); auto NWFlags = S->getNoWrapFlags(); // clear nsw flag if shl will produce poison value. if (RHS->logBase2() == RHS->getBitWidth() - 1) NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW); Prod = InsertBinop(Instruction::Shl, Prod, ConstantInt::get(Ty, RHS->logBase2()), NWFlags, /*IsSafeToHoist*/ true); } else { Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(), /*IsSafeToHoist*/ true); } } } return Prod; } Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { Value *LHS = expand(S->getLHS()); if (const SCEVConstant *SC = dyn_cast(S->getRHS())) { const APInt &RHS = SC->getAPInt(); if (RHS.isPowerOf2()) return InsertBinop(Instruction::LShr, LHS, ConstantInt::get(SC->getType(), RHS.logBase2()), SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); } Value *RHS = expand(S->getRHS()); return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS())); } /// Determine if this is a well-behaved chain of instructions leading back to /// the PHI. If so, it may be reused by expanded expressions. bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L) { if (IncV->getNumOperands() == 0 || isa(IncV) || (isa(IncV) && !isa(IncV))) return false; // If any of the operands don't dominate the insert position, bail. // Addrec operands are always loop-invariant, so this can only happen // if there are instructions which haven't been hoisted. if (L == IVIncInsertLoop) { for (Use &Op : llvm::drop_begin(IncV->operands())) if (Instruction *OInst = dyn_cast(Op)) if (!SE.DT.dominates(OInst, IVIncInsertPos)) return false; } // Advance to the next instruction. IncV = dyn_cast(IncV->getOperand(0)); if (!IncV) return false; if (IncV->mayHaveSideEffects()) return false; if (IncV == PN) return true; return isNormalAddRecExprPHI(PN, IncV, L); } /// getIVIncOperand returns an induction variable increment's induction /// variable operand. /// /// If allowScale is set, any type of GEP is allowed as long as the nonIV /// operands dominate InsertPos. /// /// If allowScale is not set, ensure that a GEP increment conforms to one of the /// simple patterns generated by getAddRecExprPHILiterally and /// expandAddtoGEP. If the pattern isn't recognized, return NULL. Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, Instruction *InsertPos, bool allowScale) { if (IncV == InsertPos) return nullptr; switch (IncV->getOpcode()) { default: return nullptr; // Check for a simple Add/Sub or GEP of a loop invariant step. case Instruction::Add: case Instruction::Sub: { Instruction *OInst = dyn_cast(IncV->getOperand(1)); if (!OInst || SE.DT.dominates(OInst, InsertPos)) return dyn_cast(IncV->getOperand(0)); return nullptr; } case Instruction::BitCast: return dyn_cast(IncV->getOperand(0)); case Instruction::GetElementPtr: for (Use &U : llvm::drop_begin(IncV->operands())) { if (isa(U)) continue; if (Instruction *OInst = dyn_cast(U)) { if (!SE.DT.dominates(OInst, InsertPos)) return nullptr; } if (allowScale) { // allow any kind of GEP as long as it can be hoisted. continue; } // GEPs produced by SCEVExpander use i8 element type. if (!cast(IncV)->getSourceElementType()->isIntegerTy(8)) return nullptr; break; } return dyn_cast(IncV->getOperand(0)); } } /// If the insert point of the current builder or any of the builders on the /// stack of saved builders has 'I' as its insert point, update it to point to /// the instruction after 'I'. This is intended to be used when the instruction /// 'I' is being moved. If this fixup is not done and 'I' is moved to a /// different block, the inconsistent insert point (with a mismatched /// Instruction and Block) can lead to an instruction being inserted in a block /// other than its parent. void SCEVExpander::fixupInsertPoints(Instruction *I) { BasicBlock::iterator It(*I); BasicBlock::iterator NewInsertPt = std::next(It); if (Builder.GetInsertPoint() == It) Builder.SetInsertPoint(&*NewInsertPt); for (auto *InsertPtGuard : InsertPointGuards) if (InsertPtGuard->GetInsertPoint() == It) InsertPtGuard->SetInsertPoint(NewInsertPt); } /// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make /// it available to other uses in this loop. Recursively hoist any operands, /// until we reach a value that dominates InsertPos. bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos, bool RecomputePoisonFlags) { auto FixupPoisonFlags = [this](Instruction *I) { // Drop flags that are potentially inferred from old context and infer flags // in new context. I->dropPoisonGeneratingFlags(); if (auto *OBO = dyn_cast(I)) if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) { auto *BO = cast(I); BO->setHasNoUnsignedWrap( ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW); BO->setHasNoSignedWrap( ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW); } }; if (SE.DT.dominates(IncV, InsertPos)) { if (RecomputePoisonFlags) FixupPoisonFlags(IncV); return true; } // InsertPos must itself dominate IncV so that IncV's new position satisfies // its existing users. if (isa(InsertPos) || !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) return false; if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) return false; // Check that the chain of IV operands leading back to Phi can be hoisted. SmallVector IVIncs; for(;;) { Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); if (!Oper) return false; // IncV is safe to hoist. IVIncs.push_back(IncV); IncV = Oper; if (SE.DT.dominates(IncV, InsertPos)) break; } for (Instruction *I : llvm::reverse(IVIncs)) { fixupInsertPoints(I); I->moveBefore(InsertPos); if (RecomputePoisonFlags) FixupPoisonFlags(I); } return true; } /// Determine if this cyclic phi is in a form that would have been generated by /// LSR. We don't care if the phi was actually expanded in this pass, as long /// as it is in a low-cost form, for example, no implied multiplication. This /// should match any patterns generated by getAddRecExprPHILiterally and /// expandAddtoGEP. bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L) { for(Instruction *IVOper = IncV; (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), /*allowScale=*/false));) { if (IVOper == PN) return true; } return false; } /// expandIVInc - Expand an IV increment at Builder's current InsertPos. /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may /// need to materialize IV increments elsewhere to handle difficult situations. Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, bool useSubtract) { Value *IncV; // If the PHI is a pointer, use a GEP, otherwise use an add or sub. if (PN->getType()->isPointerTy()) { IncV = expandAddToGEP(SE.getSCEV(StepV), PN); } else { IncV = useSubtract ? Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); } return IncV; } /// Check whether we can cheaply express the requested SCEV in terms of /// the available PHI SCEV by truncation and/or inversion of the step. static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Phi, const SCEVAddRecExpr *Requested, bool &InvertStep) { // We can't transform to match a pointer PHI. Type *PhiTy = Phi->getType(); Type *RequestedTy = Requested->getType(); if (PhiTy->isPointerTy() || RequestedTy->isPointerTy()) return false; if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth()) return false; // Try truncate it if necessary. Phi = dyn_cast(SE.getTruncateOrNoop(Phi, RequestedTy)); if (!Phi) return false; // Check whether truncation will help. if (Phi == Requested) { InvertStep = false; return true; } // Check whether inverting will help: {R,+,-1} == R - {0,+,1}. if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) { InvertStep = true; return true; } return false; } static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { if (!isa(AR->getType())) return false; unsigned BitWidth = cast(AR->getType())->getBitWidth(); Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy), SE.getSignExtendExpr(AR, WideTy)); const SCEV *ExtendAfterOp = SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy); return ExtendAfterOp == OpAfterExtend; } static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { if (!isa(AR->getType())) return false; unsigned BitWidth = cast(AR->getType())->getBitWidth(); Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy), SE.getZeroExtendExpr(AR, WideTy)); const SCEV *ExtendAfterOp = SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy); return ExtendAfterOp == OpAfterExtend; } /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand /// the base addrec, which is the addrec without any non-loop-dominating /// values, and return the PHI. PHINode * SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const Loop *L, Type *&TruncTy, bool &InvertStep) { assert((!IVIncInsertLoop || IVIncInsertPos) && "Uninitialized insert position"); // Reuse a previously-inserted PHI, if present. BasicBlock *LatchBlock = L->getLoopLatch(); if (LatchBlock) { PHINode *AddRecPhiMatch = nullptr; Instruction *IncV = nullptr; TruncTy = nullptr; InvertStep = false; // Only try partially matching scevs that need truncation and/or // step-inversion if we know this loop is outside the current loop. bool TryNonMatchingSCEV = IVIncInsertLoop && SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); for (PHINode &PN : L->getHeader()->phis()) { if (!SE.isSCEVable(PN.getType())) continue; // We should not look for a incomplete PHI. Getting SCEV for a incomplete // PHI has no meaning at all. if (!PN.isComplete()) { SCEV_DEBUG_WITH_TYPE( DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n"); continue; } const SCEVAddRecExpr *PhiSCEV = dyn_cast(SE.getSCEV(&PN)); if (!PhiSCEV) continue; bool IsMatchingSCEV = PhiSCEV == Normalized; // We only handle truncation and inversion of phi recurrences for the // expanded expression if the expanded expression's loop dominates the // loop we insert to. Check now, so we can bail out early. if (!IsMatchingSCEV && !TryNonMatchingSCEV) continue; // TODO: this possibly can be reworked to avoid this cast at all. Instruction *TempIncV = dyn_cast(PN.getIncomingValueForBlock(LatchBlock)); if (!TempIncV) continue; // Check whether we can reuse this PHI node. if (LSRMode) { if (!isExpandedAddRecExprPHI(&PN, TempIncV, L)) continue; } else { if (!isNormalAddRecExprPHI(&PN, TempIncV, L)) continue; } // Stop if we have found an exact match SCEV. if (IsMatchingSCEV) { IncV = TempIncV; TruncTy = nullptr; InvertStep = false; AddRecPhiMatch = &PN; break; } // Try whether the phi can be translated into the requested form // (truncated and/or offset by a constant). if ((!TruncTy || InvertStep) && canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) { // Record the phi node. But don't stop we might find an exact match // later. AddRecPhiMatch = &PN; IncV = TempIncV; TruncTy = Normalized->getType(); } } if (AddRecPhiMatch) { // Ok, the add recurrence looks usable. // Remember this PHI, even in post-inc mode. InsertedValues.insert(AddRecPhiMatch); // Remember the increment. rememberInstruction(IncV); // Those values were not actually inserted but re-used. ReusedValues.insert(AddRecPhiMatch); ReusedValues.insert(IncV); return AddRecPhiMatch; } } // Save the original insertion point so we can restore it when we're done. SCEVInsertPointGuard Guard(Builder, this); // Another AddRec may need to be recursively expanded below. For example, if // this AddRec is quadratic, the StepV may itself be an AddRec in this // loop. Remove this loop from the PostIncLoops set before expanding such // AddRecs. Otherwise, we cannot find a valid position for the step // (i.e. StepV can never dominate its loop header). Ideally, we could do // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, // so it's not worth implementing SmallPtrSet::swap. PostIncLoopSet SavedPostIncLoops = PostIncLoops; PostIncLoops.clear(); // Expand code for the start value into the loop preheader. assert(L->getLoopPreheader() && "Can't expand add recurrences without a loop preheader!"); Value *StartV = expand(Normalized->getStart(), L->getLoopPreheader()->getTerminator()); // StartV must have been be inserted into L's preheader to dominate the new // phi. assert(!isa(StartV) || SE.DT.properlyDominates(cast(StartV)->getParent(), L->getHeader())); // Expand code for the step value. Do this before creating the PHI so that PHI // reuse code doesn't see an incomplete PHI. const SCEV *Step = Normalized->getStepRecurrence(SE); Type *ExpandTy = Normalized->getType(); // If the stride is negative, insert a sub instead of an add for the increment // (unless it's a constant, because subtracts of constants are canonicalized // to adds). bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); if (useSubtract) Step = SE.getNegativeSCEV(Step); // Expand the step somewhere that dominates the loop header. Value *StepV = expand(Step, L->getHeader()->getFirstInsertionPt()); // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if // we actually do emit an addition. It does not apply if we emit a // subtraction. bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized); bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized); // Create the PHI. BasicBlock *Header = L->getHeader(); Builder.SetInsertPoint(Header, Header->begin()); pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), Twine(IVName) + ".iv"); // Create the step instructions and populate the PHI. for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *Pred = *HPI; // Add a start value. if (!L->contains(Pred)) { PN->addIncoming(StartV, Pred); continue; } // Create a step value and add it to the PHI. // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the // instructions at IVIncInsertPos. Instruction *InsertPos = L == IVIncInsertLoop ? IVIncInsertPos : Pred->getTerminator(); Builder.SetInsertPoint(InsertPos); Value *IncV = expandIVInc(PN, StepV, L, useSubtract); if (isa(IncV)) { if (IncrementIsNUW) cast(IncV)->setHasNoUnsignedWrap(); if (IncrementIsNSW) cast(IncV)->setHasNoSignedWrap(); } PN->addIncoming(IncV, Pred); } // After expanding subexpressions, restore the PostIncLoops set so the caller // can ensure that IVIncrement dominates the current uses. PostIncLoops = SavedPostIncLoops; // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most // effective when we are able to use an IV inserted here, so record it. InsertedValues.insert(PN); InsertedIVs.push_back(PN); return PN; } Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { const Loop *L = S->getLoop(); // Determine a normalized form of this expression, which is the expression // before any post-inc adjustment is made. const SCEVAddRecExpr *Normalized = S; if (PostIncLoops.count(L)) { PostIncLoopSet Loops; Loops.insert(L); Normalized = cast( normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false)); } [[maybe_unused]] const SCEV *Start = Normalized->getStart(); const SCEV *Step = Normalized->getStepRecurrence(SE); assert(SE.properlyDominates(Start, L->getHeader()) && "Start does not properly dominate loop header"); assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header"); // In some cases, we decide to reuse an existing phi node but need to truncate // it and/or invert the step. Type *TruncTy = nullptr; bool InvertStep = false; PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep); // Accommodate post-inc mode, if necessary. Value *Result; if (!PostIncLoops.count(L)) Result = PN; else { // In PostInc mode, use the post-incremented value. BasicBlock *LatchBlock = L->getLoopLatch(); assert(LatchBlock && "PostInc mode requires a unique loop latch!"); Result = PN->getIncomingValueForBlock(LatchBlock); // We might be introducing a new use of the post-inc IV that is not poison // safe, in which case we should drop poison generating flags. Only keep // those flags for which SCEV has proven that they always hold. if (isa(Result)) { auto *I = cast(Result); if (!S->hasNoUnsignedWrap()) I->setHasNoUnsignedWrap(false); if (!S->hasNoSignedWrap()) I->setHasNoSignedWrap(false); } // For an expansion to use the postinc form, the client must call // expandCodeFor with an InsertPoint that is either outside the PostIncLoop // or dominated by IVIncInsertPos. if (isa(Result) && !SE.DT.dominates(cast(Result), &*Builder.GetInsertPoint())) { // The induction variable's postinc expansion does not dominate this use. // IVUsers tries to prevent this case, so it is rare. However, it can // happen when an IVUser outside the loop is not dominated by the latch // block. Adjusting IVIncInsertPos before expansion begins cannot handle // all cases. Consider a phi outside whose operand is replaced during // expansion with the value of the postinc user. Without fundamentally // changing the way postinc users are tracked, the only remedy is // inserting an extra IV increment. StepV might fold into PostLoopOffset, // but hopefully expandCodeFor handles that. bool useSubtract = !S->getType()->isPointerTy() && Step->isNonConstantNegative(); if (useSubtract) Step = SE.getNegativeSCEV(Step); Value *StepV; { // Expand the step somewhere that dominates the loop header. SCEVInsertPointGuard Guard(Builder, this); StepV = expand(Step, L->getHeader()->getFirstInsertionPt()); } Result = expandIVInc(PN, StepV, L, useSubtract); } } // We have decided to reuse an induction variable of a dominating loop. Apply // truncation and/or inversion of the step. if (TruncTy) { // Truncate the result. if (TruncTy != Result->getType()) Result = Builder.CreateTrunc(Result, TruncTy); // Invert the result. if (InvertStep) Result = Builder.CreateSub(expand(Normalized->getStart()), Result); } return Result; } Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // In canonical mode we compute the addrec as an expression of a canonical IV // using evaluateAtIteration and expand the resulting SCEV expression. This // way we avoid introducing new IVs to carry on the computation of the addrec // throughout the loop. // // For nested addrecs evaluateAtIteration might need a canonical IV of a // type wider than the addrec itself. Emitting a canonical IV of the // proper type might produce non-legal types, for example expanding an i64 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall // back to non-canonical mode for nested addrecs. if (!CanonicalMode || (S->getNumOperands() > 2)) return expandAddRecExprLiterally(S); Type *Ty = SE.getEffectiveSCEVType(S->getType()); const Loop *L = S->getLoop(); // First check for an existing canonical IV in a suitable type. PHINode *CanonicalIV = nullptr; if (PHINode *PN = L->getCanonicalInductionVariable()) if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) CanonicalIV = PN; // Rewrite an AddRec in terms of the canonical induction variable, if // its type is more narrow. if (CanonicalIV && SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) && !S->getType()->isPointerTy()) { SmallVector NewOps(S->getNumOperands()); for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) NewOps[i] = SE.getAnyExtendExpr(S->getOperand(i), CanonicalIV->getType()); Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = findInsertPointAfter(cast(V), &*Builder.GetInsertPoint()); V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt); return V; } // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { if (isa(S->getType())) { Value *StartV = expand(SE.getPointerBase(S)); return expandAddToGEP(SE.removePointerBase(S), StartV); } SmallVector NewOps(S->operands()); NewOps[0] = SE.getConstant(Ty, 0); const SCEV *Rest = SE.getAddRecExpr(NewOps, L, S->getNoWrapFlags(SCEV::FlagNW)); // Just do a normal add. Pre-expand the operands to suppress folding. // // The LHS and RHS values are factored out of the expand call to make the // output independent of the argument evaluation order. const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart())); const SCEV *AddExprRHS = SE.getUnknown(expand(Rest)); return expand(SE.getAddExpr(AddExprLHS, AddExprRHS)); } // If we don't yet have a canonical IV, create one. if (!CanonicalIV) { // Create and insert the PHI node for the induction variable in the // specified loop. BasicBlock *Header = L->getHeader(); pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar"); CanonicalIV->insertBefore(Header->begin()); rememberInstruction(CanonicalIV); SmallSet PredSeen; Constant *One = ConstantInt::get(Ty, 1); for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *HP = *HPI; if (!PredSeen.insert(HP).second) { // There must be an incoming value for each predecessor, even the // duplicates! CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP); continue; } if (L->contains(HP)) { // Insert a unit add instruction right before the terminator // corresponding to the back-edge. Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, "indvar.next", HP->getTerminator()); Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); rememberInstruction(Add); CanonicalIV->addIncoming(Add, HP); } else { CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); } } } // {0,+,1} --> Insert a canonical induction variable into the loop! if (S->isAffine() && S->getOperand(1)->isOne()) { assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && "IVs with types different from the canonical IV should " "already have been handled!"); return CanonicalIV; } // {0,+,F} --> {0,+,1} * F // If this is a simple linear addrec, emit it now as a special case. if (S->isAffine()) // {0,+,F} --> i*F return expand(SE.getTruncateOrNoop( SE.getMulExpr(SE.getUnknown(CanonicalIV), SE.getNoopOrAnyExtend(S->getOperand(1), CanonicalIV->getType())), Ty)); // If this is a chain of recurrences, turn it into a closed form, using the // folders, then expandCodeFor the closed form. This allows the folders to // simplify the expression without having to build a bunch of special code // into this folder. const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. // Promote S up to the canonical IV type, if the cast is foldable. const SCEV *NewS = S; const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); if (isa(Ext)) NewS = Ext; const SCEV *V = cast(NewS)->evaluateAtIteration(IH, SE); // Truncate the result down to the original type, if needed. const SCEV *T = SE.getTruncateOrNoop(V, Ty); return expand(T); } Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) { Value *V = expand(S->getOperand()); return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt, GetOptimalInsertionPointForCastOf(V)); } Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { Value *V = expand(S->getOperand()); return Builder.CreateTrunc(V, S->getType()); } Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { Value *V = expand(S->getOperand()); return Builder.CreateZExt(V, S->getType(), "", SE.isKnownNonNegative(S->getOperand())); } Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { Value *V = expand(S->getOperand()); return Builder.CreateSExt(V, S->getType()); } Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S, Intrinsic::ID IntrinID, Twine Name, bool IsSequential) { Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); Type *Ty = LHS->getType(); if (IsSequential) LHS = Builder.CreateFreeze(LHS); for (int i = S->getNumOperands() - 2; i >= 0; --i) { Value *RHS = expand(S->getOperand(i)); if (IsSequential && i != 0) RHS = Builder.CreateFreeze(RHS); Value *Sel; if (Ty->isIntegerTy()) Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS}, /*FMFSource=*/nullptr, Name); else { Value *ICmp = Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS); Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name); } LHS = Sel; } return LHS; } Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { return expandMinMaxExpr(S, Intrinsic::smax, "smax"); } Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { return expandMinMaxExpr(S, Intrinsic::umax, "umax"); } Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { return expandMinMaxExpr(S, Intrinsic::smin, "smin"); } Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { return expandMinMaxExpr(S, Intrinsic::umin, "umin"); } Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) { return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true); } Value *SCEVExpander::visitVScale(const SCEVVScale *S) { return Builder.CreateVScale(ConstantInt::get(S->getType(), 1)); } Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator IP) { setInsertPoint(IP); Value *V = expandCodeFor(SH, Ty); return V; } Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { // Expand the code for this SCEV. Value *V = expand(SH); if (Ty) { assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && "non-trivial casts should be done with the SCEVs directly!"); V = InsertNoopCastOfTo(V, Ty); } return V; } -static bool -canReuseInstruction(ScalarEvolution &SE, const SCEV *S, Instruction *I, - SmallVectorImpl &DropPoisonGeneratingInsts) { - // If the instruction cannot be poison, it's always safe to reuse. - if (programUndefinedIfPoison(I)) - return true; - - // Otherwise, it is possible that I is more poisonous that S. Collect the - // poison-contributors of S, and then check whether I has any additional - // poison-contributors. Poison that is contributed through poison-generating - // flags is handled by dropping those flags instead. - SmallPtrSet PoisonVals; - SE.getPoisonGeneratingValues(PoisonVals, S); - - SmallVector Worklist; - SmallPtrSet Visited; - Worklist.push_back(I); - while (!Worklist.empty()) { - Value *V = Worklist.pop_back_val(); - if (!Visited.insert(V).second) - continue; - - // Avoid walking large instruction graphs. - if (Visited.size() > 16) - return false; - - // Either the value can't be poison, or the S would also be poison if it - // is. - if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V)) - continue; - - auto *I = dyn_cast(V); - if (!I) - return false; - - // FIXME: Ignore vscale, even though it technically could be poison. Do this - // because SCEV currently assumes it can't be poison. Remove this special - // case once we proper model when vscale can be poison. - if (auto *II = dyn_cast(I); - II && II->getIntrinsicID() == Intrinsic::vscale) - continue; - - if (canCreatePoison(cast(I), /*ConsiderFlagsAndMetadata*/ false)) - return false; - - // If the instruction can't create poison, we can recurse to its operands. - if (I->hasPoisonGeneratingFlagsOrMetadata()) - DropPoisonGeneratingInsts.push_back(I); - - for (Value *Op : I->operands()) - Worklist.push_back(Op); - } - return true; -} - Value *SCEVExpander::FindValueInExprValueMap( const SCEV *S, const Instruction *InsertPt, SmallVectorImpl &DropPoisonGeneratingInsts) { // If the expansion is not in CanonicalMode, and the SCEV contains any // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. if (!CanonicalMode && SE.containsAddRecurrence(S)) return nullptr; // If S is a constant, it may be worse to reuse an existing Value. if (isa(S)) return nullptr; for (Value *V : SE.getSCEVValues(S)) { Instruction *EntInst = dyn_cast(V); if (!EntInst) continue; // Choose a Value from the set which dominates the InsertPt. // InsertPt should be inside the Value's parent loop so as not to break // the LCSSA form. assert(EntInst->getFunction() == InsertPt->getFunction()); if (S->getType() != V->getType() || !SE.DT.dominates(EntInst, InsertPt) || !(SE.LI.getLoopFor(EntInst->getParent()) == nullptr || SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) continue; // Make sure reusing the instruction is poison-safe. - if (canReuseInstruction(SE, S, EntInst, DropPoisonGeneratingInsts)) + if (SE.canReuseInstruction(S, EntInst, DropPoisonGeneratingInsts)) return V; DropPoisonGeneratingInsts.clear(); } return nullptr; } // The expansion of SCEV will either reuse a previous Value in ExprValueMap, // or expand the SCEV literally. Specifically, if the expansion is in LSRMode, // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded // literally, to prevent LSR's transformed SCEV from being reverted. Otherwise, // the expansion will try to reuse Value from ExprValueMap, and only when it // fails, expand the SCEV literally. Value *SCEVExpander::expand(const SCEV *S) { // Compute an insertion point for this SCEV object. Hoist the instructions // as far out in the loop nest as possible. BasicBlock::iterator InsertPt = Builder.GetInsertPoint(); // We can move insertion point only if there is no div or rem operations // otherwise we are risky to move it over the check for zero denominator. auto SafeToHoist = [](const SCEV *S) { return !SCEVExprContains(S, [](const SCEV *S) { if (const auto *D = dyn_cast(S)) { if (const auto *SC = dyn_cast(D->getRHS())) // Division by non-zero constants can be hoisted. return SC->getValue()->isZero(); // All other divisions should not be moved as they may be // divisions by zero and should be kept within the // conditions of the surrounding loops that guard their // execution (see PR35406). return true; } return false; }); }; if (SafeToHoist(S)) { for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; L = L->getParentLoop()) { if (SE.isLoopInvariant(S, L)) { if (!L) break; if (BasicBlock *Preheader = L->getLoopPreheader()) { InsertPt = Preheader->getTerminator()->getIterator(); } else { // LSR sets the insertion point for AddRec start/step values to the // block start to simplify value reuse, even though it's an invalid // position. SCEVExpander must correct for this in all cases. InsertPt = L->getHeader()->getFirstInsertionPt(); } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) InsertPt = L->getHeader()->getFirstInsertionPt(); while (InsertPt != Builder.GetInsertPoint() && (isInsertedInstruction(&*InsertPt) || isa(&*InsertPt))) { InsertPt = std::next(InsertPt); } break; } } } // Check to see if we already expanded this here. auto I = InsertedExpressions.find(std::make_pair(S, &*InsertPt)); if (I != InsertedExpressions.end()) return I->second; SCEVInsertPointGuard Guard(Builder, this); Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); // Expand the expression into instructions. SmallVector DropPoisonGeneratingInsts; Value *V = FindValueInExprValueMap(S, &*InsertPt, DropPoisonGeneratingInsts); if (!V) { V = visit(S); V = fixupLCSSAFormFor(V); } else { for (Instruction *I : DropPoisonGeneratingInsts) { I->dropPoisonGeneratingFlagsAndMetadata(); // See if we can re-infer from first principles any of the flags we just // dropped. if (auto *OBO = dyn_cast(I)) if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) { auto *BO = cast(I); BO->setHasNoUnsignedWrap( ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW); BO->setHasNoSignedWrap( ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW); } if (auto *NNI = dyn_cast(I)) { auto *Src = NNI->getOperand(0); if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src, Constant::getNullValue(Src->getType()), I, DL).value_or(false)) NNI->setNonNeg(true); } } } // Remember the expanded value for this SCEV at this location. // // This is independent of PostIncLoops. The mapped value simply materializes // the expression at this insertion point. If the mapped value happened to be // a postinc expansion, it could be reused by a non-postinc user, but only if // its insertion point was already at the head of the loop. InsertedExpressions[std::make_pair(S, &*InsertPt)] = V; return V; } void SCEVExpander::rememberInstruction(Value *I) { auto DoInsert = [this](Value *V) { if (!PostIncLoops.empty()) InsertedPostIncValues.insert(V); else InsertedValues.insert(V); }; DoInsert(I); } /// replaceCongruentIVs - Check for congruent phis in this loop header and /// replace them with their most canonical representative. Return the number of /// phis eliminated. /// /// This does not depend on any SCEVExpander state but should be used in /// the same context that SCEVExpander is used. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl &DeadInsts, const TargetTransformInfo *TTI) { // Find integer phis in order of increasing width. SmallVector Phis; for (PHINode &PN : L->getHeader()->phis()) Phis.push_back(&PN); if (TTI) // Use stable_sort to preserve order of equivalent PHIs, so the order // of the sorted Phis is the same from run to run on the same loop. llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) { // Put pointers at the back and make sure pointer < pointer = false. if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() < LHS->getType()->getPrimitiveSizeInBits().getFixedValue(); }); unsigned NumElim = 0; DenseMap ExprToIVMap; // Process phis from wide to narrow. Map wide phis to their truncation // so narrow phis can reuse them. for (PHINode *Phi : Phis) { auto SimplifyPHINode = [&](PHINode *PN) -> Value * { if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC})) return V; if (!SE.isSCEVable(PN->getType())) return nullptr; auto *Const = dyn_cast(SE.getSCEV(PN)); if (!Const) return nullptr; return Const->getValue(); }; // Fold constant phis. They may be congruent to other constant phis and // would confuse the logic below that expects proper IVs. if (Value *V = SimplifyPHINode(Phi)) { if (V->getType() != Phi->getType()) continue; SE.forgetValue(Phi); Phi->replaceAllUsesWith(V); DeadInsts.emplace_back(Phi); ++NumElim; SCEV_DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); continue; } if (!SE.isSCEVable(Phi->getType())) continue; PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; if (!OrigPhiRef) { OrigPhiRef = Phi; if (Phi->getType()->isIntegerTy() && TTI && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { // Make sure we only rewrite using simple induction variables; // otherwise, we can make the trip count of a loop unanalyzable // to SCEV. const SCEV *PhiExpr = SE.getSCEV(Phi); if (isa(PhiExpr)) { // This phi can be freely truncated to the narrowest phi type. Map the // truncated expression to it so it will be reused for narrow types. const SCEV *TruncExpr = SE.getTruncateExpr(PhiExpr, Phis.back()->getType()); ExprToIVMap[TruncExpr] = Phi; } } continue; } // Replacing a pointer phi with an integer phi or vice-versa doesn't make // sense. if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) continue; if (BasicBlock *LatchBlock = L->getLoopLatch()) { Instruction *OrigInc = dyn_cast( OrigPhiRef->getIncomingValueForBlock(LatchBlock)); Instruction *IsomorphicInc = dyn_cast(Phi->getIncomingValueForBlock(LatchBlock)); if (OrigInc && IsomorphicInc) { // If this phi has the same width but is more canonical, replace the // original with it. As part of the "more canonical" determination, // respect a prior decision to use an IV chain. if (OrigPhiRef->getType() == Phi->getType() && !(ChainedPhis.count(Phi) || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) && (ChainedPhis.count(Phi) || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { std::swap(OrigPhiRef, Phi); std::swap(OrigInc, IsomorphicInc); } // Replacing the congruent phi is sufficient because acyclic // redundancy elimination, CSE/GVN, should handle the // rest. However, once SCEV proves that a phi is congruent, // it's often the head of an IV user cycle that is isomorphic // with the original phi. It's worth eagerly cleaning up the // common case of a single IV increment so that DeleteDeadPHIs // can remove cycles that had postinc uses. // Because we may potentially introduce a new use of OrigIV that didn't // exist before at this point, its poison flags need readjustment. const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType()); if (OrigInc != IsomorphicInc && TruncExpr == SE.getSCEV(IsomorphicInc) && SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) && hoistIVInc(OrigInc, IsomorphicInc, /*RecomputePoisonFlags*/ true)) { SCEV_DEBUG_WITH_TYPE( DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: " << *IsomorphicInc << '\n'); Value *NewInc = OrigInc; if (OrigInc->getType() != IsomorphicInc->getType()) { BasicBlock::iterator IP; if (PHINode *PN = dyn_cast(OrigInc)) IP = PN->getParent()->getFirstInsertionPt(); else IP = OrigInc->getNextNonDebugInstruction()->getIterator(); IRBuilder<> Builder(IP->getParent(), IP); Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); NewInc = Builder.CreateTruncOrBitCast( OrigInc, IsomorphicInc->getType(), IVName); } IsomorphicInc->replaceAllUsesWith(NewInc); DeadInsts.emplace_back(IsomorphicInc); } } } SCEV_DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); SCEV_DEBUG_WITH_TYPE( DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n'); ++NumElim; Value *NewIV = OrigPhiRef; if (OrigPhiRef->getType() != Phi->getType()) { IRBuilder<> Builder(L->getHeader(), L->getHeader()->getFirstInsertionPt()); Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); } Phi->replaceAllUsesWith(NewIV); DeadInsts.emplace_back(Phi); } return NumElim; } bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L) { using namespace llvm::PatternMatch; SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); // Look for suitable value in simple conditions at the loop exits. for (BasicBlock *BB : ExitingBlocks) { ICmpInst::Predicate Pred; Instruction *LHS, *RHS; if (!match(BB->getTerminator(), m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), m_BasicBlock(), m_BasicBlock()))) continue; if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) return true; if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) return true; } // Use expand's logic which is used for reusing a previous Value in // ExprValueMap. Note that we don't currently model the cost of // needing to drop poison generating flags on the instruction if we // want to reuse it. We effectively assume that has zero cost. SmallVector DropPoisonGeneratingInsts; return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr; } template static InstructionCost costAndCollectOperands( const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, SmallVectorImpl &Worklist) { const T *S = cast(WorkItem.S); InstructionCost Cost = 0; // Object to help map SCEV operands to expanded IR instructions. struct OperationIndices { OperationIndices(unsigned Opc, size_t min, size_t max) : Opcode(Opc), MinIdx(min), MaxIdx(max) { } unsigned Opcode; size_t MinIdx; size_t MaxIdx; }; // Collect the operations of all the instructions that will be needed to // expand the SCEVExpr. This is so that when we come to cost the operands, // we know what the generated user(s) will be. SmallVector Operations; auto CastCost = [&](unsigned Opcode) -> InstructionCost { Operations.emplace_back(Opcode, 0, 0); return TTI.getCastInstrCost(Opcode, S->getType(), S->getOperand(0)->getType(), TTI::CastContextHint::None, CostKind); }; auto ArithCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx = 0, unsigned MaxIdx = 1) -> InstructionCost { Operations.emplace_back(Opcode, MinIdx, MaxIdx); return NumRequired * TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind); }; auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx, unsigned MaxIdx) -> InstructionCost { Operations.emplace_back(Opcode, MinIdx, MaxIdx); Type *OpType = S->getType(); return NumRequired * TTI.getCmpSelInstrCost( Opcode, OpType, CmpInst::makeCmpResultType(OpType), CmpInst::BAD_ICMP_PREDICATE, CostKind); }; switch (S->getSCEVType()) { case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); case scUnknown: case scConstant: case scVScale: return 0; case scPtrToInt: Cost = CastCost(Instruction::PtrToInt); break; case scTruncate: Cost = CastCost(Instruction::Trunc); break; case scZeroExtend: Cost = CastCost(Instruction::ZExt); break; case scSignExtend: Cost = CastCost(Instruction::SExt); break; case scUDivExpr: { unsigned Opcode = Instruction::UDiv; if (auto *SC = dyn_cast(S->getOperand(1))) if (SC->getAPInt().isPowerOf2()) Opcode = Instruction::LShr; Cost = ArithCost(Opcode, 1); break; } case scAddExpr: Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1); break; case scMulExpr: // TODO: this is a very pessimistic cost modelling for Mul, // because of Bin Pow algorithm actually used by the expander, // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1); break; case scSMaxExpr: case scUMaxExpr: case scSMinExpr: case scUMinExpr: case scSequentialUMinExpr: { // FIXME: should this ask the cost for Intrinsic's? // The reduction tree. Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1); Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2); switch (S->getSCEVType()) { case scSequentialUMinExpr: { // The safety net against poison. // FIXME: this is broken. Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0); Cost += ArithCost(Instruction::Or, S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0); Cost += CmpSelCost(Instruction::Select, 1, 0, 1); break; } default: assert(!isa(S) && "Unhandled SCEV expression type?"); break; } break; } case scAddRecExpr: { // In this polynominal, we may have some zero operands, and we shouldn't // really charge for those. So how many non-zero coefficients are there? int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) { return !Op->isZero(); }); assert(NumTerms >= 1 && "Polynominal should have at least one term."); assert(!(*std::prev(S->operands().end()))->isZero() && "Last operand should not be zero"); // Ignoring constant term (operand 0), how many of the coefficients are u> 1? int NumNonZeroDegreeNonOneTerms = llvm::count_if(S->operands(), [](const SCEV *Op) { auto *SConst = dyn_cast(Op); return !SConst || SConst->getAPInt().ugt(1); }); // Much like with normal add expr, the polynominal will require // one less addition than the number of it's terms. InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1, /*MinIdx*/ 1, /*MaxIdx*/ 1); // Here, *each* one of those will require a multiplication. InstructionCost MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); Cost = AddCost + MulCost; // What is the degree of this polynominal? int PolyDegree = S->getNumOperands() - 1; assert(PolyDegree >= 1 && "Should be at least affine."); // The final term will be: // Op_{PolyDegree} * x ^ {PolyDegree} // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. // FIXME: this is conservatively correct, but might be overly pessimistic. Cost += MulCost * (PolyDegree - 1); break; } } for (auto &CostOp : Operations) { for (auto SCEVOp : enumerate(S->operands())) { // Clamp the index to account for multiple IR operations being chained. size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx); size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx); Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value()); } } return Cost; } bool SCEVExpander::isHighCostExpansionHelper( const SCEVOperand &WorkItem, Loop *L, const Instruction &At, InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI, SmallPtrSetImpl &Processed, SmallVectorImpl &Worklist) { if (Cost > Budget) return true; // Already run out of budget, give up. const SCEV *S = WorkItem.S; // Was the cost of expansion of this expression already accounted for? if (!isa(S) && !Processed.insert(S).second) return false; // We have already accounted for this expression. // If we can find an existing value for this scev available at the point "At" // then consider the expression cheap. if (hasRelatedExistingExpansion(S, &At, L)) return false; // Consider the expression to be free. TargetTransformInfo::TargetCostKind CostKind = L->getHeader()->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_RecipThroughput; switch (S->getSCEVType()) { case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); case scUnknown: case scVScale: // Assume to be zero-cost. return false; case scConstant: { // Only evalulate the costs of constants when optimizing for size. if (CostKind != TargetTransformInfo::TCK_CodeSize) return false; const APInt &Imm = cast(S)->getAPInt(); Type *Ty = S->getType(); Cost += TTI.getIntImmCostInst( WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind); return Cost > Budget; } case scTruncate: case scPtrToInt: case scZeroExtend: case scSignExtend: { Cost += costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); return false; // Will answer upon next entry into this function. } case scUDivExpr: { // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or // HowManyLessThans produced to compute a precise expression, rather than a // UDiv from the user's code. If we can't find a UDiv in the code with some // simple searching, we need to account for it's cost. // At the beginning of this function we already tried to find existing // value for plain 'S'. Now try to lookup 'S + 1' since it is common // pattern involving division. This is just a simple search heuristic. if (hasRelatedExistingExpansion( SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L)) return false; // Consider it to be free. Cost += costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); return false; // Will answer upon next entry into this function. } case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: case scSequentialUMinExpr: { assert(cast(S)->getNumOperands() > 1 && "Nary expr should have more than 1 operand."); // The simple nary expr will require one less op (or pair of ops) // than the number of it's terms. Cost += costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); return Cost > Budget; } case scAddRecExpr: { assert(cast(S)->getNumOperands() >= 2 && "Polynomial should be at least linear"); Cost += costAndCollectOperands( WorkItem, TTI, CostKind, Worklist); return Cost > Budget; } } llvm_unreachable("Unknown SCEV kind!"); } Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *IP) { assert(IP); switch (Pred->getKind()) { case SCEVPredicate::P_Union: return expandUnionPredicate(cast(Pred), IP); case SCEVPredicate::P_Compare: return expandComparePredicate(cast(Pred), IP); case SCEVPredicate::P_Wrap: { auto *AddRecPred = cast(Pred); return expandWrapPredicate(AddRecPred, IP); } } llvm_unreachable("Unknown SCEV predicate type"); } Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred, Instruction *IP) { Value *Expr0 = expand(Pred->getLHS(), IP); Value *Expr1 = expand(Pred->getRHS(), IP); Builder.SetInsertPoint(IP); auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate()); auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check"); return I; } Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, bool Signed) { assert(AR->isAffine() && "Cannot generate RT check for " "non-affine expression"); // FIXME: It is highly suspicious that we're ignoring the predicates here. SmallVector Pred; const SCEV *ExitCount = SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred); assert(!isa(ExitCount) && "Invalid loop count"); const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *Start = AR->getStart(); Type *ARTy = AR->getType(); unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType()); unsigned DstBits = SE.getTypeSizeInBits(ARTy); // The expression {Start,+,Step} has nusw/nssw if // Step < 0, Start - |Step| * Backedge <= Start // Step >= 0, Start + |Step| * Backedge > Start // and |Step| * Backedge doesn't unsigned overflow. Builder.SetInsertPoint(Loc); Value *TripCountVal = expand(ExitCount, Loc); IntegerType *Ty = IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); Value *StepValue = expand(Step, Loc); Value *NegStepValue = expand(SE.getNegativeSCEV(Step), Loc); Value *StartValue = expand(Start, Loc); ConstantInt *Zero = ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits)); Builder.SetInsertPoint(Loc); // Compute |Step| Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero); Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue); // Compute |Step| * Backedge // Compute: // 1. Start + |Step| * Backedge < Start // 2. Start - |Step| * Backedge > Start // // And select either 1. or 2. depending on whether step is positive or // negative. If Step is known to be positive or negative, only create // either 1. or 2. auto ComputeEndCheck = [&]() -> Value * { // Checking isZero() && SE.isKnownPositive(Step)) return ConstantInt::getFalse(Loc->getContext()); // Get the backedge taken count and truncate or extended to the AR type. Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty); Value *MulV, *OfMul; if (Step->isOne()) { // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't // needed, there is never an overflow, so to avoid artificially inflating // the cost of the check, directly emit the optimized IR. MulV = TruncTripCount; OfMul = ConstantInt::getFalse(MulV->getContext()); } else { auto *MulF = Intrinsic::getDeclaration(Loc->getModule(), Intrinsic::umul_with_overflow, Ty); CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul"); MulV = Builder.CreateExtractValue(Mul, 0, "mul.result"); OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow"); } Value *Add = nullptr, *Sub = nullptr; bool NeedPosCheck = !SE.isKnownNegative(Step); bool NeedNegCheck = !SE.isKnownPositive(Step); if (isa(ARTy)) { Value *NegMulV = Builder.CreateNeg(MulV); if (NeedPosCheck) Add = Builder.CreatePtrAdd(StartValue, MulV); if (NeedNegCheck) Sub = Builder.CreatePtrAdd(StartValue, NegMulV); } else { if (NeedPosCheck) Add = Builder.CreateAdd(StartValue, MulV); if (NeedNegCheck) Sub = Builder.CreateSub(StartValue, MulV); } Value *EndCompareLT = nullptr; Value *EndCompareGT = nullptr; Value *EndCheck = nullptr; if (NeedPosCheck) EndCheck = EndCompareLT = Builder.CreateICmp( Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue); if (NeedNegCheck) EndCheck = EndCompareGT = Builder.CreateICmp( Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); if (NeedPosCheck && NeedNegCheck) { // Select the answer based on the sign of Step. EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT); } return Builder.CreateOr(EndCheck, OfMul); }; Value *EndCheck = ComputeEndCheck(); // If the backedge taken count type is larger than the AR type, // check that we don't drop any bits by truncating it. If we are // dropping bits, then we have overflow (unless the step is zero). if (SrcBits > DstBits) { auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits); auto *BackedgeCheck = Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal, ConstantInt::get(Loc->getContext(), MaxVal)); BackedgeCheck = Builder.CreateAnd( BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero)); EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck); } return EndCheck; } Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred, Instruction *IP) { const auto *A = cast(Pred->getExpr()); Value *NSSWCheck = nullptr, *NUSWCheck = nullptr; // Add a check for NUSW if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW) NUSWCheck = generateOverflowCheck(A, IP, false); // Add a check for NSSW if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW) NSSWCheck = generateOverflowCheck(A, IP, true); if (NUSWCheck && NSSWCheck) return Builder.CreateOr(NUSWCheck, NSSWCheck); if (NUSWCheck) return NUSWCheck; if (NSSWCheck) return NSSWCheck; return ConstantInt::getFalse(IP->getContext()); } Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, Instruction *IP) { // Loop over all checks in this set. SmallVector Checks; for (const auto *Pred : Union->getPredicates()) { Checks.push_back(expandCodeForPredicate(Pred, IP)); Builder.SetInsertPoint(IP); } if (Checks.empty()) return ConstantInt::getFalse(IP->getContext()); return Builder.CreateOr(Checks); } Value *SCEVExpander::fixupLCSSAFormFor(Value *V) { auto *DefI = dyn_cast(V); if (!PreserveLCSSA || !DefI) return V; Instruction *InsertPt = &*Builder.GetInsertPoint(); Loop *DefLoop = SE.LI.getLoopFor(DefI->getParent()); Loop *UseLoop = SE.LI.getLoopFor(InsertPt->getParent()); if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop)) return V; // Create a temporary instruction to at the current insertion point, so we // can hand it off to the helper to create LCSSA PHIs if required for the // new use. // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor) // would accept a insertion point and return an LCSSA phi for that // insertion point, so there is no need to insert & remove the temporary // instruction. Type *ToTy; if (DefI->getType()->isIntegerTy()) ToTy = PointerType::get(DefI->getContext(), 0); else ToTy = Type::getInt32Ty(DefI->getContext()); Instruction *User = CastInst::CreateBitOrPointerCast(DefI, ToTy, "tmp.lcssa.user", InsertPt); auto RemoveUserOnExit = make_scope_exit([User]() { User->eraseFromParent(); }); SmallVector ToUpdate; ToUpdate.push_back(DefI); SmallVector PHIsToRemove; SmallVector InsertedPHIs; formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, &PHIsToRemove, &InsertedPHIs); for (PHINode *PN : InsertedPHIs) rememberInstruction(PN); for (PHINode *PN : PHIsToRemove) { if (!PN->use_empty()) continue; InsertedValues.erase(PN); InsertedPostIncValues.erase(PN); PN->eraseFromParent(); } return User->getOperand(0); } namespace { // Search for a SCEV subexpression that is not safe to expand. Any expression // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely // UDiv expressions. We don't know if the UDiv is derived from an IR divide // instruction, but the important thing is that we prove the denominator is // nonzero before expansion. // // IVUsers already checks that IV-derived expressions are safe. So this check is // only needed when the expression includes some subexpression that is not IV // derived. // // Currently, we only allow division by a value provably non-zero here. // // We cannot generally expand recurrences unless the step dominates the loop // header. The expander handles the special case of affine recurrences by // scaling the recurrence outside the loop, but this technique isn't generally // applicable. Expanding a nested recurrence outside a loop requires computing // binomial coefficients. This could be done, but the recurrence has to be in a // perfectly reduced form, which can't be guaranteed. struct SCEVFindUnsafe { ScalarEvolution &SE; bool CanonicalMode; bool IsUnsafe = false; SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode) : SE(SE), CanonicalMode(CanonicalMode) {} bool follow(const SCEV *S) { if (const SCEVUDivExpr *D = dyn_cast(S)) { if (!SE.isKnownNonZero(D->getRHS())) { IsUnsafe = true; return false; } } if (const SCEVAddRecExpr *AR = dyn_cast(S)) { // For non-affine addrecs or in non-canonical mode we need a preheader // to insert into. if (!AR->getLoop()->getLoopPreheader() && (!CanonicalMode || !AR->isAffine())) { IsUnsafe = true; return false; } } return true; } bool isDone() const { return IsUnsafe; } }; } // namespace bool SCEVExpander::isSafeToExpand(const SCEV *S) const { SCEVFindUnsafe Search(SE, CanonicalMode); visitAll(S, Search); return !Search.IsUnsafe; } bool SCEVExpander::isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint) const { if (!isSafeToExpand(S)) return false; // We have to prove that the expanded site of S dominates InsertionPoint. // This is easy when not in the same block, but hard when S is an instruction // to be expanded somewhere inside the same block as our insertion point. // What we really need here is something analogous to an OrderedBasicBlock, // but for the moment, we paper over the problem by handling two common and // cheap to check cases. if (SE.properlyDominates(S, InsertionPoint->getParent())) return true; if (SE.dominates(S, InsertionPoint->getParent())) { if (InsertionPoint->getParent()->getTerminator() == InsertionPoint) return true; if (const SCEVUnknown *U = dyn_cast(S)) if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue())) return true; } return false; } void SCEVExpanderCleaner::cleanup() { // Result is used, nothing to remove. if (ResultUsed) return; auto InsertedInstructions = Expander.getAllInsertedInstructions(); #ifndef NDEBUG SmallPtrSet InsertedSet(InsertedInstructions.begin(), InsertedInstructions.end()); (void)InsertedSet; #endif // Remove sets with value handles. Expander.clear(); // Remove all inserted instructions. for (Instruction *I : reverse(InsertedInstructions)) { #ifndef NDEBUG assert(all_of(I->users(), [&InsertedSet](Value *U) { return InsertedSet.contains(cast(U)); }) && "removed instruction should only be used by instructions inserted " "during expansion"); #endif assert(!I->getType()->isVoidTy() && "inserted instruction should have non-void types"); I->replaceAllUsesWith(PoisonValue::get(I->getType())); I->eraseFromParent(); } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 0ed3324a27b6..1b142f14d811 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -1,2125 +1,2139 @@ //===-- SimplifyIndVar.cpp - Induction variable simplification ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements induction variable simplification. It does // not define any actual pass or policy, but provides a single function to // simplify a loop's induction variables based on ScalarEvolution. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" using namespace llvm; using namespace llvm::PatternMatch; #define DEBUG_TYPE "indvars" STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); STATISTIC(NumFoldedUser, "Number of IV users folded into a constant"); STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); STATISTIC( NumSimplifiedSDiv, "Number of IV signed division operations converted to unsigned division"); STATISTIC( NumSimplifiedSRem, "Number of IV signed remainder operations converted to unsigned remainder"); STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); namespace { /// This is a utility for simplifying induction variables /// based on ScalarEvolution. It is the primary instrument of the /// IndvarSimplify pass, but it may also be directly invoked to cleanup after /// other loop passes that preserve SCEV. class SimplifyIndvar { Loop *L; LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; const TargetTransformInfo *TTI; SCEVExpander &Rewriter; SmallVectorImpl &DeadInsts; bool Changed = false; public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, SmallVectorImpl &Dead) : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter), DeadInsts(Dead) { assert(LI && "IV simplification requires LoopInfo"); } bool hasChanged() const { return Changed; } /// Iteratively perform simplification on a worklist of users of the /// specified induction variable. This is the top-level driver that applies /// all simplifications to users of an IV. void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); bool replaceIVUserWithLoopInvariant(Instruction *UseInst); bool replaceFloatIVWithIntegerIV(Instruction *UseInst); bool eliminateOverflowIntrinsic(WithOverflowInst *WO); bool eliminateSaturatingIntrinsic(SaturatingInst *SI); bool eliminateTrunc(TruncInst *TI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand); void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand); void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand, bool IsSigned); void replaceRemWithNumerator(BinaryOperator *Rem); void replaceRemWithNumeratorOrZero(BinaryOperator *Rem); void replaceSRemWithURem(BinaryOperator *Rem); bool eliminateSDiv(BinaryOperator *SDiv); bool strengthenBinaryOp(BinaryOperator *BO, Instruction *IVOperand); bool strengthenOverflowingOperation(BinaryOperator *OBO, Instruction *IVOperand); bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand); }; } /// Find a point in code which dominates all given instructions. We can safely /// assume that, whatever fact we can prove at the found point, this fact is /// also true for each of the given instructions. static Instruction *findCommonDominator(ArrayRef Instructions, DominatorTree &DT) { Instruction *CommonDom = nullptr; for (auto *Insn : Instructions) CommonDom = CommonDom ? DT.findNearestCommonDominator(CommonDom, Insn) : Insn; assert(CommonDom && "Common dominator not found?"); return CommonDom; } /// Fold an IV operand into its use. This removes increments of an /// aligned IV when used by a instruction that ignores the low bits. /// /// IVOperand is guaranteed SCEVable, but UseInst may not be. /// /// Return the operand of IVOperand for this induction variable if IVOperand can /// be folded (in case more folding opportunities have been exposed). /// Otherwise return null. Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { Value *IVSrc = nullptr; const unsigned OperIdx = 0; const SCEV *FoldedExpr = nullptr; bool MustDropExactFlag = false; switch (UseInst->getOpcode()) { default: return nullptr; case Instruction::UDiv: case Instruction::LShr: // We're only interested in the case where we know something about // the numerator and have a constant denominator. if (IVOperand != UseInst->getOperand(OperIdx) || !isa(UseInst->getOperand(1))) return nullptr; // Attempt to fold a binary operator with constant operand. // e.g. ((I + 1) >> 2) => I >> 2 if (!isa(IVOperand) || !isa(IVOperand->getOperand(1))) return nullptr; IVSrc = IVOperand->getOperand(0); // IVSrc must be the (SCEVable) IV, since the other operand is const. assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand"); ConstantInt *D = cast(UseInst->getOperand(1)); if (UseInst->getOpcode() == Instruction::LShr) { // Get a constant for the divisor. See createSCEV. uint32_t BitWidth = cast(UseInst->getType())->getBitWidth(); if (D->getValue().uge(BitWidth)) return nullptr; D = ConstantInt::get(UseInst->getContext(), APInt::getOneBitSet(BitWidth, D->getZExtValue())); } const auto *LHS = SE->getSCEV(IVSrc); const auto *RHS = SE->getSCEV(D); FoldedExpr = SE->getUDivExpr(LHS, RHS); // We might have 'exact' flag set at this point which will no longer be // correct after we make the replacement. if (UseInst->isExact() && LHS != SE->getMulExpr(FoldedExpr, RHS)) MustDropExactFlag = true; } // We have something that might fold it's operand. Compare SCEVs. if (!SE->isSCEVable(UseInst->getType())) return nullptr; // Bypass the operand if SCEV can prove it has no effect. if (SE->getSCEV(UseInst) != FoldedExpr) return nullptr; LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand << " -> " << *UseInst << '\n'); UseInst->setOperand(OperIdx, IVSrc); assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); if (MustDropExactFlag) UseInst->dropPoisonGeneratingFlags(); ++NumElimOperand; Changed = true; if (IVOperand->use_empty()) DeadInsts.emplace_back(IVOperand); return IVSrc; } bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand) { auto *Preheader = L->getLoopPreheader(); if (!Preheader) return false; unsigned IVOperIdx = 0; ICmpInst::Predicate Pred = ICmp->getPredicate(); if (IVOperand != ICmp->getOperand(0)) { // Swapped assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); IVOperIdx = 1; Pred = ICmpInst::getSwappedPredicate(Pred); } // Get the SCEVs for the ICmp operands (in the specific context of the // current loop) const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L, ICmp); if (!LIP) return false; ICmpInst::Predicate InvariantPredicate = LIP->Pred; const SCEV *InvariantLHS = LIP->LHS; const SCEV *InvariantRHS = LIP->RHS; // Do not generate something ridiculous. auto *PHTerm = Preheader->getTerminator(); if (Rewriter.isHighCostExpansion({InvariantLHS, InvariantRHS}, L, 2 * SCEVCheapExpansionBudget, TTI, PHTerm) || !Rewriter.isSafeToExpandAt(InvariantLHS, PHTerm) || !Rewriter.isSafeToExpandAt(InvariantRHS, PHTerm)) return false; auto *NewLHS = Rewriter.expandCodeFor(InvariantLHS, IVOperand->getType(), PHTerm); auto *NewRHS = Rewriter.expandCodeFor(InvariantRHS, IVOperand->getType(), PHTerm); LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); ICmp->setPredicate(InvariantPredicate); ICmp->setOperand(0, NewLHS); ICmp->setOperand(1, NewRHS); return true; } /// SimplifyIVUsers helper for eliminating useless /// comparisons against an induction variable. void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand) { unsigned IVOperIdx = 0; ICmpInst::Predicate Pred = ICmp->getPredicate(); ICmpInst::Predicate OriginalPred = Pred; if (IVOperand != ICmp->getOperand(0)) { // Swapped assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); IVOperIdx = 1; Pred = ICmpInst::getSwappedPredicate(Pred); } // Get the SCEVs for the ICmp operands (in the specific context of the // current loop) const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); // If the condition is always true or always false in the given context, // replace it with a constant value. SmallVector Users; for (auto *U : ICmp->users()) Users.push_back(cast(U)); const Instruction *CtxI = findCommonDominator(Users, *DT); if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) { SE->forgetValue(ICmp); ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev)); DeadInsts.emplace_back(ICmp); LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); } else if (makeIVComparisonInvariant(ICmp, IVOperand)) { // fallthrough to end of function } else if (ICmpInst::isSigned(OriginalPred) && SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { // If we were unable to make anything above, all we can is to canonicalize // the comparison hoping that it will open the doors for other // optimizations. If we find out that we compare two non-negative values, // we turn the instruction's predicate to its unsigned version. Note that // we cannot rely on Pred here unless we check if we have swapped it. assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n'); ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); } else return; ++NumElimCmp; Changed = true; } bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) { // Get the SCEVs for the ICmp operands. auto *N = SE->getSCEV(SDiv->getOperand(0)); auto *D = SE->getSCEV(SDiv->getOperand(1)); // Simplify unnecessary loops away. const Loop *L = LI->getLoopFor(SDiv->getParent()); N = SE->getSCEVAtScope(N, L); D = SE->getSCEVAtScope(D, L); // Replace sdiv by udiv if both of the operands are non-negative if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) { auto *UDiv = BinaryOperator::Create( BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1), SDiv->getName() + ".udiv", SDiv); UDiv->setIsExact(SDiv->isExact()); SDiv->replaceAllUsesWith(UDiv); LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n'); ++NumSimplifiedSDiv; Changed = true; DeadInsts.push_back(SDiv); return true; } return false; } // i %s n -> i %u n if i >= 0 and n >= 0 void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) { auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D, Rem->getName() + ".urem", Rem); Rem->replaceAllUsesWith(URem); LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n'); ++NumSimplifiedSRem; Changed = true; DeadInsts.emplace_back(Rem); } // i % n --> i if i is in [0,n). void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) { Rem->replaceAllUsesWith(Rem->getOperand(0)); LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); ++NumElimRem; Changed = true; DeadInsts.emplace_back(Rem); } // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) { auto *T = Rem->getType(); auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, N, D); SelectInst *Sel = SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem); Rem->replaceAllUsesWith(Sel); LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); ++NumElimRem; Changed = true; DeadInsts.emplace_back(Rem); } /// SimplifyIVUsers helper for eliminating useless remainder operations /// operating on an induction variable or replacing srem by urem. void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand, bool IsSigned) { auto *NValue = Rem->getOperand(0); auto *DValue = Rem->getOperand(1); // We're only interested in the case where we know something about // the numerator, unless it is a srem, because we want to replace srem by urem // in general. bool UsedAsNumerator = IVOperand == NValue; if (!UsedAsNumerator && !IsSigned) return; const SCEV *N = SE->getSCEV(NValue); // Simplify unnecessary loops away. const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); N = SE->getSCEVAtScope(N, ICmpLoop); bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N); // Do not proceed if the Numerator may be negative if (!IsNumeratorNonNegative) return; const SCEV *D = SE->getSCEV(DValue); D = SE->getSCEVAtScope(D, ICmpLoop); if (UsedAsNumerator) { auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; if (SE->isKnownPredicate(LT, N, D)) { replaceRemWithNumerator(Rem); return; } auto *T = Rem->getType(); const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T)); if (SE->isKnownPredicate(LT, NLessOne, D)) { replaceRemWithNumeratorOrZero(Rem); return; } } // Try to replace SRem with URem, if both N and D are known non-negative. // Since we had already check N, we only need to check D now if (!IsSigned || !SE->isKnownNonNegative(D)) return; replaceSRemWithURem(Rem); } bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) { const SCEV *LHS = SE->getSCEV(WO->getLHS()); const SCEV *RHS = SE->getSCEV(WO->getRHS()); if (!SE->willNotOverflow(WO->getBinaryOp(), WO->isSigned(), LHS, RHS)) return false; // Proved no overflow, nuke the overflow check and, if possible, the overflow // intrinsic as well. BinaryOperator *NewResult = BinaryOperator::Create( WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO); if (WO->isSigned()) NewResult->setHasNoSignedWrap(true); else NewResult->setHasNoUnsignedWrap(true); SmallVector ToDelete; for (auto *U : WO->users()) { if (auto *EVI = dyn_cast(U)) { if (EVI->getIndices()[0] == 1) EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext())); else { assert(EVI->getIndices()[0] == 0 && "Only two possibilities!"); EVI->replaceAllUsesWith(NewResult); } ToDelete.push_back(EVI); } } for (auto *EVI : ToDelete) EVI->eraseFromParent(); if (WO->use_empty()) WO->eraseFromParent(); Changed = true; return true; } bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) { const SCEV *LHS = SE->getSCEV(SI->getLHS()); const SCEV *RHS = SE->getSCEV(SI->getRHS()); if (!SE->willNotOverflow(SI->getBinaryOp(), SI->isSigned(), LHS, RHS)) return false; BinaryOperator *BO = BinaryOperator::Create( SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI); if (SI->isSigned()) BO->setHasNoSignedWrap(); else BO->setHasNoUnsignedWrap(); SI->replaceAllUsesWith(BO); DeadInsts.emplace_back(SI); Changed = true; return true; } bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { // It is always legal to replace // icmp i32 trunc(iv), n // with // icmp i64 sext(trunc(iv)), sext(n), if pred is signed predicate. // Or with // icmp i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate. // Or with either of these if pred is an equality predicate. // // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for // every comparison which uses trunc, it means that we can replace each of // them with comparison of iv against sext/zext(n). We no longer need trunc // after that. // // TODO: Should we do this if we can widen *some* comparisons, but not all // of them? Sometimes it is enough to enable other optimizations, but the // trunc instruction will stay in the loop. Value *IV = TI->getOperand(0); Type *IVTy = IV->getType(); const SCEV *IVSCEV = SE->getSCEV(IV); const SCEV *TISCEV = SE->getSCEV(TI); // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can // get rid of trunc bool DoesSExtCollapse = false; bool DoesZExtCollapse = false; if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy)) DoesSExtCollapse = true; if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy)) DoesZExtCollapse = true; // If neither sext nor zext does collapse, it is not profitable to do any // transform. Bail. if (!DoesSExtCollapse && !DoesZExtCollapse) return false; // Collect users of the trunc that look like comparisons against invariants. // Bail if we find something different. SmallVector ICmpUsers; for (auto *U : TI->users()) { // We don't care about users in unreachable blocks. if (isa(U) && !DT->isReachableFromEntry(cast(U)->getParent())) continue; ICmpInst *ICI = dyn_cast(U); if (!ICI) return false; assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) && !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0)))) return false; // If we cannot get rid of trunc, bail. if (ICI->isSigned() && !DoesSExtCollapse) return false; if (ICI->isUnsigned() && !DoesZExtCollapse) return false; // For equality, either signed or unsigned works. ICmpUsers.push_back(ICI); } auto CanUseZExt = [&](ICmpInst *ICI) { // Unsigned comparison can be widened as unsigned. if (ICI->isUnsigned()) return true; // Is it profitable to do zext? if (!DoesZExtCollapse) return false; // For equality, we can safely zext both parts. if (ICI->isEquality()) return true; // Otherwise we can only use zext when comparing two non-negative or two // negative values. But in practice, we will never pass DoesZExtCollapse // check for a negative value, because zext(trunc(x)) is non-negative. So // it only make sense to check for non-negativity here. const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0)); const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1)); return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2); }; // Replace all comparisons against trunc with comparisons against IV. for (auto *ICI : ICmpUsers) { bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0)); auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1); IRBuilder<> Builder(ICI); Value *Ext = nullptr; // For signed/unsigned predicate, replace the old comparison with comparison // of immediate IV against sext/zext of the invariant argument. If we can // use either sext or zext (i.e. we are dealing with equality predicate), // then prefer zext as a more canonical form. // TODO: If we see a signed comparison which can be turned into unsigned, // we can do it here for canonicalization purposes. ICmpInst::Predicate Pred = ICI->getPredicate(); if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred); if (CanUseZExt(ICI)) { assert(DoesZExtCollapse && "Unprofitable zext?"); Ext = Builder.CreateZExt(Op1, IVTy, "zext"); Pred = ICmpInst::getUnsignedPredicate(Pred); } else { assert(DoesSExtCollapse && "Unprofitable sext?"); Ext = Builder.CreateSExt(Op1, IVTy, "sext"); assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!"); } bool Changed; L->makeLoopInvariant(Ext, Changed); (void)Changed; auto *NewCmp = Builder.CreateICmp(Pred, IV, Ext); ICI->replaceAllUsesWith(NewCmp); DeadInsts.emplace_back(ICI); } // Trunc no longer needed. TI->replaceAllUsesWith(PoisonValue::get(TI->getType())); DeadInsts.emplace_back(TI); return true; } /// Eliminate an operation that consumes a simple IV and has no observable /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, /// but UseInst may not be. bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, Instruction *IVOperand) { if (ICmpInst *ICmp = dyn_cast(UseInst)) { eliminateIVComparison(ICmp, IVOperand); return true; } if (BinaryOperator *Bin = dyn_cast(UseInst)) { bool IsSRem = Bin->getOpcode() == Instruction::SRem; if (IsSRem || Bin->getOpcode() == Instruction::URem) { simplifyIVRemainder(Bin, IVOperand, IsSRem); return true; } if (Bin->getOpcode() == Instruction::SDiv) return eliminateSDiv(Bin); } if (auto *WO = dyn_cast(UseInst)) if (eliminateOverflowIntrinsic(WO)) return true; if (auto *SI = dyn_cast(UseInst)) if (eliminateSaturatingIntrinsic(SI)) return true; if (auto *TI = dyn_cast(UseInst)) if (eliminateTrunc(TI)) return true; if (eliminateIdentitySCEV(UseInst, IVOperand)) return true; return false; } static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) { if (auto *BB = L->getLoopPreheader()) return BB->getTerminator(); return Hint; } /// Replace the UseInst with a loop invariant expression if it is safe. bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { if (!SE->isSCEVable(I->getType())) return false; // Get the symbolic expression for this instruction. const SCEV *S = SE->getSCEV(I); if (!SE->isLoopInvariant(S, L)) return false; // Do not generate something ridiculous even if S is loop invariant. if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I)) return false; auto *IP = GetLoopInvariantInsertPosition(L, I); if (!Rewriter.isSafeToExpandAt(S, IP)) { LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I << " with non-speculable loop invariant: " << *S << '\n'); return false; } auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP); I->replaceAllUsesWith(Invariant); LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I << " with loop invariant: " << *S << '\n'); ++NumFoldedUser; Changed = true; DeadInsts.emplace_back(I); return true; } /// Eliminate redundant type cast between integer and float. bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { if (UseInst->getOpcode() != CastInst::SIToFP && UseInst->getOpcode() != CastInst::UIToFP) return false; Instruction *IVOperand = cast(UseInst->getOperand(0)); // Get the symbolic expression for this instruction. const SCEV *IV = SE->getSCEV(IVOperand); int MaskBits; if (UseInst->getOpcode() == CastInst::SIToFP) MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits(); else MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits(); int DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); if (MaskBits <= DestNumSigBits) { for (User *U : UseInst->users()) { // Match for fptosi/fptoui of sitofp and with same type. auto *CI = dyn_cast(U); if (!CI) continue; CastInst::CastOps Opcode = CI->getOpcode(); if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI) continue; Value *Conv = nullptr; if (IVOperand->getType() != CI->getType()) { IRBuilder<> Builder(CI); StringRef Name = IVOperand->getName(); // To match InstCombine logic, we only need sext if both fptosi and // sitofp are used. If one of them is unsigned, then we can use zext. if (SE->getTypeSizeInBits(IVOperand->getType()) > SE->getTypeSizeInBits(CI->getType())) { Conv = Builder.CreateTrunc(IVOperand, CI->getType(), Name + ".trunc"); } else if (Opcode == CastInst::FPToUI || UseInst->getOpcode() == CastInst::UIToFP) { Conv = Builder.CreateZExt(IVOperand, CI->getType(), Name + ".zext"); } else { Conv = Builder.CreateSExt(IVOperand, CI->getType(), Name + ".sext"); } } else Conv = IVOperand; CI->replaceAllUsesWith(Conv); DeadInsts.push_back(CI); LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI << " with: " << *Conv << '\n'); ++NumFoldedUser; Changed = true; } } return Changed; } /// Eliminate any operation that SCEV can prove is an identity function. bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand) { if (!SE->isSCEVable(UseInst->getType()) || - (UseInst->getType() != IVOperand->getType()) || - (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) + UseInst->getType() != IVOperand->getType()) + return false; + + const SCEV *UseSCEV = SE->getSCEV(UseInst); + if (UseSCEV != SE->getSCEV(IVOperand)) return false; // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the // dominator tree, even if X is an operand to Y. For instance, in // // %iv = phi i32 {0,+,1} // br %cond, label %left, label %merge // // left: // %X = add i32 %iv, 0 // br label %merge // // merge: // %M = phi (%X, %iv) // // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and // %M.replaceAllUsesWith(%X) would be incorrect. if (isa(UseInst)) // If UseInst is not a PHI node then we know that IVOperand dominates // UseInst directly from the legality of SSA. if (!DT || !DT->dominates(IVOperand, UseInst)) return false; if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand)) return false; + // Make sure the operand is not more poisonous than the instruction. + if (!impliesPoison(IVOperand, UseInst)) { + SmallVector DropPoisonGeneratingInsts; + if (!SE->canReuseInstruction(UseSCEV, IVOperand, DropPoisonGeneratingInsts)) + return false; + + for (Instruction *I : DropPoisonGeneratingInsts) + I->dropPoisonGeneratingFlagsAndMetadata(); + } + LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); SE->forgetValue(UseInst); UseInst->replaceAllUsesWith(IVOperand); ++NumElimIdentity; Changed = true; DeadInsts.emplace_back(UseInst); return true; } bool SimplifyIndvar::strengthenBinaryOp(BinaryOperator *BO, Instruction *IVOperand) { return (isa(BO) && strengthenOverflowingOperation(BO, IVOperand)) || (isa(BO) && strengthenRightShift(BO, IVOperand)); } /// Annotate BO with nsw / nuw if it provably does not signed-overflow / /// unsigned-overflow. Returns true if anything changed, false otherwise. bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, Instruction *IVOperand) { auto Flags = SE->getStrengthenedNoWrapFlagsFromBinOp( cast(BO)); if (!Flags) return false; BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW); BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW); // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap // flags on addrecs while performing zero/sign extensions. We could call // forgetValue() here to make sure those flags also propagate to any other // SCEV expressions based on the addrec. However, this can have pathological // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384. return true; } /// Annotate the Shr in (X << IVOperand) >> C as exact using the /// information from the IV's range. Returns true if anything changed, false /// otherwise. bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand) { if (BO->getOpcode() == Instruction::Shl) { bool Changed = false; ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand)); for (auto *U : BO->users()) { const APInt *C; if (match(U, m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) || match(U, m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) { BinaryOperator *Shr = cast(U); if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) { Shr->setIsExact(true); Changed = true; } } } return Changed; } return false; } /// Add all uses of Def to the current IV's worklist. static void pushIVUsers( Instruction *Def, Loop *L, SmallPtrSet &Simplified, SmallVectorImpl< std::pair > &SimpleIVUsers) { for (User *U : Def->users()) { Instruction *UI = cast(U); // Avoid infinite or exponential worklist processing. // Also ensure unique worklist users. // If Def is a LoopPhi, it may not be in the Simplified set, so check for // self edges first. if (UI == Def) continue; // Only change the current Loop, do not change the other parts (e.g. other // Loops). if (!L->contains(UI)) continue; // Do not push the same instruction more than once. if (!Simplified.insert(UI).second) continue; SimpleIVUsers.push_back(std::make_pair(UI, Def)); } } /// Return true if this instruction generates a simple SCEV /// expression in terms of that IV. /// /// This is similar to IVUsers' isInteresting() but processes each instruction /// non-recursively when the operand is already known to be a simpleIVUser. /// static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { if (!SE->isSCEVable(I->getType())) return false; // Get the symbolic expression for this instruction. const SCEV *S = SE->getSCEV(I); // Only consider affine recurrences. const SCEVAddRecExpr *AR = dyn_cast(S); if (AR && AR->getLoop() == L) return true; return false; } /// Iteratively perform simplification on a worklist of users /// of the specified induction variable. Each successive simplification may push /// more users which may themselves be candidates for simplification. /// /// This algorithm does not require IVUsers analysis. Instead, it simplifies /// instructions in-place during analysis. Rather than rewriting induction /// variables bottom-up from their users, it transforms a chain of IVUsers /// top-down, updating the IR only when it encounters a clear optimization /// opportunity. /// /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. /// void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { if (!SE->isSCEVable(CurrIV->getType())) return; // Instructions processed by SimplifyIndvar for CurrIV. SmallPtrSet Simplified; // Use-def pairs if IV users waiting to be processed for CurrIV. SmallVector, 8> SimpleIVUsers; // Push users of the current LoopPhi. In rare cases, pushIVUsers may be // called multiple times for the same LoopPhi. This is the proper thing to // do for loop header phis that use each other. pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers); while (!SimpleIVUsers.empty()) { std::pair UseOper = SimpleIVUsers.pop_back_val(); Instruction *UseInst = UseOper.first; // If a user of the IndVar is trivially dead, we prefer just to mark it dead // rather than try to do some complex analysis or transformation (such as // widening) basing on it. // TODO: Propagate TLI and pass it here to handle more cases. if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) { DeadInsts.emplace_back(UseInst); continue; } // Bypass back edges to avoid extra work. if (UseInst == CurrIV) continue; // Try to replace UseInst with a loop invariant before any other // simplifications. if (replaceIVUserWithLoopInvariant(UseInst)) continue; // Go further for the bitcast 'prtoint ptr to i64' or if the cast is done // by truncation if ((isa(UseInst)) || (isa(UseInst))) for (Use &U : UseInst->uses()) { Instruction *User = cast(U.getUser()); if (replaceIVUserWithLoopInvariant(User)) break; // done replacing } Instruction *IVOperand = UseOper.second; for (unsigned N = 0; IVOperand; ++N) { assert(N <= Simplified.size() && "runaway iteration"); (void) N; Value *NewOper = foldIVUser(UseInst, IVOperand); if (!NewOper) break; // done folding IVOperand = dyn_cast(NewOper); } if (!IVOperand) continue; if (eliminateIVUser(UseInst, IVOperand)) { pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); continue; } if (BinaryOperator *BO = dyn_cast(UseInst)) { if (strengthenBinaryOp(BO, IVOperand)) { // re-queue uses of the now modified binary operator and fall // through to the checks that remain. pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); } } // Try to use integer induction for FPToSI of float induction directly. if (replaceFloatIVWithIntegerIV(UseInst)) { // Re-queue the potentially new direct uses of IVOperand. pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); continue; } CastInst *Cast = dyn_cast(UseInst); if (V && Cast) { V->visitCast(Cast); continue; } if (isSimpleIVUser(UseInst, L, SE)) { pushIVUsers(UseInst, L, Simplified, SimpleIVUsers); } } } namespace llvm { void IVVisitor::anchor() { } /// Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl &Dead, SCEVExpander &Rewriter, IVVisitor *V) { SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI, Rewriter, Dead); SIV.simplifyUsers(CurrIV, V); return SIV.hasChanged(); } /// Simplify users of induction variables within this /// loop. This does not actually change or add IVs. bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl &Dead) { SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { Changed |= simplifyUsersOfIV(cast(I), SE, DT, LI, TTI, Dead, Rewriter); } return Changed; } } // namespace llvm namespace { //===----------------------------------------------------------------------===// // Widen Induction Variables - Extend the width of an IV to cover its // widest uses. //===----------------------------------------------------------------------===// class WidenIV { // Parameters PHINode *OrigPhi; Type *WideType; // Context LoopInfo *LI; Loop *L; ScalarEvolution *SE; DominatorTree *DT; // Does the module have any calls to the llvm.experimental.guard intrinsic // at all? If not we can avoid scanning instructions looking for guards. bool HasGuards; bool UsePostIncrementRanges; // Statistics unsigned NumElimExt = 0; unsigned NumWidened = 0; // Result PHINode *WidePhi = nullptr; Instruction *WideInc = nullptr; const SCEV *WideIncExpr = nullptr; SmallVectorImpl &DeadInsts; SmallPtrSet Widened; enum class ExtendKind { Zero, Sign, Unknown }; // A map tracking the kind of extension used to widen each narrow IV // and narrow IV user. // Key: pointer to a narrow IV or IV user. // Value: the kind of extension used to widen this Instruction. DenseMap, ExtendKind> ExtendKindMap; using DefUserPair = std::pair, AssertingVH>; // A map with control-dependent ranges for post increment IV uses. The key is // a pair of IV def and a use of this def denoting the context. The value is // a ConstantRange representing possible values of the def at the given // context. DenseMap PostIncRangeInfos; std::optional getPostIncRangeInfo(Value *Def, Instruction *UseI) { DefUserPair Key(Def, UseI); auto It = PostIncRangeInfos.find(Key); return It == PostIncRangeInfos.end() ? std::optional(std::nullopt) : std::optional(It->second); } void calculatePostIncRanges(PHINode *OrigPhi); void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { DefUserPair Key(Def, UseI); auto It = PostIncRangeInfos.find(Key); if (It == PostIncRangeInfos.end()) PostIncRangeInfos.insert({Key, R}); else It->second = R.intersectWith(It->second); } public: /// Record a link in the Narrow IV def-use chain along with the WideIV that /// computes the same value as the Narrow IV def. This avoids caching Use* /// pointers. struct NarrowIVDefUse { Instruction *NarrowDef = nullptr; Instruction *NarrowUse = nullptr; Instruction *WideDef = nullptr; // True if the narrow def is never negative. Tracking this information lets // us use a sign extension instead of a zero extension or vice versa, when // profitable and legal. bool NeverNegative = false; NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, bool NeverNegative) : NarrowDef(ND), NarrowUse(NU), WideDef(WD), NeverNegative(NeverNegative) {} }; WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, SmallVectorImpl &DI, bool HasGuards, bool UsePostIncrementRanges = true); PHINode *createWideIV(SCEVExpander &Rewriter); unsigned getNumElimExt() { return NumElimExt; }; unsigned getNumWidened() { return NumWidened; }; protected: Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, Instruction *Use); Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); ExtendKind getExtendKind(Instruction *I); using WidenedRecTy = std::pair; WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, unsigned OpCode) const; Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); bool widenLoopCompare(NarrowIVDefUse DU); bool widenWithVariantUse(NarrowIVDefUse DU); void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); private: SmallVector NarrowIVUsers; }; } // namespace /// Determine the insertion point for this user. By default, insert immediately /// before the user. SCEVExpander or LICM will hoist loop invariants out of the /// loop. For PHI nodes, there may be multiple uses, so compute the nearest /// common dominator for the incoming blocks. A nullptr can be returned if no /// viable location is found: it may happen if User is a PHI and Def only comes /// to this PHI from unreachable blocks. static Instruction *getInsertPointForUses(Instruction *User, Value *Def, DominatorTree *DT, LoopInfo *LI) { PHINode *PHI = dyn_cast(User); if (!PHI) return User; Instruction *InsertPt = nullptr; for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { if (PHI->getIncomingValue(i) != Def) continue; BasicBlock *InsertBB = PHI->getIncomingBlock(i); if (!DT->isReachableFromEntry(InsertBB)) continue; if (!InsertPt) { InsertPt = InsertBB->getTerminator(); continue; } InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); InsertPt = InsertBB->getTerminator(); } // If we have skipped all inputs, it means that Def only comes to Phi from // unreachable blocks. if (!InsertPt) return nullptr; auto *DefI = dyn_cast(Def); if (!DefI) return InsertPt; assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); auto *L = LI->getLoopFor(DefI->getParent()); assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) if (LI->getLoopFor(DTN->getBlock()) == L) return DTN->getBlock()->getTerminator(); llvm_unreachable("DefI dominates InsertPt!"); } WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, SmallVectorImpl &DI, bool HasGuards, bool UsePostIncrementRanges) : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero; } Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, Instruction *Use) { // Set the debug location and conservative insertion point. IRBuilder<> Builder(Use); // Hoist the insertion point into loop preheaders as far as possible. for (const Loop *L = LI->getLoopFor(Use->getParent()); L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); L = L->getParentLoop()) Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : Builder.CreateZExt(NarrowOper, WideType); } /// Instantiate a wide operation to replace a narrow operation. This only needs /// to handle operations that can evaluation to SCEVAddRec. It can safely return /// 0 for any operation we decide not to clone. Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR) { unsigned Opcode = DU.NarrowUse->getOpcode(); switch (Opcode) { default: return nullptr; case Instruction::Add: case Instruction::Mul: case Instruction::UDiv: case Instruction::Sub: return cloneArithmeticIVUser(DU, WideAR); case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: return cloneBitwiseIVUser(DU); } } Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) { Instruction *NarrowUse = DU.NarrowUse; Instruction *NarrowDef = DU.NarrowDef; Instruction *WideDef = DU.WideDef; LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything // about the narrow operand yet so must insert a [sz]ext. It is probably loop // invariant and will be folded or hoisted. If it actually comes from a // widened IV, it should be removed during a future call to widenIVUse. bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign; Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(0), WideType, IsSigned, NarrowUse); Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(1), WideType, IsSigned, NarrowUse); auto *NarrowBO = cast(NarrowUse); auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, NarrowBO->getName()); IRBuilder<> Builder(NarrowUse); Builder.Insert(WideBO); WideBO->copyIRFlags(NarrowBO); return WideBO; } Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR) { Instruction *NarrowUse = DU.NarrowUse; Instruction *NarrowDef = DU.NarrowDef; Instruction *WideDef = DU.WideDef; LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; // We're trying to find X such that // // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X // // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), // and check using SCEV if any of them are correct. // Returns true if extending NonIVNarrowDef according to `SignExt` is a // correct solution to X. auto GuessNonIVOperand = [&](bool SignExt) { const SCEV *WideLHS; const SCEV *WideRHS; auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { if (SignExt) return SE->getSignExtendExpr(S, Ty); return SE->getZeroExtendExpr(S, Ty); }; if (IVOpIdx == 0) { WideLHS = SE->getSCEV(WideDef); const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); WideRHS = GetExtend(NarrowRHS, WideType); } else { const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); WideLHS = GetExtend(NarrowLHS, WideType); WideRHS = SE->getSCEV(WideDef); } // WideUse is "WideDef `op.wide` X" as described in the comment. const SCEV *WideUse = getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode()); return WideUse == WideAR; }; bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign; if (!GuessNonIVOperand(SignExtend)) { SignExtend = !SignExtend; if (!GuessNonIVOperand(SignExtend)) return nullptr; } Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(0), WideType, SignExtend, NarrowUse); Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(1), WideType, SignExtend, NarrowUse); auto *NarrowBO = cast(NarrowUse); auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, NarrowBO->getName()); IRBuilder<> Builder(NarrowUse); Builder.Insert(WideBO); WideBO->copyIRFlags(NarrowBO); return WideBO; } WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { auto It = ExtendKindMap.find(I); assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); return It->second; } const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, unsigned OpCode) const { switch (OpCode) { case Instruction::Add: return SE->getAddExpr(LHS, RHS); case Instruction::Sub: return SE->getMinusSCEV(LHS, RHS); case Instruction::Mul: return SE->getMulExpr(LHS, RHS); case Instruction::UDiv: return SE->getUDivExpr(LHS, RHS); default: llvm_unreachable("Unsupported opcode."); }; } /// No-wrap operations can transfer sign extension of their result to their /// operands. Generate the SCEV value for the widened operation without /// actually modifying the IR yet. If the expression after extending the /// operands is an AddRec for this loop, return the AddRec and the kind of /// extension used. WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { // Handle the common case of add const unsigned OpCode = DU.NarrowUse->getOpcode(); // Only Add/Sub/Mul instructions supported yet. if (OpCode != Instruction::Add && OpCode != Instruction::Sub && OpCode != Instruction::Mul) return {nullptr, ExtendKind::Unknown}; // One operand (NarrowDef) has already been extended to WideDef. Now determine // if extending the other will lead to a recurrence. const unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); const OverflowingBinaryOperator *OBO = cast(DU.NarrowUse); ExtendKind ExtKind = getExtendKind(DU.NarrowDef); if (!(ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) && !(ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())) { ExtKind = ExtendKind::Unknown; // For a non-negative NarrowDef, we can choose either type of // extension. We want to use the current extend kind if legal // (see above), and we only hit this code if we need to check // the opposite case. if (DU.NeverNegative) { if (OBO->hasNoSignedWrap()) { ExtKind = ExtendKind::Sign; } else if (OBO->hasNoUnsignedWrap()) { ExtKind = ExtendKind::Zero; } } } const SCEV *ExtendOperExpr = SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)); if (ExtKind == ExtendKind::Sign) ExtendOperExpr = SE->getSignExtendExpr(ExtendOperExpr, WideType); else if (ExtKind == ExtendKind::Zero) ExtendOperExpr = SE->getZeroExtendExpr(ExtendOperExpr, WideType); else return {nullptr, ExtendKind::Unknown}; // When creating this SCEV expr, don't apply the current operations NSW or NUW // flags. This instruction may be guarded by control flow that the no-wrap // behavior depends on. Non-control-equivalent instructions can be mapped to // the same SCEV expression, and it would be incorrect to transfer NSW/NUW // semantics to those operations. const SCEV *lhs = SE->getSCEV(DU.WideDef); const SCEV *rhs = ExtendOperExpr; // Let's swap operands to the initial order for the case of non-commutative // operations, like SUB. See PR21014. if (ExtendOperIdx == 0) std::swap(lhs, rhs); const SCEVAddRecExpr *AddRec = dyn_cast(getSCEVByOpCode(lhs, rhs, OpCode)); if (!AddRec || AddRec->getLoop() != L) return {nullptr, ExtendKind::Unknown}; return {AddRec, ExtKind}; } /// Is this instruction potentially interesting for further simplification after /// widening it's type? In other words, can the extend be safely hoisted out of /// the loop with SCEV reducing the value to a recurrence on the same loop. If /// so, return the extended recurrence and the kind of extension used. Otherwise /// return {nullptr, ExtendKind::Unknown}. WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { if (!DU.NarrowUse->getType()->isIntegerTy()) return {nullptr, ExtendKind::Unknown}; const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= SE->getTypeSizeInBits(WideType)) { // NarrowUse implicitly widens its operand. e.g. a gep with a narrow // index. So don't follow this use. return {nullptr, ExtendKind::Unknown}; } const SCEV *WideExpr; ExtendKind ExtKind; if (DU.NeverNegative) { WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); if (isa(WideExpr)) ExtKind = ExtendKind::Sign; else { WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); ExtKind = ExtendKind::Zero; } } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) { WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); ExtKind = ExtendKind::Sign; } else { WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); ExtKind = ExtendKind::Zero; } const SCEVAddRecExpr *AddRec = dyn_cast(WideExpr); if (!AddRec || AddRec->getLoop() != L) return {nullptr, ExtendKind::Unknown}; return {AddRec, ExtKind}; } /// This IV user cannot be widened. Replace this use of the original narrow IV /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); if (!InsertPt) return; LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " << *DU.NarrowUse << "\n"); IRBuilder<> Builder(InsertPt); Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); } /// If the narrow use is a compare instruction, then widen the compare // (and possibly the other operand). The extend operation is hoisted into the // loop preheader as far as possible. bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { ICmpInst *Cmp = dyn_cast(DU.NarrowUse); if (!Cmp) return false; // We can legally widen the comparison in the following two cases: // // - The signedness of the IV extension and comparison match // // - The narrow IV is always positive (and thus its sign extension is equal // to its zero extension). For instance, let's say we're zero extending // %narrow for the following use // // icmp slt i32 %narrow, %val ... (A) // // and %narrow is always positive. Then // // (A) == icmp slt i32 sext(%narrow), sext(%val) // == icmp slt i32 zext(%narrow), sext(%val) bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign; if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) return false; Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); unsigned IVWidth = SE->getTypeSizeInBits(WideType); assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); // Widen the compare instruction. DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); // Widen the other operand of the compare, if necessary. if (CastWidth < IVWidth) { Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); } return true; } // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this // will not work when: // 1) SCEV traces back to an instruction inside the loop that SCEV can not // expand, eg. add %indvar, (load %addr) // 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant // While SCEV fails to avoid trunc, we can still try to use instruction // combining approach to prove trunc is not required. This can be further // extended with other instruction combining checks, but for now we handle the // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") // // Src: // %c = sub nsw %b, %indvar // %d = sext %c to i64 // Dst: // %indvar.ext1 = sext %indvar to i64 // %m = sext %b to i64 // %d = sub nsw i64 %m, %indvar.ext1 // Therefore, as long as the result of add/sub/mul is extended to wide type, no // trunc is required regardless of how %b is generated. This pattern is common // when calculating address in 64 bit architecture bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { Instruction *NarrowUse = DU.NarrowUse; Instruction *NarrowDef = DU.NarrowDef; Instruction *WideDef = DU.WideDef; // Handle the common case of add const unsigned OpCode = NarrowUse->getOpcode(); // Only Add/Sub/Mul instructions are supported. if (OpCode != Instruction::Add && OpCode != Instruction::Sub && OpCode != Instruction::Mul) return false; // The operand that is not defined by NarrowDef of DU. Let's call it the // other operand. assert((NarrowUse->getOperand(0) == NarrowDef || NarrowUse->getOperand(1) == NarrowDef) && "bad DU"); const OverflowingBinaryOperator *OBO = cast(NarrowUse); ExtendKind ExtKind = getExtendKind(NarrowDef); bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap(); bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap(); auto AnotherOpExtKind = ExtKind; // Check that all uses are either: // - narrow def (in case of we are widening the IV increment); // - single-input LCSSA Phis; // - comparison of the chosen type; // - extend of the chosen type (raison d'etre). SmallVector ExtUsers; SmallVector LCSSAPhiUsers; SmallVector ICmpUsers; for (Use &U : NarrowUse->uses()) { Instruction *User = cast(U.getUser()); if (User == NarrowDef) continue; if (!L->contains(User)) { auto *LCSSAPhi = cast(User); // Make sure there is only 1 input, so that we don't have to split // critical edges. if (LCSSAPhi->getNumOperands() != 1) return false; LCSSAPhiUsers.push_back(LCSSAPhi); continue; } if (auto *ICmp = dyn_cast(User)) { auto Pred = ICmp->getPredicate(); // We have 3 types of predicates: signed, unsigned and equality // predicates. For equality, it's legal to widen icmp for either sign and // zero extend. For sign extend, we can also do so for signed predicates, // likeweise for zero extend we can widen icmp for unsigned predicates. if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred)) return false; if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred)) return false; ICmpUsers.push_back(ICmp); continue; } if (ExtKind == ExtendKind::Sign) User = dyn_cast(User); else User = dyn_cast(User); if (!User || User->getType() != WideType) return false; ExtUsers.push_back(User); } if (ExtUsers.empty()) { DeadInsts.emplace_back(NarrowUse); return true; } // We'll prove some facts that should be true in the context of ext users. If // there is no users, we are done now. If there are some, pick their common // dominator as context. const Instruction *CtxI = findCommonDominator(ExtUsers, *DT); if (!CanSignExtend && !CanZeroExtend) { // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we // will most likely not see it. Let's try to prove it. if (OpCode != Instruction::Add) return false; if (ExtKind != ExtendKind::Zero) return false; const SCEV *LHS = SE->getSCEV(OBO->getOperand(0)); const SCEV *RHS = SE->getSCEV(OBO->getOperand(1)); // TODO: Support case for NarrowDef = NarrowUse->getOperand(1). if (NarrowUse->getOperand(0) != NarrowDef) return false; if (!SE->isKnownNegative(RHS)) return false; bool ProvedSubNUW = SE->isKnownPredicateAt(ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), CtxI); if (!ProvedSubNUW) return false; // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as // neg(zext(neg(op))), which is basically sext(op). AnotherOpExtKind = ExtendKind::Sign; } // Verifying that Defining operand is an AddRec const SCEV *Op1 = SE->getSCEV(WideDef); const SCEVAddRecExpr *AddRecOp1 = dyn_cast(Op1); if (!AddRecOp1 || AddRecOp1->getLoop() != L) return false; LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); // Generating a widening use instruction. Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(0), WideType, AnotherOpExtKind == ExtendKind::Sign, NarrowUse); Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(1), WideType, AnotherOpExtKind == ExtendKind::Sign, NarrowUse); auto *NarrowBO = cast(NarrowUse); auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, NarrowBO->getName()); IRBuilder<> Builder(NarrowUse); Builder.Insert(WideBO); WideBO->copyIRFlags(NarrowBO); ExtendKindMap[NarrowUse] = ExtKind; for (Instruction *User : ExtUsers) { assert(User->getType() == WideType && "Checked before!"); LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " << *WideBO << "\n"); ++NumElimExt; User->replaceAllUsesWith(WideBO); DeadInsts.emplace_back(User); } for (PHINode *User : LCSSAPhiUsers) { assert(User->getNumOperands() == 1 && "Checked before!"); Builder.SetInsertPoint(User); auto *WidePN = Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide"); BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor(); assert(LoopExitingBlock && L->contains(LoopExitingBlock) && "Not a LCSSA Phi?"); WidePN->addIncoming(WideBO, LoopExitingBlock); Builder.SetInsertPoint(User->getParent(), User->getParent()->getFirstInsertionPt()); auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType()); User->replaceAllUsesWith(TruncPN); DeadInsts.emplace_back(User); } for (ICmpInst *User : ICmpUsers) { Builder.SetInsertPoint(User); auto ExtendedOp = [&](Value * V)->Value * { if (V == NarrowUse) return WideBO; if (ExtKind == ExtendKind::Zero) return Builder.CreateZExt(V, WideBO->getType()); else return Builder.CreateSExt(V, WideBO->getType()); }; auto Pred = User->getPredicate(); auto *LHS = ExtendedOp(User->getOperand(0)); auto *RHS = ExtendedOp(User->getOperand(1)); auto *WideCmp = Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide"); User->replaceAllUsesWith(WideCmp); DeadInsts.emplace_back(User); } return true; } /// Determine whether an individual user of the narrow IV can be widened. If so, /// return the wide clone of the user. Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) { assert(ExtendKindMap.count(DU.NarrowDef) && "Should already know the kind of extension used to widen NarrowDef"); // Stop traversing the def-use chain at inner-loop phis or post-loop phis. if (PHINode *UsePhi = dyn_cast(DU.NarrowUse)) { if (LI->getLoopFor(UsePhi->getParent()) != L) { // For LCSSA phis, sink the truncate outside the loop. // After SimplifyCFG most loop exit targets have a single predecessor. // Otherwise fall back to a truncate within the loop. if (UsePhi->getNumOperands() != 1) truncateIVUse(DU, DT, LI); else { // Widening the PHI requires us to insert a trunc. The logical place // for this trunc is in the same BB as the PHI. This is not possible if // the BB is terminated by a catchswitch. if (isa(UsePhi->getParent()->getTerminator())) return nullptr; PHINode *WidePhi = PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", UsePhi); WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); BasicBlock *WidePhiBB = WidePhi->getParent(); IRBuilder<> Builder(WidePhiBB, WidePhiBB->getFirstInsertionPt()); Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); UsePhi->replaceAllUsesWith(Trunc); DeadInsts.emplace_back(UsePhi); LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " << *WidePhi << "\n"); } return nullptr; } } // This narrow use can be widened by a sext if it's non-negative or its narrow // def was widened by a sext. Same for zext. auto canWidenBySExt = [&]() { return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign; }; auto canWidenByZExt = [&]() { return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero; }; // Our raison d'etre! Eliminate sign and zero extension. if ((match(DU.NarrowUse, m_SExtLike(m_Value())) && canWidenBySExt()) || (isa(DU.NarrowUse) && canWidenByZExt())) { Value *NewDef = DU.WideDef; if (DU.NarrowUse->getType() != WideType) { unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); unsigned IVWidth = SE->getTypeSizeInBits(WideType); if (CastWidth < IVWidth) { // The cast isn't as wide as the IV, so insert a Trunc. IRBuilder<> Builder(DU.NarrowUse); NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); } else { // A wider extend was hidden behind a narrower one. This may induce // another round of IV widening in which the intermediate IV becomes // dead. It should be very rare. LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi << " not wide enough to subsume " << *DU.NarrowUse << "\n"); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); NewDef = DU.NarrowUse; } } if (NewDef != DU.NarrowUse) { LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse << " replaced by " << *DU.WideDef << "\n"); ++NumElimExt; DU.NarrowUse->replaceAllUsesWith(NewDef); DeadInsts.emplace_back(DU.NarrowUse); } // Now that the extend is gone, we want to expose it's uses for potential // further simplification. We don't need to directly inform SimplifyIVUsers // of the new users, because their parent IV will be processed later as a // new loop phi. If we preserved IVUsers analysis, we would also want to // push the uses of WideDef here. // No further widening is needed. The deceased [sz]ext had done it for us. return nullptr; } auto tryAddRecExpansion = [&]() -> Instruction* { // Does this user itself evaluate to a recurrence after widening? WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); if (!WideAddRec.first) WideAddRec = getWideRecurrence(DU); assert((WideAddRec.first == nullptr) == (WideAddRec.second == ExtendKind::Unknown)); if (!WideAddRec.first) return nullptr; // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. Instruction *WideUse = nullptr; if (WideAddRec.first == WideIncExpr && Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) WideUse = WideInc; else { WideUse = cloneIVUser(DU, WideAddRec.first); if (!WideUse) return nullptr; } // Evaluation of WideAddRec ensured that the narrow expression could be // extended outside the loop without overflow. This suggests that the wide use // evaluates to the same expression as the extended narrow use, but doesn't // absolutely guarantee it. Hence the following failsafe check. In rare cases // where it fails, we simply throw away the newly created wide use. if (WideAddRec.first != SE->getSCEV(WideUse)) { LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first << "\n"); DeadInsts.emplace_back(WideUse); return nullptr; }; // if we reached this point then we are going to replace // DU.NarrowUse with WideUse. Reattach DbgValue then. replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); ExtendKindMap[DU.NarrowUse] = WideAddRec.second; // Returning WideUse pushes it on the worklist. return WideUse; }; if (auto *I = tryAddRecExpansion()) return I; // If use is a loop condition, try to promote the condition instead of // truncating the IV first. if (widenLoopCompare(DU)) return nullptr; // We are here about to generate a truncate instruction that may hurt // performance because the scalar evolution expression computed earlier // in WideAddRec.first does not indicate a polynomial induction expression. // In that case, look at the operands of the use instruction to determine // if we can still widen the use instead of truncating its operand. if (widenWithVariantUse(DU)) return nullptr; // This user does not evaluate to a recurrence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. truncateIVUse(DU, DT, LI); return nullptr; } /// Add eligible users of NarrowDef to NarrowIVUsers. void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); bool NonNegativeDef = SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, SE->getZero(NarrowSCEV->getType())); for (User *U : NarrowDef->users()) { Instruction *NarrowUser = cast(U); // Handle data flow merges and bizarre phi cycles. if (!Widened.insert(NarrowUser).second) continue; bool NonNegativeUse = false; if (!NonNegativeDef) { // We might have a control-dependent range information for this context. if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); } NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, NonNegativeDef || NonNegativeUse); } } /// Process a single induction variable. First use the SCEVExpander to create a /// wide induction variable that evaluates to the same recurrence as the /// original narrow IV. Then use a worklist to forward traverse the narrow IV's /// def-use chain. After widenIVUse has processed all interesting IV users, the /// narrow IV will be isolated for removal by DeleteDeadPHIs. /// /// It would be simpler to delete uses as they are processed, but we must avoid /// invalidating SCEV expressions. PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { // Is this phi an induction variable? const SCEVAddRecExpr *AddRec = dyn_cast(SE->getSCEV(OrigPhi)); if (!AddRec) return nullptr; // Widen the induction variable expression. const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign ? SE->getSignExtendExpr(AddRec, WideType) : SE->getZeroExtendExpr(AddRec, WideType); assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && "Expect the new IV expression to preserve its type"); // Can the IV be extended outside the loop without overflow? AddRec = dyn_cast(WideIVExpr); if (!AddRec || AddRec->getLoop() != L) return nullptr; // An AddRec must have loop-invariant operands. Since this AddRec is // materialized by a loop header phi, the expression cannot have any post-loop // operands, so they must dominate the loop header. assert( SE->properlyDominates(AddRec->getStart(), L->getHeader()) && SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && "Loop header phi recurrence inputs do not dominate the loop"); // Iterate over IV uses (including transitive ones) looking for IV increments // of the form 'add nsw %iv, '. For each increment and each use of // the increment calculate control-dependent range information basing on // dominating conditions inside of the loop (e.g. a range check inside of the // loop). Calculated ranges are stored in PostIncRangeInfos map. // // Control-dependent range information is later used to prove that a narrow // definition is not negative (see pushNarrowIVUsers). It's difficult to do // this on demand because when pushNarrowIVUsers needs this information some // of the dominating conditions might be already widened. if (UsePostIncrementRanges) calculatePostIncRanges(OrigPhi); // The rewriter provides a value for the desired IV expression. This may // either find an existing phi or materialize a new one. Either way, we // expect a well-formed cyclic phi-with-increments. i.e. any operand not part // of the phi-SCC dominates the loop entry. Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); // If the wide phi is not a phi node, for example a cast node, like bitcast, // inttoptr, ptrtoint, just skip for now. if (!(WidePhi = dyn_cast(ExpandInst))) { // if the cast node is an inserted instruction without any user, we should // remove it to make sure the pass don't touch the function as we can not // wide the phi. if (ExpandInst->hasNUses(0) && Rewriter.isInsertedInstruction(cast(ExpandInst))) DeadInsts.emplace_back(ExpandInst); return nullptr; } // Remembering the WideIV increment generated by SCEVExpander allows // widenIVUse to reuse it when widening the narrow IV's increment. We don't // employ a general reuse mechanism because the call above is the only call to // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. if (BasicBlock *LatchBlock = L->getLoopLatch()) { WideInc = dyn_cast(WidePhi->getIncomingValueForBlock(LatchBlock)); if (WideInc) { WideIncExpr = SE->getSCEV(WideInc); // Propagate the debug location associated with the original loop // increment to the new (widened) increment. auto *OrigInc = cast(OrigPhi->getIncomingValueForBlock(LatchBlock)); WideInc->setDebugLoc(OrigInc->getDebugLoc()); } } LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); ++NumWidened; // Traverse the def-use chain using a worklist starting at the original IV. assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); Widened.insert(OrigPhi); pushNarrowIVUsers(OrigPhi, WidePhi); while (!NarrowIVUsers.empty()) { WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); // Process a def-use edge. This may replace the use, so don't hold a // use_iterator across it. Instruction *WideUse = widenIVUse(DU, Rewriter); // Follow all def-use edges from the previous narrow use. if (WideUse) pushNarrowIVUsers(DU.NarrowUse, WideUse); // widenIVUse may have removed the def-use edge. if (DU.NarrowDef->use_empty()) DeadInsts.emplace_back(DU.NarrowDef); } // Attach any debug information to the new PHI. replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); return WidePhi; } /// Calculates control-dependent range for the given def at the given context /// by looking at dominating conditions inside of the loop void WidenIV::calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser) { Value *NarrowDefLHS; const APInt *NarrowDefRHS; if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), m_APInt(NarrowDefRHS))) || !NarrowDefRHS->isNonNegative()) return; auto UpdateRangeFromCondition = [&] (Value *Condition, bool TrueDest) { CmpInst::Predicate Pred; Value *CmpRHS; if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), m_Value(CmpRHS)))) return; CmpInst::Predicate P = TrueDest ? Pred : CmpInst::getInversePredicate(Pred); auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); auto CmpConstrainedLHSRange = ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); }; auto UpdateRangeFromGuards = [&](Instruction *Ctx) { if (!HasGuards) return; for (Instruction &I : make_range(Ctx->getIterator().getReverse(), Ctx->getParent()->rend())) { Value *C = nullptr; if (match(&I, m_Intrinsic(m_Value(C)))) UpdateRangeFromCondition(C, /*TrueDest=*/true); } }; UpdateRangeFromGuards(NarrowUser); BasicBlock *NarrowUserBB = NarrowUser->getParent(); // If NarrowUserBB is statically unreachable asking dominator queries may // yield surprising results. (e.g. the block may not have a dom tree node) if (!DT->isReachableFromEntry(NarrowUserBB)) return; for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); L->contains(DTB->getBlock()); DTB = DTB->getIDom()) { auto *BB = DTB->getBlock(); auto *TI = BB->getTerminator(); UpdateRangeFromGuards(TI); auto *BI = dyn_cast(TI); if (!BI || !BI->isConditional()) continue; auto *TrueSuccessor = BI->getSuccessor(0); auto *FalseSuccessor = BI->getSuccessor(1); auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { return BBE.isSingleEdge() && DT->dominates(BBE, NarrowUser->getParent()); }; if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); } } /// Calculates PostIncRangeInfos map for the given IV void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(OrigPhi); Visited.insert(OrigPhi); while (!Worklist.empty()) { Instruction *NarrowDef = Worklist.pop_back_val(); for (Use &U : NarrowDef->uses()) { auto *NarrowUser = cast(U.getUser()); // Don't go looking outside the current loop. auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) continue; if (!Visited.insert(NarrowUser).second) continue; Worklist.push_back(NarrowUser); calculatePostIncRange(NarrowDef, NarrowUser); } } } PHINode *llvm::createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges) { WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges); PHINode *WidePHI = Widener.createWideIV(Rewriter); NumElimExt = Widener.getNumElimExt(); NumWidened = Widener.getNumWidened(); return WidePHI; } diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp index f15307181fad..f63e5c61e802 100644 --- a/contrib/llvm-project/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp @@ -1,1373 +1,1375 @@ //===- ObjcopyOptions.cpp -------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "ObjcopyOptions.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/ObjCopy/CommonConfig.h" #include "llvm/ObjCopy/ConfigManager.h" #include "llvm/ObjCopy/MachO/MachOConfig.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/CRC.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" using namespace llvm; using namespace llvm::objcopy; using namespace llvm::opt; namespace { enum ObjcopyID { OBJCOPY_INVALID = 0, // This is not an option ID. #define OPTION(...) LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(OBJCOPY_, __VA_ARGS__), #include "ObjcopyOpts.inc" #undef OPTION }; namespace objcopy_opt { #define PREFIX(NAME, VALUE) \ static constexpr StringLiteral NAME##_init[] = VALUE; \ static constexpr ArrayRef NAME(NAME##_init, \ std::size(NAME##_init) - 1); #include "ObjcopyOpts.inc" #undef PREFIX static constexpr opt::OptTable::Info ObjcopyInfoTable[] = { #define OPTION(...) \ LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(OBJCOPY_, __VA_ARGS__), #include "ObjcopyOpts.inc" #undef OPTION }; } // namespace objcopy_opt class ObjcopyOptTable : public opt::GenericOptTable { public: ObjcopyOptTable() : opt::GenericOptTable(objcopy_opt::ObjcopyInfoTable) { setGroupedShortOptions(true); } }; enum InstallNameToolID { INSTALL_NAME_TOOL_INVALID = 0, // This is not an option ID. #define OPTION(...) \ LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(INSTALL_NAME_TOOL_, __VA_ARGS__), #include "InstallNameToolOpts.inc" #undef OPTION }; namespace install_name_tool { #define PREFIX(NAME, VALUE) \ static constexpr StringLiteral NAME##_init[] = VALUE; \ static constexpr ArrayRef NAME(NAME##_init, \ std::size(NAME##_init) - 1); #include "InstallNameToolOpts.inc" #undef PREFIX static constexpr opt::OptTable::Info InstallNameToolInfoTable[] = { #define OPTION(...) \ LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(INSTALL_NAME_TOOL_, __VA_ARGS__), #include "InstallNameToolOpts.inc" #undef OPTION }; } // namespace install_name_tool class InstallNameToolOptTable : public opt::GenericOptTable { public: InstallNameToolOptTable() : GenericOptTable(install_name_tool::InstallNameToolInfoTable) {} }; enum BitcodeStripID { BITCODE_STRIP_INVALID = 0, // This is not an option ID. #define OPTION(...) \ LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(BITCODE_STRIP_, __VA_ARGS__), #include "BitcodeStripOpts.inc" #undef OPTION }; namespace bitcode_strip { #define PREFIX(NAME, VALUE) \ static constexpr StringLiteral NAME##_init[] = VALUE; \ static constexpr ArrayRef NAME(NAME##_init, \ std::size(NAME##_init) - 1); #include "BitcodeStripOpts.inc" #undef PREFIX static constexpr opt::OptTable::Info BitcodeStripInfoTable[] = { #define OPTION(...) \ LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(BITCODE_STRIP_, __VA_ARGS__), #include "BitcodeStripOpts.inc" #undef OPTION }; } // namespace bitcode_strip class BitcodeStripOptTable : public opt::GenericOptTable { public: BitcodeStripOptTable() : opt::GenericOptTable(bitcode_strip::BitcodeStripInfoTable) {} }; enum StripID { STRIP_INVALID = 0, // This is not an option ID. #define OPTION(...) LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(STRIP_, __VA_ARGS__), #include "StripOpts.inc" #undef OPTION }; namespace strip { #define PREFIX(NAME, VALUE) \ static constexpr StringLiteral NAME##_init[] = VALUE; \ static constexpr ArrayRef NAME(NAME##_init, \ std::size(NAME##_init) - 1); #include "StripOpts.inc" #undef PREFIX static constexpr opt::OptTable::Info StripInfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(STRIP_, __VA_ARGS__), #include "StripOpts.inc" #undef OPTION }; } // namespace strip class StripOptTable : public opt::GenericOptTable { public: StripOptTable() : GenericOptTable(strip::StripInfoTable) { setGroupedShortOptions(true); } }; } // namespace static SectionFlag parseSectionRenameFlag(StringRef SectionName) { return llvm::StringSwitch(SectionName) .CaseLower("alloc", SectionFlag::SecAlloc) .CaseLower("load", SectionFlag::SecLoad) .CaseLower("noload", SectionFlag::SecNoload) .CaseLower("readonly", SectionFlag::SecReadonly) .CaseLower("debug", SectionFlag::SecDebug) .CaseLower("code", SectionFlag::SecCode) .CaseLower("data", SectionFlag::SecData) .CaseLower("rom", SectionFlag::SecRom) .CaseLower("merge", SectionFlag::SecMerge) .CaseLower("strings", SectionFlag::SecStrings) .CaseLower("contents", SectionFlag::SecContents) .CaseLower("share", SectionFlag::SecShare) .CaseLower("exclude", SectionFlag::SecExclude) .CaseLower("large", SectionFlag::SecLarge) .Default(SectionFlag::SecNone); } static Expected parseSectionFlagSet(ArrayRef SectionFlags) { SectionFlag ParsedFlags = SectionFlag::SecNone; for (StringRef Flag : SectionFlags) { SectionFlag ParsedFlag = parseSectionRenameFlag(Flag); if (ParsedFlag == SectionFlag::SecNone) return createStringError( errc::invalid_argument, "unrecognized section flag '%s'. Flags supported for GNU " "compatibility: alloc, load, noload, readonly, exclude, debug, " "code, data, rom, share, contents, merge, strings, large", Flag.str().c_str()); ParsedFlags |= ParsedFlag; } return ParsedFlags; } static Expected parseRenameSectionValue(StringRef FlagValue) { if (!FlagValue.contains('=')) return createStringError(errc::invalid_argument, "bad format for --rename-section: missing '='"); // Initial split: ".foo" = ".bar,f1,f2,..." auto Old2New = FlagValue.split('='); SectionRename SR; SR.OriginalName = Old2New.first; // Flags split: ".bar" "f1" "f2" ... SmallVector NameAndFlags; Old2New.second.split(NameAndFlags, ','); SR.NewName = NameAndFlags[0]; if (NameAndFlags.size() > 1) { Expected ParsedFlagSet = parseSectionFlagSet(ArrayRef(NameAndFlags).drop_front()); if (!ParsedFlagSet) return ParsedFlagSet.takeError(); SR.NewFlags = *ParsedFlagSet; } return SR; } static Expected> parseSetSectionAttribute(StringRef Option, StringRef FlagValue) { if (!FlagValue.contains('=')) return make_error("bad format for " + Option + ": missing '='", errc::invalid_argument); auto Split = StringRef(FlagValue).split('='); if (Split.first.empty()) return make_error("bad format for " + Option + ": missing section name", errc::invalid_argument); uint64_t Value; if (Split.second.getAsInteger(0, Value)) return make_error("invalid value for " + Option + ": '" + Split.second + "'", errc::invalid_argument); return std::make_pair(Split.first, Value); } static Expected parseSetSectionFlagValue(StringRef FlagValue) { if (!StringRef(FlagValue).contains('=')) return createStringError(errc::invalid_argument, "bad format for --set-section-flags: missing '='"); // Initial split: ".foo" = "f1,f2,..." auto Section2Flags = StringRef(FlagValue).split('='); SectionFlagsUpdate SFU; SFU.Name = Section2Flags.first; // Flags split: "f1" "f2" ... SmallVector SectionFlags; Section2Flags.second.split(SectionFlags, ','); Expected ParsedFlagSet = parseSectionFlagSet(SectionFlags); if (!ParsedFlagSet) return ParsedFlagSet.takeError(); SFU.NewFlags = *ParsedFlagSet; return SFU; } namespace { struct TargetInfo { FileFormat Format; MachineInfo Machine; }; } // namespace // FIXME: consolidate with the bfd parsing used by lld. static const StringMap TargetMap{ // Name, {EMachine, 64bit, LittleEndian} // x86 {"elf32-i386", {ELF::EM_386, false, true}}, {"elf32-x86-64", {ELF::EM_X86_64, false, true}}, {"elf64-x86-64", {ELF::EM_X86_64, true, true}}, // Intel MCU {"elf32-iamcu", {ELF::EM_IAMCU, false, true}}, // ARM {"elf32-littlearm", {ELF::EM_ARM, false, true}}, // ARM AArch64 {"elf64-aarch64", {ELF::EM_AARCH64, true, true}}, {"elf64-littleaarch64", {ELF::EM_AARCH64, true, true}}, // RISC-V {"elf32-littleriscv", {ELF::EM_RISCV, false, true}}, {"elf64-littleriscv", {ELF::EM_RISCV, true, true}}, // PowerPC {"elf32-powerpc", {ELF::EM_PPC, false, false}}, {"elf32-powerpcle", {ELF::EM_PPC, false, true}}, {"elf64-powerpc", {ELF::EM_PPC64, true, false}}, {"elf64-powerpcle", {ELF::EM_PPC64, true, true}}, // MIPS {"elf32-bigmips", {ELF::EM_MIPS, false, false}}, {"elf32-ntradbigmips", {ELF::EM_MIPS, false, false}}, {"elf32-ntradlittlemips", {ELF::EM_MIPS, false, true}}, {"elf32-tradbigmips", {ELF::EM_MIPS, false, false}}, {"elf32-tradlittlemips", {ELF::EM_MIPS, false, true}}, {"elf64-tradbigmips", {ELF::EM_MIPS, true, false}}, {"elf64-tradlittlemips", {ELF::EM_MIPS, true, true}}, // SPARC {"elf32-sparc", {ELF::EM_SPARC, false, false}}, {"elf32-sparcel", {ELF::EM_SPARC, false, true}}, // Hexagon {"elf32-hexagon", {ELF::EM_HEXAGON, false, true}}, // LoongArch {"elf32-loongarch", {ELF::EM_LOONGARCH, false, true}}, {"elf64-loongarch", {ELF::EM_LOONGARCH, true, true}}, + // SystemZ + {"elf64-s390", {ELF::EM_S390, true, false}}, }; static Expected getOutputTargetInfoByTargetName(StringRef TargetName) { StringRef OriginalTargetName = TargetName; bool IsFreeBSD = TargetName.consume_back("-freebsd"); auto Iter = TargetMap.find(TargetName); if (Iter == std::end(TargetMap)) return createStringError(errc::invalid_argument, "invalid output format: '%s'", OriginalTargetName.str().c_str()); MachineInfo MI = Iter->getValue(); if (IsFreeBSD) MI.OSABI = ELF::ELFOSABI_FREEBSD; FileFormat Format; if (TargetName.starts_with("elf")) Format = FileFormat::ELF; else // This should never happen because `TargetName` is valid (it certainly // exists in the TargetMap). llvm_unreachable("unknown target prefix"); return {TargetInfo{Format, MI}}; } static Error addSymbolsFromFile(NameMatcher &Symbols, BumpPtrAllocator &Alloc, StringRef Filename, MatchStyle MS, function_ref ErrorCallback) { StringSaver Saver(Alloc); SmallVector Lines; auto BufOrErr = MemoryBuffer::getFile(Filename); if (!BufOrErr) return createFileError(Filename, BufOrErr.getError()); BufOrErr.get()->getBuffer().split(Lines, '\n'); for (StringRef Line : Lines) { // Ignore everything after '#', trim whitespace, and only add the symbol if // it's not empty. auto TrimmedLine = Line.split('#').first.trim(); if (!TrimmedLine.empty()) if (Error E = Symbols.addMatcher(NameOrPattern::create( Saver.save(TrimmedLine), MS, ErrorCallback))) return E; } return Error::success(); } static Error addSymbolsToRenameFromFile(StringMap &SymbolsToRename, BumpPtrAllocator &Alloc, StringRef Filename) { StringSaver Saver(Alloc); SmallVector Lines; auto BufOrErr = MemoryBuffer::getFile(Filename); if (!BufOrErr) return createFileError(Filename, BufOrErr.getError()); BufOrErr.get()->getBuffer().split(Lines, '\n'); size_t NumLines = Lines.size(); for (size_t LineNo = 0; LineNo < NumLines; ++LineNo) { StringRef TrimmedLine = Lines[LineNo].split('#').first.trim(); if (TrimmedLine.empty()) continue; std::pair Pair = Saver.save(TrimmedLine).split(' '); StringRef NewName = Pair.second.trim(); if (NewName.empty()) return createStringError(errc::invalid_argument, "%s:%zu: missing new symbol name", Filename.str().c_str(), LineNo + 1); SymbolsToRename.insert({Pair.first, NewName}); } return Error::success(); } template static ErrorOr getAsInteger(StringRef Val) { T Result; if (Val.getAsInteger(0, Result)) return errc::invalid_argument; return Result; } namespace { enum class ToolType { Objcopy, Strip, InstallNameTool, BitcodeStrip }; } // anonymous namespace static void printHelp(const opt::OptTable &OptTable, raw_ostream &OS, ToolType Tool) { StringRef HelpText, ToolName; switch (Tool) { case ToolType::Objcopy: ToolName = "llvm-objcopy"; HelpText = " [options] input [output]"; break; case ToolType::Strip: ToolName = "llvm-strip"; HelpText = " [options] inputs..."; break; case ToolType::InstallNameTool: ToolName = "llvm-install-name-tool"; HelpText = " [options] input"; break; case ToolType::BitcodeStrip: ToolName = "llvm-bitcode-strip"; HelpText = " [options] input"; break; } OptTable.printHelp(OS, (ToolName + HelpText).str().c_str(), (ToolName + " tool").str().c_str()); // TODO: Replace this with libOption call once it adds extrahelp support. // The CommandLine library has a cl::extrahelp class to support this, // but libOption does not have that yet. OS << "\nPass @FILE as argument to read options from FILE.\n"; } static Expected parseNewSymbolInfo(StringRef FlagValue) { // Parse value given with --add-symbol option and create the // new symbol if possible. The value format for --add-symbol is: // // =[
:][,] // // where: // - symbol name, can be empty string //
- optional section name. If not given ABS symbol is created // - symbol value, can be decimal or hexadecimal number prefixed // with 0x. // - optional flags affecting symbol type, binding or visibility. NewSymbolInfo SI; StringRef Value; std::tie(SI.SymbolName, Value) = FlagValue.split('='); if (Value.empty()) return createStringError( errc::invalid_argument, "bad format for --add-symbol, missing '=' after '%s'", SI.SymbolName.str().c_str()); if (Value.contains(':')) { std::tie(SI.SectionName, Value) = Value.split(':'); if (SI.SectionName.empty() || Value.empty()) return createStringError( errc::invalid_argument, "bad format for --add-symbol, missing section name or symbol value"); } SmallVector Flags; Value.split(Flags, ','); if (Flags[0].getAsInteger(0, SI.Value)) return createStringError(errc::invalid_argument, "bad symbol value: '%s'", Flags[0].str().c_str()); using Functor = std::function; SmallVector UnsupportedFlags; for (size_t I = 1, NumFlags = Flags.size(); I < NumFlags; ++I) static_cast( StringSwitch(Flags[I]) .CaseLower("global", [&] { SI.Flags.push_back(SymbolFlag::Global); }) .CaseLower("local", [&] { SI.Flags.push_back(SymbolFlag::Local); }) .CaseLower("weak", [&] { SI.Flags.push_back(SymbolFlag::Weak); }) .CaseLower("default", [&] { SI.Flags.push_back(SymbolFlag::Default); }) .CaseLower("hidden", [&] { SI.Flags.push_back(SymbolFlag::Hidden); }) .CaseLower("protected", [&] { SI.Flags.push_back(SymbolFlag::Protected); }) .CaseLower("file", [&] { SI.Flags.push_back(SymbolFlag::File); }) .CaseLower("section", [&] { SI.Flags.push_back(SymbolFlag::Section); }) .CaseLower("object", [&] { SI.Flags.push_back(SymbolFlag::Object); }) .CaseLower("function", [&] { SI.Flags.push_back(SymbolFlag::Function); }) .CaseLower( "indirect-function", [&] { SI.Flags.push_back(SymbolFlag::IndirectFunction); }) .CaseLower("debug", [&] { SI.Flags.push_back(SymbolFlag::Debug); }) .CaseLower("constructor", [&] { SI.Flags.push_back(SymbolFlag::Constructor); }) .CaseLower("warning", [&] { SI.Flags.push_back(SymbolFlag::Warning); }) .CaseLower("indirect", [&] { SI.Flags.push_back(SymbolFlag::Indirect); }) .CaseLower("synthetic", [&] { SI.Flags.push_back(SymbolFlag::Synthetic); }) .CaseLower("unique-object", [&] { SI.Flags.push_back(SymbolFlag::UniqueObject); }) .StartsWithLower("before=", [&] { StringRef SymNamePart = Flags[I].split('=').second; if (!SymNamePart.empty()) SI.BeforeSyms.push_back(SymNamePart); }) .Default([&] { UnsupportedFlags.push_back(Flags[I]); }))(); if (!UnsupportedFlags.empty()) return createStringError(errc::invalid_argument, "unsupported flag%s for --add-symbol: '%s'", UnsupportedFlags.size() > 1 ? "s" : "", join(UnsupportedFlags, "', '").c_str()); return SI; } // Parse input option \p ArgValue and load section data. This function // extracts section name and name of the file keeping section data from // ArgValue, loads data from the file, and stores section name and data // into the vector of new sections \p NewSections. static Error loadNewSectionData(StringRef ArgValue, StringRef OptionName, std::vector &NewSections) { if (!ArgValue.contains('=')) return createStringError(errc::invalid_argument, "bad format for " + OptionName + ": missing '='"); std::pair SecPair = ArgValue.split("="); if (SecPair.second.empty()) return createStringError(errc::invalid_argument, "bad format for " + OptionName + ": missing file name"); ErrorOr> BufOrErr = MemoryBuffer::getFile(SecPair.second); if (!BufOrErr) return createFileError(SecPair.second, errorCodeToError(BufOrErr.getError())); NewSections.push_back({SecPair.first, std::move(*BufOrErr)}); return Error::success(); } // parseObjcopyOptions returns the config and sets the input arguments. If a // help flag is set then parseObjcopyOptions will print the help messege and // exit. Expected objcopy::parseObjcopyOptions(ArrayRef RawArgsArr, function_ref ErrorCallback) { DriverConfig DC; ObjcopyOptTable T; const char *const *DashDash = llvm::find_if(RawArgsArr, [](StringRef Str) { return Str == "--"; }); ArrayRef ArgsArr = ArrayRef(RawArgsArr.begin(), DashDash); if (DashDash != RawArgsArr.end()) DashDash = std::next(DashDash); unsigned MissingArgumentIndex, MissingArgumentCount; llvm::opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (InputArgs.size() == 0 && DashDash == RawArgsArr.end()) { printHelp(T, errs(), ToolType::Objcopy); exit(1); } if (InputArgs.hasArg(OBJCOPY_help)) { printHelp(T, outs(), ToolType::Objcopy); exit(0); } if (InputArgs.hasArg(OBJCOPY_version)) { outs() << "llvm-objcopy, compatible with GNU objcopy\n"; cl::PrintVersionMessage(); exit(0); } SmallVector Positional; for (auto *Arg : InputArgs.filtered(OBJCOPY_UNKNOWN)) return createStringError(errc::invalid_argument, "unknown argument '%s'", Arg->getAsString(InputArgs).c_str()); for (auto *Arg : InputArgs.filtered(OBJCOPY_INPUT)) Positional.push_back(Arg->getValue()); std::copy(DashDash, RawArgsArr.end(), std::back_inserter(Positional)); if (Positional.empty()) return createStringError(errc::invalid_argument, "no input file specified"); if (Positional.size() > 2) return createStringError(errc::invalid_argument, "too many positional arguments"); ConfigManager ConfigMgr; CommonConfig &Config = ConfigMgr.Common; COFFConfig &COFFConfig = ConfigMgr.COFF; ELFConfig &ELFConfig = ConfigMgr.ELF; MachOConfig &MachOConfig = ConfigMgr.MachO; Config.InputFilename = Positional[0]; Config.OutputFilename = Positional[Positional.size() == 1 ? 0 : 1]; if (InputArgs.hasArg(OBJCOPY_target) && (InputArgs.hasArg(OBJCOPY_input_target) || InputArgs.hasArg(OBJCOPY_output_target))) return createStringError( errc::invalid_argument, "--target cannot be used with --input-target or --output-target"); if (InputArgs.hasArg(OBJCOPY_regex) && InputArgs.hasArg(OBJCOPY_wildcard)) return createStringError(errc::invalid_argument, "--regex and --wildcard are incompatible"); MatchStyle SectionMatchStyle = InputArgs.hasArg(OBJCOPY_regex) ? MatchStyle::Regex : MatchStyle::Wildcard; MatchStyle SymbolMatchStyle = InputArgs.hasArg(OBJCOPY_regex) ? MatchStyle::Regex : InputArgs.hasArg(OBJCOPY_wildcard) ? MatchStyle::Wildcard : MatchStyle::Literal; StringRef InputFormat, OutputFormat; if (InputArgs.hasArg(OBJCOPY_target)) { InputFormat = InputArgs.getLastArgValue(OBJCOPY_target); OutputFormat = InputArgs.getLastArgValue(OBJCOPY_target); } else { InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target); OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target); } // FIXME: Currently, we ignore the target for non-binary/ihex formats // explicitly specified by -I option (e.g. -Ielf32-x86-64) and guess the // format by llvm::object::createBinary regardless of the option value. Config.InputFormat = StringSwitch(InputFormat) .Case("binary", FileFormat::Binary) .Case("ihex", FileFormat::IHex) .Default(FileFormat::Unspecified); if (InputArgs.hasArg(OBJCOPY_new_symbol_visibility)) { const uint8_t Invalid = 0xff; StringRef VisibilityStr = InputArgs.getLastArgValue(OBJCOPY_new_symbol_visibility); ELFConfig.NewSymbolVisibility = StringSwitch(VisibilityStr) .Case("default", ELF::STV_DEFAULT) .Case("hidden", ELF::STV_HIDDEN) .Case("internal", ELF::STV_INTERNAL) .Case("protected", ELF::STV_PROTECTED) .Default(Invalid); if (ELFConfig.NewSymbolVisibility == Invalid) return createStringError(errc::invalid_argument, "'%s' is not a valid symbol visibility", VisibilityStr.str().c_str()); } for (const auto *Arg : InputArgs.filtered(OBJCOPY_subsystem)) { StringRef Subsystem, Version; std::tie(Subsystem, Version) = StringRef(Arg->getValue()).split(':'); COFFConfig.Subsystem = StringSwitch(Subsystem.lower()) .Case("boot_application", COFF::IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION) .Case("console", COFF::IMAGE_SUBSYSTEM_WINDOWS_CUI) .Case("efi_application", COFF::IMAGE_SUBSYSTEM_EFI_APPLICATION) .Case("efi_boot_service_driver", COFF::IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER) .Case("efi_rom", COFF::IMAGE_SUBSYSTEM_EFI_ROM) .Case("efi_runtime_driver", COFF::IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER) .Case("native", COFF::IMAGE_SUBSYSTEM_NATIVE) .Case("posix", COFF::IMAGE_SUBSYSTEM_POSIX_CUI) .Case("windows", COFF::IMAGE_SUBSYSTEM_WINDOWS_GUI) .Default(COFF::IMAGE_SUBSYSTEM_UNKNOWN); if (*COFFConfig.Subsystem == COFF::IMAGE_SUBSYSTEM_UNKNOWN) return createStringError(errc::invalid_argument, "'%s' is not a valid subsystem", Subsystem.str().c_str()); if (!Version.empty()) { StringRef Major, Minor; std::tie(Major, Minor) = Version.split('.'); unsigned Number; if (Major.getAsInteger(10, Number)) return createStringError(errc::invalid_argument, "'%s' is not a valid subsystem major version", Major.str().c_str()); COFFConfig.MajorSubsystemVersion = Number; Number = 0; if (!Minor.empty() && Minor.getAsInteger(10, Number)) return createStringError(errc::invalid_argument, "'%s' is not a valid subsystem minor version", Minor.str().c_str()); COFFConfig.MinorSubsystemVersion = Number; } } Config.OutputFormat = StringSwitch(OutputFormat) .Case("binary", FileFormat::Binary) .Case("ihex", FileFormat::IHex) .Default(FileFormat::Unspecified); if (Config.OutputFormat == FileFormat::Unspecified) { if (OutputFormat.empty()) { Config.OutputFormat = Config.InputFormat; } else { Expected Target = getOutputTargetInfoByTargetName(OutputFormat); if (!Target) return Target.takeError(); Config.OutputFormat = Target->Format; Config.OutputArch = Target->Machine; } } if (const auto *A = InputArgs.getLastArg(OBJCOPY_compress_debug_sections)) { Config.CompressionType = StringSwitch(A->getValue()) .Case("zlib", DebugCompressionType::Zlib) .Case("zstd", DebugCompressionType::Zstd) .Default(DebugCompressionType::None); if (Config.CompressionType == DebugCompressionType::None) { return createStringError( errc::invalid_argument, "invalid or unsupported --compress-debug-sections format: %s", A->getValue()); } if (const char *Reason = compression::getReasonIfUnsupported( compression::formatFor(Config.CompressionType))) return createStringError(errc::invalid_argument, Reason); } Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink); // The gnu_debuglink's target is expected to not change or else its CRC would // become invalidated and get rejected. We can avoid recalculating the // checksum for every target file inside an archive by precomputing the CRC // here. This prevents a significant amount of I/O. if (!Config.AddGnuDebugLink.empty()) { auto DebugOrErr = MemoryBuffer::getFile(Config.AddGnuDebugLink); if (!DebugOrErr) return createFileError(Config.AddGnuDebugLink, DebugOrErr.getError()); auto Debug = std::move(*DebugOrErr); Config.GnuDebugLinkCRC32 = llvm::crc32(arrayRefFromStringRef(Debug->getBuffer())); } Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo); Config.SymbolsPrefix = InputArgs.getLastArgValue(OBJCOPY_prefix_symbols); Config.AllocSectionsPrefix = InputArgs.getLastArgValue(OBJCOPY_prefix_alloc_sections); if (auto Arg = InputArgs.getLastArg(OBJCOPY_extract_partition)) Config.ExtractPartition = Arg->getValue(); if (const auto *A = InputArgs.getLastArg(OBJCOPY_gap_fill)) { if (Config.OutputFormat != FileFormat::Binary) return createStringError( errc::invalid_argument, "'--gap-fill' is only supported for binary output"); ErrorOr Val = getAsInteger(A->getValue()); if (!Val) return createStringError(Val.getError(), "--gap-fill: bad number: %s", A->getValue()); uint8_t ByteVal = Val.get(); if (ByteVal != Val.get()) return createStringError(std::errc::value_too_large, "gap-fill value %s is out of range (0 to 0xff)", A->getValue()); Config.GapFill = ByteVal; } if (const auto *A = InputArgs.getLastArg(OBJCOPY_pad_to)) { if (Config.OutputFormat != FileFormat::Binary) return createStringError( errc::invalid_argument, "'--pad-to' is only supported for binary output"); ErrorOr Addr = getAsInteger(A->getValue()); if (!Addr) return createStringError(Addr.getError(), "--pad-to: bad number: %s", A->getValue()); Config.PadTo = *Addr; } for (auto *Arg : InputArgs.filtered(OBJCOPY_redefine_symbol)) { if (!StringRef(Arg->getValue()).contains('=')) return createStringError(errc::invalid_argument, "bad format for --redefine-sym"); auto Old2New = StringRef(Arg->getValue()).split('='); if (!Config.SymbolsToRename.insert(Old2New).second) return createStringError(errc::invalid_argument, "multiple redefinition of symbol '%s'", Old2New.first.str().c_str()); } for (auto *Arg : InputArgs.filtered(OBJCOPY_redefine_symbols)) if (Error E = addSymbolsToRenameFromFile(Config.SymbolsToRename, DC.Alloc, Arg->getValue())) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_rename_section)) { Expected SR = parseRenameSectionValue(StringRef(Arg->getValue())); if (!SR) return SR.takeError(); if (!Config.SectionsToRename.try_emplace(SR->OriginalName, *SR).second) return createStringError(errc::invalid_argument, "multiple renames of section '%s'", SR->OriginalName.str().c_str()); } for (auto *Arg : InputArgs.filtered(OBJCOPY_set_section_alignment)) { Expected> NameAndAlign = parseSetSectionAttribute("--set-section-alignment", Arg->getValue()); if (!NameAndAlign) return NameAndAlign.takeError(); Config.SetSectionAlignment[NameAndAlign->first] = NameAndAlign->second; } for (auto *Arg : InputArgs.filtered(OBJCOPY_set_section_flags)) { Expected SFU = parseSetSectionFlagValue(Arg->getValue()); if (!SFU) return SFU.takeError(); if (!Config.SetSectionFlags.try_emplace(SFU->Name, *SFU).second) return createStringError( errc::invalid_argument, "--set-section-flags set multiple times for section '%s'", SFU->Name.str().c_str()); } for (auto *Arg : InputArgs.filtered(OBJCOPY_set_section_type)) { Expected> NameAndType = parseSetSectionAttribute("--set-section-type", Arg->getValue()); if (!NameAndType) return NameAndType.takeError(); Config.SetSectionType[NameAndType->first] = NameAndType->second; } // Prohibit combinations of --set-section-{flags,type} when the section name // is used as the destination of a --rename-section. for (const auto &E : Config.SectionsToRename) { const SectionRename &SR = E.second; auto Err = [&](const char *Option) { return createStringError( errc::invalid_argument, "--set-section-%s=%s conflicts with --rename-section=%s=%s", Option, SR.NewName.str().c_str(), SR.OriginalName.str().c_str(), SR.NewName.str().c_str()); }; if (Config.SetSectionFlags.count(SR.NewName)) return Err("flags"); if (Config.SetSectionType.count(SR.NewName)) return Err("type"); } for (auto *Arg : InputArgs.filtered(OBJCOPY_remove_section)) if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( Arg->getValue(), SectionMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_keep_section)) if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( Arg->getValue(), SectionMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_only_section)) if (Error E = Config.OnlySection.addMatcher(NameOrPattern::create( Arg->getValue(), SectionMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_add_section)) { if (Error Err = loadNewSectionData(Arg->getValue(), "--add-section", Config.AddSection)) return std::move(Err); } for (auto *Arg : InputArgs.filtered(OBJCOPY_update_section)) { if (Error Err = loadNewSectionData(Arg->getValue(), "--update-section", Config.UpdateSection)) return std::move(Err); } for (auto *Arg : InputArgs.filtered(OBJCOPY_dump_section)) { StringRef Value(Arg->getValue()); if (Value.split('=').second.empty()) return createStringError( errc::invalid_argument, "bad format for --dump-section, expected section=file"); Config.DumpSection.push_back(Value); } Config.StripAll = InputArgs.hasArg(OBJCOPY_strip_all); Config.StripAllGNU = InputArgs.hasArg(OBJCOPY_strip_all_gnu); Config.StripDebug = InputArgs.hasArg(OBJCOPY_strip_debug); Config.StripDWO = InputArgs.hasArg(OBJCOPY_strip_dwo); Config.StripSections = InputArgs.hasArg(OBJCOPY_strip_sections); Config.StripNonAlloc = InputArgs.hasArg(OBJCOPY_strip_non_alloc); Config.StripUnneeded = InputArgs.hasArg(OBJCOPY_strip_unneeded); Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo); Config.ExtractMainPartition = InputArgs.hasArg(OBJCOPY_extract_main_partition); ELFConfig.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden); Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken); if (auto *Arg = InputArgs.getLastArg(OBJCOPY_discard_all, OBJCOPY_discard_locals)) { Config.DiscardMode = Arg->getOption().matches(OBJCOPY_discard_all) ? DiscardType::All : DiscardType::Locals; } Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug); ELFConfig.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols); MachOConfig.KeepUndefined = InputArgs.hasArg(OBJCOPY_keep_undefined); Config.DecompressDebugSections = InputArgs.hasArg(OBJCOPY_decompress_debug_sections); if (Config.DiscardMode == DiscardType::All) { Config.StripDebug = true; ELFConfig.KeepFileSymbols = true; } for (auto *Arg : InputArgs.filtered(OBJCOPY_localize_symbol)) if (Error E = Config.SymbolsToLocalize.addMatcher(NameOrPattern::create( Arg->getValue(), SymbolMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_localize_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToLocalize, DC.Alloc, Arg->getValue(), SymbolMatchStyle, ErrorCallback)) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol)) if (Error E = Config.SymbolsToKeepGlobal.addMatcher(NameOrPattern::create( Arg->getValue(), SymbolMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToKeepGlobal, DC.Alloc, Arg->getValue(), SymbolMatchStyle, ErrorCallback)) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_globalize_symbol)) if (Error E = Config.SymbolsToGlobalize.addMatcher(NameOrPattern::create( Arg->getValue(), SymbolMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_globalize_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToGlobalize, DC.Alloc, Arg->getValue(), SymbolMatchStyle, ErrorCallback)) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_weaken_symbol)) if (Error E = Config.SymbolsToWeaken.addMatcher(NameOrPattern::create( Arg->getValue(), SymbolMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_weaken_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToWeaken, DC.Alloc, Arg->getValue(), SymbolMatchStyle, ErrorCallback)) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_strip_symbol)) if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( Arg->getValue(), SymbolMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_strip_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToRemove, DC.Alloc, Arg->getValue(), SymbolMatchStyle, ErrorCallback)) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbol)) if (Error E = Config.UnneededSymbolsToRemove.addMatcher(NameOrPattern::create( Arg->getValue(), SymbolMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbols)) if (Error E = addSymbolsFromFile(Config.UnneededSymbolsToRemove, DC.Alloc, Arg->getValue(), SymbolMatchStyle, ErrorCallback)) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_keep_symbol)) if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( Arg->getValue(), SymbolMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_keep_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc, Arg->getValue(), SymbolMatchStyle, ErrorCallback)) return std::move(E); for (auto *Arg : InputArgs.filtered(OBJCOPY_add_symbol)) { Expected SymInfo = parseNewSymbolInfo(Arg->getValue()); if (!SymInfo) return SymInfo.takeError(); Config.SymbolsToAdd.push_back(*SymInfo); } ELFConfig.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links); Config.DeterministicArchives = InputArgs.hasFlag( OBJCOPY_enable_deterministic_archives, OBJCOPY_disable_deterministic_archives, /*default=*/true); Config.PreserveDates = InputArgs.hasArg(OBJCOPY_preserve_dates); if (Config.PreserveDates && (Config.OutputFilename == "-" || Config.InputFilename == "-")) return createStringError(errc::invalid_argument, "--preserve-dates requires a file"); for (auto *Arg : InputArgs) if (Arg->getOption().matches(OBJCOPY_set_start)) { auto EAddr = getAsInteger(Arg->getValue()); if (!EAddr) return createStringError( EAddr.getError(), "bad entry point address: '%s'", Arg->getValue()); ELFConfig.EntryExpr = [EAddr](uint64_t) { return *EAddr; }; } else if (Arg->getOption().matches(OBJCOPY_change_start)) { auto EIncr = getAsInteger(Arg->getValue()); if (!EIncr) return createStringError(EIncr.getError(), "bad entry point increment: '%s'", Arg->getValue()); auto Expr = ELFConfig.EntryExpr ? std::move(ELFConfig.EntryExpr) : [](uint64_t A) { return A; }; ELFConfig.EntryExpr = [Expr, EIncr](uint64_t EAddr) { return Expr(EAddr) + *EIncr; }; } if (Config.DecompressDebugSections && Config.CompressionType != DebugCompressionType::None) { return createStringError( errc::invalid_argument, "cannot specify both --compress-debug-sections and " "--decompress-debug-sections"); } if (Config.ExtractPartition && Config.ExtractMainPartition) return createStringError(errc::invalid_argument, "cannot specify --extract-partition together with " "--extract-main-partition"); DC.CopyConfigs.push_back(std::move(ConfigMgr)); return std::move(DC); } // parseInstallNameToolOptions returns the config and sets the input arguments. // If a help flag is set then parseInstallNameToolOptions will print the help // messege and exit. Expected objcopy::parseInstallNameToolOptions(ArrayRef ArgsArr) { DriverConfig DC; ConfigManager ConfigMgr; CommonConfig &Config = ConfigMgr.Common; MachOConfig &MachOConfig = ConfigMgr.MachO; InstallNameToolOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; llvm::opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (MissingArgumentCount) return createStringError( errc::invalid_argument, "missing argument to " + StringRef(InputArgs.getArgString(MissingArgumentIndex)) + " option"); if (InputArgs.size() == 0) { printHelp(T, errs(), ToolType::InstallNameTool); exit(1); } if (InputArgs.hasArg(INSTALL_NAME_TOOL_help)) { printHelp(T, outs(), ToolType::InstallNameTool); exit(0); } if (InputArgs.hasArg(INSTALL_NAME_TOOL_version)) { outs() << "llvm-install-name-tool, compatible with cctools " "install_name_tool\n"; cl::PrintVersionMessage(); exit(0); } for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_add_rpath)) MachOConfig.RPathToAdd.push_back(Arg->getValue()); for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_prepend_rpath)) MachOConfig.RPathToPrepend.push_back(Arg->getValue()); for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_delete_rpath)) { StringRef RPath = Arg->getValue(); // Cannot add and delete the same rpath at the same time. if (is_contained(MachOConfig.RPathToAdd, RPath)) return createStringError( errc::invalid_argument, "cannot specify both -add_rpath '%s' and -delete_rpath '%s'", RPath.str().c_str(), RPath.str().c_str()); if (is_contained(MachOConfig.RPathToPrepend, RPath)) return createStringError( errc::invalid_argument, "cannot specify both -prepend_rpath '%s' and -delete_rpath '%s'", RPath.str().c_str(), RPath.str().c_str()); MachOConfig.RPathsToRemove.insert(RPath); } for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_rpath)) { StringRef Old = Arg->getValue(0); StringRef New = Arg->getValue(1); auto Match = [=](StringRef RPath) { return RPath == Old || RPath == New; }; // Cannot specify duplicate -rpath entries auto It1 = find_if( MachOConfig.RPathsToUpdate, [&Match](const DenseMap::value_type &OldNew) { return Match(OldNew.getFirst()) || Match(OldNew.getSecond()); }); if (It1 != MachOConfig.RPathsToUpdate.end()) return createStringError(errc::invalid_argument, "cannot specify both -rpath '" + It1->getFirst() + "' '" + It1->getSecond() + "' and -rpath '" + Old + "' '" + New + "'"); // Cannot specify the same rpath under both -delete_rpath and -rpath auto It2 = find_if(MachOConfig.RPathsToRemove, Match); if (It2 != MachOConfig.RPathsToRemove.end()) return createStringError(errc::invalid_argument, "cannot specify both -delete_rpath '" + *It2 + "' and -rpath '" + Old + "' '" + New + "'"); // Cannot specify the same rpath under both -add_rpath and -rpath auto It3 = find_if(MachOConfig.RPathToAdd, Match); if (It3 != MachOConfig.RPathToAdd.end()) return createStringError(errc::invalid_argument, "cannot specify both -add_rpath '" + *It3 + "' and -rpath '" + Old + "' '" + New + "'"); // Cannot specify the same rpath under both -prepend_rpath and -rpath. auto It4 = find_if(MachOConfig.RPathToPrepend, Match); if (It4 != MachOConfig.RPathToPrepend.end()) return createStringError(errc::invalid_argument, "cannot specify both -prepend_rpath '" + *It4 + "' and -rpath '" + Old + "' '" + New + "'"); MachOConfig.RPathsToUpdate.insert({Old, New}); } if (auto *Arg = InputArgs.getLastArg(INSTALL_NAME_TOOL_id)) { MachOConfig.SharedLibId = Arg->getValue(); if (MachOConfig.SharedLibId->empty()) return createStringError(errc::invalid_argument, "cannot specify an empty id"); } for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_change)) MachOConfig.InstallNamesToUpdate.insert( {Arg->getValue(0), Arg->getValue(1)}); MachOConfig.RemoveAllRpaths = InputArgs.hasArg(INSTALL_NAME_TOOL_delete_all_rpaths); SmallVector Positional; for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_UNKNOWN)) return createStringError(errc::invalid_argument, "unknown argument '%s'", Arg->getAsString(InputArgs).c_str()); for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_INPUT)) Positional.push_back(Arg->getValue()); if (Positional.empty()) return createStringError(errc::invalid_argument, "no input file specified"); if (Positional.size() > 1) return createStringError( errc::invalid_argument, "llvm-install-name-tool expects a single input file"); Config.InputFilename = Positional[0]; Config.OutputFilename = Positional[0]; DC.CopyConfigs.push_back(std::move(ConfigMgr)); return std::move(DC); } Expected objcopy::parseBitcodeStripOptions(ArrayRef ArgsArr, function_ref ErrorCallback) { DriverConfig DC; ConfigManager ConfigMgr; CommonConfig &Config = ConfigMgr.Common; MachOConfig &MachOConfig = ConfigMgr.MachO; BitcodeStripOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (InputArgs.size() == 0) { printHelp(T, errs(), ToolType::BitcodeStrip); exit(1); } if (InputArgs.hasArg(BITCODE_STRIP_help)) { printHelp(T, outs(), ToolType::BitcodeStrip); exit(0); } if (InputArgs.hasArg(BITCODE_STRIP_version)) { outs() << "llvm-bitcode-strip, compatible with cctools " "bitcode_strip\n"; cl::PrintVersionMessage(); exit(0); } for (auto *Arg : InputArgs.filtered(BITCODE_STRIP_UNKNOWN)) return createStringError(errc::invalid_argument, "unknown argument '%s'", Arg->getAsString(InputArgs).c_str()); SmallVector Positional; for (auto *Arg : InputArgs.filtered(BITCODE_STRIP_INPUT)) Positional.push_back(Arg->getValue()); if (Positional.size() > 1) return createStringError(errc::invalid_argument, "llvm-bitcode-strip expects a single input file"); assert(!Positional.empty()); Config.InputFilename = Positional[0]; if (!InputArgs.hasArg(BITCODE_STRIP_output)) { return createStringError(errc::invalid_argument, "-o is a required argument"); } Config.OutputFilename = InputArgs.getLastArgValue(BITCODE_STRIP_output); if (!InputArgs.hasArg(BITCODE_STRIP_remove)) return createStringError(errc::invalid_argument, "no action specified"); // We only support -r for now, which removes all bitcode sections and // the __LLVM segment if it's now empty. cantFail(Config.ToRemove.addMatcher(NameOrPattern::create( "__LLVM,__asm", MatchStyle::Literal, ErrorCallback))); cantFail(Config.ToRemove.addMatcher(NameOrPattern::create( "__LLVM,__bitcode", MatchStyle::Literal, ErrorCallback))); cantFail(Config.ToRemove.addMatcher(NameOrPattern::create( "__LLVM,__bundle", MatchStyle::Literal, ErrorCallback))); cantFail(Config.ToRemove.addMatcher(NameOrPattern::create( "__LLVM,__cmdline", MatchStyle::Literal, ErrorCallback))); cantFail(Config.ToRemove.addMatcher(NameOrPattern::create( "__LLVM,__swift_cmdline", MatchStyle::Literal, ErrorCallback))); MachOConfig.EmptySegmentsToRemove.insert("__LLVM"); DC.CopyConfigs.push_back(std::move(ConfigMgr)); return std::move(DC); } // parseStripOptions returns the config and sets the input arguments. If a // help flag is set then parseStripOptions will print the help messege and // exit. Expected objcopy::parseStripOptions(ArrayRef RawArgsArr, function_ref ErrorCallback) { const char *const *DashDash = llvm::find_if(RawArgsArr, [](StringRef Str) { return Str == "--"; }); ArrayRef ArgsArr = ArrayRef(RawArgsArr.begin(), DashDash); if (DashDash != RawArgsArr.end()) DashDash = std::next(DashDash); StripOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; llvm::opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (InputArgs.size() == 0 && DashDash == RawArgsArr.end()) { printHelp(T, errs(), ToolType::Strip); exit(1); } if (InputArgs.hasArg(STRIP_help)) { printHelp(T, outs(), ToolType::Strip); exit(0); } if (InputArgs.hasArg(STRIP_version)) { outs() << "llvm-strip, compatible with GNU strip\n"; cl::PrintVersionMessage(); exit(0); } SmallVector Positional; for (auto *Arg : InputArgs.filtered(STRIP_UNKNOWN)) return createStringError(errc::invalid_argument, "unknown argument '%s'", Arg->getAsString(InputArgs).c_str()); for (auto *Arg : InputArgs.filtered(STRIP_INPUT)) Positional.push_back(Arg->getValue()); std::copy(DashDash, RawArgsArr.end(), std::back_inserter(Positional)); if (Positional.empty()) return createStringError(errc::invalid_argument, "no input file specified"); if (Positional.size() > 1 && InputArgs.hasArg(STRIP_output)) return createStringError( errc::invalid_argument, "multiple input files cannot be used in combination with -o"); ConfigManager ConfigMgr; CommonConfig &Config = ConfigMgr.Common; ELFConfig &ELFConfig = ConfigMgr.ELF; MachOConfig &MachOConfig = ConfigMgr.MachO; if (InputArgs.hasArg(STRIP_regex) && InputArgs.hasArg(STRIP_wildcard)) return createStringError(errc::invalid_argument, "--regex and --wildcard are incompatible"); MatchStyle SectionMatchStyle = InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex : MatchStyle::Wildcard; MatchStyle SymbolMatchStyle = InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex : InputArgs.hasArg(STRIP_wildcard) ? MatchStyle::Wildcard : MatchStyle::Literal; ELFConfig.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links); Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug); if (auto *Arg = InputArgs.getLastArg(STRIP_discard_all, STRIP_discard_locals)) Config.DiscardMode = Arg->getOption().matches(STRIP_discard_all) ? DiscardType::All : DiscardType::Locals; Config.StripSections = InputArgs.hasArg(STRIP_strip_sections); Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded); if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all)) Config.StripAll = Arg->getOption().getID() == STRIP_strip_all; Config.StripAllGNU = InputArgs.hasArg(STRIP_strip_all_gnu); MachOConfig.StripSwiftSymbols = InputArgs.hasArg(STRIP_strip_swift_symbols); Config.OnlyKeepDebug = InputArgs.hasArg(STRIP_only_keep_debug); ELFConfig.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols); MachOConfig.KeepUndefined = InputArgs.hasArg(STRIP_keep_undefined); for (auto *Arg : InputArgs.filtered(STRIP_keep_section)) if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( Arg->getValue(), SectionMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(STRIP_remove_section)) if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( Arg->getValue(), SectionMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(STRIP_strip_symbol)) if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( Arg->getValue(), SymbolMatchStyle, ErrorCallback))) return std::move(E); for (auto *Arg : InputArgs.filtered(STRIP_keep_symbol)) if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( Arg->getValue(), SymbolMatchStyle, ErrorCallback))) return std::move(E); if (!InputArgs.hasArg(STRIP_no_strip_all) && !Config.StripDebug && !Config.OnlyKeepDebug && !Config.StripUnneeded && Config.DiscardMode == DiscardType::None && !Config.StripAllGNU && Config.SymbolsToRemove.empty()) Config.StripAll = true; if (Config.DiscardMode == DiscardType::All) { Config.StripDebug = true; ELFConfig.KeepFileSymbols = true; } Config.DeterministicArchives = InputArgs.hasFlag(STRIP_enable_deterministic_archives, STRIP_disable_deterministic_archives, /*default=*/true); Config.PreserveDates = InputArgs.hasArg(STRIP_preserve_dates); Config.InputFormat = FileFormat::Unspecified; Config.OutputFormat = FileFormat::Unspecified; DriverConfig DC; if (Positional.size() == 1) { Config.InputFilename = Positional[0]; Config.OutputFilename = InputArgs.getLastArgValue(STRIP_output, Positional[0]); DC.CopyConfigs.push_back(std::move(ConfigMgr)); } else { StringMap InputFiles; for (StringRef Filename : Positional) { if (InputFiles[Filename]++ == 1) { if (Filename == "-") return createStringError( errc::invalid_argument, "cannot specify '-' as an input file more than once"); if (Error E = ErrorCallback(createStringError( errc::invalid_argument, "'%s' was already specified", Filename.str().c_str()))) return std::move(E); } Config.InputFilename = Filename; Config.OutputFilename = Filename; DC.CopyConfigs.push_back(ConfigMgr); } } if (Config.PreserveDates && (is_contained(Positional, "-") || InputArgs.getLastArgValue(STRIP_output) == "-")) return createStringError(errc::invalid_argument, "--preserve-dates requires a file"); return std::move(DC); } diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp b/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp index 34861ee92128..fda99bd6d33e 100644 --- a/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-objdump/ELFDump.cpp @@ -1,433 +1,436 @@ //===-- ELFDump.cpp - ELF-specific dumper -----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file implements the ELF-specific dumper for llvm-objdump. /// //===----------------------------------------------------------------------===// #include "ELFDump.h" #include "llvm-objdump.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::object; using namespace llvm::objdump; namespace { template class ELFDumper : public Dumper { public: ELFDumper(const ELFObjectFile &O) : Dumper(O), Obj(O) {} void printPrivateHeaders() override; void printDynamicRelocations() override; private: const ELFObjectFile &Obj; const ELFFile &getELFFile() const { return Obj.getELFFile(); } void printDynamicSection(); void printProgramHeaders(); void printSymbolVersion(); void printSymbolVersionDependency(const typename ELFT::Shdr &Sec); }; } // namespace template static std::unique_ptr createDumper(const ELFObjectFile &Obj) { return std::make_unique>(Obj); } std::unique_ptr objdump::createELFDumper(const object::ELFObjectFileBase &Obj) { if (const auto *O = dyn_cast(&Obj)) return createDumper(*O); if (const auto *O = dyn_cast(&Obj)) return createDumper(*O); if (const auto *O = dyn_cast(&Obj)) return createDumper(*O); return createDumper(cast(Obj)); } template static Expected getDynamicStrTab(const ELFFile &Elf) { auto DynamicEntriesOrError = Elf.dynamicEntries(); if (!DynamicEntriesOrError) return DynamicEntriesOrError.takeError(); for (const typename ELFT::Dyn &Dyn : *DynamicEntriesOrError) { if (Dyn.d_tag == ELF::DT_STRTAB) { auto MappedAddrOrError = Elf.toMappedAddr(Dyn.getPtr()); if (!MappedAddrOrError) consumeError(MappedAddrOrError.takeError()); return StringRef(reinterpret_cast(*MappedAddrOrError)); } } // If the dynamic segment is not present, we fall back on the sections. auto SectionsOrError = Elf.sections(); if (!SectionsOrError) return SectionsOrError.takeError(); for (const typename ELFT::Shdr &Sec : *SectionsOrError) { if (Sec.sh_type == ELF::SHT_DYNSYM) return Elf.getStringTableForSymtab(Sec); } return createError("dynamic string table not found"); } template static Error getRelocationValueString(const ELFObjectFile *Obj, const RelocationRef &RelRef, SmallVectorImpl &Result) { const ELFFile &EF = Obj->getELFFile(); DataRefImpl Rel = RelRef.getRawDataRefImpl(); auto SecOrErr = EF.getSection(Rel.d.a); if (!SecOrErr) return SecOrErr.takeError(); int64_t Addend = 0; // If there is no Symbol associated with the relocation, we set the undef // boolean value to 'true'. This will prevent us from calling functions that // requires the relocation to be associated with a symbol. // // In SHT_REL case we would need to read the addend from section data. // GNU objdump does not do that and we just follow for simplicity atm. bool Undef = false; if ((*SecOrErr)->sh_type == ELF::SHT_RELA) { const typename ELFT::Rela *ERela = Obj->getRela(Rel); Addend = ERela->r_addend; Undef = ERela->getSymbol(false) == 0; } else if ((*SecOrErr)->sh_type == ELF::SHT_REL) { const typename ELFT::Rel *ERel = Obj->getRel(Rel); Undef = ERel->getSymbol(false) == 0; } else { return make_error(); } // Default scheme is to print Target, as well as "+ " for nonzero // addend. Should be acceptable for all normal purposes. std::string FmtBuf; raw_string_ostream Fmt(FmtBuf); if (!Undef) { symbol_iterator SI = RelRef.getSymbol(); Expected SymOrErr = Obj->getSymbol(SI->getRawDataRefImpl()); // TODO: test this error. if (!SymOrErr) return SymOrErr.takeError(); if ((*SymOrErr)->getType() == ELF::STT_SECTION) { Expected SymSI = SI->getSection(); if (!SymSI) return SymSI.takeError(); const typename ELFT::Shdr *SymSec = Obj->getSection((*SymSI)->getRawDataRefImpl()); auto SecName = EF.getSectionName(*SymSec); if (!SecName) return SecName.takeError(); Fmt << *SecName; } else { Expected SymName = SI->getName(); if (!SymName) return SymName.takeError(); Fmt << (Demangle ? demangle(*SymName) : *SymName); } } else { Fmt << "*ABS*"; } if (Addend != 0) { Fmt << (Addend < 0 ? "-" : "+") << format("0x%" PRIx64, (Addend < 0 ? -(uint64_t)Addend : (uint64_t)Addend)); } Fmt.flush(); Result.append(FmtBuf.begin(), FmtBuf.end()); return Error::success(); } Error objdump::getELFRelocationValueString(const ELFObjectFileBase *Obj, const RelocationRef &Rel, SmallVectorImpl &Result) { if (auto *ELF32LE = dyn_cast(Obj)) return getRelocationValueString(ELF32LE, Rel, Result); if (auto *ELF64LE = dyn_cast(Obj)) return getRelocationValueString(ELF64LE, Rel, Result); if (auto *ELF32BE = dyn_cast(Obj)) return getRelocationValueString(ELF32BE, Rel, Result); auto *ELF64BE = cast(Obj); return getRelocationValueString(ELF64BE, Rel, Result); } template static uint64_t getSectionLMA(const ELFFile &Obj, const object::ELFSectionRef &Sec) { auto PhdrRangeOrErr = Obj.program_headers(); if (!PhdrRangeOrErr) report_fatal_error(Twine(toString(PhdrRangeOrErr.takeError()))); // Search for a PT_LOAD segment containing the requested section. Use this // segment's p_addr to calculate the section's LMA. for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr) if ((Phdr.p_type == ELF::PT_LOAD) && (isSectionInSegment( Phdr, *cast>(Sec.getObject()) ->getSection(Sec.getRawDataRefImpl())))) return Sec.getAddress() - Phdr.p_vaddr + Phdr.p_paddr; // Return section's VMA if it isn't in a PT_LOAD segment. return Sec.getAddress(); } uint64_t objdump::getELFSectionLMA(const object::ELFSectionRef &Sec) { if (const auto *ELFObj = dyn_cast(Sec.getObject())) return getSectionLMA(ELFObj->getELFFile(), Sec); else if (const auto *ELFObj = dyn_cast(Sec.getObject())) return getSectionLMA(ELFObj->getELFFile(), Sec); else if (const auto *ELFObj = dyn_cast(Sec.getObject())) return getSectionLMA(ELFObj->getELFFile(), Sec); const auto *ELFObj = cast(Sec.getObject()); return getSectionLMA(ELFObj->getELFFile(), Sec); } template void ELFDumper::printDynamicSection() { const ELFFile &Elf = getELFFile(); auto DynamicEntriesOrErr = Elf.dynamicEntries(); if (!DynamicEntriesOrErr) { reportWarning(toString(DynamicEntriesOrErr.takeError()), Obj.getFileName()); return; } ArrayRef DynamicEntries = *DynamicEntriesOrErr; // Find the maximum tag name length to format the value column properly. size_t MaxLen = 0; for (const typename ELFT::Dyn &Dyn : DynamicEntries) MaxLen = std::max(MaxLen, Elf.getDynamicTagAsString(Dyn.d_tag).size()); std::string TagFmt = " %-" + std::to_string(MaxLen) + "s "; outs() << "\nDynamic Section:\n"; for (const typename ELFT::Dyn &Dyn : DynamicEntries) { if (Dyn.d_tag == ELF::DT_NULL) continue; std::string Str = Elf.getDynamicTagAsString(Dyn.d_tag); outs() << format(TagFmt.c_str(), Str.c_str()); const char *Fmt = ELFT::Is64Bits ? "0x%016" PRIx64 "\n" : "0x%08" PRIx64 "\n"; if (Dyn.d_tag == ELF::DT_NEEDED || Dyn.d_tag == ELF::DT_RPATH || Dyn.d_tag == ELF::DT_RUNPATH || Dyn.d_tag == ELF::DT_SONAME || Dyn.d_tag == ELF::DT_AUXILIARY || Dyn.d_tag == ELF::DT_FILTER) { Expected StrTabOrErr = getDynamicStrTab(Elf); if (StrTabOrErr) { const char *Data = StrTabOrErr.get().data(); outs() << (Data + Dyn.d_un.d_val) << "\n"; continue; } reportWarning(toString(StrTabOrErr.takeError()), Obj.getFileName()); consumeError(StrTabOrErr.takeError()); } outs() << format(Fmt, (uint64_t)Dyn.d_un.d_val); } } template void ELFDumper::printProgramHeaders() { outs() << "\nProgram Header:\n"; auto ProgramHeaderOrError = getELFFile().program_headers(); if (!ProgramHeaderOrError) { reportWarning("unable to read program headers: " + toString(ProgramHeaderOrError.takeError()), Obj.getFileName()); return; } for (const typename ELFT::Phdr &Phdr : *ProgramHeaderOrError) { switch (Phdr.p_type) { case ELF::PT_DYNAMIC: outs() << " DYNAMIC "; break; case ELF::PT_GNU_EH_FRAME: outs() << "EH_FRAME "; break; case ELF::PT_GNU_RELRO: outs() << " RELRO "; break; case ELF::PT_GNU_PROPERTY: outs() << " PROPERTY "; break; case ELF::PT_GNU_STACK: outs() << " STACK "; break; case ELF::PT_INTERP: outs() << " INTERP "; break; case ELF::PT_LOAD: outs() << " LOAD "; break; case ELF::PT_NOTE: outs() << " NOTE "; break; case ELF::PT_OPENBSD_BOOTDATA: outs() << "OPENBSD_BOOTDATA "; break; case ELF::PT_OPENBSD_MUTABLE: outs() << "OPENBSD_MUTABLE "; break; case ELF::PT_OPENBSD_NOBTCFI: outs() << "OPENBSD_NOBTCFI "; break; case ELF::PT_OPENBSD_RANDOMIZE: outs() << "OPENBSD_RANDOMIZE "; break; + case ELF::PT_OPENBSD_SYSCALLS: + outs() << "OPENBSD_SYSCALLS "; + break; case ELF::PT_OPENBSD_WXNEEDED: outs() << "OPENBSD_WXNEEDED "; break; case ELF::PT_PHDR: outs() << " PHDR "; break; case ELF::PT_TLS: outs() << " TLS "; break; default: outs() << " UNKNOWN "; } const char *Fmt = ELFT::Is64Bits ? "0x%016" PRIx64 " " : "0x%08" PRIx64 " "; outs() << "off " << format(Fmt, (uint64_t)Phdr.p_offset) << "vaddr " << format(Fmt, (uint64_t)Phdr.p_vaddr) << "paddr " << format(Fmt, (uint64_t)Phdr.p_paddr) << format("align 2**%u\n", llvm::countr_zero(Phdr.p_align)) << " filesz " << format(Fmt, (uint64_t)Phdr.p_filesz) << "memsz " << format(Fmt, (uint64_t)Phdr.p_memsz) << "flags " << ((Phdr.p_flags & ELF::PF_R) ? "r" : "-") << ((Phdr.p_flags & ELF::PF_W) ? "w" : "-") << ((Phdr.p_flags & ELF::PF_X) ? "x" : "-") << "\n"; } } template void ELFDumper::printDynamicRelocations() { if (!any_of(Obj.sections(), [](const ELFSectionRef Sec) { return Sec.getType() == ELF::SHT_DYNAMIC; })) { reportError(Obj.getFileName(), "not a dynamic object"); return; } std::vector DynRelSec = cast(Obj).dynamic_relocation_sections(); if (DynRelSec.empty()) return; outs() << "\nDYNAMIC RELOCATION RECORDS\n"; const uint32_t OffsetPadding = (Obj.getBytesInAddress() > 4 ? 16 : 8); const uint32_t TypePadding = 24; outs() << left_justify("OFFSET", OffsetPadding) << ' ' << left_justify("TYPE", TypePadding) << " VALUE\n"; StringRef Fmt = Obj.getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64; for (const SectionRef &Section : DynRelSec) for (const RelocationRef &Reloc : Section.relocations()) { uint64_t Address = Reloc.getOffset(); SmallString<32> RelocName; SmallString<32> ValueStr; Reloc.getTypeName(RelocName); if (Error E = getELFRelocationValueString(&Obj, Reloc, ValueStr)) reportError(std::move(E), Obj.getFileName()); outs() << format(Fmt.data(), Address) << ' ' << left_justify(RelocName, TypePadding) << ' ' << ValueStr << '\n'; } } template void ELFDumper::printSymbolVersionDependency( const typename ELFT::Shdr &Sec) { outs() << "\nVersion References:\n"; Expected> V = getELFFile().getVersionDependencies(Sec, this->WarningHandler); if (!V) { reportWarning(toString(V.takeError()), Obj.getFileName()); return; } raw_fd_ostream &OS = outs(); for (const VerNeed &VN : *V) { OS << " required from " << VN.File << ":\n"; for (const VernAux &Aux : VN.AuxV) OS << format(" 0x%08x 0x%02x %02u %s\n", Aux.Hash, Aux.Flags, Aux.Other, Aux.Name.c_str()); } } template static void printSymbolVersionDefinition(const typename ELFT::Shdr &Shdr, ArrayRef Contents, StringRef StrTab) { outs() << "\nVersion definitions:\n"; const uint8_t *Buf = Contents.data(); uint32_t VerdefIndex = 1; // sh_info contains the number of entries in the SHT_GNU_verdef section. To // make the index column have consistent width, we should insert blank spaces // according to sh_info. uint16_t VerdefIndexWidth = std::to_string(Shdr.sh_info).size(); while (Buf) { auto *Verdef = reinterpret_cast(Buf); outs() << format_decimal(VerdefIndex++, VerdefIndexWidth) << " " << format("0x%02" PRIx16 " ", (uint16_t)Verdef->vd_flags) << format("0x%08" PRIx32 " ", (uint32_t)Verdef->vd_hash); const uint8_t *BufAux = Buf + Verdef->vd_aux; uint16_t VerdauxIndex = 0; while (BufAux) { auto *Verdaux = reinterpret_cast(BufAux); if (VerdauxIndex) outs() << std::string(VerdefIndexWidth + 17, ' '); outs() << StringRef(StrTab.drop_front(Verdaux->vda_name).data()) << '\n'; BufAux = Verdaux->vda_next ? BufAux + Verdaux->vda_next : nullptr; ++VerdauxIndex; } Buf = Verdef->vd_next ? Buf + Verdef->vd_next : nullptr; } } template void ELFDumper::printSymbolVersion() { const ELFFile &Elf = getELFFile(); StringRef FileName = Obj.getFileName(); ArrayRef Sections = unwrapOrError(Elf.sections(), FileName); for (const typename ELFT::Shdr &Shdr : Sections) { if (Shdr.sh_type != ELF::SHT_GNU_verneed && Shdr.sh_type != ELF::SHT_GNU_verdef) continue; ArrayRef Contents = unwrapOrError(Elf.getSectionContents(Shdr), FileName); const typename ELFT::Shdr *StrTabSec = unwrapOrError(Elf.getSection(Shdr.sh_link), FileName); StringRef StrTab = unwrapOrError(Elf.getStringTable(*StrTabSec), FileName); if (Shdr.sh_type == ELF::SHT_GNU_verneed) printSymbolVersionDependency(Shdr); else printSymbolVersionDefinition(Shdr, Contents, StrTab); } } template void ELFDumper::printPrivateHeaders() { printProgramHeaders(); printDynamicSection(); printSymbolVersion(); } diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp index f369a63add11..387124ad53e4 100644 --- a/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1,8105 +1,8106 @@ //===- ELFDumper.cpp - ELF-specific dumper --------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file implements the ELF-specific dumper for llvm-readobj. /// //===----------------------------------------------------------------------===// #include "ARMEHABIPrinter.h" #include "DwarfCFIEHPrinter.h" #include "ObjDumper.h" #include "StackMapPrinter.h" #include "llvm-readobj.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/MsgPackDocument.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/RelocationResolver.h" #include "llvm/Object/StackMapParser.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/ARMAttributeParser.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MSP430AttributeParser.h" #include "llvm/Support/MSP430Attributes.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MipsABIFlags.h" #include "llvm/Support/RISCVAttributeParser.h" #include "llvm/Support/RISCVAttributes.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/SystemZ/zOSSupport.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include #include #include #include #include using namespace llvm; using namespace llvm::object; using namespace ELF; #define LLVM_READOBJ_ENUM_CASE(ns, enum) \ case ns::enum: \ return #enum; #define ENUM_ENT(enum, altName) \ { #enum, altName, ELF::enum } #define ENUM_ENT_1(enum) \ { #enum, #enum, ELF::enum } namespace { template struct RelSymbol { RelSymbol(const typename ELFT::Sym *S, StringRef N) : Sym(S), Name(N.str()) {} const typename ELFT::Sym *Sym; std::string Name; }; /// Represents a contiguous uniform range in the file. We cannot just create a /// range directly because when creating one of these from the .dynamic table /// the size, entity size and virtual address are different entries in arbitrary /// order (DT_REL, DT_RELSZ, DT_RELENT for example). struct DynRegionInfo { DynRegionInfo(const Binary &Owner, const ObjDumper &D) : Obj(&Owner), Dumper(&D) {} DynRegionInfo(const Binary &Owner, const ObjDumper &D, const uint8_t *A, uint64_t S, uint64_t ES) : Addr(A), Size(S), EntSize(ES), Obj(&Owner), Dumper(&D) {} /// Address in current address space. const uint8_t *Addr = nullptr; /// Size in bytes of the region. uint64_t Size = 0; /// Size of each entity in the region. uint64_t EntSize = 0; /// Owner object. Used for error reporting. const Binary *Obj; /// Dumper used for error reporting. const ObjDumper *Dumper; /// Error prefix. Used for error reporting to provide more information. std::string Context; /// Region size name. Used for error reporting. StringRef SizePrintName = "size"; /// Entry size name. Used for error reporting. If this field is empty, errors /// will not mention the entry size. StringRef EntSizePrintName = "entry size"; template ArrayRef getAsArrayRef() const { const Type *Start = reinterpret_cast(Addr); if (!Start) return {Start, Start}; const uint64_t Offset = Addr - (const uint8_t *)Obj->getMemoryBufferRef().getBufferStart(); const uint64_t ObjSize = Obj->getMemoryBufferRef().getBufferSize(); if (Size > ObjSize - Offset) { Dumper->reportUniqueWarning( "unable to read data at 0x" + Twine::utohexstr(Offset) + " of size 0x" + Twine::utohexstr(Size) + " (" + SizePrintName + "): it goes past the end of the file of size 0x" + Twine::utohexstr(ObjSize)); return {Start, Start}; } if (EntSize == sizeof(Type) && (Size % EntSize == 0)) return {Start, Start + (Size / EntSize)}; std::string Msg; if (!Context.empty()) Msg += Context + " has "; Msg += ("invalid " + SizePrintName + " (0x" + Twine::utohexstr(Size) + ")") .str(); if (!EntSizePrintName.empty()) Msg += (" or " + EntSizePrintName + " (0x" + Twine::utohexstr(EntSize) + ")") .str(); Dumper->reportUniqueWarning(Msg); return {Start, Start}; } }; struct GroupMember { StringRef Name; uint64_t Index; }; struct GroupSection { StringRef Name; std::string Signature; uint64_t ShName; uint64_t Index; uint32_t Link; uint32_t Info; uint32_t Type; std::vector Members; }; namespace { struct NoteType { uint32_t ID; StringRef Name; }; } // namespace template class Relocation { public: Relocation(const typename ELFT::Rel &R, bool IsMips64EL) : Type(R.getType(IsMips64EL)), Symbol(R.getSymbol(IsMips64EL)), Offset(R.r_offset), Info(R.r_info) {} Relocation(const typename ELFT::Rela &R, bool IsMips64EL) : Relocation((const typename ELFT::Rel &)R, IsMips64EL) { Addend = R.r_addend; } uint32_t Type; uint32_t Symbol; typename ELFT::uint Offset; typename ELFT::uint Info; std::optional Addend; }; template class MipsGOTParser; template class ELFDumper : public ObjDumper { LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) public: ELFDumper(const object::ELFObjectFile &ObjF, ScopedPrinter &Writer); void printUnwindInfo() override; void printNeededLibraries() override; void printHashTable() override; void printGnuHashTable() override; void printLoadName() override; void printVersionInfo() override; void printArchSpecificInfo() override; void printStackMap() const override; void printMemtag() override; ArrayRef getMemtagGlobalsSectionContents(uint64_t ExpectedAddr); // Hash histogram shows statistics of how efficient the hash was for the // dynamic symbol table. The table shows the number of hash buckets for // different lengths of chains as an absolute number and percentage of the // total buckets, and the cumulative coverage of symbols for each set of // buckets. void printHashHistograms() override; const object::ELFObjectFile &getElfObject() const { return ObjF; }; std::string describe(const Elf_Shdr &Sec) const; unsigned getHashTableEntSize() const { // EM_S390 and ELF::EM_ALPHA platforms use 8-bytes entries in SHT_HASH // sections. This violates the ELF specification. if (Obj.getHeader().e_machine == ELF::EM_S390 || Obj.getHeader().e_machine == ELF::EM_ALPHA) return 8; return 4; } std::vector> getOtherFlagsFromSymbol(const Elf_Ehdr &Header, const Elf_Sym &Symbol) const; Elf_Dyn_Range dynamic_table() const { // A valid .dynamic section contains an array of entries terminated // with a DT_NULL entry. However, sometimes the section content may // continue past the DT_NULL entry, so to dump the section correctly, // we first find the end of the entries by iterating over them. Elf_Dyn_Range Table = DynamicTable.template getAsArrayRef(); size_t Size = 0; while (Size < Table.size()) if (Table[Size++].getTag() == DT_NULL) break; return Table.slice(0, Size); } Elf_Sym_Range dynamic_symbols() const { if (!DynSymRegion) return Elf_Sym_Range(); return DynSymRegion->template getAsArrayRef(); } const Elf_Shdr *findSectionByName(StringRef Name) const; StringRef getDynamicStringTable() const { return DynamicStringTable; } protected: virtual void printVersionSymbolSection(const Elf_Shdr *Sec) = 0; virtual void printVersionDefinitionSection(const Elf_Shdr *Sec) = 0; virtual void printVersionDependencySection(const Elf_Shdr *Sec) = 0; void printDependentLibsHelper(function_ref OnSectionStart, function_ref OnLibEntry); virtual void printRelRelaReloc(const Relocation &R, const RelSymbol &RelSym) = 0; virtual void printRelrReloc(const Elf_Relr &R) = 0; virtual void printDynamicRelocHeader(unsigned Type, StringRef Name, const DynRegionInfo &Reg) {} void printReloc(const Relocation &R, unsigned RelIndex, const Elf_Shdr &Sec, const Elf_Shdr *SymTab); void printDynamicReloc(const Relocation &R); void printDynamicRelocationsHelper(); void printRelocationsHelper(const Elf_Shdr &Sec); void forEachRelocationDo( const Elf_Shdr &Sec, bool RawRelr, llvm::function_ref &, unsigned, const Elf_Shdr &, const Elf_Shdr *)> RelRelaFn, llvm::function_ref RelrFn); virtual void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset, bool NonVisibilityBitsUsed, bool ExtraSymInfo) const {}; virtual void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable, std::optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed, bool ExtraSymInfo) const = 0; virtual void printMipsABIFlags() = 0; virtual void printMipsGOT(const MipsGOTParser &Parser) = 0; virtual void printMipsPLT(const MipsGOTParser &Parser) = 0; virtual void printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc, const ArrayRef> Descriptors) = 0; virtual void printHashHistogram(const Elf_Hash &HashTable) const; virtual void printGnuHashHistogram(const Elf_GnuHash &GnuHashTable) const; virtual void printHashHistogramStats(size_t NBucket, size_t MaxChain, size_t TotalSyms, ArrayRef Count, bool IsGnu) const = 0; Expected> getVersionTable(const Elf_Shdr &Sec, ArrayRef *SymTab, StringRef *StrTab, const Elf_Shdr **SymTabSec) const; StringRef getPrintableSectionName(const Elf_Shdr &Sec) const; std::vector getGroups(); // Returns the function symbol index for the given address. Matches the // symbol's section with FunctionSec when specified. // Returns std::nullopt if no function symbol can be found for the address or // in case it is not defined in the specified section. SmallVector getSymbolIndexesForFunctionAddress( uint64_t SymValue, std::optional FunctionSec); bool printFunctionStackSize(uint64_t SymValue, std::optional FunctionSec, const Elf_Shdr &StackSizeSec, DataExtractor Data, uint64_t *Offset); void printStackSize(const Relocation &R, const Elf_Shdr &RelocSec, unsigned Ndx, const Elf_Shdr *SymTab, const Elf_Shdr *FunctionSec, const Elf_Shdr &StackSizeSec, const RelocationResolver &Resolver, DataExtractor Data); virtual void printStackSizeEntry(uint64_t Size, ArrayRef FuncNames) = 0; void printRelocatableStackSizes(std::function PrintHeader); void printNonRelocatableStackSizes(std::function PrintHeader); const object::ELFObjectFile &ObjF; const ELFFile &Obj; StringRef FileName; Expected createDRI(uint64_t Offset, uint64_t Size, uint64_t EntSize) { if (Offset + Size < Offset || Offset + Size > Obj.getBufSize()) return createError("offset (0x" + Twine::utohexstr(Offset) + ") + size (0x" + Twine::utohexstr(Size) + ") is greater than the file size (0x" + Twine::utohexstr(Obj.getBufSize()) + ")"); return DynRegionInfo(ObjF, *this, Obj.base() + Offset, Size, EntSize); } void printAttributes(unsigned, std::unique_ptr, llvm::endianness); void printMipsReginfo(); void printMipsOptions(); std::pair findDynamic(); void loadDynamicTable(); void parseDynamicTable(); Expected getSymbolVersion(const Elf_Sym &Sym, bool &IsDefault) const; Expected, 0> *> getVersionMap() const; DynRegionInfo DynRelRegion; DynRegionInfo DynRelaRegion; DynRegionInfo DynRelrRegion; DynRegionInfo DynPLTRelRegion; std::optional DynSymRegion; DynRegionInfo DynSymTabShndxRegion; DynRegionInfo DynamicTable; StringRef DynamicStringTable; const Elf_Hash *HashTable = nullptr; const Elf_GnuHash *GnuHashTable = nullptr; const Elf_Shdr *DotSymtabSec = nullptr; const Elf_Shdr *DotDynsymSec = nullptr; const Elf_Shdr *DotAddrsigSec = nullptr; DenseMap> ShndxTables; std::optional SONameOffset; std::optional>> AddressToIndexMap; const Elf_Shdr *SymbolVersionSection = nullptr; // .gnu.version const Elf_Shdr *SymbolVersionNeedSection = nullptr; // .gnu.version_r const Elf_Shdr *SymbolVersionDefSection = nullptr; // .gnu.version_d std::string getFullSymbolName(const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable, std::optional StrTable, bool IsDynamic) const; Expected getSymbolSectionIndex(const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable) const; Expected getSymbolSectionName(const Elf_Sym &Symbol, unsigned SectionIndex) const; std::string getStaticSymbolName(uint32_t Index) const; StringRef getDynamicString(uint64_t Value) const; void printSymbolsHelper(bool IsDynamic, bool ExtraSymInfo) const; std::string getDynamicEntry(uint64_t Type, uint64_t Value) const; Expected> getRelocationTarget(const Relocation &R, const Elf_Shdr *SymTab) const; ArrayRef getShndxTable(const Elf_Shdr *Symtab) const; private: mutable SmallVector, 0> VersionMap; }; template std::string ELFDumper::describe(const Elf_Shdr &Sec) const { return ::describe(Obj, Sec); } namespace { template struct SymtabLink { typename ELFT::SymRange Symbols; StringRef StringTable; const typename ELFT::Shdr *SymTab; }; // Returns the linked symbol table, symbols and associated string table for a // given section. template Expected> getLinkAsSymtab(const ELFFile &Obj, const typename ELFT::Shdr &Sec, unsigned ExpectedType) { Expected SymtabOrErr = Obj.getSection(Sec.sh_link); if (!SymtabOrErr) return createError("invalid section linked to " + describe(Obj, Sec) + ": " + toString(SymtabOrErr.takeError())); if ((*SymtabOrErr)->sh_type != ExpectedType) return createError( "invalid section linked to " + describe(Obj, Sec) + ": expected " + object::getELFSectionTypeName(Obj.getHeader().e_machine, ExpectedType) + ", but got " + object::getELFSectionTypeName(Obj.getHeader().e_machine, (*SymtabOrErr)->sh_type)); Expected StrTabOrErr = Obj.getLinkAsStrtab(**SymtabOrErr); if (!StrTabOrErr) return createError( "can't get a string table for the symbol table linked to " + describe(Obj, Sec) + ": " + toString(StrTabOrErr.takeError())); Expected SymsOrErr = Obj.symbols(*SymtabOrErr); if (!SymsOrErr) return createError("unable to read symbols from the " + describe(Obj, Sec) + ": " + toString(SymsOrErr.takeError())); return SymtabLink{*SymsOrErr, *StrTabOrErr, *SymtabOrErr}; } } // namespace template Expected> ELFDumper::getVersionTable(const Elf_Shdr &Sec, ArrayRef *SymTab, StringRef *StrTab, const Elf_Shdr **SymTabSec) const { assert((!SymTab && !StrTab && !SymTabSec) || (SymTab && StrTab && SymTabSec)); if (reinterpret_cast(Obj.base() + Sec.sh_offset) % sizeof(uint16_t) != 0) return createError("the " + describe(Sec) + " is misaligned"); Expected> VersionsOrErr = Obj.template getSectionContentsAsArray(Sec); if (!VersionsOrErr) return createError("cannot read content of " + describe(Sec) + ": " + toString(VersionsOrErr.takeError())); Expected> SymTabOrErr = getLinkAsSymtab(Obj, Sec, SHT_DYNSYM); if (!SymTabOrErr) { reportUniqueWarning(SymTabOrErr.takeError()); return *VersionsOrErr; } if (SymTabOrErr->Symbols.size() != VersionsOrErr->size()) reportUniqueWarning(describe(Sec) + ": the number of entries (" + Twine(VersionsOrErr->size()) + ") does not match the number of symbols (" + Twine(SymTabOrErr->Symbols.size()) + ") in the symbol table with index " + Twine(Sec.sh_link)); if (SymTab) { *SymTab = SymTabOrErr->Symbols; *StrTab = SymTabOrErr->StringTable; *SymTabSec = SymTabOrErr->SymTab; } return *VersionsOrErr; } template void ELFDumper::printSymbolsHelper(bool IsDynamic, bool ExtraSymInfo) const { std::optional StrTable; size_t Entries = 0; Elf_Sym_Range Syms(nullptr, nullptr); const Elf_Shdr *SymtabSec = IsDynamic ? DotDynsymSec : DotSymtabSec; if (IsDynamic) { StrTable = DynamicStringTable; Syms = dynamic_symbols(); Entries = Syms.size(); } else if (DotSymtabSec) { if (Expected StrTableOrErr = Obj.getStringTableForSymtab(*DotSymtabSec)) StrTable = *StrTableOrErr; else reportUniqueWarning( "unable to get the string table for the SHT_SYMTAB section: " + toString(StrTableOrErr.takeError())); if (Expected SymsOrErr = Obj.symbols(DotSymtabSec)) Syms = *SymsOrErr; else reportUniqueWarning( "unable to read symbols from the SHT_SYMTAB section: " + toString(SymsOrErr.takeError())); Entries = DotSymtabSec->getEntityCount(); } if (Syms.empty()) return; // The st_other field has 2 logical parts. The first two bits hold the symbol // visibility (STV_*) and the remainder hold other platform-specific values. bool NonVisibilityBitsUsed = llvm::any_of(Syms, [](const Elf_Sym &S) { return S.st_other & ~0x3; }); DataRegion ShndxTable = IsDynamic ? DataRegion( (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->getElfObject().getELFFile().end()) : DataRegion(this->getShndxTable(SymtabSec)); printSymtabMessage(SymtabSec, Entries, NonVisibilityBitsUsed, ExtraSymInfo); for (const Elf_Sym &Sym : Syms) printSymbol(Sym, &Sym - Syms.begin(), ShndxTable, StrTable, IsDynamic, NonVisibilityBitsUsed, ExtraSymInfo); } template class GNUELFDumper : public ELFDumper { formatted_raw_ostream &OS; public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) GNUELFDumper(const object::ELFObjectFile &ObjF, ScopedPrinter &Writer) : ELFDumper(ObjF, Writer), OS(static_cast(Writer.getOStream())) { assert(&this->W.getOStream() == &llvm::fouts()); } void printFileSummary(StringRef FileStr, ObjectFile &Obj, ArrayRef InputFilenames, const Archive *A) override; void printFileHeaders() override; void printGroupSections() override; void printRelocations() override; void printSectionHeaders() override; void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols, bool ExtraSymInfo) override; void printHashSymbols() override; void printSectionDetails() override; void printDependentLibs() override; void printDynamicTable() override; void printDynamicRelocations() override; void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset, bool NonVisibilityBitsUsed, bool ExtraSymInfo) const override; void printProgramHeaders(bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; void printVersionSymbolSection(const Elf_Shdr *Sec) override; void printVersionDefinitionSection(const Elf_Shdr *Sec) override; void printVersionDependencySection(const Elf_Shdr *Sec) override; void printCGProfile() override; void printBBAddrMaps() override; void printAddrsig() override; void printNotes() override; void printELFLinkerOptions() override; void printStackSizes() override; void printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc, const ArrayRef> Descriptors) override; void printHashHistogramStats(size_t NBucket, size_t MaxChain, size_t TotalSyms, ArrayRef Count, bool IsGnu) const override; private: void printHashTableSymbols(const Elf_Hash &HashTable); void printGnuHashTableSymbols(const Elf_GnuHash &GnuHashTable); struct Field { std::string Str; unsigned Column; Field(StringRef S, unsigned Col) : Str(std::string(S)), Column(Col) {} Field(unsigned Col) : Column(Col) {} }; template std::string printFlags(T Value, ArrayRef> EnumValues, TEnum EnumMask1 = {}, TEnum EnumMask2 = {}, TEnum EnumMask3 = {}) const { std::string Str; for (const EnumEntry &Flag : EnumValues) { if (Flag.Value == 0) continue; TEnum EnumMask{}; if (Flag.Value & EnumMask1) EnumMask = EnumMask1; else if (Flag.Value & EnumMask2) EnumMask = EnumMask2; else if (Flag.Value & EnumMask3) EnumMask = EnumMask3; bool IsEnum = (Flag.Value & EnumMask) != 0; if ((!IsEnum && (Value & Flag.Value) == Flag.Value) || (IsEnum && (Value & EnumMask) == Flag.Value)) { if (!Str.empty()) Str += ", "; Str += Flag.AltName; } } return Str; } formatted_raw_ostream &printField(struct Field F) const { if (F.Column != 0) OS.PadToColumn(F.Column); OS << F.Str; OS.flush(); return OS; } void printHashedSymbol(const Elf_Sym *Sym, unsigned SymIndex, DataRegion ShndxTable, StringRef StrTable, uint32_t Bucket); void printRelrReloc(const Elf_Relr &R) override; void printRelRelaReloc(const Relocation &R, const RelSymbol &RelSym) override; void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable, std::optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed, bool ExtraSymInfo) const override; void printDynamicRelocHeader(unsigned Type, StringRef Name, const DynRegionInfo &Reg) override; std::string getSymbolSectionNdx(const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable, bool ExtraSymInfo = false) const; void printProgramHeaders() override; void printSectionMapping() override; void printGNUVersionSectionProlog(const typename ELFT::Shdr &Sec, const Twine &Label, unsigned EntriesNum); void printStackSizeEntry(uint64_t Size, ArrayRef FuncNames) override; void printMipsGOT(const MipsGOTParser &Parser) override; void printMipsPLT(const MipsGOTParser &Parser) override; void printMipsABIFlags() override; }; template class LLVMELFDumper : public ELFDumper { public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) LLVMELFDumper(const object::ELFObjectFile &ObjF, ScopedPrinter &Writer) : ELFDumper(ObjF, Writer), W(Writer) {} void printFileHeaders() override; void printGroupSections() override; void printRelocations() override; void printSectionHeaders() override; void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols, bool ExtraSymInfo) override; void printDependentLibs() override; void printDynamicTable() override; void printDynamicRelocations() override; void printProgramHeaders(bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; void printVersionSymbolSection(const Elf_Shdr *Sec) override; void printVersionDefinitionSection(const Elf_Shdr *Sec) override; void printVersionDependencySection(const Elf_Shdr *Sec) override; void printCGProfile() override; void printBBAddrMaps() override; void printAddrsig() override; void printNotes() override; void printELFLinkerOptions() override; void printStackSizes() override; void printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc, const ArrayRef> Descriptors) override; void printSymbolSection(const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable) const; void printHashHistogramStats(size_t NBucket, size_t MaxChain, size_t TotalSyms, ArrayRef Count, bool IsGnu) const override; private: void printRelrReloc(const Elf_Relr &R) override; void printRelRelaReloc(const Relocation &R, const RelSymbol &RelSym) override; void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable, std::optional StrTable, bool IsDynamic, bool /*NonVisibilityBitsUsed*/, bool /*ExtraSymInfo*/) const override; void printProgramHeaders() override; void printSectionMapping() override {} void printStackSizeEntry(uint64_t Size, ArrayRef FuncNames) override; void printMipsGOT(const MipsGOTParser &Parser) override; void printMipsPLT(const MipsGOTParser &Parser) override; void printMipsABIFlags() override; virtual void printZeroSymbolOtherField(const Elf_Sym &Symbol) const; protected: virtual std::string getGroupSectionHeaderName() const; void printSymbolOtherField(const Elf_Sym &Symbol) const; virtual void printExpandedRelRelaReloc(const Relocation &R, StringRef SymbolName, StringRef RelocName); virtual void printDefaultRelRelaReloc(const Relocation &R, StringRef SymbolName, StringRef RelocName); virtual void printRelocationSectionInfo(const Elf_Shdr &Sec, StringRef Name, const unsigned SecNdx); virtual void printSectionGroupMembers(StringRef Name, uint64_t Idx) const; virtual void printEmptyGroupMessage() const; ScopedPrinter &W; }; // JSONELFDumper shares most of the same implementation as LLVMELFDumper except // it uses a JSONScopedPrinter. template class JSONELFDumper : public LLVMELFDumper { public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) JSONELFDumper(const object::ELFObjectFile &ObjF, ScopedPrinter &Writer) : LLVMELFDumper(ObjF, Writer) {} std::string getGroupSectionHeaderName() const override; void printFileSummary(StringRef FileStr, ObjectFile &Obj, ArrayRef InputFilenames, const Archive *A) override; virtual void printZeroSymbolOtherField(const Elf_Sym &Symbol) const override; void printDefaultRelRelaReloc(const Relocation &R, StringRef SymbolName, StringRef RelocName) override; void printRelocationSectionInfo(const Elf_Shdr &Sec, StringRef Name, const unsigned SecNdx) override; void printSectionGroupMembers(StringRef Name, uint64_t Idx) const override; void printEmptyGroupMessage() const override; private: std::unique_ptr FileScope; }; } // end anonymous namespace namespace llvm { template static std::unique_ptr createELFDumper(const ELFObjectFile &Obj, ScopedPrinter &Writer) { if (opts::Output == opts::GNU) return std::make_unique>(Obj, Writer); else if (opts::Output == opts::JSON) return std::make_unique>(Obj, Writer); return std::make_unique>(Obj, Writer); } std::unique_ptr createELFDumper(const object::ELFObjectFileBase &Obj, ScopedPrinter &Writer) { // Little-endian 32-bit if (const ELF32LEObjectFile *ELFObj = dyn_cast(&Obj)) return createELFDumper(*ELFObj, Writer); // Big-endian 32-bit if (const ELF32BEObjectFile *ELFObj = dyn_cast(&Obj)) return createELFDumper(*ELFObj, Writer); // Little-endian 64-bit if (const ELF64LEObjectFile *ELFObj = dyn_cast(&Obj)) return createELFDumper(*ELFObj, Writer); // Big-endian 64-bit return createELFDumper(*cast(&Obj), Writer); } } // end namespace llvm template Expected, 0> *> ELFDumper::getVersionMap() const { // If the VersionMap has already been loaded or if there is no dynamic symtab // or version table, there is nothing to do. if (!VersionMap.empty() || !DynSymRegion || !SymbolVersionSection) return &VersionMap; Expected, 0>> MapOrErr = Obj.loadVersionMap(SymbolVersionNeedSection, SymbolVersionDefSection); if (MapOrErr) VersionMap = *MapOrErr; else return MapOrErr.takeError(); return &VersionMap; } template Expected ELFDumper::getSymbolVersion(const Elf_Sym &Sym, bool &IsDefault) const { // This is a dynamic symbol. Look in the GNU symbol version table. if (!SymbolVersionSection) { // No version table. IsDefault = false; return ""; } assert(DynSymRegion && "DynSymRegion has not been initialised"); // Determine the position in the symbol table of this entry. size_t EntryIndex = (reinterpret_cast(&Sym) - reinterpret_cast(DynSymRegion->Addr)) / sizeof(Elf_Sym); // Get the corresponding version index entry. Expected EntryOrErr = Obj.template getEntry(*SymbolVersionSection, EntryIndex); if (!EntryOrErr) return EntryOrErr.takeError(); unsigned Version = (*EntryOrErr)->vs_index; if (Version == VER_NDX_LOCAL || Version == VER_NDX_GLOBAL) { IsDefault = false; return ""; } Expected, 0> *> MapOrErr = getVersionMap(); if (!MapOrErr) return MapOrErr.takeError(); return Obj.getSymbolVersionByIndex(Version, IsDefault, **MapOrErr, Sym.st_shndx == ELF::SHN_UNDEF); } template Expected> ELFDumper::getRelocationTarget(const Relocation &R, const Elf_Shdr *SymTab) const { if (R.Symbol == 0) return RelSymbol(nullptr, ""); Expected SymOrErr = Obj.template getEntry(*SymTab, R.Symbol); if (!SymOrErr) return createError("unable to read an entry with index " + Twine(R.Symbol) + " from " + describe(*SymTab) + ": " + toString(SymOrErr.takeError())); const Elf_Sym *Sym = *SymOrErr; if (!Sym) return RelSymbol(nullptr, ""); Expected StrTableOrErr = Obj.getStringTableForSymtab(*SymTab); if (!StrTableOrErr) return StrTableOrErr.takeError(); const Elf_Sym *FirstSym = cantFail(Obj.template getEntry(*SymTab, 0)); std::string SymbolName = getFullSymbolName(*Sym, Sym - FirstSym, getShndxTable(SymTab), *StrTableOrErr, SymTab->sh_type == SHT_DYNSYM); return RelSymbol(Sym, SymbolName); } template ArrayRef ELFDumper::getShndxTable(const Elf_Shdr *Symtab) const { if (Symtab) { auto It = ShndxTables.find(Symtab); if (It != ShndxTables.end()) return It->second; } return {}; } static std::string maybeDemangle(StringRef Name) { return opts::Demangle ? demangle(Name) : Name.str(); } template std::string ELFDumper::getStaticSymbolName(uint32_t Index) const { auto Warn = [&](Error E) -> std::string { reportUniqueWarning("unable to read the name of symbol with index " + Twine(Index) + ": " + toString(std::move(E))); return ""; }; Expected SymOrErr = Obj.getSymbol(DotSymtabSec, Index); if (!SymOrErr) return Warn(SymOrErr.takeError()); Expected StrTabOrErr = Obj.getStringTableForSymtab(*DotSymtabSec); if (!StrTabOrErr) return Warn(StrTabOrErr.takeError()); Expected NameOrErr = (*SymOrErr)->getName(*StrTabOrErr); if (!NameOrErr) return Warn(NameOrErr.takeError()); return maybeDemangle(*NameOrErr); } template std::string ELFDumper::getFullSymbolName( const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable, std::optional StrTable, bool IsDynamic) const { if (!StrTable) return ""; std::string SymbolName; if (Expected NameOrErr = Symbol.getName(*StrTable)) { SymbolName = maybeDemangle(*NameOrErr); } else { reportUniqueWarning(NameOrErr.takeError()); return ""; } if (SymbolName.empty() && Symbol.getType() == ELF::STT_SECTION) { Expected SectionIndex = getSymbolSectionIndex(Symbol, SymIndex, ShndxTable); if (!SectionIndex) { reportUniqueWarning(SectionIndex.takeError()); return ""; } Expected NameOrErr = getSymbolSectionName(Symbol, *SectionIndex); if (!NameOrErr) { reportUniqueWarning(NameOrErr.takeError()); return ("
").str(); } return std::string(*NameOrErr); } if (!IsDynamic) return SymbolName; bool IsDefault; Expected VersionOrErr = getSymbolVersion(Symbol, IsDefault); if (!VersionOrErr) { reportUniqueWarning(VersionOrErr.takeError()); return SymbolName + "@"; } if (!VersionOrErr->empty()) { SymbolName += (IsDefault ? "@@" : "@"); SymbolName += *VersionOrErr; } return SymbolName; } template Expected ELFDumper::getSymbolSectionIndex(const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable) const { unsigned Ndx = Symbol.st_shndx; if (Ndx == SHN_XINDEX) return object::getExtendedSymbolTableIndex(Symbol, SymIndex, ShndxTable); if (Ndx != SHN_UNDEF && Ndx < SHN_LORESERVE) return Ndx; auto CreateErr = [&](const Twine &Name, std::optional Offset = std::nullopt) { std::string Desc; if (Offset) Desc = (Name + "+0x" + Twine::utohexstr(*Offset)).str(); else Desc = Name.str(); return createError( "unable to get section index for symbol with st_shndx = 0x" + Twine::utohexstr(Ndx) + " (" + Desc + ")"); }; if (Ndx >= ELF::SHN_LOPROC && Ndx <= ELF::SHN_HIPROC) return CreateErr("SHN_LOPROC", Ndx - ELF::SHN_LOPROC); if (Ndx >= ELF::SHN_LOOS && Ndx <= ELF::SHN_HIOS) return CreateErr("SHN_LOOS", Ndx - ELF::SHN_LOOS); if (Ndx == ELF::SHN_UNDEF) return CreateErr("SHN_UNDEF"); if (Ndx == ELF::SHN_ABS) return CreateErr("SHN_ABS"); if (Ndx == ELF::SHN_COMMON) return CreateErr("SHN_COMMON"); return CreateErr("SHN_LORESERVE", Ndx - SHN_LORESERVE); } template Expected ELFDumper::getSymbolSectionName(const Elf_Sym &Symbol, unsigned SectionIndex) const { Expected SecOrErr = Obj.getSection(SectionIndex); if (!SecOrErr) return SecOrErr.takeError(); return Obj.getSectionName(**SecOrErr); } template static const typename ELFO::Elf_Shdr * findNotEmptySectionByAddress(const ELFO &Obj, StringRef FileName, uint64_t Addr) { for (const typename ELFO::Elf_Shdr &Shdr : cantFail(Obj.sections())) if (Shdr.sh_addr == Addr && Shdr.sh_size > 0) return &Shdr; return nullptr; } const EnumEntry ElfClass[] = { {"None", "none", ELF::ELFCLASSNONE}, {"32-bit", "ELF32", ELF::ELFCLASS32}, {"64-bit", "ELF64", ELF::ELFCLASS64}, }; const EnumEntry ElfDataEncoding[] = { {"None", "none", ELF::ELFDATANONE}, {"LittleEndian", "2's complement, little endian", ELF::ELFDATA2LSB}, {"BigEndian", "2's complement, big endian", ELF::ELFDATA2MSB}, }; const EnumEntry ElfObjectFileType[] = { {"None", "NONE (none)", ELF::ET_NONE}, {"Relocatable", "REL (Relocatable file)", ELF::ET_REL}, {"Executable", "EXEC (Executable file)", ELF::ET_EXEC}, {"SharedObject", "DYN (Shared object file)", ELF::ET_DYN}, {"Core", "CORE (Core file)", ELF::ET_CORE}, }; const EnumEntry ElfOSABI[] = { {"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE}, {"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX}, {"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD}, {"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX}, {"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD}, {"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS}, {"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX}, {"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX}, {"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD}, {"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64}, {"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO}, {"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD}, {"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS}, {"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK}, {"AROS", "AROS", ELF::ELFOSABI_AROS}, {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS}, {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI}, {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA}, {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE} }; const EnumEntry AMDGPUElfOSABI[] = { {"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA}, {"AMDGPU_PAL", "AMDGPU - PAL", ELF::ELFOSABI_AMDGPU_PAL}, {"AMDGPU_MESA3D", "AMDGPU - MESA3D", ELF::ELFOSABI_AMDGPU_MESA3D} }; const EnumEntry ARMElfOSABI[] = { {"ARM", "ARM", ELF::ELFOSABI_ARM} }; const EnumEntry C6000ElfOSABI[] = { {"C6000_ELFABI", "Bare-metal C6000", ELF::ELFOSABI_C6000_ELFABI}, {"C6000_LINUX", "Linux C6000", ELF::ELFOSABI_C6000_LINUX} }; const EnumEntry ElfMachineType[] = { ENUM_ENT(EM_NONE, "None"), ENUM_ENT(EM_M32, "WE32100"), ENUM_ENT(EM_SPARC, "Sparc"), ENUM_ENT(EM_386, "Intel 80386"), ENUM_ENT(EM_68K, "MC68000"), ENUM_ENT(EM_88K, "MC88000"), ENUM_ENT(EM_IAMCU, "EM_IAMCU"), ENUM_ENT(EM_860, "Intel 80860"), ENUM_ENT(EM_MIPS, "MIPS R3000"), ENUM_ENT(EM_S370, "IBM System/370"), ENUM_ENT(EM_MIPS_RS3_LE, "MIPS R3000 little-endian"), ENUM_ENT(EM_PARISC, "HPPA"), ENUM_ENT(EM_VPP500, "Fujitsu VPP500"), ENUM_ENT(EM_SPARC32PLUS, "Sparc v8+"), ENUM_ENT(EM_960, "Intel 80960"), ENUM_ENT(EM_PPC, "PowerPC"), ENUM_ENT(EM_PPC64, "PowerPC64"), ENUM_ENT(EM_S390, "IBM S/390"), ENUM_ENT(EM_SPU, "SPU"), ENUM_ENT(EM_V800, "NEC V800 series"), ENUM_ENT(EM_FR20, "Fujistsu FR20"), ENUM_ENT(EM_RH32, "TRW RH-32"), ENUM_ENT(EM_RCE, "Motorola RCE"), ENUM_ENT(EM_ARM, "ARM"), ENUM_ENT(EM_ALPHA, "EM_ALPHA"), ENUM_ENT(EM_SH, "Hitachi SH"), ENUM_ENT(EM_SPARCV9, "Sparc v9"), ENUM_ENT(EM_TRICORE, "Siemens Tricore"), ENUM_ENT(EM_ARC, "ARC"), ENUM_ENT(EM_H8_300, "Hitachi H8/300"), ENUM_ENT(EM_H8_300H, "Hitachi H8/300H"), ENUM_ENT(EM_H8S, "Hitachi H8S"), ENUM_ENT(EM_H8_500, "Hitachi H8/500"), ENUM_ENT(EM_IA_64, "Intel IA-64"), ENUM_ENT(EM_MIPS_X, "Stanford MIPS-X"), ENUM_ENT(EM_COLDFIRE, "Motorola Coldfire"), ENUM_ENT(EM_68HC12, "Motorola MC68HC12 Microcontroller"), ENUM_ENT(EM_MMA, "Fujitsu Multimedia Accelerator"), ENUM_ENT(EM_PCP, "Siemens PCP"), ENUM_ENT(EM_NCPU, "Sony nCPU embedded RISC processor"), ENUM_ENT(EM_NDR1, "Denso NDR1 microprocesspr"), ENUM_ENT(EM_STARCORE, "Motorola Star*Core processor"), ENUM_ENT(EM_ME16, "Toyota ME16 processor"), ENUM_ENT(EM_ST100, "STMicroelectronics ST100 processor"), ENUM_ENT(EM_TINYJ, "Advanced Logic Corp. TinyJ embedded processor"), ENUM_ENT(EM_X86_64, "Advanced Micro Devices X86-64"), ENUM_ENT(EM_PDSP, "Sony DSP processor"), ENUM_ENT(EM_PDP10, "Digital Equipment Corp. PDP-10"), ENUM_ENT(EM_PDP11, "Digital Equipment Corp. PDP-11"), ENUM_ENT(EM_FX66, "Siemens FX66 microcontroller"), ENUM_ENT(EM_ST9PLUS, "STMicroelectronics ST9+ 8/16 bit microcontroller"), ENUM_ENT(EM_ST7, "STMicroelectronics ST7 8-bit microcontroller"), ENUM_ENT(EM_68HC16, "Motorola MC68HC16 Microcontroller"), ENUM_ENT(EM_68HC11, "Motorola MC68HC11 Microcontroller"), ENUM_ENT(EM_68HC08, "Motorola MC68HC08 Microcontroller"), ENUM_ENT(EM_68HC05, "Motorola MC68HC05 Microcontroller"), ENUM_ENT(EM_SVX, "Silicon Graphics SVx"), ENUM_ENT(EM_ST19, "STMicroelectronics ST19 8-bit microcontroller"), ENUM_ENT(EM_VAX, "Digital VAX"), ENUM_ENT(EM_CRIS, "Axis Communications 32-bit embedded processor"), ENUM_ENT(EM_JAVELIN, "Infineon Technologies 32-bit embedded cpu"), ENUM_ENT(EM_FIREPATH, "Element 14 64-bit DSP processor"), ENUM_ENT(EM_ZSP, "LSI Logic's 16-bit DSP processor"), ENUM_ENT(EM_MMIX, "Donald Knuth's educational 64-bit processor"), ENUM_ENT(EM_HUANY, "Harvard Universitys's machine-independent object format"), ENUM_ENT(EM_PRISM, "Vitesse Prism"), ENUM_ENT(EM_AVR, "Atmel AVR 8-bit microcontroller"), ENUM_ENT(EM_FR30, "Fujitsu FR30"), ENUM_ENT(EM_D10V, "Mitsubishi D10V"), ENUM_ENT(EM_D30V, "Mitsubishi D30V"), ENUM_ENT(EM_V850, "NEC v850"), ENUM_ENT(EM_M32R, "Renesas M32R (formerly Mitsubishi M32r)"), ENUM_ENT(EM_MN10300, "Matsushita MN10300"), ENUM_ENT(EM_MN10200, "Matsushita MN10200"), ENUM_ENT(EM_PJ, "picoJava"), ENUM_ENT(EM_OPENRISC, "OpenRISC 32-bit embedded processor"), ENUM_ENT(EM_ARC_COMPACT, "EM_ARC_COMPACT"), ENUM_ENT(EM_XTENSA, "Tensilica Xtensa Processor"), ENUM_ENT(EM_VIDEOCORE, "Alphamosaic VideoCore processor"), ENUM_ENT(EM_TMM_GPP, "Thompson Multimedia General Purpose Processor"), ENUM_ENT(EM_NS32K, "National Semiconductor 32000 series"), ENUM_ENT(EM_TPC, "Tenor Network TPC processor"), ENUM_ENT(EM_SNP1K, "EM_SNP1K"), ENUM_ENT(EM_ST200, "STMicroelectronics ST200 microcontroller"), ENUM_ENT(EM_IP2K, "Ubicom IP2xxx 8-bit microcontrollers"), ENUM_ENT(EM_MAX, "MAX Processor"), ENUM_ENT(EM_CR, "National Semiconductor CompactRISC"), ENUM_ENT(EM_F2MC16, "Fujitsu F2MC16"), ENUM_ENT(EM_MSP430, "Texas Instruments msp430 microcontroller"), ENUM_ENT(EM_BLACKFIN, "Analog Devices Blackfin"), ENUM_ENT(EM_SE_C33, "S1C33 Family of Seiko Epson processors"), ENUM_ENT(EM_SEP, "Sharp embedded microprocessor"), ENUM_ENT(EM_ARCA, "Arca RISC microprocessor"), ENUM_ENT(EM_UNICORE, "Unicore"), ENUM_ENT(EM_EXCESS, "eXcess 16/32/64-bit configurable embedded CPU"), ENUM_ENT(EM_DXP, "Icera Semiconductor Inc. Deep Execution Processor"), ENUM_ENT(EM_ALTERA_NIOS2, "Altera Nios"), ENUM_ENT(EM_CRX, "National Semiconductor CRX microprocessor"), ENUM_ENT(EM_XGATE, "Motorola XGATE embedded processor"), ENUM_ENT(EM_C166, "Infineon Technologies xc16x"), ENUM_ENT(EM_M16C, "Renesas M16C"), ENUM_ENT(EM_DSPIC30F, "Microchip Technology dsPIC30F Digital Signal Controller"), ENUM_ENT(EM_CE, "Freescale Communication Engine RISC core"), ENUM_ENT(EM_M32C, "Renesas M32C"), ENUM_ENT(EM_TSK3000, "Altium TSK3000 core"), ENUM_ENT(EM_RS08, "Freescale RS08 embedded processor"), ENUM_ENT(EM_SHARC, "EM_SHARC"), ENUM_ENT(EM_ECOG2, "Cyan Technology eCOG2 microprocessor"), ENUM_ENT(EM_SCORE7, "SUNPLUS S+Core"), ENUM_ENT(EM_DSP24, "New Japan Radio (NJR) 24-bit DSP Processor"), ENUM_ENT(EM_VIDEOCORE3, "Broadcom VideoCore III processor"), ENUM_ENT(EM_LATTICEMICO32, "Lattice Mico32"), ENUM_ENT(EM_SE_C17, "Seiko Epson C17 family"), ENUM_ENT(EM_TI_C6000, "Texas Instruments TMS320C6000 DSP family"), ENUM_ENT(EM_TI_C2000, "Texas Instruments TMS320C2000 DSP family"), ENUM_ENT(EM_TI_C5500, "Texas Instruments TMS320C55x DSP family"), ENUM_ENT(EM_MMDSP_PLUS, "STMicroelectronics 64bit VLIW Data Signal Processor"), ENUM_ENT(EM_CYPRESS_M8C, "Cypress M8C microprocessor"), ENUM_ENT(EM_R32C, "Renesas R32C series microprocessors"), ENUM_ENT(EM_TRIMEDIA, "NXP Semiconductors TriMedia architecture family"), ENUM_ENT(EM_HEXAGON, "Qualcomm Hexagon"), ENUM_ENT(EM_8051, "Intel 8051 and variants"), ENUM_ENT(EM_STXP7X, "STMicroelectronics STxP7x family"), ENUM_ENT(EM_NDS32, "Andes Technology compact code size embedded RISC processor family"), ENUM_ENT(EM_ECOG1, "Cyan Technology eCOG1 microprocessor"), // FIXME: Following EM_ECOG1X definitions is dead code since EM_ECOG1X has // an identical number to EM_ECOG1. ENUM_ENT(EM_ECOG1X, "Cyan Technology eCOG1X family"), ENUM_ENT(EM_MAXQ30, "Dallas Semiconductor MAXQ30 Core microcontrollers"), ENUM_ENT(EM_XIMO16, "New Japan Radio (NJR) 16-bit DSP Processor"), ENUM_ENT(EM_MANIK, "M2000 Reconfigurable RISC Microprocessor"), ENUM_ENT(EM_CRAYNV2, "Cray Inc. NV2 vector architecture"), ENUM_ENT(EM_RX, "Renesas RX"), ENUM_ENT(EM_METAG, "Imagination Technologies Meta processor architecture"), ENUM_ENT(EM_MCST_ELBRUS, "MCST Elbrus general purpose hardware architecture"), ENUM_ENT(EM_ECOG16, "Cyan Technology eCOG16 family"), ENUM_ENT(EM_CR16, "National Semiconductor CompactRISC 16-bit processor"), ENUM_ENT(EM_ETPU, "Freescale Extended Time Processing Unit"), ENUM_ENT(EM_SLE9X, "Infineon Technologies SLE9X core"), ENUM_ENT(EM_L10M, "EM_L10M"), ENUM_ENT(EM_K10M, "EM_K10M"), ENUM_ENT(EM_AARCH64, "AArch64"), ENUM_ENT(EM_AVR32, "Atmel Corporation 32-bit microprocessor family"), ENUM_ENT(EM_STM8, "STMicroeletronics STM8 8-bit microcontroller"), ENUM_ENT(EM_TILE64, "Tilera TILE64 multicore architecture family"), ENUM_ENT(EM_TILEPRO, "Tilera TILEPro multicore architecture family"), ENUM_ENT(EM_MICROBLAZE, "Xilinx MicroBlaze 32-bit RISC soft processor core"), ENUM_ENT(EM_CUDA, "NVIDIA CUDA architecture"), ENUM_ENT(EM_TILEGX, "Tilera TILE-Gx multicore architecture family"), ENUM_ENT(EM_CLOUDSHIELD, "EM_CLOUDSHIELD"), ENUM_ENT(EM_COREA_1ST, "EM_COREA_1ST"), ENUM_ENT(EM_COREA_2ND, "EM_COREA_2ND"), ENUM_ENT(EM_ARC_COMPACT2, "EM_ARC_COMPACT2"), ENUM_ENT(EM_OPEN8, "EM_OPEN8"), ENUM_ENT(EM_RL78, "Renesas RL78"), ENUM_ENT(EM_VIDEOCORE5, "Broadcom VideoCore V processor"), ENUM_ENT(EM_78KOR, "EM_78KOR"), ENUM_ENT(EM_56800EX, "EM_56800EX"), ENUM_ENT(EM_AMDGPU, "EM_AMDGPU"), ENUM_ENT(EM_RISCV, "RISC-V"), ENUM_ENT(EM_LANAI, "EM_LANAI"), ENUM_ENT(EM_BPF, "EM_BPF"), ENUM_ENT(EM_VE, "NEC SX-Aurora Vector Engine"), ENUM_ENT(EM_LOONGARCH, "LoongArch"), }; const EnumEntry ElfSymbolBindings[] = { {"Local", "LOCAL", ELF::STB_LOCAL}, {"Global", "GLOBAL", ELF::STB_GLOBAL}, {"Weak", "WEAK", ELF::STB_WEAK}, {"Unique", "UNIQUE", ELF::STB_GNU_UNIQUE}}; const EnumEntry ElfSymbolVisibilities[] = { {"DEFAULT", "DEFAULT", ELF::STV_DEFAULT}, {"INTERNAL", "INTERNAL", ELF::STV_INTERNAL}, {"HIDDEN", "HIDDEN", ELF::STV_HIDDEN}, {"PROTECTED", "PROTECTED", ELF::STV_PROTECTED}}; const EnumEntry AMDGPUSymbolTypes[] = { { "AMDGPU_HSA_KERNEL", ELF::STT_AMDGPU_HSA_KERNEL } }; static const char *getGroupType(uint32_t Flag) { if (Flag & ELF::GRP_COMDAT) return "COMDAT"; else return "(unknown)"; } const EnumEntry ElfSectionFlags[] = { ENUM_ENT(SHF_WRITE, "W"), ENUM_ENT(SHF_ALLOC, "A"), ENUM_ENT(SHF_EXECINSTR, "X"), ENUM_ENT(SHF_MERGE, "M"), ENUM_ENT(SHF_STRINGS, "S"), ENUM_ENT(SHF_INFO_LINK, "I"), ENUM_ENT(SHF_LINK_ORDER, "L"), ENUM_ENT(SHF_OS_NONCONFORMING, "O"), ENUM_ENT(SHF_GROUP, "G"), ENUM_ENT(SHF_TLS, "T"), ENUM_ENT(SHF_COMPRESSED, "C"), ENUM_ENT(SHF_EXCLUDE, "E"), }; const EnumEntry ElfGNUSectionFlags[] = { ENUM_ENT(SHF_GNU_RETAIN, "R") }; const EnumEntry ElfSolarisSectionFlags[] = { ENUM_ENT(SHF_SUNW_NODISCARD, "R") }; const EnumEntry ElfXCoreSectionFlags[] = { ENUM_ENT(XCORE_SHF_CP_SECTION, ""), ENUM_ENT(XCORE_SHF_DP_SECTION, "") }; const EnumEntry ElfARMSectionFlags[] = { ENUM_ENT(SHF_ARM_PURECODE, "y") }; const EnumEntry ElfHexagonSectionFlags[] = { ENUM_ENT(SHF_HEX_GPREL, "") }; const EnumEntry ElfMipsSectionFlags[] = { ENUM_ENT(SHF_MIPS_NODUPES, ""), ENUM_ENT(SHF_MIPS_NAMES, ""), ENUM_ENT(SHF_MIPS_LOCAL, ""), ENUM_ENT(SHF_MIPS_NOSTRIP, ""), ENUM_ENT(SHF_MIPS_GPREL, ""), ENUM_ENT(SHF_MIPS_MERGE, ""), ENUM_ENT(SHF_MIPS_ADDR, ""), ENUM_ENT(SHF_MIPS_STRING, "") }; const EnumEntry ElfX86_64SectionFlags[] = { ENUM_ENT(SHF_X86_64_LARGE, "l") }; static std::vector> getSectionFlagsForTarget(unsigned EOSAbi, unsigned EMachine) { std::vector> Ret(std::begin(ElfSectionFlags), std::end(ElfSectionFlags)); switch (EOSAbi) { case ELFOSABI_SOLARIS: Ret.insert(Ret.end(), std::begin(ElfSolarisSectionFlags), std::end(ElfSolarisSectionFlags)); break; default: Ret.insert(Ret.end(), std::begin(ElfGNUSectionFlags), std::end(ElfGNUSectionFlags)); break; } switch (EMachine) { case EM_ARM: Ret.insert(Ret.end(), std::begin(ElfARMSectionFlags), std::end(ElfARMSectionFlags)); break; case EM_HEXAGON: Ret.insert(Ret.end(), std::begin(ElfHexagonSectionFlags), std::end(ElfHexagonSectionFlags)); break; case EM_MIPS: Ret.insert(Ret.end(), std::begin(ElfMipsSectionFlags), std::end(ElfMipsSectionFlags)); break; case EM_X86_64: Ret.insert(Ret.end(), std::begin(ElfX86_64SectionFlags), std::end(ElfX86_64SectionFlags)); break; case EM_XCORE: Ret.insert(Ret.end(), std::begin(ElfXCoreSectionFlags), std::end(ElfXCoreSectionFlags)); break; default: break; } return Ret; } static std::string getGNUFlags(unsigned EOSAbi, unsigned EMachine, uint64_t Flags) { // Here we are trying to build the flags string in the same way as GNU does. // It is not that straightforward. Imagine we have sh_flags == 0x90000000. // SHF_EXCLUDE ("E") has a value of 0x80000000 and SHF_MASKPROC is 0xf0000000. // GNU readelf will not print "E" or "Ep" in this case, but will print just // "p". It only will print "E" when no other processor flag is set. std::string Str; bool HasUnknownFlag = false; bool HasOSFlag = false; bool HasProcFlag = false; std::vector> FlagsList = getSectionFlagsForTarget(EOSAbi, EMachine); while (Flags) { // Take the least significant bit as a flag. uint64_t Flag = Flags & -Flags; Flags -= Flag; // Find the flag in the known flags list. auto I = llvm::find_if(FlagsList, [=](const EnumEntry &E) { // Flags with empty names are not printed in GNU style output. return E.Value == Flag && !E.AltName.empty(); }); if (I != FlagsList.end()) { Str += I->AltName; continue; } // If we did not find a matching regular flag, then we deal with an OS // specific flag, processor specific flag or an unknown flag. if (Flag & ELF::SHF_MASKOS) { HasOSFlag = true; Flags &= ~ELF::SHF_MASKOS; } else if (Flag & ELF::SHF_MASKPROC) { HasProcFlag = true; // Mask off all the processor-specific bits. This removes the SHF_EXCLUDE // bit if set so that it doesn't also get printed. Flags &= ~ELF::SHF_MASKPROC; } else { HasUnknownFlag = true; } } // "o", "p" and "x" are printed last. if (HasOSFlag) Str += "o"; if (HasProcFlag) Str += "p"; if (HasUnknownFlag) Str += "x"; return Str; } static StringRef segmentTypeToString(unsigned Arch, unsigned Type) { // Check potentially overlapped processor-specific program header type. switch (Arch) { case ELF::EM_ARM: switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX); } break; case ELF::EM_MIPS: case ELF::EM_MIPS_RS3_LE: switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_REGINFO); LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_RTPROC); LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_OPTIONS); LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_ABIFLAGS); } break; case ELF::EM_RISCV: switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_RISCV_ATTRIBUTES); } } switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_NULL); LLVM_READOBJ_ENUM_CASE(ELF, PT_LOAD); LLVM_READOBJ_ENUM_CASE(ELF, PT_DYNAMIC); LLVM_READOBJ_ENUM_CASE(ELF, PT_INTERP); LLVM_READOBJ_ENUM_CASE(ELF, PT_NOTE); LLVM_READOBJ_ENUM_CASE(ELF, PT_SHLIB); LLVM_READOBJ_ENUM_CASE(ELF, PT_PHDR); LLVM_READOBJ_ENUM_CASE(ELF, PT_TLS); LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_EH_FRAME); LLVM_READOBJ_ENUM_CASE(ELF, PT_SUNW_UNWIND); LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_STACK); LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_RELRO); LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_PROPERTY); LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_MUTABLE); LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE); LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED); LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_NOBTCFI); + LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_SYSCALLS); LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_BOOTDATA); default: return ""; } } static std::string getGNUPtType(unsigned Arch, unsigned Type) { StringRef Seg = segmentTypeToString(Arch, Type); if (Seg.empty()) return std::string(": ") + to_string(format_hex(Type, 1)); // E.g. "PT_ARM_EXIDX" -> "EXIDX". if (Seg.consume_front("PT_ARM_")) return Seg.str(); // E.g. "PT_MIPS_REGINFO" -> "REGINFO". if (Seg.consume_front("PT_MIPS_")) return Seg.str(); // E.g. "PT_RISCV_ATTRIBUTES" if (Seg.consume_front("PT_RISCV_")) return Seg.str(); // E.g. "PT_LOAD" -> "LOAD". assert(Seg.starts_with("PT_")); return Seg.drop_front(3).str(); } const EnumEntry ElfSegmentFlags[] = { LLVM_READOBJ_ENUM_ENT(ELF, PF_X), LLVM_READOBJ_ENUM_ENT(ELF, PF_W), LLVM_READOBJ_ENUM_ENT(ELF, PF_R) }; const EnumEntry ElfHeaderMipsFlags[] = { ENUM_ENT(EF_MIPS_NOREORDER, "noreorder"), ENUM_ENT(EF_MIPS_PIC, "pic"), ENUM_ENT(EF_MIPS_CPIC, "cpic"), ENUM_ENT(EF_MIPS_ABI2, "abi2"), ENUM_ENT(EF_MIPS_32BITMODE, "32bitmode"), ENUM_ENT(EF_MIPS_FP64, "fp64"), ENUM_ENT(EF_MIPS_NAN2008, "nan2008"), ENUM_ENT(EF_MIPS_ABI_O32, "o32"), ENUM_ENT(EF_MIPS_ABI_O64, "o64"), ENUM_ENT(EF_MIPS_ABI_EABI32, "eabi32"), ENUM_ENT(EF_MIPS_ABI_EABI64, "eabi64"), ENUM_ENT(EF_MIPS_MACH_3900, "3900"), ENUM_ENT(EF_MIPS_MACH_4010, "4010"), ENUM_ENT(EF_MIPS_MACH_4100, "4100"), ENUM_ENT(EF_MIPS_MACH_4650, "4650"), ENUM_ENT(EF_MIPS_MACH_4120, "4120"), ENUM_ENT(EF_MIPS_MACH_4111, "4111"), ENUM_ENT(EF_MIPS_MACH_SB1, "sb1"), ENUM_ENT(EF_MIPS_MACH_OCTEON, "octeon"), ENUM_ENT(EF_MIPS_MACH_XLR, "xlr"), ENUM_ENT(EF_MIPS_MACH_OCTEON2, "octeon2"), ENUM_ENT(EF_MIPS_MACH_OCTEON3, "octeon3"), ENUM_ENT(EF_MIPS_MACH_5400, "5400"), ENUM_ENT(EF_MIPS_MACH_5900, "5900"), ENUM_ENT(EF_MIPS_MACH_5500, "5500"), ENUM_ENT(EF_MIPS_MACH_9000, "9000"), ENUM_ENT(EF_MIPS_MACH_LS2E, "loongson-2e"), ENUM_ENT(EF_MIPS_MACH_LS2F, "loongson-2f"), ENUM_ENT(EF_MIPS_MACH_LS3A, "loongson-3a"), ENUM_ENT(EF_MIPS_MICROMIPS, "micromips"), ENUM_ENT(EF_MIPS_ARCH_ASE_M16, "mips16"), ENUM_ENT(EF_MIPS_ARCH_ASE_MDMX, "mdmx"), ENUM_ENT(EF_MIPS_ARCH_1, "mips1"), ENUM_ENT(EF_MIPS_ARCH_2, "mips2"), ENUM_ENT(EF_MIPS_ARCH_3, "mips3"), ENUM_ENT(EF_MIPS_ARCH_4, "mips4"), ENUM_ENT(EF_MIPS_ARCH_5, "mips5"), ENUM_ENT(EF_MIPS_ARCH_32, "mips32"), ENUM_ENT(EF_MIPS_ARCH_64, "mips64"), ENUM_ENT(EF_MIPS_ARCH_32R2, "mips32r2"), ENUM_ENT(EF_MIPS_ARCH_64R2, "mips64r2"), ENUM_ENT(EF_MIPS_ARCH_32R6, "mips32r6"), ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6") }; const EnumEntry ElfHeaderAMDGPUFlagsABIVersion3[] = { ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_V3, "xnack"), ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_V3, "sramecc"), }; const EnumEntry ElfHeaderAMDGPUFlagsABIVersion4[] = { ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ANY_V4, "xnack"), ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_OFF_V4, "xnack-"), ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ON_V4, "xnack+"), ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_ANY_V4, "sramecc"), ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_OFF_V4, "sramecc-"), ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_ON_V4, "sramecc+"), }; const EnumEntry ElfHeaderNVPTXFlags[] = { ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"), ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"), ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"), ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"), ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"), ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"), ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"), ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"), ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"), ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"), }; const EnumEntry ElfHeaderRISCVFlags[] = { ENUM_ENT(EF_RISCV_RVC, "RVC"), ENUM_ENT(EF_RISCV_FLOAT_ABI_SINGLE, "single-float ABI"), ENUM_ENT(EF_RISCV_FLOAT_ABI_DOUBLE, "double-float ABI"), ENUM_ENT(EF_RISCV_FLOAT_ABI_QUAD, "quad-float ABI"), ENUM_ENT(EF_RISCV_RVE, "RVE"), ENUM_ENT(EF_RISCV_TSO, "TSO"), }; const EnumEntry ElfHeaderAVRFlags[] = { LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR1), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR2), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR25), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR3), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR31), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR35), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR4), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR5), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR51), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR6), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVRTINY), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA1), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA2), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA3), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA4), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA5), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA6), LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA7), ENUM_ENT(EF_AVR_LINKRELAX_PREPARED, "relaxable"), }; const EnumEntry ElfHeaderLoongArchFlags[] = { ENUM_ENT(EF_LOONGARCH_ABI_SOFT_FLOAT, "SOFT-FLOAT"), ENUM_ENT(EF_LOONGARCH_ABI_SINGLE_FLOAT, "SINGLE-FLOAT"), ENUM_ENT(EF_LOONGARCH_ABI_DOUBLE_FLOAT, "DOUBLE-FLOAT"), ENUM_ENT(EF_LOONGARCH_OBJABI_V0, "OBJ-v0"), ENUM_ENT(EF_LOONGARCH_OBJABI_V1, "OBJ-v1"), }; static const EnumEntry ElfHeaderXtensaFlags[] = { LLVM_READOBJ_ENUM_ENT(ELF, EF_XTENSA_MACH_NONE), LLVM_READOBJ_ENUM_ENT(ELF, EF_XTENSA_XT_INSN), LLVM_READOBJ_ENUM_ENT(ELF, EF_XTENSA_XT_LIT) }; const EnumEntry ElfSymOtherFlags[] = { LLVM_READOBJ_ENUM_ENT(ELF, STV_INTERNAL), LLVM_READOBJ_ENUM_ENT(ELF, STV_HIDDEN), LLVM_READOBJ_ENUM_ENT(ELF, STV_PROTECTED) }; const EnumEntry ElfMipsSymOtherFlags[] = { LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_OPTIONAL), LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PLT), LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PIC), LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_MICROMIPS) }; const EnumEntry ElfAArch64SymOtherFlags[] = { LLVM_READOBJ_ENUM_ENT(ELF, STO_AARCH64_VARIANT_PCS) }; const EnumEntry ElfMips16SymOtherFlags[] = { LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_OPTIONAL), LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PLT), LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_MIPS16) }; const EnumEntry ElfRISCVSymOtherFlags[] = { LLVM_READOBJ_ENUM_ENT(ELF, STO_RISCV_VARIANT_CC)}; static const char *getElfMipsOptionsOdkType(unsigned Odk) { switch (Odk) { LLVM_READOBJ_ENUM_CASE(ELF, ODK_NULL); LLVM_READOBJ_ENUM_CASE(ELF, ODK_REGINFO); LLVM_READOBJ_ENUM_CASE(ELF, ODK_EXCEPTIONS); LLVM_READOBJ_ENUM_CASE(ELF, ODK_PAD); LLVM_READOBJ_ENUM_CASE(ELF, ODK_HWPATCH); LLVM_READOBJ_ENUM_CASE(ELF, ODK_FILL); LLVM_READOBJ_ENUM_CASE(ELF, ODK_TAGS); LLVM_READOBJ_ENUM_CASE(ELF, ODK_HWAND); LLVM_READOBJ_ENUM_CASE(ELF, ODK_HWOR); LLVM_READOBJ_ENUM_CASE(ELF, ODK_GP_GROUP); LLVM_READOBJ_ENUM_CASE(ELF, ODK_IDENT); LLVM_READOBJ_ENUM_CASE(ELF, ODK_PAGESIZE); default: return "Unknown"; } } template std::pair ELFDumper::findDynamic() { // Try to locate the PT_DYNAMIC header. const Elf_Phdr *DynamicPhdr = nullptr; if (Expected> PhdrsOrErr = Obj.program_headers()) { for (const Elf_Phdr &Phdr : *PhdrsOrErr) { if (Phdr.p_type != ELF::PT_DYNAMIC) continue; DynamicPhdr = &Phdr; break; } } else { reportUniqueWarning( "unable to read program headers to locate the PT_DYNAMIC segment: " + toString(PhdrsOrErr.takeError())); } // Try to locate the .dynamic section in the sections header table. const Elf_Shdr *DynamicSec = nullptr; for (const Elf_Shdr &Sec : cantFail(Obj.sections())) { if (Sec.sh_type != ELF::SHT_DYNAMIC) continue; DynamicSec = &Sec; break; } if (DynamicPhdr && ((DynamicPhdr->p_offset + DynamicPhdr->p_filesz > ObjF.getMemoryBufferRef().getBufferSize()) || (DynamicPhdr->p_offset + DynamicPhdr->p_filesz < DynamicPhdr->p_offset))) { reportUniqueWarning( "PT_DYNAMIC segment offset (0x" + Twine::utohexstr(DynamicPhdr->p_offset) + ") + file size (0x" + Twine::utohexstr(DynamicPhdr->p_filesz) + ") exceeds the size of the file (0x" + Twine::utohexstr(ObjF.getMemoryBufferRef().getBufferSize()) + ")"); // Don't use the broken dynamic header. DynamicPhdr = nullptr; } if (DynamicPhdr && DynamicSec) { if (DynamicSec->sh_addr + DynamicSec->sh_size > DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz || DynamicSec->sh_addr < DynamicPhdr->p_vaddr) reportUniqueWarning(describe(*DynamicSec) + " is not contained within the " "PT_DYNAMIC segment"); if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr) reportUniqueWarning(describe(*DynamicSec) + " is not at the start of " "PT_DYNAMIC segment"); } return std::make_pair(DynamicPhdr, DynamicSec); } template void ELFDumper::loadDynamicTable() { const Elf_Phdr *DynamicPhdr; const Elf_Shdr *DynamicSec; std::tie(DynamicPhdr, DynamicSec) = findDynamic(); if (!DynamicPhdr && !DynamicSec) return; DynRegionInfo FromPhdr(ObjF, *this); bool IsPhdrTableValid = false; if (DynamicPhdr) { // Use cantFail(), because p_offset/p_filesz fields of a PT_DYNAMIC are // validated in findDynamic() and so createDRI() is not expected to fail. FromPhdr = cantFail(createDRI(DynamicPhdr->p_offset, DynamicPhdr->p_filesz, sizeof(Elf_Dyn))); FromPhdr.SizePrintName = "PT_DYNAMIC size"; FromPhdr.EntSizePrintName = ""; IsPhdrTableValid = !FromPhdr.template getAsArrayRef().empty(); } // Locate the dynamic table described in a section header. // Ignore sh_entsize and use the expected value for entry size explicitly. // This allows us to dump dynamic sections with a broken sh_entsize // field. DynRegionInfo FromSec(ObjF, *this); bool IsSecTableValid = false; if (DynamicSec) { Expected RegOrErr = createDRI(DynamicSec->sh_offset, DynamicSec->sh_size, sizeof(Elf_Dyn)); if (RegOrErr) { FromSec = *RegOrErr; FromSec.Context = describe(*DynamicSec); FromSec.EntSizePrintName = ""; IsSecTableValid = !FromSec.template getAsArrayRef().empty(); } else { reportUniqueWarning("unable to read the dynamic table from " + describe(*DynamicSec) + ": " + toString(RegOrErr.takeError())); } } // When we only have information from one of the SHT_DYNAMIC section header or // PT_DYNAMIC program header, just use that. if (!DynamicPhdr || !DynamicSec) { if ((DynamicPhdr && IsPhdrTableValid) || (DynamicSec && IsSecTableValid)) { DynamicTable = DynamicPhdr ? FromPhdr : FromSec; parseDynamicTable(); } else { reportUniqueWarning("no valid dynamic table was found"); } return; } // At this point we have tables found from the section header and from the // dynamic segment. Usually they match, but we have to do sanity checks to // verify that. if (FromPhdr.Addr != FromSec.Addr) reportUniqueWarning("SHT_DYNAMIC section header and PT_DYNAMIC " "program header disagree about " "the location of the dynamic table"); if (!IsPhdrTableValid && !IsSecTableValid) { reportUniqueWarning("no valid dynamic table was found"); return; } // Information in the PT_DYNAMIC program header has priority over the // information in a section header. if (IsPhdrTableValid) { if (!IsSecTableValid) reportUniqueWarning( "SHT_DYNAMIC dynamic table is invalid: PT_DYNAMIC will be used"); DynamicTable = FromPhdr; } else { reportUniqueWarning( "PT_DYNAMIC dynamic table is invalid: SHT_DYNAMIC will be used"); DynamicTable = FromSec; } parseDynamicTable(); } template ELFDumper::ELFDumper(const object::ELFObjectFile &O, ScopedPrinter &Writer) : ObjDumper(Writer, O.getFileName()), ObjF(O), Obj(O.getELFFile()), FileName(O.getFileName()), DynRelRegion(O, *this), DynRelaRegion(O, *this), DynRelrRegion(O, *this), DynPLTRelRegion(O, *this), DynSymTabShndxRegion(O, *this), DynamicTable(O, *this) { if (!O.IsContentValid()) return; typename ELFT::ShdrRange Sections = cantFail(Obj.sections()); for (const Elf_Shdr &Sec : Sections) { switch (Sec.sh_type) { case ELF::SHT_SYMTAB: if (!DotSymtabSec) DotSymtabSec = &Sec; break; case ELF::SHT_DYNSYM: if (!DotDynsymSec) DotDynsymSec = &Sec; if (!DynSymRegion) { Expected RegOrErr = createDRI(Sec.sh_offset, Sec.sh_size, Sec.sh_entsize); if (RegOrErr) { DynSymRegion = *RegOrErr; DynSymRegion->Context = describe(Sec); if (Expected E = Obj.getStringTableForSymtab(Sec)) DynamicStringTable = *E; else reportUniqueWarning("unable to get the string table for the " + describe(Sec) + ": " + toString(E.takeError())); } else { reportUniqueWarning("unable to read dynamic symbols from " + describe(Sec) + ": " + toString(RegOrErr.takeError())); } } break; case ELF::SHT_SYMTAB_SHNDX: { uint32_t SymtabNdx = Sec.sh_link; if (SymtabNdx >= Sections.size()) { reportUniqueWarning( "unable to get the associated symbol table for " + describe(Sec) + ": sh_link (" + Twine(SymtabNdx) + ") is greater than or equal to the total number of sections (" + Twine(Sections.size()) + ")"); continue; } if (Expected> ShndxTableOrErr = Obj.getSHNDXTable(Sec)) { if (!ShndxTables.insert({&Sections[SymtabNdx], *ShndxTableOrErr}) .second) reportUniqueWarning( "multiple SHT_SYMTAB_SHNDX sections are linked to " + describe(Sec)); } else { reportUniqueWarning(ShndxTableOrErr.takeError()); } break; } case ELF::SHT_GNU_versym: if (!SymbolVersionSection) SymbolVersionSection = &Sec; break; case ELF::SHT_GNU_verdef: if (!SymbolVersionDefSection) SymbolVersionDefSection = &Sec; break; case ELF::SHT_GNU_verneed: if (!SymbolVersionNeedSection) SymbolVersionNeedSection = &Sec; break; case ELF::SHT_LLVM_ADDRSIG: if (!DotAddrsigSec) DotAddrsigSec = &Sec; break; } } loadDynamicTable(); } template void ELFDumper::parseDynamicTable() { auto toMappedAddr = [&](uint64_t Tag, uint64_t VAddr) -> const uint8_t * { auto MappedAddrOrError = Obj.toMappedAddr(VAddr, [&](const Twine &Msg) { this->reportUniqueWarning(Msg); return Error::success(); }); if (!MappedAddrOrError) { this->reportUniqueWarning("unable to parse DT_" + Obj.getDynamicTagAsString(Tag) + ": " + llvm::toString(MappedAddrOrError.takeError())); return nullptr; } return MappedAddrOrError.get(); }; const char *StringTableBegin = nullptr; uint64_t StringTableSize = 0; std::optional DynSymFromTable; for (const Elf_Dyn &Dyn : dynamic_table()) { if (Obj.getHeader().e_machine == EM_AARCH64) { switch (Dyn.d_tag) { case ELF::DT_AARCH64_AUTH_RELRSZ: DynRelrRegion.Size = Dyn.getVal(); DynRelrRegion.SizePrintName = "DT_AARCH64_AUTH_RELRSZ value"; continue; case ELF::DT_AARCH64_AUTH_RELRENT: DynRelrRegion.EntSize = Dyn.getVal(); DynRelrRegion.EntSizePrintName = "DT_AARCH64_AUTH_RELRENT value"; continue; } } switch (Dyn.d_tag) { case ELF::DT_HASH: HashTable = reinterpret_cast( toMappedAddr(Dyn.getTag(), Dyn.getPtr())); break; case ELF::DT_GNU_HASH: GnuHashTable = reinterpret_cast( toMappedAddr(Dyn.getTag(), Dyn.getPtr())); break; case ELF::DT_STRTAB: StringTableBegin = reinterpret_cast( toMappedAddr(Dyn.getTag(), Dyn.getPtr())); break; case ELF::DT_STRSZ: StringTableSize = Dyn.getVal(); break; case ELF::DT_SYMTAB: { // If we can't map the DT_SYMTAB value to an address (e.g. when there are // no program headers), we ignore its value. if (const uint8_t *VA = toMappedAddr(Dyn.getTag(), Dyn.getPtr())) { DynSymFromTable.emplace(ObjF, *this); DynSymFromTable->Addr = VA; DynSymFromTable->EntSize = sizeof(Elf_Sym); DynSymFromTable->EntSizePrintName = ""; } break; } case ELF::DT_SYMENT: { uint64_t Val = Dyn.getVal(); if (Val != sizeof(Elf_Sym)) this->reportUniqueWarning("DT_SYMENT value of 0x" + Twine::utohexstr(Val) + " is not the size of a symbol (0x" + Twine::utohexstr(sizeof(Elf_Sym)) + ")"); break; } case ELF::DT_RELA: DynRelaRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr()); break; case ELF::DT_RELASZ: DynRelaRegion.Size = Dyn.getVal(); DynRelaRegion.SizePrintName = "DT_RELASZ value"; break; case ELF::DT_RELAENT: DynRelaRegion.EntSize = Dyn.getVal(); DynRelaRegion.EntSizePrintName = "DT_RELAENT value"; break; case ELF::DT_SONAME: SONameOffset = Dyn.getVal(); break; case ELF::DT_REL: DynRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr()); break; case ELF::DT_RELSZ: DynRelRegion.Size = Dyn.getVal(); DynRelRegion.SizePrintName = "DT_RELSZ value"; break; case ELF::DT_RELENT: DynRelRegion.EntSize = Dyn.getVal(); DynRelRegion.EntSizePrintName = "DT_RELENT value"; break; case ELF::DT_RELR: case ELF::DT_ANDROID_RELR: case ELF::DT_AARCH64_AUTH_RELR: DynRelrRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr()); break; case ELF::DT_RELRSZ: case ELF::DT_ANDROID_RELRSZ: case ELF::DT_AARCH64_AUTH_RELRSZ: DynRelrRegion.Size = Dyn.getVal(); DynRelrRegion.SizePrintName = Dyn.d_tag == ELF::DT_RELRSZ ? "DT_RELRSZ value" : "DT_ANDROID_RELRSZ value"; break; case ELF::DT_RELRENT: case ELF::DT_ANDROID_RELRENT: case ELF::DT_AARCH64_AUTH_RELRENT: DynRelrRegion.EntSize = Dyn.getVal(); DynRelrRegion.EntSizePrintName = Dyn.d_tag == ELF::DT_RELRENT ? "DT_RELRENT value" : "DT_ANDROID_RELRENT value"; break; case ELF::DT_PLTREL: if (Dyn.getVal() == DT_REL) DynPLTRelRegion.EntSize = sizeof(Elf_Rel); else if (Dyn.getVal() == DT_RELA) DynPLTRelRegion.EntSize = sizeof(Elf_Rela); else reportUniqueWarning(Twine("unknown DT_PLTREL value of ") + Twine((uint64_t)Dyn.getVal())); DynPLTRelRegion.EntSizePrintName = "PLTREL entry size"; break; case ELF::DT_JMPREL: DynPLTRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr()); break; case ELF::DT_PLTRELSZ: DynPLTRelRegion.Size = Dyn.getVal(); DynPLTRelRegion.SizePrintName = "DT_PLTRELSZ value"; break; case ELF::DT_SYMTAB_SHNDX: DynSymTabShndxRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr()); DynSymTabShndxRegion.EntSize = sizeof(Elf_Word); break; } } if (StringTableBegin) { const uint64_t FileSize = Obj.getBufSize(); const uint64_t Offset = (const uint8_t *)StringTableBegin - Obj.base(); if (StringTableSize > FileSize - Offset) reportUniqueWarning( "the dynamic string table at 0x" + Twine::utohexstr(Offset) + " goes past the end of the file (0x" + Twine::utohexstr(FileSize) + ") with DT_STRSZ = 0x" + Twine::utohexstr(StringTableSize)); else DynamicStringTable = StringRef(StringTableBegin, StringTableSize); } const bool IsHashTableSupported = getHashTableEntSize() == 4; if (DynSymRegion) { // Often we find the information about the dynamic symbol table // location in the SHT_DYNSYM section header. However, the value in // DT_SYMTAB has priority, because it is used by dynamic loaders to // locate .dynsym at runtime. The location we find in the section header // and the location we find here should match. if (DynSymFromTable && DynSymFromTable->Addr != DynSymRegion->Addr) reportUniqueWarning( createError("SHT_DYNSYM section header and DT_SYMTAB disagree about " "the location of the dynamic symbol table")); // According to the ELF gABI: "The number of symbol table entries should // equal nchain". Check to see if the DT_HASH hash table nchain value // conflicts with the number of symbols in the dynamic symbol table // according to the section header. if (HashTable && IsHashTableSupported) { if (DynSymRegion->EntSize == 0) reportUniqueWarning("SHT_DYNSYM section has sh_entsize == 0"); else if (HashTable->nchain != DynSymRegion->Size / DynSymRegion->EntSize) reportUniqueWarning( "hash table nchain (" + Twine(HashTable->nchain) + ") differs from symbol count derived from SHT_DYNSYM section " "header (" + Twine(DynSymRegion->Size / DynSymRegion->EntSize) + ")"); } } // Delay the creation of the actual dynamic symbol table until now, so that // checks can always be made against the section header-based properties, // without worrying about tag order. if (DynSymFromTable) { if (!DynSymRegion) { DynSymRegion = DynSymFromTable; } else { DynSymRegion->Addr = DynSymFromTable->Addr; DynSymRegion->EntSize = DynSymFromTable->EntSize; DynSymRegion->EntSizePrintName = DynSymFromTable->EntSizePrintName; } } // Derive the dynamic symbol table size from the DT_HASH hash table, if // present. if (HashTable && IsHashTableSupported && DynSymRegion) { const uint64_t FileSize = Obj.getBufSize(); const uint64_t DerivedSize = (uint64_t)HashTable->nchain * DynSymRegion->EntSize; const uint64_t Offset = (const uint8_t *)DynSymRegion->Addr - Obj.base(); if (DerivedSize > FileSize - Offset) reportUniqueWarning( "the size (0x" + Twine::utohexstr(DerivedSize) + ") of the dynamic symbol table at 0x" + Twine::utohexstr(Offset) + ", derived from the hash table, goes past the end of the file (0x" + Twine::utohexstr(FileSize) + ") and will be ignored"); else DynSymRegion->Size = HashTable->nchain * DynSymRegion->EntSize; } } template void ELFDumper::printVersionInfo() { // Dump version symbol section. printVersionSymbolSection(SymbolVersionSection); // Dump version definition section. printVersionDefinitionSection(SymbolVersionDefSection); // Dump version dependency section. printVersionDependencySection(SymbolVersionNeedSection); } #define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \ { #enum, prefix##_##enum } const EnumEntry ElfDynamicDTFlags[] = { LLVM_READOBJ_DT_FLAG_ENT(DF, ORIGIN), LLVM_READOBJ_DT_FLAG_ENT(DF, SYMBOLIC), LLVM_READOBJ_DT_FLAG_ENT(DF, TEXTREL), LLVM_READOBJ_DT_FLAG_ENT(DF, BIND_NOW), LLVM_READOBJ_DT_FLAG_ENT(DF, STATIC_TLS) }; const EnumEntry ElfDynamicDTFlags1[] = { LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOW), LLVM_READOBJ_DT_FLAG_ENT(DF_1, GLOBAL), LLVM_READOBJ_DT_FLAG_ENT(DF_1, GROUP), LLVM_READOBJ_DT_FLAG_ENT(DF_1, NODELETE), LLVM_READOBJ_DT_FLAG_ENT(DF_1, LOADFLTR), LLVM_READOBJ_DT_FLAG_ENT(DF_1, INITFIRST), LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOOPEN), LLVM_READOBJ_DT_FLAG_ENT(DF_1, ORIGIN), LLVM_READOBJ_DT_FLAG_ENT(DF_1, DIRECT), LLVM_READOBJ_DT_FLAG_ENT(DF_1, TRANS), LLVM_READOBJ_DT_FLAG_ENT(DF_1, INTERPOSE), LLVM_READOBJ_DT_FLAG_ENT(DF_1, NODEFLIB), LLVM_READOBJ_DT_FLAG_ENT(DF_1, NODUMP), LLVM_READOBJ_DT_FLAG_ENT(DF_1, CONFALT), LLVM_READOBJ_DT_FLAG_ENT(DF_1, ENDFILTEE), LLVM_READOBJ_DT_FLAG_ENT(DF_1, DISPRELDNE), LLVM_READOBJ_DT_FLAG_ENT(DF_1, DISPRELPND), LLVM_READOBJ_DT_FLAG_ENT(DF_1, NODIRECT), LLVM_READOBJ_DT_FLAG_ENT(DF_1, IGNMULDEF), LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOKSYMS), LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOHDR), LLVM_READOBJ_DT_FLAG_ENT(DF_1, EDITED), LLVM_READOBJ_DT_FLAG_ENT(DF_1, NORELOC), LLVM_READOBJ_DT_FLAG_ENT(DF_1, SYMINTPOSE), LLVM_READOBJ_DT_FLAG_ENT(DF_1, GLOBAUDIT), LLVM_READOBJ_DT_FLAG_ENT(DF_1, SINGLETON), LLVM_READOBJ_DT_FLAG_ENT(DF_1, PIE), }; const EnumEntry ElfDynamicDTMipsFlags[] = { LLVM_READOBJ_DT_FLAG_ENT(RHF, NONE), LLVM_READOBJ_DT_FLAG_ENT(RHF, QUICKSTART), LLVM_READOBJ_DT_FLAG_ENT(RHF, NOTPOT), LLVM_READOBJ_DT_FLAG_ENT(RHS, NO_LIBRARY_REPLACEMENT), LLVM_READOBJ_DT_FLAG_ENT(RHF, NO_MOVE), LLVM_READOBJ_DT_FLAG_ENT(RHF, SGI_ONLY), LLVM_READOBJ_DT_FLAG_ENT(RHF, GUARANTEE_INIT), LLVM_READOBJ_DT_FLAG_ENT(RHF, DELTA_C_PLUS_PLUS), LLVM_READOBJ_DT_FLAG_ENT(RHF, GUARANTEE_START_INIT), LLVM_READOBJ_DT_FLAG_ENT(RHF, PIXIE), LLVM_READOBJ_DT_FLAG_ENT(RHF, DEFAULT_DELAY_LOAD), LLVM_READOBJ_DT_FLAG_ENT(RHF, REQUICKSTART), LLVM_READOBJ_DT_FLAG_ENT(RHF, REQUICKSTARTED), LLVM_READOBJ_DT_FLAG_ENT(RHF, CORD), LLVM_READOBJ_DT_FLAG_ENT(RHF, NO_UNRES_UNDEF), LLVM_READOBJ_DT_FLAG_ENT(RHF, RLD_ORDER_SAFE) }; #undef LLVM_READOBJ_DT_FLAG_ENT template void printFlags(T Value, ArrayRef> Flags, raw_ostream &OS) { SmallVector, 10> SetFlags; for (const EnumEntry &Flag : Flags) if (Flag.Value != 0 && (Value & Flag.Value) == Flag.Value) SetFlags.push_back(Flag); for (const EnumEntry &Flag : SetFlags) OS << Flag.Name << " "; } template const typename ELFT::Shdr * ELFDumper::findSectionByName(StringRef Name) const { for (const Elf_Shdr &Shdr : cantFail(Obj.sections())) { if (Expected NameOrErr = Obj.getSectionName(Shdr)) { if (*NameOrErr == Name) return &Shdr; } else { reportUniqueWarning("unable to read the name of " + describe(Shdr) + ": " + toString(NameOrErr.takeError())); } } return nullptr; } template std::string ELFDumper::getDynamicEntry(uint64_t Type, uint64_t Value) const { auto FormatHexValue = [](uint64_t V) { std::string Str; raw_string_ostream OS(Str); const char *ConvChar = (opts::Output == opts::GNU) ? "0x%" PRIx64 : "0x%" PRIX64; OS << format(ConvChar, V); return OS.str(); }; auto FormatFlags = [](uint64_t V, llvm::ArrayRef> Array) { std::string Str; raw_string_ostream OS(Str); printFlags(V, Array, OS); return OS.str(); }; // Handle custom printing of architecture specific tags switch (Obj.getHeader().e_machine) { case EM_AARCH64: switch (Type) { case DT_AARCH64_BTI_PLT: case DT_AARCH64_PAC_PLT: case DT_AARCH64_VARIANT_PCS: case DT_AARCH64_MEMTAG_GLOBALSSZ: return std::to_string(Value); case DT_AARCH64_MEMTAG_MODE: switch (Value) { case 0: return "Synchronous (0)"; case 1: return "Asynchronous (1)"; default: return (Twine("Unknown (") + Twine(Value) + ")").str(); } case DT_AARCH64_MEMTAG_HEAP: case DT_AARCH64_MEMTAG_STACK: switch (Value) { case 0: return "Disabled (0)"; case 1: return "Enabled (1)"; default: return (Twine("Unknown (") + Twine(Value) + ")").str(); } case DT_AARCH64_MEMTAG_GLOBALS: return (Twine("0x") + utohexstr(Value, /*LowerCase=*/true)).str(); default: break; } break; case EM_HEXAGON: switch (Type) { case DT_HEXAGON_VER: return std::to_string(Value); case DT_HEXAGON_SYMSZ: case DT_HEXAGON_PLT: return FormatHexValue(Value); default: break; } break; case EM_MIPS: switch (Type) { case DT_MIPS_RLD_VERSION: case DT_MIPS_LOCAL_GOTNO: case DT_MIPS_SYMTABNO: case DT_MIPS_UNREFEXTNO: return std::to_string(Value); case DT_MIPS_TIME_STAMP: case DT_MIPS_ICHECKSUM: case DT_MIPS_IVERSION: case DT_MIPS_BASE_ADDRESS: case DT_MIPS_MSYM: case DT_MIPS_CONFLICT: case DT_MIPS_LIBLIST: case DT_MIPS_CONFLICTNO: case DT_MIPS_LIBLISTNO: case DT_MIPS_GOTSYM: case DT_MIPS_HIPAGENO: case DT_MIPS_RLD_MAP: case DT_MIPS_DELTA_CLASS: case DT_MIPS_DELTA_CLASS_NO: case DT_MIPS_DELTA_INSTANCE: case DT_MIPS_DELTA_RELOC: case DT_MIPS_DELTA_RELOC_NO: case DT_MIPS_DELTA_SYM: case DT_MIPS_DELTA_SYM_NO: case DT_MIPS_DELTA_CLASSSYM: case DT_MIPS_DELTA_CLASSSYM_NO: case DT_MIPS_CXX_FLAGS: case DT_MIPS_PIXIE_INIT: case DT_MIPS_SYMBOL_LIB: case DT_MIPS_LOCALPAGE_GOTIDX: case DT_MIPS_LOCAL_GOTIDX: case DT_MIPS_HIDDEN_GOTIDX: case DT_MIPS_PROTECTED_GOTIDX: case DT_MIPS_OPTIONS: case DT_MIPS_INTERFACE: case DT_MIPS_DYNSTR_ALIGN: case DT_MIPS_INTERFACE_SIZE: case DT_MIPS_RLD_TEXT_RESOLVE_ADDR: case DT_MIPS_PERF_SUFFIX: case DT_MIPS_COMPACT_SIZE: case DT_MIPS_GP_VALUE: case DT_MIPS_AUX_DYNAMIC: case DT_MIPS_PLTGOT: case DT_MIPS_RWPLT: case DT_MIPS_RLD_MAP_REL: case DT_MIPS_XHASH: return FormatHexValue(Value); case DT_MIPS_FLAGS: return FormatFlags(Value, ArrayRef(ElfDynamicDTMipsFlags)); default: break; } break; default: break; } switch (Type) { case DT_PLTREL: if (Value == DT_REL) return "REL"; if (Value == DT_RELA) return "RELA"; [[fallthrough]]; case DT_PLTGOT: case DT_HASH: case DT_STRTAB: case DT_SYMTAB: case DT_RELA: case DT_INIT: case DT_FINI: case DT_REL: case DT_JMPREL: case DT_INIT_ARRAY: case DT_FINI_ARRAY: case DT_PREINIT_ARRAY: case DT_DEBUG: case DT_VERDEF: case DT_VERNEED: case DT_VERSYM: case DT_GNU_HASH: case DT_NULL: return FormatHexValue(Value); case DT_RELACOUNT: case DT_RELCOUNT: case DT_VERDEFNUM: case DT_VERNEEDNUM: return std::to_string(Value); case DT_PLTRELSZ: case DT_RELASZ: case DT_RELAENT: case DT_STRSZ: case DT_SYMENT: case DT_RELSZ: case DT_RELENT: case DT_INIT_ARRAYSZ: case DT_FINI_ARRAYSZ: case DT_PREINIT_ARRAYSZ: case DT_RELRSZ: case DT_RELRENT: case DT_AARCH64_AUTH_RELRSZ: case DT_AARCH64_AUTH_RELRENT: case DT_ANDROID_RELSZ: case DT_ANDROID_RELASZ: return std::to_string(Value) + " (bytes)"; case DT_NEEDED: case DT_SONAME: case DT_AUXILIARY: case DT_USED: case DT_FILTER: case DT_RPATH: case DT_RUNPATH: { const std::map TagNames = { {DT_NEEDED, "Shared library"}, {DT_SONAME, "Library soname"}, {DT_AUXILIARY, "Auxiliary library"}, {DT_USED, "Not needed object"}, {DT_FILTER, "Filter library"}, {DT_RPATH, "Library rpath"}, {DT_RUNPATH, "Library runpath"}, }; return (Twine(TagNames.at(Type)) + ": [" + getDynamicString(Value) + "]") .str(); } case DT_FLAGS: return FormatFlags(Value, ArrayRef(ElfDynamicDTFlags)); case DT_FLAGS_1: return FormatFlags(Value, ArrayRef(ElfDynamicDTFlags1)); default: return FormatHexValue(Value); } } template StringRef ELFDumper::getDynamicString(uint64_t Value) const { if (DynamicStringTable.empty() && !DynamicStringTable.data()) { reportUniqueWarning("string table was not found"); return ""; } auto WarnAndReturn = [this](const Twine &Msg, uint64_t Offset) { reportUniqueWarning("string table at offset 0x" + Twine::utohexstr(Offset) + Msg); return ""; }; const uint64_t FileSize = Obj.getBufSize(); const uint64_t Offset = (const uint8_t *)DynamicStringTable.data() - Obj.base(); if (DynamicStringTable.size() > FileSize - Offset) return WarnAndReturn(" with size 0x" + Twine::utohexstr(DynamicStringTable.size()) + " goes past the end of the file (0x" + Twine::utohexstr(FileSize) + ")", Offset); if (Value >= DynamicStringTable.size()) return WarnAndReturn( ": unable to read the string at 0x" + Twine::utohexstr(Offset + Value) + ": it goes past the end of the table (0x" + Twine::utohexstr(Offset + DynamicStringTable.size()) + ")", Offset); if (DynamicStringTable.back() != '\0') return WarnAndReturn(": unable to read the string at 0x" + Twine::utohexstr(Offset + Value) + ": the string table is not null-terminated", Offset); return DynamicStringTable.data() + Value; } template void ELFDumper::printUnwindInfo() { DwarfCFIEH::PrinterContext Ctx(W, ObjF); Ctx.printUnwindInformation(); } // The namespace is needed to fix the compilation with GCC older than 7.0+. namespace { template <> void ELFDumper::printUnwindInfo() { if (Obj.getHeader().e_machine == EM_ARM) { ARM::EHABI::PrinterContext Ctx(W, Obj, ObjF.getFileName(), DotSymtabSec); Ctx.PrintUnwindInformation(); } DwarfCFIEH::PrinterContext Ctx(W, ObjF); Ctx.printUnwindInformation(); } } // namespace template void ELFDumper::printNeededLibraries() { ListScope D(W, "NeededLibraries"); std::vector Libs; for (const auto &Entry : dynamic_table()) if (Entry.d_tag == ELF::DT_NEEDED) Libs.push_back(getDynamicString(Entry.d_un.d_val)); llvm::sort(Libs); for (StringRef L : Libs) W.printString(L); } template static Error checkHashTable(const ELFDumper &Dumper, const typename ELFT::Hash *H, bool *IsHeaderValid = nullptr) { const ELFFile &Obj = Dumper.getElfObject().getELFFile(); const uint64_t SecOffset = (const uint8_t *)H - Obj.base(); if (Dumper.getHashTableEntSize() == 8) { auto It = llvm::find_if(ElfMachineType, [&](const EnumEntry &E) { return E.Value == Obj.getHeader().e_machine; }); if (IsHeaderValid) *IsHeaderValid = false; return createError("the hash table at 0x" + Twine::utohexstr(SecOffset) + " is not supported: it contains non-standard 8 " "byte entries on " + It->AltName + " platform"); } auto MakeError = [&](const Twine &Msg = "") { return createError("the hash table at offset 0x" + Twine::utohexstr(SecOffset) + " goes past the end of the file (0x" + Twine::utohexstr(Obj.getBufSize()) + ")" + Msg); }; // Each SHT_HASH section starts from two 32-bit fields: nbucket and nchain. const unsigned HeaderSize = 2 * sizeof(typename ELFT::Word); if (IsHeaderValid) *IsHeaderValid = Obj.getBufSize() - SecOffset >= HeaderSize; if (Obj.getBufSize() - SecOffset < HeaderSize) return MakeError(); if (Obj.getBufSize() - SecOffset - HeaderSize < ((uint64_t)H->nbucket + H->nchain) * sizeof(typename ELFT::Word)) return MakeError(", nbucket = " + Twine(H->nbucket) + ", nchain = " + Twine(H->nchain)); return Error::success(); } template static Error checkGNUHashTable(const ELFFile &Obj, const typename ELFT::GnuHash *GnuHashTable, bool *IsHeaderValid = nullptr) { const uint8_t *TableData = reinterpret_cast(GnuHashTable); assert(TableData >= Obj.base() && TableData < Obj.base() + Obj.getBufSize() && "GnuHashTable must always point to a location inside the file"); uint64_t TableOffset = TableData - Obj.base(); if (IsHeaderValid) *IsHeaderValid = TableOffset + /*Header size:*/ 16 < Obj.getBufSize(); if (TableOffset + 16 + (uint64_t)GnuHashTable->nbuckets * 4 + (uint64_t)GnuHashTable->maskwords * sizeof(typename ELFT::Off) >= Obj.getBufSize()) return createError("unable to dump the SHT_GNU_HASH " "section at 0x" + Twine::utohexstr(TableOffset) + ": it goes past the end of the file"); return Error::success(); } template void ELFDumper::printHashTable() { DictScope D(W, "HashTable"); if (!HashTable) return; bool IsHeaderValid; Error Err = checkHashTable(*this, HashTable, &IsHeaderValid); if (IsHeaderValid) { W.printNumber("Num Buckets", HashTable->nbucket); W.printNumber("Num Chains", HashTable->nchain); } if (Err) { reportUniqueWarning(std::move(Err)); return; } W.printList("Buckets", HashTable->buckets()); W.printList("Chains", HashTable->chains()); } template static Expected> getGnuHashTableChains(std::optional DynSymRegion, const typename ELFT::GnuHash *GnuHashTable) { if (!DynSymRegion) return createError("no dynamic symbol table found"); ArrayRef DynSymTable = DynSymRegion->template getAsArrayRef(); size_t NumSyms = DynSymTable.size(); if (!NumSyms) return createError("the dynamic symbol table is empty"); if (GnuHashTable->symndx < NumSyms) return GnuHashTable->values(NumSyms); // A normal empty GNU hash table section produced by linker might have // symndx set to the number of dynamic symbols + 1 (for the zero symbol) // and have dummy null values in the Bloom filter and in the buckets // vector (or no values at all). It happens because the value of symndx is not // important for dynamic loaders when the GNU hash table is empty. They just // skip the whole object during symbol lookup. In such cases, the symndx value // is irrelevant and we should not report a warning. ArrayRef Buckets = GnuHashTable->buckets(); if (!llvm::all_of(Buckets, [](typename ELFT::Word V) { return V == 0; })) return createError( "the first hashed symbol index (" + Twine(GnuHashTable->symndx) + ") is greater than or equal to the number of dynamic symbols (" + Twine(NumSyms) + ")"); // There is no way to represent an array of (dynamic symbols count - symndx) // length. return ArrayRef(); } template void ELFDumper::printGnuHashTable() { DictScope D(W, "GnuHashTable"); if (!GnuHashTable) return; bool IsHeaderValid; Error Err = checkGNUHashTable(Obj, GnuHashTable, &IsHeaderValid); if (IsHeaderValid) { W.printNumber("Num Buckets", GnuHashTable->nbuckets); W.printNumber("First Hashed Symbol Index", GnuHashTable->symndx); W.printNumber("Num Mask Words", GnuHashTable->maskwords); W.printNumber("Shift Count", GnuHashTable->shift2); } if (Err) { reportUniqueWarning(std::move(Err)); return; } ArrayRef BloomFilter = GnuHashTable->filter(); W.printHexList("Bloom Filter", BloomFilter); ArrayRef Buckets = GnuHashTable->buckets(); W.printList("Buckets", Buckets); Expected> Chains = getGnuHashTableChains(DynSymRegion, GnuHashTable); if (!Chains) { reportUniqueWarning("unable to dump 'Values' for the SHT_GNU_HASH " "section: " + toString(Chains.takeError())); return; } W.printHexList("Values", *Chains); } template void ELFDumper::printHashHistograms() { // Print histogram for the .hash section. if (this->HashTable) { if (Error E = checkHashTable(*this, this->HashTable)) this->reportUniqueWarning(std::move(E)); else printHashHistogram(*this->HashTable); } // Print histogram for the .gnu.hash section. if (this->GnuHashTable) { if (Error E = checkGNUHashTable(this->Obj, this->GnuHashTable)) this->reportUniqueWarning(std::move(E)); else printGnuHashHistogram(*this->GnuHashTable); } } template void ELFDumper::printHashHistogram(const Elf_Hash &HashTable) const { size_t NBucket = HashTable.nbucket; size_t NChain = HashTable.nchain; ArrayRef Buckets = HashTable.buckets(); ArrayRef Chains = HashTable.chains(); size_t TotalSyms = 0; // If hash table is correct, we have at least chains with 0 length. size_t MaxChain = 1; if (NChain == 0 || NBucket == 0) return; std::vector ChainLen(NBucket, 0); // Go over all buckets and note chain lengths of each bucket (total // unique chain lengths). for (size_t B = 0; B < NBucket; ++B) { BitVector Visited(NChain); for (size_t C = Buckets[B]; C < NChain; C = Chains[C]) { if (C == ELF::STN_UNDEF) break; if (Visited[C]) { this->reportUniqueWarning( ".hash section is invalid: bucket " + Twine(C) + ": a cycle was detected in the linked chain"); break; } Visited[C] = true; if (MaxChain <= ++ChainLen[B]) ++MaxChain; } TotalSyms += ChainLen[B]; } if (!TotalSyms) return; std::vector Count(MaxChain, 0); // Count how long is the chain for each bucket. for (size_t B = 0; B < NBucket; B++) ++Count[ChainLen[B]]; // Print Number of buckets with each chain lengths and their cumulative // coverage of the symbols. printHashHistogramStats(NBucket, MaxChain, TotalSyms, Count, /*IsGnu=*/false); } template void ELFDumper::printGnuHashHistogram( const Elf_GnuHash &GnuHashTable) const { Expected> ChainsOrErr = getGnuHashTableChains(this->DynSymRegion, &GnuHashTable); if (!ChainsOrErr) { this->reportUniqueWarning("unable to print the GNU hash table histogram: " + toString(ChainsOrErr.takeError())); return; } ArrayRef Chains = *ChainsOrErr; size_t Symndx = GnuHashTable.symndx; size_t TotalSyms = 0; size_t MaxChain = 1; size_t NBucket = GnuHashTable.nbuckets; if (Chains.empty() || NBucket == 0) return; ArrayRef Buckets = GnuHashTable.buckets(); std::vector ChainLen(NBucket, 0); for (size_t B = 0; B < NBucket; ++B) { if (!Buckets[B]) continue; size_t Len = 1; for (size_t C = Buckets[B] - Symndx; C < Chains.size() && (Chains[C] & 1) == 0; ++C) if (MaxChain < ++Len) ++MaxChain; ChainLen[B] = Len; TotalSyms += Len; } ++MaxChain; if (!TotalSyms) return; std::vector Count(MaxChain, 0); for (size_t B = 0; B < NBucket; ++B) ++Count[ChainLen[B]]; // Print Number of buckets with each chain lengths and their cumulative // coverage of the symbols. printHashHistogramStats(NBucket, MaxChain, TotalSyms, Count, /*IsGnu=*/true); } template void ELFDumper::printLoadName() { StringRef SOName = ""; if (SONameOffset) SOName = getDynamicString(*SONameOffset); W.printString("LoadName", SOName); } template void ELFDumper::printArchSpecificInfo() { switch (Obj.getHeader().e_machine) { case EM_ARM: if (Obj.isLE()) printAttributes(ELF::SHT_ARM_ATTRIBUTES, std::make_unique(&W), llvm::endianness::little); else reportUniqueWarning("attribute printing not implemented for big-endian " "ARM objects"); break; case EM_RISCV: if (Obj.isLE()) printAttributes(ELF::SHT_RISCV_ATTRIBUTES, std::make_unique(&W), llvm::endianness::little); else reportUniqueWarning("attribute printing not implemented for big-endian " "RISC-V objects"); break; case EM_MSP430: printAttributes(ELF::SHT_MSP430_ATTRIBUTES, std::make_unique(&W), llvm::endianness::little); break; case EM_MIPS: { printMipsABIFlags(); printMipsOptions(); printMipsReginfo(); MipsGOTParser Parser(*this); if (Error E = Parser.findGOT(dynamic_table(), dynamic_symbols())) reportUniqueWarning(std::move(E)); else if (!Parser.isGotEmpty()) printMipsGOT(Parser); if (Error E = Parser.findPLT(dynamic_table())) reportUniqueWarning(std::move(E)); else if (!Parser.isPltEmpty()) printMipsPLT(Parser); break; } default: break; } } template void ELFDumper::printAttributes( unsigned AttrShType, std::unique_ptr AttrParser, llvm::endianness Endianness) { assert((AttrShType != ELF::SHT_NULL) && AttrParser && "Incomplete ELF attribute implementation"); DictScope BA(W, "BuildAttributes"); for (const Elf_Shdr &Sec : cantFail(Obj.sections())) { if (Sec.sh_type != AttrShType) continue; ArrayRef Contents; if (Expected> ContentOrErr = Obj.getSectionContents(Sec)) { Contents = *ContentOrErr; if (Contents.empty()) { reportUniqueWarning("the " + describe(Sec) + " is empty"); continue; } } else { reportUniqueWarning("unable to read the content of the " + describe(Sec) + ": " + toString(ContentOrErr.takeError())); continue; } W.printHex("FormatVersion", Contents[0]); if (Error E = AttrParser->parse(Contents, Endianness)) reportUniqueWarning("unable to dump attributes from the " + describe(Sec) + ": " + toString(std::move(E))); } } namespace { template class MipsGOTParser { public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) using Entry = typename ELFT::Addr; using Entries = ArrayRef; const bool IsStatic; const ELFFile &Obj; const ELFDumper &Dumper; MipsGOTParser(const ELFDumper &D); Error findGOT(Elf_Dyn_Range DynTable, Elf_Sym_Range DynSyms); Error findPLT(Elf_Dyn_Range DynTable); bool isGotEmpty() const { return GotEntries.empty(); } bool isPltEmpty() const { return PltEntries.empty(); } uint64_t getGp() const; const Entry *getGotLazyResolver() const; const Entry *getGotModulePointer() const; const Entry *getPltLazyResolver() const; const Entry *getPltModulePointer() const; Entries getLocalEntries() const; Entries getGlobalEntries() const; Entries getOtherEntries() const; Entries getPltEntries() const; uint64_t getGotAddress(const Entry * E) const; int64_t getGotOffset(const Entry * E) const; const Elf_Sym *getGotSym(const Entry *E) const; uint64_t getPltAddress(const Entry * E) const; const Elf_Sym *getPltSym(const Entry *E) const; StringRef getPltStrTable() const { return PltStrTable; } const Elf_Shdr *getPltSymTable() const { return PltSymTable; } private: const Elf_Shdr *GotSec; size_t LocalNum; size_t GlobalNum; const Elf_Shdr *PltSec; const Elf_Shdr *PltRelSec; const Elf_Shdr *PltSymTable; StringRef FileName; Elf_Sym_Range GotDynSyms; StringRef PltStrTable; Entries GotEntries; Entries PltEntries; }; } // end anonymous namespace template MipsGOTParser::MipsGOTParser(const ELFDumper &D) : IsStatic(D.dynamic_table().empty()), Obj(D.getElfObject().getELFFile()), Dumper(D), GotSec(nullptr), LocalNum(0), GlobalNum(0), PltSec(nullptr), PltRelSec(nullptr), PltSymTable(nullptr), FileName(D.getElfObject().getFileName()) {} template Error MipsGOTParser::findGOT(Elf_Dyn_Range DynTable, Elf_Sym_Range DynSyms) { // See "Global Offset Table" in Chapter 5 in the following document // for detailed GOT description. // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf // Find static GOT secton. if (IsStatic) { GotSec = Dumper.findSectionByName(".got"); if (!GotSec) return Error::success(); ArrayRef Content = unwrapOrError(FileName, Obj.getSectionContents(*GotSec)); GotEntries = Entries(reinterpret_cast(Content.data()), Content.size() / sizeof(Entry)); LocalNum = GotEntries.size(); return Error::success(); } // Lookup dynamic table tags which define the GOT layout. std::optional DtPltGot; std::optional DtLocalGotNum; std::optional DtGotSym; for (const auto &Entry : DynTable) { switch (Entry.getTag()) { case ELF::DT_PLTGOT: DtPltGot = Entry.getVal(); break; case ELF::DT_MIPS_LOCAL_GOTNO: DtLocalGotNum = Entry.getVal(); break; case ELF::DT_MIPS_GOTSYM: DtGotSym = Entry.getVal(); break; } } if (!DtPltGot && !DtLocalGotNum && !DtGotSym) return Error::success(); if (!DtPltGot) return createError("cannot find PLTGOT dynamic tag"); if (!DtLocalGotNum) return createError("cannot find MIPS_LOCAL_GOTNO dynamic tag"); if (!DtGotSym) return createError("cannot find MIPS_GOTSYM dynamic tag"); size_t DynSymTotal = DynSyms.size(); if (*DtGotSym > DynSymTotal) return createError("DT_MIPS_GOTSYM value (" + Twine(*DtGotSym) + ") exceeds the number of dynamic symbols (" + Twine(DynSymTotal) + ")"); GotSec = findNotEmptySectionByAddress(Obj, FileName, *DtPltGot); if (!GotSec) return createError("there is no non-empty GOT section at 0x" + Twine::utohexstr(*DtPltGot)); LocalNum = *DtLocalGotNum; GlobalNum = DynSymTotal - *DtGotSym; ArrayRef Content = unwrapOrError(FileName, Obj.getSectionContents(*GotSec)); GotEntries = Entries(reinterpret_cast(Content.data()), Content.size() / sizeof(Entry)); GotDynSyms = DynSyms.drop_front(*DtGotSym); return Error::success(); } template Error MipsGOTParser::findPLT(Elf_Dyn_Range DynTable) { // Lookup dynamic table tags which define the PLT layout. std::optional DtMipsPltGot; std::optional DtJmpRel; for (const auto &Entry : DynTable) { switch (Entry.getTag()) { case ELF::DT_MIPS_PLTGOT: DtMipsPltGot = Entry.getVal(); break; case ELF::DT_JMPREL: DtJmpRel = Entry.getVal(); break; } } if (!DtMipsPltGot && !DtJmpRel) return Error::success(); // Find PLT section. if (!DtMipsPltGot) return createError("cannot find MIPS_PLTGOT dynamic tag"); if (!DtJmpRel) return createError("cannot find JMPREL dynamic tag"); PltSec = findNotEmptySectionByAddress(Obj, FileName, *DtMipsPltGot); if (!PltSec) return createError("there is no non-empty PLTGOT section at 0x" + Twine::utohexstr(*DtMipsPltGot)); PltRelSec = findNotEmptySectionByAddress(Obj, FileName, *DtJmpRel); if (!PltRelSec) return createError("there is no non-empty RELPLT section at 0x" + Twine::utohexstr(*DtJmpRel)); if (Expected> PltContentOrErr = Obj.getSectionContents(*PltSec)) PltEntries = Entries(reinterpret_cast(PltContentOrErr->data()), PltContentOrErr->size() / sizeof(Entry)); else return createError("unable to read PLTGOT section content: " + toString(PltContentOrErr.takeError())); if (Expected PltSymTableOrErr = Obj.getSection(PltRelSec->sh_link)) PltSymTable = *PltSymTableOrErr; else return createError("unable to get a symbol table linked to the " + describe(Obj, *PltRelSec) + ": " + toString(PltSymTableOrErr.takeError())); if (Expected StrTabOrErr = Obj.getStringTableForSymtab(*PltSymTable)) PltStrTable = *StrTabOrErr; else return createError("unable to get a string table for the " + describe(Obj, *PltSymTable) + ": " + toString(StrTabOrErr.takeError())); return Error::success(); } template uint64_t MipsGOTParser::getGp() const { return GotSec->sh_addr + 0x7ff0; } template const typename MipsGOTParser::Entry * MipsGOTParser::getGotLazyResolver() const { return LocalNum > 0 ? &GotEntries[0] : nullptr; } template const typename MipsGOTParser::Entry * MipsGOTParser::getGotModulePointer() const { if (LocalNum < 2) return nullptr; const Entry &E = GotEntries[1]; if ((E >> (sizeof(Entry) * 8 - 1)) == 0) return nullptr; return &E; } template typename MipsGOTParser::Entries MipsGOTParser::getLocalEntries() const { size_t Skip = getGotModulePointer() ? 2 : 1; if (LocalNum - Skip <= 0) return Entries(); return GotEntries.slice(Skip, LocalNum - Skip); } template typename MipsGOTParser::Entries MipsGOTParser::getGlobalEntries() const { if (GlobalNum == 0) return Entries(); return GotEntries.slice(LocalNum, GlobalNum); } template typename MipsGOTParser::Entries MipsGOTParser::getOtherEntries() const { size_t OtherNum = GotEntries.size() - LocalNum - GlobalNum; if (OtherNum == 0) return Entries(); return GotEntries.slice(LocalNum + GlobalNum, OtherNum); } template uint64_t MipsGOTParser::getGotAddress(const Entry *E) const { int64_t Offset = std::distance(GotEntries.data(), E) * sizeof(Entry); return GotSec->sh_addr + Offset; } template int64_t MipsGOTParser::getGotOffset(const Entry *E) const { int64_t Offset = std::distance(GotEntries.data(), E) * sizeof(Entry); return Offset - 0x7ff0; } template const typename MipsGOTParser::Elf_Sym * MipsGOTParser::getGotSym(const Entry *E) const { int64_t Offset = std::distance(GotEntries.data(), E); return &GotDynSyms[Offset - LocalNum]; } template const typename MipsGOTParser::Entry * MipsGOTParser::getPltLazyResolver() const { return PltEntries.empty() ? nullptr : &PltEntries[0]; } template const typename MipsGOTParser::Entry * MipsGOTParser::getPltModulePointer() const { return PltEntries.size() < 2 ? nullptr : &PltEntries[1]; } template typename MipsGOTParser::Entries MipsGOTParser::getPltEntries() const { if (PltEntries.size() <= 2) return Entries(); return PltEntries.slice(2, PltEntries.size() - 2); } template uint64_t MipsGOTParser::getPltAddress(const Entry *E) const { int64_t Offset = std::distance(PltEntries.data(), E) * sizeof(Entry); return PltSec->sh_addr + Offset; } template const typename MipsGOTParser::Elf_Sym * MipsGOTParser::getPltSym(const Entry *E) const { int64_t Offset = std::distance(getPltEntries().data(), E); if (PltRelSec->sh_type == ELF::SHT_REL) { Elf_Rel_Range Rels = unwrapOrError(FileName, Obj.rels(*PltRelSec)); return unwrapOrError(FileName, Obj.getRelocationSymbol(Rels[Offset], PltSymTable)); } else { Elf_Rela_Range Rels = unwrapOrError(FileName, Obj.relas(*PltRelSec)); return unwrapOrError(FileName, Obj.getRelocationSymbol(Rels[Offset], PltSymTable)); } } const EnumEntry ElfMipsISAExtType[] = { {"None", Mips::AFL_EXT_NONE}, {"Broadcom SB-1", Mips::AFL_EXT_SB1}, {"Cavium Networks Octeon", Mips::AFL_EXT_OCTEON}, {"Cavium Networks Octeon2", Mips::AFL_EXT_OCTEON2}, {"Cavium Networks OcteonP", Mips::AFL_EXT_OCTEONP}, {"Cavium Networks Octeon3", Mips::AFL_EXT_OCTEON3}, {"LSI R4010", Mips::AFL_EXT_4010}, {"Loongson 2E", Mips::AFL_EXT_LOONGSON_2E}, {"Loongson 2F", Mips::AFL_EXT_LOONGSON_2F}, {"Loongson 3A", Mips::AFL_EXT_LOONGSON_3A}, {"MIPS R4650", Mips::AFL_EXT_4650}, {"MIPS R5900", Mips::AFL_EXT_5900}, {"MIPS R10000", Mips::AFL_EXT_10000}, {"NEC VR4100", Mips::AFL_EXT_4100}, {"NEC VR4111/VR4181", Mips::AFL_EXT_4111}, {"NEC VR4120", Mips::AFL_EXT_4120}, {"NEC VR5400", Mips::AFL_EXT_5400}, {"NEC VR5500", Mips::AFL_EXT_5500}, {"RMI Xlr", Mips::AFL_EXT_XLR}, {"Toshiba R3900", Mips::AFL_EXT_3900} }; const EnumEntry ElfMipsASEFlags[] = { {"DSP", Mips::AFL_ASE_DSP}, {"DSPR2", Mips::AFL_ASE_DSPR2}, {"Enhanced VA Scheme", Mips::AFL_ASE_EVA}, {"MCU", Mips::AFL_ASE_MCU}, {"MDMX", Mips::AFL_ASE_MDMX}, {"MIPS-3D", Mips::AFL_ASE_MIPS3D}, {"MT", Mips::AFL_ASE_MT}, {"SmartMIPS", Mips::AFL_ASE_SMARTMIPS}, {"VZ", Mips::AFL_ASE_VIRT}, {"MSA", Mips::AFL_ASE_MSA}, {"MIPS16", Mips::AFL_ASE_MIPS16}, {"microMIPS", Mips::AFL_ASE_MICROMIPS}, {"XPA", Mips::AFL_ASE_XPA}, {"CRC", Mips::AFL_ASE_CRC}, {"GINV", Mips::AFL_ASE_GINV}, }; const EnumEntry ElfMipsFpABIType[] = { {"Hard or soft float", Mips::Val_GNU_MIPS_ABI_FP_ANY}, {"Hard float (double precision)", Mips::Val_GNU_MIPS_ABI_FP_DOUBLE}, {"Hard float (single precision)", Mips::Val_GNU_MIPS_ABI_FP_SINGLE}, {"Soft float", Mips::Val_GNU_MIPS_ABI_FP_SOFT}, {"Hard float (MIPS32r2 64-bit FPU 12 callee-saved)", Mips::Val_GNU_MIPS_ABI_FP_OLD_64}, {"Hard float (32-bit CPU, Any FPU)", Mips::Val_GNU_MIPS_ABI_FP_XX}, {"Hard float (32-bit CPU, 64-bit FPU)", Mips::Val_GNU_MIPS_ABI_FP_64}, {"Hard float compat (32-bit CPU, 64-bit FPU)", Mips::Val_GNU_MIPS_ABI_FP_64A} }; static const EnumEntry ElfMipsFlags1[] { {"ODDSPREG", Mips::AFL_FLAGS1_ODDSPREG}, }; static int getMipsRegisterSize(uint8_t Flag) { switch (Flag) { case Mips::AFL_REG_NONE: return 0; case Mips::AFL_REG_32: return 32; case Mips::AFL_REG_64: return 64; case Mips::AFL_REG_128: return 128; default: return -1; } } template static void printMipsReginfoData(ScopedPrinter &W, const Elf_Mips_RegInfo &Reginfo) { W.printHex("GP", Reginfo.ri_gp_value); W.printHex("General Mask", Reginfo.ri_gprmask); W.printHex("Co-Proc Mask0", Reginfo.ri_cprmask[0]); W.printHex("Co-Proc Mask1", Reginfo.ri_cprmask[1]); W.printHex("Co-Proc Mask2", Reginfo.ri_cprmask[2]); W.printHex("Co-Proc Mask3", Reginfo.ri_cprmask[3]); } template void ELFDumper::printMipsReginfo() { const Elf_Shdr *RegInfoSec = findSectionByName(".reginfo"); if (!RegInfoSec) { W.startLine() << "There is no .reginfo section in the file.\n"; return; } Expected> ContentsOrErr = Obj.getSectionContents(*RegInfoSec); if (!ContentsOrErr) { this->reportUniqueWarning( "unable to read the content of the .reginfo section (" + describe(*RegInfoSec) + "): " + toString(ContentsOrErr.takeError())); return; } if (ContentsOrErr->size() < sizeof(Elf_Mips_RegInfo)) { this->reportUniqueWarning("the .reginfo section has an invalid size (0x" + Twine::utohexstr(ContentsOrErr->size()) + ")"); return; } DictScope GS(W, "MIPS RegInfo"); printMipsReginfoData(W, *reinterpret_cast *>( ContentsOrErr->data())); } template static Expected *> readMipsOptions(const uint8_t *SecBegin, ArrayRef &SecData, bool &IsSupported) { if (SecData.size() < sizeof(Elf_Mips_Options)) return createError("the .MIPS.options section has an invalid size (0x" + Twine::utohexstr(SecData.size()) + ")"); const Elf_Mips_Options *O = reinterpret_cast *>(SecData.data()); const uint8_t Size = O->size; if (Size > SecData.size()) { const uint64_t Offset = SecData.data() - SecBegin; const uint64_t SecSize = Offset + SecData.size(); return createError("a descriptor of size 0x" + Twine::utohexstr(Size) + " at offset 0x" + Twine::utohexstr(Offset) + " goes past the end of the .MIPS.options " "section of size 0x" + Twine::utohexstr(SecSize)); } IsSupported = O->kind == ODK_REGINFO; const size_t ExpectedSize = sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); if (IsSupported) if (Size < ExpectedSize) return createError( "a .MIPS.options entry of kind " + Twine(getElfMipsOptionsOdkType(O->kind)) + " has an invalid size (0x" + Twine::utohexstr(Size) + "), the expected size is 0x" + Twine::utohexstr(ExpectedSize)); SecData = SecData.drop_front(Size); return O; } template void ELFDumper::printMipsOptions() { const Elf_Shdr *MipsOpts = findSectionByName(".MIPS.options"); if (!MipsOpts) { W.startLine() << "There is no .MIPS.options section in the file.\n"; return; } DictScope GS(W, "MIPS Options"); ArrayRef Data = unwrapOrError(ObjF.getFileName(), Obj.getSectionContents(*MipsOpts)); const uint8_t *const SecBegin = Data.begin(); while (!Data.empty()) { bool IsSupported; Expected *> OptsOrErr = readMipsOptions(SecBegin, Data, IsSupported); if (!OptsOrErr) { reportUniqueWarning(OptsOrErr.takeError()); break; } unsigned Kind = (*OptsOrErr)->kind; const char *Type = getElfMipsOptionsOdkType(Kind); if (!IsSupported) { W.startLine() << "Unsupported MIPS options tag: " << Type << " (" << Kind << ")\n"; continue; } DictScope GS(W, Type); if (Kind == ODK_REGINFO) printMipsReginfoData(W, (*OptsOrErr)->getRegInfo()); else llvm_unreachable("unexpected .MIPS.options section descriptor kind"); } } template void ELFDumper::printStackMap() const { const Elf_Shdr *StackMapSection = findSectionByName(".llvm_stackmaps"); if (!StackMapSection) return; auto Warn = [&](Error &&E) { this->reportUniqueWarning("unable to read the stack map from " + describe(*StackMapSection) + ": " + toString(std::move(E))); }; Expected> ContentOrErr = Obj.getSectionContents(*StackMapSection); if (!ContentOrErr) { Warn(ContentOrErr.takeError()); return; } if (Error E = StackMapParser::validateHeader( *ContentOrErr)) { Warn(std::move(E)); return; } prettyPrintStackMap(W, StackMapParser(*ContentOrErr)); } template void ELFDumper::printReloc(const Relocation &R, unsigned RelIndex, const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { Expected> Target = getRelocationTarget(R, SymTab); if (!Target) reportUniqueWarning("unable to print relocation " + Twine(RelIndex) + " in " + describe(Sec) + ": " + toString(Target.takeError())); else printRelRelaReloc(R, *Target); } template std::vector> ELFDumper::getOtherFlagsFromSymbol(const Elf_Ehdr &Header, const Elf_Sym &Symbol) const { std::vector> SymOtherFlags(std::begin(ElfSymOtherFlags), std::end(ElfSymOtherFlags)); if (Header.e_machine == EM_MIPS) { // Someone in their infinite wisdom decided to make STO_MIPS_MIPS16 // flag overlap with other ST_MIPS_xxx flags. So consider both // cases separately. if ((Symbol.st_other & STO_MIPS_MIPS16) == STO_MIPS_MIPS16) SymOtherFlags.insert(SymOtherFlags.end(), std::begin(ElfMips16SymOtherFlags), std::end(ElfMips16SymOtherFlags)); else SymOtherFlags.insert(SymOtherFlags.end(), std::begin(ElfMipsSymOtherFlags), std::end(ElfMipsSymOtherFlags)); } else if (Header.e_machine == EM_AARCH64) { SymOtherFlags.insert(SymOtherFlags.end(), std::begin(ElfAArch64SymOtherFlags), std::end(ElfAArch64SymOtherFlags)); } else if (Header.e_machine == EM_RISCV) { SymOtherFlags.insert(SymOtherFlags.end(), std::begin(ElfRISCVSymOtherFlags), std::end(ElfRISCVSymOtherFlags)); } return SymOtherFlags; } static inline void printFields(formatted_raw_ostream &OS, StringRef Str1, StringRef Str2) { OS.PadToColumn(2u); OS << Str1; OS.PadToColumn(37u); OS << Str2 << "\n"; OS.flush(); } template static std::string getSectionHeadersNumString(const ELFFile &Obj, StringRef FileName) { const typename ELFT::Ehdr &ElfHeader = Obj.getHeader(); if (ElfHeader.e_shnum != 0) return to_string(ElfHeader.e_shnum); Expected> ArrOrErr = Obj.sections(); if (!ArrOrErr) { // In this case we can ignore an error, because we have already reported a // warning about the broken section header table earlier. consumeError(ArrOrErr.takeError()); return ""; } if (ArrOrErr->empty()) return "0"; return "0 (" + to_string((*ArrOrErr)[0].sh_size) + ")"; } template static std::string getSectionHeaderTableIndexString(const ELFFile &Obj, StringRef FileName) { const typename ELFT::Ehdr &ElfHeader = Obj.getHeader(); if (ElfHeader.e_shstrndx != SHN_XINDEX) return to_string(ElfHeader.e_shstrndx); Expected> ArrOrErr = Obj.sections(); if (!ArrOrErr) { // In this case we can ignore an error, because we have already reported a // warning about the broken section header table earlier. consumeError(ArrOrErr.takeError()); return ""; } if (ArrOrErr->empty()) return "65535 (corrupt: out of range)"; return to_string(ElfHeader.e_shstrndx) + " (" + to_string((*ArrOrErr)[0].sh_link) + ")"; } static const EnumEntry *getObjectFileEnumEntry(unsigned Type) { auto It = llvm::find_if(ElfObjectFileType, [&](const EnumEntry &E) { return E.Value == Type; }); if (It != ArrayRef(ElfObjectFileType).end()) return It; return nullptr; } template void GNUELFDumper::printFileSummary(StringRef FileStr, ObjectFile &Obj, ArrayRef InputFilenames, const Archive *A) { if (InputFilenames.size() > 1 || A) { this->W.startLine() << "\n"; this->W.printString("File", FileStr); } } template void GNUELFDumper::printFileHeaders() { const Elf_Ehdr &e = this->Obj.getHeader(); OS << "ELF Header:\n"; OS << " Magic: "; std::string Str; for (int i = 0; i < ELF::EI_NIDENT; i++) OS << format(" %02x", static_cast(e.e_ident[i])); OS << "\n"; Str = enumToString(e.e_ident[ELF::EI_CLASS], ArrayRef(ElfClass)); printFields(OS, "Class:", Str); Str = enumToString(e.e_ident[ELF::EI_DATA], ArrayRef(ElfDataEncoding)); printFields(OS, "Data:", Str); OS.PadToColumn(2u); OS << "Version:"; OS.PadToColumn(37u); OS << utohexstr(e.e_ident[ELF::EI_VERSION]); if (e.e_version == ELF::EV_CURRENT) OS << " (current)"; OS << "\n"; auto OSABI = ArrayRef(ElfOSABI); if (e.e_ident[ELF::EI_OSABI] >= ELF::ELFOSABI_FIRST_ARCH && e.e_ident[ELF::EI_OSABI] <= ELF::ELFOSABI_LAST_ARCH) { switch (e.e_machine) { case ELF::EM_AMDGPU: OSABI = ArrayRef(AMDGPUElfOSABI); break; default: break; } } Str = enumToString(e.e_ident[ELF::EI_OSABI], OSABI); printFields(OS, "OS/ABI:", Str); printFields(OS, "ABI Version:", std::to_string(e.e_ident[ELF::EI_ABIVERSION])); if (const EnumEntry *E = getObjectFileEnumEntry(e.e_type)) { Str = E->AltName.str(); } else { if (e.e_type >= ET_LOPROC) Str = "Processor Specific: (" + utohexstr(e.e_type, /*LowerCase=*/true) + ")"; else if (e.e_type >= ET_LOOS) Str = "OS Specific: (" + utohexstr(e.e_type, /*LowerCase=*/true) + ")"; else Str = ": " + utohexstr(e.e_type, /*LowerCase=*/true); } printFields(OS, "Type:", Str); Str = enumToString(e.e_machine, ArrayRef(ElfMachineType)); printFields(OS, "Machine:", Str); Str = "0x" + utohexstr(e.e_version); printFields(OS, "Version:", Str); Str = "0x" + utohexstr(e.e_entry); printFields(OS, "Entry point address:", Str); Str = to_string(e.e_phoff) + " (bytes into file)"; printFields(OS, "Start of program headers:", Str); Str = to_string(e.e_shoff) + " (bytes into file)"; printFields(OS, "Start of section headers:", Str); std::string ElfFlags; if (e.e_machine == EM_MIPS) ElfFlags = printFlags( e.e_flags, ArrayRef(ElfHeaderMipsFlags), unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI), unsigned(ELF::EF_MIPS_MACH)); else if (e.e_machine == EM_RISCV) ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderRISCVFlags)); else if (e.e_machine == EM_AVR) ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderAVRFlags), unsigned(ELF::EF_AVR_ARCH_MASK)); else if (e.e_machine == EM_LOONGARCH) ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderLoongArchFlags), unsigned(ELF::EF_LOONGARCH_ABI_MODIFIER_MASK), unsigned(ELF::EF_LOONGARCH_OBJABI_MASK)); else if (e.e_machine == EM_XTENSA) ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags), unsigned(ELF::EF_XTENSA_MACH)); else if (e.e_machine == EM_CUDA) ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags), unsigned(ELF::EF_CUDA_SM)); else if (e.e_machine == EM_AMDGPU) { switch (e.e_ident[ELF::EI_ABIVERSION]) { default: break; case 0: // ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags. [[fallthrough]]; case ELF::ELFABIVERSION_AMDGPU_HSA_V3: ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderAMDGPUFlagsABIVersion3), unsigned(ELF::EF_AMDGPU_MACH)); break; case ELF::ELFABIVERSION_AMDGPU_HSA_V4: case ELF::ELFABIVERSION_AMDGPU_HSA_V5: ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderAMDGPUFlagsABIVersion4), unsigned(ELF::EF_AMDGPU_MACH), unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4), unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4)); break; } } Str = "0x" + utohexstr(e.e_flags); if (!ElfFlags.empty()) Str = Str + ", " + ElfFlags; printFields(OS, "Flags:", Str); Str = to_string(e.e_ehsize) + " (bytes)"; printFields(OS, "Size of this header:", Str); Str = to_string(e.e_phentsize) + " (bytes)"; printFields(OS, "Size of program headers:", Str); Str = to_string(e.e_phnum); printFields(OS, "Number of program headers:", Str); Str = to_string(e.e_shentsize) + " (bytes)"; printFields(OS, "Size of section headers:", Str); Str = getSectionHeadersNumString(this->Obj, this->FileName); printFields(OS, "Number of section headers:", Str); Str = getSectionHeaderTableIndexString(this->Obj, this->FileName); printFields(OS, "Section header string table index:", Str); } template std::vector ELFDumper::getGroups() { auto GetSignature = [&](const Elf_Sym &Sym, unsigned SymNdx, const Elf_Shdr &Symtab) -> StringRef { Expected StrTableOrErr = Obj.getStringTableForSymtab(Symtab); if (!StrTableOrErr) { reportUniqueWarning("unable to get the string table for " + describe(Symtab) + ": " + toString(StrTableOrErr.takeError())); return ""; } StringRef Strings = *StrTableOrErr; if (Sym.st_name >= Strings.size()) { reportUniqueWarning("unable to get the name of the symbol with index " + Twine(SymNdx) + ": st_name (0x" + Twine::utohexstr(Sym.st_name) + ") is past the end of the string table of size 0x" + Twine::utohexstr(Strings.size())); return ""; } return StrTableOrErr->data() + Sym.st_name; }; std::vector Ret; uint64_t I = 0; for (const Elf_Shdr &Sec : cantFail(Obj.sections())) { ++I; if (Sec.sh_type != ELF::SHT_GROUP) continue; StringRef Signature = ""; if (Expected SymtabOrErr = Obj.getSection(Sec.sh_link)) { if (Expected SymOrErr = Obj.template getEntry(**SymtabOrErr, Sec.sh_info)) Signature = GetSignature(**SymOrErr, Sec.sh_info, **SymtabOrErr); else reportUniqueWarning("unable to get the signature symbol for " + describe(Sec) + ": " + toString(SymOrErr.takeError())); } else { reportUniqueWarning("unable to get the symbol table for " + describe(Sec) + ": " + toString(SymtabOrErr.takeError())); } ArrayRef Data; if (Expected> ContentsOrErr = Obj.template getSectionContentsAsArray(Sec)) { if (ContentsOrErr->empty()) reportUniqueWarning("unable to read the section group flag from the " + describe(Sec) + ": the section is empty"); else Data = *ContentsOrErr; } else { reportUniqueWarning("unable to get the content of the " + describe(Sec) + ": " + toString(ContentsOrErr.takeError())); } Ret.push_back({getPrintableSectionName(Sec), maybeDemangle(Signature), Sec.sh_name, I - 1, Sec.sh_link, Sec.sh_info, Data.empty() ? Elf_Word(0) : Data[0], {}}); if (Data.empty()) continue; std::vector &GM = Ret.back().Members; for (uint32_t Ndx : Data.slice(1)) { if (Expected SecOrErr = Obj.getSection(Ndx)) { GM.push_back({getPrintableSectionName(**SecOrErr), Ndx}); } else { reportUniqueWarning("unable to get the section with index " + Twine(Ndx) + " when dumping the " + describe(Sec) + ": " + toString(SecOrErr.takeError())); GM.push_back({"", Ndx}); } } } return Ret; } static DenseMap mapSectionsToGroups(ArrayRef Groups) { DenseMap Ret; for (const GroupSection &G : Groups) for (const GroupMember &GM : G.Members) Ret.insert({GM.Index, &G}); return Ret; } template void GNUELFDumper::printGroupSections() { std::vector V = this->getGroups(); DenseMap Map = mapSectionsToGroups(V); for (const GroupSection &G : V) { OS << "\n" << getGroupType(G.Type) << " group section [" << format_decimal(G.Index, 5) << "] `" << G.Name << "' [" << G.Signature << "] contains " << G.Members.size() << " sections:\n" << " [Index] Name\n"; for (const GroupMember &GM : G.Members) { const GroupSection *MainGroup = Map[GM.Index]; if (MainGroup != &G) this->reportUniqueWarning( "section with index " + Twine(GM.Index) + ", included in the group section with index " + Twine(MainGroup->Index) + ", was also found in the group section with index " + Twine(G.Index)); OS << " [" << format_decimal(GM.Index, 5) << "] " << GM.Name << "\n"; } } if (V.empty()) OS << "There are no section groups in this file.\n"; } template void GNUELFDumper::printRelrReloc(const Elf_Relr &R) { OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8)) << "\n"; } template void GNUELFDumper::printRelRelaReloc(const Relocation &R, const RelSymbol &RelSym) { // First two fields are bit width dependent. The rest of them are fixed width. unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias}; unsigned Width = ELFT::Is64Bits ? 16 : 8; Fields[0].Str = to_string(format_hex_no_prefix(R.Offset, Width)); Fields[1].Str = to_string(format_hex_no_prefix(R.Info, Width)); SmallString<32> RelocName; this->Obj.getRelocationTypeName(R.Type, RelocName); Fields[2].Str = RelocName.c_str(); if (RelSym.Sym) Fields[3].Str = to_string(format_hex_no_prefix(RelSym.Sym->getValue(), Width)); if (RelSym.Sym && RelSym.Name.empty()) Fields[4].Str = ""; else Fields[4].Str = std::string(RelSym.Name); for (const Field &F : Fields) printField(F); std::string Addend; if (std::optional A = R.Addend) { int64_t RelAddend = *A; if (!Fields[4].Str.empty()) { if (RelAddend < 0) { Addend = " - "; RelAddend = -static_cast(RelAddend); } else { Addend = " + "; } } Addend += utohexstr(RelAddend, /*LowerCase=*/true); } OS << Addend << "\n"; } template static void printRelocHeaderFields(formatted_raw_ostream &OS, unsigned SType, const typename ELFT::Ehdr &EHeader) { bool IsRela = SType == ELF::SHT_RELA || SType == ELF::SHT_ANDROID_RELA; bool IsRelr = SType == ELF::SHT_RELR || SType == ELF::SHT_ANDROID_RELR || (EHeader.e_machine == EM_AARCH64 && SType == ELF::SHT_AARCH64_AUTH_RELR); if (ELFT::Is64Bits) OS << " "; else OS << " "; if (IsRelr && opts::RawRelr) OS << "Data "; else OS << "Offset"; if (ELFT::Is64Bits) OS << " Info Type" << " Symbol's Value Symbol's Name"; else OS << " Info Type Sym. Value Symbol's Name"; if (IsRela) OS << " + Addend"; OS << "\n"; } template void GNUELFDumper::printDynamicRelocHeader(unsigned Type, StringRef Name, const DynRegionInfo &Reg) { uint64_t Offset = Reg.Addr - this->Obj.base(); OS << "\n'" << Name.str().c_str() << "' relocation section at offset 0x" << utohexstr(Offset, /*LowerCase=*/true) << " contains " << Reg.Size << " bytes:\n"; printRelocHeaderFields(OS, Type, this->Obj.getHeader()); } template static bool isRelocationSec(const typename ELFT::Shdr &Sec, const typename ELFT::Ehdr &EHeader) { return Sec.sh_type == ELF::SHT_REL || Sec.sh_type == ELF::SHT_RELA || Sec.sh_type == ELF::SHT_RELR || Sec.sh_type == ELF::SHT_ANDROID_REL || Sec.sh_type == ELF::SHT_ANDROID_RELA || Sec.sh_type == ELF::SHT_ANDROID_RELR || (EHeader.e_machine == EM_AARCH64 && Sec.sh_type == ELF::SHT_AARCH64_AUTH_RELR); } template void GNUELFDumper::printRelocations() { auto GetEntriesNum = [&](const Elf_Shdr &Sec) -> Expected { // Android's packed relocation section needs to be unpacked first // to get the actual number of entries. if (Sec.sh_type == ELF::SHT_ANDROID_REL || Sec.sh_type == ELF::SHT_ANDROID_RELA) { Expected> RelasOrErr = this->Obj.android_relas(Sec); if (!RelasOrErr) return RelasOrErr.takeError(); return RelasOrErr->size(); } if (!opts::RawRelr && (Sec.sh_type == ELF::SHT_RELR || Sec.sh_type == ELF::SHT_ANDROID_RELR || (this->Obj.getHeader().e_machine == EM_AARCH64 && Sec.sh_type == ELF::SHT_AARCH64_AUTH_RELR))) { Expected RelrsOrErr = this->Obj.relrs(Sec); if (!RelrsOrErr) return RelrsOrErr.takeError(); return this->Obj.decode_relrs(*RelrsOrErr).size(); } return Sec.getEntityCount(); }; bool HasRelocSections = false; for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (!isRelocationSec(Sec, this->Obj.getHeader())) continue; HasRelocSections = true; std::string EntriesNum = ""; if (Expected NumOrErr = GetEntriesNum(Sec)) EntriesNum = std::to_string(*NumOrErr); else this->reportUniqueWarning("unable to get the number of relocations in " + this->describe(Sec) + ": " + toString(NumOrErr.takeError())); uintX_t Offset = Sec.sh_offset; StringRef Name = this->getPrintableSectionName(Sec); OS << "\nRelocation section '" << Name << "' at offset 0x" << utohexstr(Offset, /*LowerCase=*/true) << " contains " << EntriesNum << " entries:\n"; printRelocHeaderFields(OS, Sec.sh_type, this->Obj.getHeader()); this->printRelocationsHelper(Sec); } if (!HasRelocSections) OS << "\nThere are no relocations in this file.\n"; } // Print the offset of a particular section from anyone of the ranges: // [SHT_LOOS, SHT_HIOS], [SHT_LOPROC, SHT_HIPROC], [SHT_LOUSER, SHT_HIUSER]. // If 'Type' does not fall within any of those ranges, then a string is // returned as '' followed by the type value. static std::string getSectionTypeOffsetString(unsigned Type) { if (Type >= SHT_LOOS && Type <= SHT_HIOS) return "LOOS+0x" + utohexstr(Type - SHT_LOOS); else if (Type >= SHT_LOPROC && Type <= SHT_HIPROC) return "LOPROC+0x" + utohexstr(Type - SHT_LOPROC); else if (Type >= SHT_LOUSER && Type <= SHT_HIUSER) return "LOUSER+0x" + utohexstr(Type - SHT_LOUSER); return "0x" + utohexstr(Type) + ": "; } static std::string getSectionTypeString(unsigned Machine, unsigned Type) { StringRef Name = getELFSectionTypeName(Machine, Type); // Handle SHT_GNU_* type names. if (Name.consume_front("SHT_GNU_")) { if (Name == "HASH") return "GNU_HASH"; // E.g. SHT_GNU_verneed -> VERNEED. return Name.upper(); } if (Name == "SHT_SYMTAB_SHNDX") return "SYMTAB SECTION INDICES"; if (Name.consume_front("SHT_")) return Name.str(); return getSectionTypeOffsetString(Type); } static void printSectionDescription(formatted_raw_ostream &OS, unsigned EMachine) { OS << "Key to Flags:\n"; OS << " W (write), A (alloc), X (execute), M (merge), S (strings), I " "(info),\n"; OS << " L (link order), O (extra OS processing required), G (group), T " "(TLS),\n"; OS << " C (compressed), x (unknown), o (OS specific), E (exclude),\n"; OS << " R (retain)"; if (EMachine == EM_X86_64) OS << ", l (large)"; else if (EMachine == EM_ARM) OS << ", y (purecode)"; OS << ", p (processor specific)\n"; } template void GNUELFDumper::printSectionHeaders() { ArrayRef Sections = cantFail(this->Obj.sections()); if (Sections.empty()) { OS << "\nThere are no sections in this file.\n"; Expected SecStrTableOrErr = this->Obj.getSectionStringTable(Sections, this->WarningHandler); if (!SecStrTableOrErr) this->reportUniqueWarning(SecStrTableOrErr.takeError()); return; } unsigned Bias = ELFT::Is64Bits ? 0 : 8; OS << "There are " << to_string(Sections.size()) << " section headers, starting at offset " << "0x" << utohexstr(this->Obj.getHeader().e_shoff, /*LowerCase=*/true) << ":\n\n"; OS << "Section Headers:\n"; Field Fields[11] = { {"[Nr]", 2}, {"Name", 7}, {"Type", 25}, {"Address", 41}, {"Off", 58 - Bias}, {"Size", 65 - Bias}, {"ES", 72 - Bias}, {"Flg", 75 - Bias}, {"Lk", 79 - Bias}, {"Inf", 82 - Bias}, {"Al", 86 - Bias}}; for (const Field &F : Fields) printField(F); OS << "\n"; StringRef SecStrTable; if (Expected SecStrTableOrErr = this->Obj.getSectionStringTable(Sections, this->WarningHandler)) SecStrTable = *SecStrTableOrErr; else this->reportUniqueWarning(SecStrTableOrErr.takeError()); size_t SectionIndex = 0; for (const Elf_Shdr &Sec : Sections) { Fields[0].Str = to_string(SectionIndex); if (SecStrTable.empty()) Fields[1].Str = ""; else Fields[1].Str = std::string(unwrapOrError( this->FileName, this->Obj.getSectionName(Sec, SecStrTable))); Fields[2].Str = getSectionTypeString(this->Obj.getHeader().e_machine, Sec.sh_type); Fields[3].Str = to_string(format_hex_no_prefix(Sec.sh_addr, ELFT::Is64Bits ? 16 : 8)); Fields[4].Str = to_string(format_hex_no_prefix(Sec.sh_offset, 6)); Fields[5].Str = to_string(format_hex_no_prefix(Sec.sh_size, 6)); Fields[6].Str = to_string(format_hex_no_prefix(Sec.sh_entsize, 2)); Fields[7].Str = getGNUFlags(this->Obj.getHeader().e_ident[ELF::EI_OSABI], this->Obj.getHeader().e_machine, Sec.sh_flags); Fields[8].Str = to_string(Sec.sh_link); Fields[9].Str = to_string(Sec.sh_info); Fields[10].Str = to_string(Sec.sh_addralign); OS.PadToColumn(Fields[0].Column); OS << "[" << right_justify(Fields[0].Str, 2) << "]"; for (int i = 1; i < 7; i++) printField(Fields[i]); OS.PadToColumn(Fields[7].Column); OS << right_justify(Fields[7].Str, 3); OS.PadToColumn(Fields[8].Column); OS << right_justify(Fields[8].Str, 2); OS.PadToColumn(Fields[9].Column); OS << right_justify(Fields[9].Str, 3); OS.PadToColumn(Fields[10].Column); OS << right_justify(Fields[10].Str, 2); OS << "\n"; ++SectionIndex; } printSectionDescription(OS, this->Obj.getHeader().e_machine); } template void GNUELFDumper::printSymtabMessage(const Elf_Shdr *Symtab, size_t Entries, bool NonVisibilityBitsUsed, bool ExtraSymInfo) const { StringRef Name; if (Symtab) Name = this->getPrintableSectionName(*Symtab); if (!Name.empty()) OS << "\nSymbol table '" << Name << "'"; else OS << "\nSymbol table for image"; OS << " contains " << Entries << " entries:\n"; if (ELFT::Is64Bits) { OS << " Num: Value Size Type Bind Vis"; if (ExtraSymInfo) OS << "+Other"; } else { OS << " Num: Value Size Type Bind Vis"; if (ExtraSymInfo) OS << "+Other"; } OS.PadToColumn((ELFT::Is64Bits ? 56 : 48) + (NonVisibilityBitsUsed ? 13 : 0)); if (ExtraSymInfo) OS << "Ndx(SecName) Name [+ Version Info]\n"; else OS << "Ndx Name\n"; } template std::string GNUELFDumper::getSymbolSectionNdx( const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable, bool ExtraSymInfo) const { unsigned SectionIndex = Symbol.st_shndx; switch (SectionIndex) { case ELF::SHN_UNDEF: return "UND"; case ELF::SHN_ABS: return "ABS"; case ELF::SHN_COMMON: return "COM"; case ELF::SHN_XINDEX: { Expected IndexOrErr = object::getExtendedSymbolTableIndex(Symbol, SymIndex, ShndxTable); if (!IndexOrErr) { assert(Symbol.st_shndx == SHN_XINDEX && "getExtendedSymbolTableIndex should only fail due to an invalid " "SHT_SYMTAB_SHNDX table/reference"); this->reportUniqueWarning(IndexOrErr.takeError()); return "RSV[0xffff]"; } SectionIndex = *IndexOrErr; break; } default: // Find if: // Processor specific if (SectionIndex >= ELF::SHN_LOPROC && SectionIndex <= ELF::SHN_HIPROC) return std::string("PRC[0x") + to_string(format_hex_no_prefix(SectionIndex, 4)) + "]"; // OS specific if (SectionIndex >= ELF::SHN_LOOS && SectionIndex <= ELF::SHN_HIOS) return std::string("OS[0x") + to_string(format_hex_no_prefix(SectionIndex, 4)) + "]"; // Architecture reserved: if (SectionIndex >= ELF::SHN_LORESERVE && SectionIndex <= ELF::SHN_HIRESERVE) return std::string("RSV[0x") + to_string(format_hex_no_prefix(SectionIndex, 4)) + "]"; break; } std::string Extra; if (ExtraSymInfo) { auto Sec = this->Obj.getSection(SectionIndex); if (!Sec) { this->reportUniqueWarning(Sec.takeError()); } else { auto SecName = this->Obj.getSectionName(**Sec); if (!SecName) this->reportUniqueWarning(SecName.takeError()); else Extra = Twine(" (" + *SecName + ")").str(); } } return to_string(format_decimal(SectionIndex, 3)) + Extra; } template void GNUELFDumper::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable, std::optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed, bool ExtraSymInfo) const { unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[8] = {0, 8, 17 + Bias, 23 + Bias, 31 + Bias, 38 + Bias, 48 + Bias, 51 + Bias}; Fields[0].Str = to_string(format_decimal(SymIndex, 6)) + ":"; Fields[1].Str = to_string(format_hex_no_prefix(Symbol.st_value, ELFT::Is64Bits ? 16 : 8)); Fields[2].Str = to_string(format_decimal(Symbol.st_size, 5)); unsigned char SymbolType = Symbol.getType(); if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) Fields[3].Str = enumToString(SymbolType, ArrayRef(AMDGPUSymbolTypes)); else Fields[3].Str = enumToString(SymbolType, ArrayRef(ElfSymbolTypes)); Fields[4].Str = enumToString(Symbol.getBinding(), ArrayRef(ElfSymbolBindings)); Fields[5].Str = enumToString(Symbol.getVisibility(), ArrayRef(ElfSymbolVisibilities)); if (Symbol.st_other & ~0x3) { if (this->Obj.getHeader().e_machine == ELF::EM_AARCH64) { uint8_t Other = Symbol.st_other & ~0x3; if (Other & STO_AARCH64_VARIANT_PCS) { Other &= ~STO_AARCH64_VARIANT_PCS; Fields[5].Str += " [VARIANT_PCS"; if (Other != 0) Fields[5].Str.append(" | " + utohexstr(Other, /*LowerCase=*/true)); Fields[5].Str.append("]"); } } else if (this->Obj.getHeader().e_machine == ELF::EM_RISCV) { uint8_t Other = Symbol.st_other & ~0x3; if (Other & STO_RISCV_VARIANT_CC) { Other &= ~STO_RISCV_VARIANT_CC; Fields[5].Str += " [VARIANT_CC"; if (Other != 0) Fields[5].Str.append(" | " + utohexstr(Other, /*LowerCase=*/true)); Fields[5].Str.append("]"); } } else { Fields[5].Str += " []"; } } Fields[6].Column += NonVisibilityBitsUsed ? 13 : 0; Fields[6].Str = getSymbolSectionNdx(Symbol, SymIndex, ShndxTable, ExtraSymInfo); Fields[7].Column += ExtraSymInfo ? 10 : 0; Fields[7].Str = this->getFullSymbolName(Symbol, SymIndex, ShndxTable, StrTable, IsDynamic); for (const Field &Entry : Fields) printField(Entry); OS << "\n"; } template void GNUELFDumper::printHashedSymbol(const Elf_Sym *Symbol, unsigned SymIndex, DataRegion ShndxTable, StringRef StrTable, uint32_t Bucket) { unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[9] = {0, 6, 11, 20 + Bias, 25 + Bias, 34 + Bias, 41 + Bias, 49 + Bias, 53 + Bias}; Fields[0].Str = to_string(format_decimal(SymIndex, 5)); Fields[1].Str = to_string(format_decimal(Bucket, 3)) + ":"; Fields[2].Str = to_string( format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8)); Fields[3].Str = to_string(format_decimal(Symbol->st_size, 5)); unsigned char SymbolType = Symbol->getType(); if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) Fields[4].Str = enumToString(SymbolType, ArrayRef(AMDGPUSymbolTypes)); else Fields[4].Str = enumToString(SymbolType, ArrayRef(ElfSymbolTypes)); Fields[5].Str = enumToString(Symbol->getBinding(), ArrayRef(ElfSymbolBindings)); Fields[6].Str = enumToString(Symbol->getVisibility(), ArrayRef(ElfSymbolVisibilities)); Fields[7].Str = getSymbolSectionNdx(*Symbol, SymIndex, ShndxTable); Fields[8].Str = this->getFullSymbolName(*Symbol, SymIndex, ShndxTable, StrTable, true); for (const Field &Entry : Fields) printField(Entry); OS << "\n"; } template void GNUELFDumper::printSymbols(bool PrintSymbols, bool PrintDynamicSymbols, bool ExtraSymInfo) { if (!PrintSymbols && !PrintDynamicSymbols) return; // GNU readelf prints both the .dynsym and .symtab with --symbols. this->printSymbolsHelper(true, ExtraSymInfo); if (PrintSymbols) this->printSymbolsHelper(false, ExtraSymInfo); } template void GNUELFDumper::printHashTableSymbols(const Elf_Hash &SysVHash) { if (this->DynamicStringTable.empty()) return; if (ELFT::Is64Bits) OS << " Num Buc: Value Size Type Bind Vis Ndx Name"; else OS << " Num Buc: Value Size Type Bind Vis Ndx Name"; OS << "\n"; Elf_Sym_Range DynSyms = this->dynamic_symbols(); const Elf_Sym *FirstSym = DynSyms.empty() ? nullptr : &DynSyms[0]; if (!FirstSym) { this->reportUniqueWarning( Twine("unable to print symbols for the .hash table: the " "dynamic symbol table ") + (this->DynSymRegion ? "is empty" : "was not found")); return; } DataRegion ShndxTable( (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end()); auto Buckets = SysVHash.buckets(); auto Chains = SysVHash.chains(); for (uint32_t Buc = 0; Buc < SysVHash.nbucket; Buc++) { if (Buckets[Buc] == ELF::STN_UNDEF) continue; BitVector Visited(SysVHash.nchain); for (uint32_t Ch = Buckets[Buc]; Ch < SysVHash.nchain; Ch = Chains[Ch]) { if (Ch == ELF::STN_UNDEF) break; if (Visited[Ch]) { this->reportUniqueWarning(".hash section is invalid: bucket " + Twine(Ch) + ": a cycle was detected in the linked chain"); break; } printHashedSymbol(FirstSym + Ch, Ch, ShndxTable, this->DynamicStringTable, Buc); Visited[Ch] = true; } } } template void GNUELFDumper::printGnuHashTableSymbols(const Elf_GnuHash &GnuHash) { if (this->DynamicStringTable.empty()) return; Elf_Sym_Range DynSyms = this->dynamic_symbols(); const Elf_Sym *FirstSym = DynSyms.empty() ? nullptr : &DynSyms[0]; if (!FirstSym) { this->reportUniqueWarning( Twine("unable to print symbols for the .gnu.hash table: the " "dynamic symbol table ") + (this->DynSymRegion ? "is empty" : "was not found")); return; } auto GetSymbol = [&](uint64_t SymIndex, uint64_t SymsTotal) -> const Elf_Sym * { if (SymIndex >= SymsTotal) { this->reportUniqueWarning( "unable to print hashed symbol with index " + Twine(SymIndex) + ", which is greater than or equal to the number of dynamic symbols " "(" + Twine::utohexstr(SymsTotal) + ")"); return nullptr; } return FirstSym + SymIndex; }; Expected> ValuesOrErr = getGnuHashTableChains(this->DynSymRegion, &GnuHash); ArrayRef Values; if (!ValuesOrErr) this->reportUniqueWarning("unable to get hash values for the SHT_GNU_HASH " "section: " + toString(ValuesOrErr.takeError())); else Values = *ValuesOrErr; DataRegion ShndxTable( (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end()); ArrayRef Buckets = GnuHash.buckets(); for (uint32_t Buc = 0; Buc < GnuHash.nbuckets; Buc++) { if (Buckets[Buc] == ELF::STN_UNDEF) continue; uint32_t Index = Buckets[Buc]; // Print whole chain. while (true) { uint32_t SymIndex = Index++; if (const Elf_Sym *Sym = GetSymbol(SymIndex, DynSyms.size())) printHashedSymbol(Sym, SymIndex, ShndxTable, this->DynamicStringTable, Buc); else break; if (SymIndex < GnuHash.symndx) { this->reportUniqueWarning( "unable to read the hash value for symbol with index " + Twine(SymIndex) + ", which is less than the index of the first hashed symbol (" + Twine(GnuHash.symndx) + ")"); break; } // Chain ends at symbol with stopper bit. if ((Values[SymIndex - GnuHash.symndx] & 1) == 1) break; } } } template void GNUELFDumper::printHashSymbols() { if (this->HashTable) { OS << "\n Symbol table of .hash for image:\n"; if (Error E = checkHashTable(*this, this->HashTable)) this->reportUniqueWarning(std::move(E)); else printHashTableSymbols(*this->HashTable); } // Try printing the .gnu.hash table. if (this->GnuHashTable) { OS << "\n Symbol table of .gnu.hash for image:\n"; if (ELFT::Is64Bits) OS << " Num Buc: Value Size Type Bind Vis Ndx Name"; else OS << " Num Buc: Value Size Type Bind Vis Ndx Name"; OS << "\n"; if (Error E = checkGNUHashTable(this->Obj, this->GnuHashTable)) this->reportUniqueWarning(std::move(E)); else printGnuHashTableSymbols(*this->GnuHashTable); } } template void GNUELFDumper::printSectionDetails() { ArrayRef Sections = cantFail(this->Obj.sections()); if (Sections.empty()) { OS << "\nThere are no sections in this file.\n"; Expected SecStrTableOrErr = this->Obj.getSectionStringTable(Sections, this->WarningHandler); if (!SecStrTableOrErr) this->reportUniqueWarning(SecStrTableOrErr.takeError()); return; } OS << "There are " << to_string(Sections.size()) << " section headers, starting at offset " << "0x" << utohexstr(this->Obj.getHeader().e_shoff, /*LowerCase=*/true) << ":\n\n"; OS << "Section Headers:\n"; auto PrintFields = [&](ArrayRef V) { for (const Field &F : V) printField(F); OS << "\n"; }; PrintFields({{"[Nr]", 2}, {"Name", 7}}); constexpr bool Is64 = ELFT::Is64Bits; PrintFields({{"Type", 7}, {Is64 ? "Address" : "Addr", 23}, {"Off", Is64 ? 40 : 32}, {"Size", Is64 ? 47 : 39}, {"ES", Is64 ? 54 : 46}, {"Lk", Is64 ? 59 : 51}, {"Inf", Is64 ? 62 : 54}, {"Al", Is64 ? 66 : 57}}); PrintFields({{"Flags", 7}}); StringRef SecStrTable; if (Expected SecStrTableOrErr = this->Obj.getSectionStringTable(Sections, this->WarningHandler)) SecStrTable = *SecStrTableOrErr; else this->reportUniqueWarning(SecStrTableOrErr.takeError()); size_t SectionIndex = 0; const unsigned AddrSize = Is64 ? 16 : 8; for (const Elf_Shdr &S : Sections) { StringRef Name = ""; if (Expected NameOrErr = this->Obj.getSectionName(S, SecStrTable)) Name = *NameOrErr; else this->reportUniqueWarning(NameOrErr.takeError()); OS.PadToColumn(2); OS << "[" << right_justify(to_string(SectionIndex), 2) << "]"; PrintFields({{Name, 7}}); PrintFields( {{getSectionTypeString(this->Obj.getHeader().e_machine, S.sh_type), 7}, {to_string(format_hex_no_prefix(S.sh_addr, AddrSize)), 23}, {to_string(format_hex_no_prefix(S.sh_offset, 6)), Is64 ? 39 : 32}, {to_string(format_hex_no_prefix(S.sh_size, 6)), Is64 ? 47 : 39}, {to_string(format_hex_no_prefix(S.sh_entsize, 2)), Is64 ? 54 : 46}, {to_string(S.sh_link), Is64 ? 59 : 51}, {to_string(S.sh_info), Is64 ? 63 : 55}, {to_string(S.sh_addralign), Is64 ? 66 : 58}}); OS.PadToColumn(7); OS << "[" << to_string(format_hex_no_prefix(S.sh_flags, AddrSize)) << "]: "; DenseMap FlagToName = { {SHF_WRITE, "WRITE"}, {SHF_ALLOC, "ALLOC"}, {SHF_EXECINSTR, "EXEC"}, {SHF_MERGE, "MERGE"}, {SHF_STRINGS, "STRINGS"}, {SHF_INFO_LINK, "INFO LINK"}, {SHF_LINK_ORDER, "LINK ORDER"}, {SHF_OS_NONCONFORMING, "OS NONCONF"}, {SHF_GROUP, "GROUP"}, {SHF_TLS, "TLS"}, {SHF_COMPRESSED, "COMPRESSED"}, {SHF_EXCLUDE, "EXCLUDE"}}; uint64_t Flags = S.sh_flags; uint64_t UnknownFlags = 0; ListSeparator LS; while (Flags) { // Take the least significant bit as a flag. uint64_t Flag = Flags & -Flags; Flags -= Flag; auto It = FlagToName.find(Flag); if (It != FlagToName.end()) OS << LS << It->second; else UnknownFlags |= Flag; } auto PrintUnknownFlags = [&](uint64_t Mask, StringRef Name) { uint64_t FlagsToPrint = UnknownFlags & Mask; if (!FlagsToPrint) return; OS << LS << Name << " (" << to_string(format_hex_no_prefix(FlagsToPrint, AddrSize)) << ")"; UnknownFlags &= ~Mask; }; PrintUnknownFlags(SHF_MASKOS, "OS"); PrintUnknownFlags(SHF_MASKPROC, "PROC"); PrintUnknownFlags(uint64_t(-1), "UNKNOWN"); OS << "\n"; ++SectionIndex; if (!(S.sh_flags & SHF_COMPRESSED)) continue; Expected> Data = this->Obj.getSectionContents(S); if (!Data || Data->size() < sizeof(Elf_Chdr)) { consumeError(Data.takeError()); reportWarning(createError("SHF_COMPRESSED section '" + Name + "' does not have an Elf_Chdr header"), this->FileName); OS.indent(7); OS << "[]"; } else { OS.indent(7); auto *Chdr = reinterpret_cast(Data->data()); if (Chdr->ch_type == ELFCOMPRESS_ZLIB) OS << "ZLIB"; else if (Chdr->ch_type == ELFCOMPRESS_ZSTD) OS << "ZSTD"; else OS << format("[: 0x%x]", unsigned(Chdr->ch_type)); OS << ", " << format_hex_no_prefix(Chdr->ch_size, ELFT::Is64Bits ? 16 : 8) << ", " << Chdr->ch_addralign; } OS << '\n'; } } static inline std::string printPhdrFlags(unsigned Flag) { std::string Str; Str = (Flag & PF_R) ? "R" : " "; Str += (Flag & PF_W) ? "W" : " "; Str += (Flag & PF_X) ? "E" : " "; return Str; } template static bool checkTLSSections(const typename ELFT::Phdr &Phdr, const typename ELFT::Shdr &Sec) { if (Sec.sh_flags & ELF::SHF_TLS) { // .tbss must only be shown in the PT_TLS segment. if (Sec.sh_type == ELF::SHT_NOBITS) return Phdr.p_type == ELF::PT_TLS; // SHF_TLS sections are only shown in PT_TLS, PT_LOAD or PT_GNU_RELRO // segments. return (Phdr.p_type == ELF::PT_TLS) || (Phdr.p_type == ELF::PT_LOAD) || (Phdr.p_type == ELF::PT_GNU_RELRO); } // PT_TLS must only have SHF_TLS sections. return Phdr.p_type != ELF::PT_TLS; } template static bool checkPTDynamic(const typename ELFT::Phdr &Phdr, const typename ELFT::Shdr &Sec) { if (Phdr.p_type != ELF::PT_DYNAMIC || Phdr.p_memsz == 0 || Sec.sh_size != 0) return true; // We get here when we have an empty section. Only non-empty sections can be // at the start or at the end of PT_DYNAMIC. // Is section within the phdr both based on offset and VMA? bool CheckOffset = (Sec.sh_type == ELF::SHT_NOBITS) || (Sec.sh_offset > Phdr.p_offset && Sec.sh_offset < Phdr.p_offset + Phdr.p_filesz); bool CheckVA = !(Sec.sh_flags & ELF::SHF_ALLOC) || (Sec.sh_addr > Phdr.p_vaddr && Sec.sh_addr < Phdr.p_memsz); return CheckOffset && CheckVA; } template void GNUELFDumper::printProgramHeaders( bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { const bool ShouldPrintSectionMapping = (PrintSectionMapping != cl::BOU_FALSE); // Exit early if no program header or section mapping details were requested. if (!PrintProgramHeaders && !ShouldPrintSectionMapping) return; if (PrintProgramHeaders) { const Elf_Ehdr &Header = this->Obj.getHeader(); if (Header.e_phnum == 0) { OS << "\nThere are no program headers in this file.\n"; } else { printProgramHeaders(); } } if (ShouldPrintSectionMapping) printSectionMapping(); } template void GNUELFDumper::printProgramHeaders() { unsigned Bias = ELFT::Is64Bits ? 8 : 0; const Elf_Ehdr &Header = this->Obj.getHeader(); Field Fields[8] = {2, 17, 26, 37 + Bias, 48 + Bias, 56 + Bias, 64 + Bias, 68 + Bias}; OS << "\nElf file type is " << enumToString(Header.e_type, ArrayRef(ElfObjectFileType)) << "\n" << "Entry point " << format_hex(Header.e_entry, 3) << "\n" << "There are " << Header.e_phnum << " program headers," << " starting at offset " << Header.e_phoff << "\n\n" << "Program Headers:\n"; if (ELFT::Is64Bits) OS << " Type Offset VirtAddr PhysAddr " << " FileSiz MemSiz Flg Align\n"; else OS << " Type Offset VirtAddr PhysAddr FileSiz " << "MemSiz Flg Align\n"; unsigned Width = ELFT::Is64Bits ? 18 : 10; unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7; Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning("unable to dump program headers: " + toString(PhdrsOrErr.takeError())); return; } for (const Elf_Phdr &Phdr : *PhdrsOrErr) { Fields[0].Str = getGNUPtType(Header.e_machine, Phdr.p_type); Fields[1].Str = to_string(format_hex(Phdr.p_offset, 8)); Fields[2].Str = to_string(format_hex(Phdr.p_vaddr, Width)); Fields[3].Str = to_string(format_hex(Phdr.p_paddr, Width)); Fields[4].Str = to_string(format_hex(Phdr.p_filesz, SizeWidth)); Fields[5].Str = to_string(format_hex(Phdr.p_memsz, SizeWidth)); Fields[6].Str = printPhdrFlags(Phdr.p_flags); Fields[7].Str = to_string(format_hex(Phdr.p_align, 1)); for (const Field &F : Fields) printField(F); if (Phdr.p_type == ELF::PT_INTERP) { OS << "\n"; auto ReportBadInterp = [&](const Twine &Msg) { this->reportUniqueWarning( "unable to read program interpreter name at offset 0x" + Twine::utohexstr(Phdr.p_offset) + ": " + Msg); }; if (Phdr.p_offset >= this->Obj.getBufSize()) { ReportBadInterp("it goes past the end of the file (0x" + Twine::utohexstr(this->Obj.getBufSize()) + ")"); continue; } const char *Data = reinterpret_cast(this->Obj.base()) + Phdr.p_offset; size_t MaxSize = this->Obj.getBufSize() - Phdr.p_offset; size_t Len = strnlen(Data, MaxSize); if (Len == MaxSize) { ReportBadInterp("it is not null-terminated"); continue; } OS << " [Requesting program interpreter: "; OS << StringRef(Data, Len) << "]"; } OS << "\n"; } } template void GNUELFDumper::printSectionMapping() { OS << "\n Section to Segment mapping:\n Segment Sections...\n"; DenseSet BelongsToSegment; int Phnum = 0; Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning( "can't read program headers to build section to segment mapping: " + toString(PhdrsOrErr.takeError())); return; } for (const Elf_Phdr &Phdr : *PhdrsOrErr) { std::string Sections; OS << format(" %2.2d ", Phnum++); // Check if each section is in a segment and then print mapping. for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (Sec.sh_type == ELF::SHT_NULL) continue; // readelf additionally makes sure it does not print zero sized sections // at end of segments and for PT_DYNAMIC both start and end of section // .tbss must only be shown in PT_TLS section. if (isSectionInSegment(Phdr, Sec) && checkTLSSections(Phdr, Sec) && checkPTDynamic(Phdr, Sec)) { Sections += unwrapOrError(this->FileName, this->Obj.getSectionName(Sec)).str() + " "; BelongsToSegment.insert(&Sec); } } OS << Sections << "\n"; OS.flush(); } // Display sections that do not belong to a segment. std::string Sections; for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (BelongsToSegment.find(&Sec) == BelongsToSegment.end()) Sections += unwrapOrError(this->FileName, this->Obj.getSectionName(Sec)).str() + ' '; } if (!Sections.empty()) { OS << " None " << Sections << '\n'; OS.flush(); } } namespace { template RelSymbol getSymbolForReloc(const ELFDumper &Dumper, const Relocation &Reloc) { using Elf_Sym = typename ELFT::Sym; auto WarnAndReturn = [&](const Elf_Sym *Sym, const Twine &Reason) -> RelSymbol { Dumper.reportUniqueWarning( "unable to get name of the dynamic symbol with index " + Twine(Reloc.Symbol) + ": " + Reason); return {Sym, ""}; }; ArrayRef Symbols = Dumper.dynamic_symbols(); const Elf_Sym *FirstSym = Symbols.begin(); if (!FirstSym) return WarnAndReturn(nullptr, "no dynamic symbol table found"); // We might have an object without a section header. In this case the size of // Symbols is zero, because there is no way to know the size of the dynamic // table. We should allow this case and not print a warning. if (!Symbols.empty() && Reloc.Symbol >= Symbols.size()) return WarnAndReturn( nullptr, "index is greater than or equal to the number of dynamic symbols (" + Twine(Symbols.size()) + ")"); const ELFFile &Obj = Dumper.getElfObject().getELFFile(); const uint64_t FileSize = Obj.getBufSize(); const uint64_t SymOffset = ((const uint8_t *)FirstSym - Obj.base()) + (uint64_t)Reloc.Symbol * sizeof(Elf_Sym); if (SymOffset + sizeof(Elf_Sym) > FileSize) return WarnAndReturn(nullptr, "symbol at 0x" + Twine::utohexstr(SymOffset) + " goes past the end of the file (0x" + Twine::utohexstr(FileSize) + ")"); const Elf_Sym *Sym = FirstSym + Reloc.Symbol; Expected ErrOrName = Sym->getName(Dumper.getDynamicStringTable()); if (!ErrOrName) return WarnAndReturn(Sym, toString(ErrOrName.takeError())); return {Sym == FirstSym ? nullptr : Sym, maybeDemangle(*ErrOrName)}; } } // namespace template static size_t getMaxDynamicTagSize(const ELFFile &Obj, typename ELFT::DynRange Tags) { size_t Max = 0; for (const typename ELFT::Dyn &Dyn : Tags) Max = std::max(Max, Obj.getDynamicTagAsString(Dyn.d_tag).size()); return Max; } template void GNUELFDumper::printDynamicTable() { Elf_Dyn_Range Table = this->dynamic_table(); if (Table.empty()) return; OS << "Dynamic section at offset " << format_hex(reinterpret_cast(this->DynamicTable.Addr) - this->Obj.base(), 1) << " contains " << Table.size() << " entries:\n"; // The type name is surrounded with round brackets, hence add 2. size_t MaxTagSize = getMaxDynamicTagSize(this->Obj, Table) + 2; // The "Name/Value" column should be indented from the "Type" column by N // spaces, where N = MaxTagSize - length of "Type" (4) + trailing // space (1) = 3. OS << " Tag" + std::string(ELFT::Is64Bits ? 16 : 8, ' ') + "Type" << std::string(MaxTagSize - 3, ' ') << "Name/Value\n"; std::string ValueFmt = " %-" + std::to_string(MaxTagSize) + "s "; for (auto Entry : Table) { uintX_t Tag = Entry.getTag(); std::string Type = std::string("(") + this->Obj.getDynamicTagAsString(Tag) + ")"; std::string Value = this->getDynamicEntry(Tag, Entry.getVal()); OS << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10) << format(ValueFmt.c_str(), Type.c_str()) << Value << "\n"; } } template void GNUELFDumper::printDynamicRelocations() { this->printDynamicRelocationsHelper(); } template void ELFDumper::printDynamicReloc(const Relocation &R) { printRelRelaReloc(R, getSymbolForReloc(*this, R)); } template void ELFDumper::printRelocationsHelper(const Elf_Shdr &Sec) { this->forEachRelocationDo( Sec, opts::RawRelr, [&](const Relocation &R, unsigned Ndx, const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { printReloc(R, Ndx, Sec, SymTab); }, [&](const Elf_Relr &R) { printRelrReloc(R); }); } template void ELFDumper::printDynamicRelocationsHelper() { const bool IsMips64EL = this->Obj.isMips64EL(); if (this->DynRelaRegion.Size > 0) { printDynamicRelocHeader(ELF::SHT_RELA, "RELA", this->DynRelaRegion); for (const Elf_Rela &Rela : this->DynRelaRegion.template getAsArrayRef()) printDynamicReloc(Relocation(Rela, IsMips64EL)); } if (this->DynRelRegion.Size > 0) { printDynamicRelocHeader(ELF::SHT_REL, "REL", this->DynRelRegion); for (const Elf_Rel &Rel : this->DynRelRegion.template getAsArrayRef()) printDynamicReloc(Relocation(Rel, IsMips64EL)); } if (this->DynRelrRegion.Size > 0) { printDynamicRelocHeader(ELF::SHT_REL, "RELR", this->DynRelrRegion); Elf_Relr_Range Relrs = this->DynRelrRegion.template getAsArrayRef(); for (const Elf_Rel &Rel : Obj.decode_relrs(Relrs)) printDynamicReloc(Relocation(Rel, IsMips64EL)); } if (this->DynPLTRelRegion.Size) { if (this->DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) { printDynamicRelocHeader(ELF::SHT_RELA, "PLT", this->DynPLTRelRegion); for (const Elf_Rela &Rela : this->DynPLTRelRegion.template getAsArrayRef()) printDynamicReloc(Relocation(Rela, IsMips64EL)); } else { printDynamicRelocHeader(ELF::SHT_REL, "PLT", this->DynPLTRelRegion); for (const Elf_Rel &Rel : this->DynPLTRelRegion.template getAsArrayRef()) printDynamicReloc(Relocation(Rel, IsMips64EL)); } } } template void GNUELFDumper::printGNUVersionSectionProlog( const typename ELFT::Shdr &Sec, const Twine &Label, unsigned EntriesNum) { // Don't inline the SecName, because it might report a warning to stderr and // corrupt the output. StringRef SecName = this->getPrintableSectionName(Sec); OS << Label << " section '" << SecName << "' " << "contains " << EntriesNum << " entries:\n"; StringRef LinkedSecName = ""; if (Expected LinkedSecOrErr = this->Obj.getSection(Sec.sh_link)) LinkedSecName = this->getPrintableSectionName(**LinkedSecOrErr); else this->reportUniqueWarning("invalid section linked to " + this->describe(Sec) + ": " + toString(LinkedSecOrErr.takeError())); OS << " Addr: " << format_hex_no_prefix(Sec.sh_addr, 16) << " Offset: " << format_hex(Sec.sh_offset, 8) << " Link: " << Sec.sh_link << " (" << LinkedSecName << ")\n"; } template void GNUELFDumper::printVersionSymbolSection(const Elf_Shdr *Sec) { if (!Sec) return; printGNUVersionSectionProlog(*Sec, "Version symbols", Sec->sh_size / sizeof(Elf_Versym)); Expected> VerTableOrErr = this->getVersionTable(*Sec, /*SymTab=*/nullptr, /*StrTab=*/nullptr, /*SymTabSec=*/nullptr); if (!VerTableOrErr) { this->reportUniqueWarning(VerTableOrErr.takeError()); return; } SmallVector, 0> *VersionMap = nullptr; if (Expected, 0> *> MapOrErr = this->getVersionMap()) VersionMap = *MapOrErr; else this->reportUniqueWarning(MapOrErr.takeError()); ArrayRef VerTable = *VerTableOrErr; std::vector Versions; for (size_t I = 0, E = VerTable.size(); I < E; ++I) { unsigned Ndx = VerTable[I].vs_index; if (Ndx == VER_NDX_LOCAL || Ndx == VER_NDX_GLOBAL) { Versions.emplace_back(Ndx == VER_NDX_LOCAL ? "*local*" : "*global*"); continue; } if (!VersionMap) { Versions.emplace_back(""); continue; } bool IsDefault; Expected NameOrErr = this->Obj.getSymbolVersionByIndex( Ndx, IsDefault, *VersionMap, /*IsSymHidden=*/std::nullopt); if (!NameOrErr) { this->reportUniqueWarning("unable to get a version for entry " + Twine(I) + " of " + this->describe(*Sec) + ": " + toString(NameOrErr.takeError())); Versions.emplace_back(""); continue; } Versions.emplace_back(*NameOrErr); } // readelf prints 4 entries per line. uint64_t Entries = VerTable.size(); for (uint64_t VersymRow = 0; VersymRow < Entries; VersymRow += 4) { OS << " " << format_hex_no_prefix(VersymRow, 3) << ":"; for (uint64_t I = 0; (I < 4) && (I + VersymRow) < Entries; ++I) { unsigned Ndx = VerTable[VersymRow + I].vs_index; OS << format("%4x%c", Ndx & VERSYM_VERSION, Ndx & VERSYM_HIDDEN ? 'h' : ' '); OS << left_justify("(" + std::string(Versions[VersymRow + I]) + ")", 13); } OS << '\n'; } OS << '\n'; } static std::string versionFlagToString(unsigned Flags) { if (Flags == 0) return "none"; std::string Ret; auto AddFlag = [&Ret, &Flags](unsigned Flag, StringRef Name) { if (!(Flags & Flag)) return; if (!Ret.empty()) Ret += " | "; Ret += Name; Flags &= ~Flag; }; AddFlag(VER_FLG_BASE, "BASE"); AddFlag(VER_FLG_WEAK, "WEAK"); AddFlag(VER_FLG_INFO, "INFO"); AddFlag(~0, ""); return Ret; } template void GNUELFDumper::printVersionDefinitionSection(const Elf_Shdr *Sec) { if (!Sec) return; printGNUVersionSectionProlog(*Sec, "Version definition", Sec->sh_info); Expected> V = this->Obj.getVersionDefinitions(*Sec); if (!V) { this->reportUniqueWarning(V.takeError()); return; } for (const VerDef &Def : *V) { OS << format(" 0x%04x: Rev: %u Flags: %s Index: %u Cnt: %u Name: %s\n", Def.Offset, Def.Version, versionFlagToString(Def.Flags).c_str(), Def.Ndx, Def.Cnt, Def.Name.data()); unsigned I = 0; for (const VerdAux &Aux : Def.AuxV) OS << format(" 0x%04x: Parent %u: %s\n", Aux.Offset, ++I, Aux.Name.data()); } OS << '\n'; } template void GNUELFDumper::printVersionDependencySection(const Elf_Shdr *Sec) { if (!Sec) return; unsigned VerneedNum = Sec->sh_info; printGNUVersionSectionProlog(*Sec, "Version needs", VerneedNum); Expected> V = this->Obj.getVersionDependencies(*Sec, this->WarningHandler); if (!V) { this->reportUniqueWarning(V.takeError()); return; } for (const VerNeed &VN : *V) { OS << format(" 0x%04x: Version: %u File: %s Cnt: %u\n", VN.Offset, VN.Version, VN.File.data(), VN.Cnt); for (const VernAux &Aux : VN.AuxV) OS << format(" 0x%04x: Name: %s Flags: %s Version: %u\n", Aux.Offset, Aux.Name.data(), versionFlagToString(Aux.Flags).c_str(), Aux.Other); } OS << '\n'; } template void GNUELFDumper::printHashHistogramStats(size_t NBucket, size_t MaxChain, size_t TotalSyms, ArrayRef Count, bool IsGnu) const { size_t CumulativeNonZero = 0; OS << "Histogram for" << (IsGnu ? " `.gnu.hash'" : "") << " bucket list length (total of " << NBucket << " buckets)\n" << " Length Number % of total Coverage\n"; for (size_t I = 0; I < MaxChain; ++I) { CumulativeNonZero += Count[I] * I; OS << format("%7lu %-10lu (%5.1f%%) %5.1f%%\n", I, Count[I], (Count[I] * 100.0) / NBucket, (CumulativeNonZero * 100.0) / TotalSyms); } } template void GNUELFDumper::printCGProfile() { OS << "GNUStyle::printCGProfile not implemented\n"; } template void GNUELFDumper::printBBAddrMaps() { OS << "GNUStyle::printBBAddrMaps not implemented\n"; } static Expected> toULEB128Array(ArrayRef Data) { std::vector Ret; const uint8_t *Cur = Data.begin(); const uint8_t *End = Data.end(); while (Cur != End) { unsigned Size; const char *Err = nullptr; Ret.push_back(decodeULEB128(Cur, &Size, End, &Err)); if (Err) return createError(Err); Cur += Size; } return Ret; } template static Expected> decodeAddrsigSection(const ELFFile &Obj, const typename ELFT::Shdr &Sec) { Expected> ContentsOrErr = Obj.getSectionContents(Sec); if (!ContentsOrErr) return ContentsOrErr.takeError(); if (Expected> SymsOrErr = toULEB128Array(*ContentsOrErr)) return *SymsOrErr; else return createError("unable to decode " + describe(Obj, Sec) + ": " + toString(SymsOrErr.takeError())); } template void GNUELFDumper::printAddrsig() { if (!this->DotAddrsigSec) return; Expected> SymsOrErr = decodeAddrsigSection(this->Obj, *this->DotAddrsigSec); if (!SymsOrErr) { this->reportUniqueWarning(SymsOrErr.takeError()); return; } StringRef Name = this->getPrintableSectionName(*this->DotAddrsigSec); OS << "\nAddress-significant symbols section '" << Name << "'" << " contains " << SymsOrErr->size() << " entries:\n"; OS << " Num: Name\n"; Field Fields[2] = {0, 8}; size_t SymIndex = 0; for (uint64_t Sym : *SymsOrErr) { Fields[0].Str = to_string(format_decimal(++SymIndex, 6)) + ":"; Fields[1].Str = this->getStaticSymbolName(Sym); for (const Field &Entry : Fields) printField(Entry); OS << "\n"; } } template static std::string getGNUProperty(uint32_t Type, uint32_t DataSize, ArrayRef Data) { std::string str; raw_string_ostream OS(str); uint32_t PrData; auto DumpBit = [&](uint32_t Flag, StringRef Name) { if (PrData & Flag) { PrData &= ~Flag; OS << Name; if (PrData) OS << ", "; } }; switch (Type) { default: OS << format("", Type); return OS.str(); case GNU_PROPERTY_STACK_SIZE: { OS << "stack size: "; if (DataSize == sizeof(typename ELFT::uint)) OS << formatv("{0:x}", (uint64_t)(*(const typename ELFT::Addr *)Data.data())); else OS << format("", DataSize); return OS.str(); } case GNU_PROPERTY_NO_COPY_ON_PROTECTED: OS << "no copy on protected"; if (DataSize) OS << format(" ", DataSize); return OS.str(); case GNU_PROPERTY_AARCH64_FEATURE_1_AND: case GNU_PROPERTY_X86_FEATURE_1_AND: OS << ((Type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) ? "aarch64 feature: " : "x86 feature: "); if (DataSize != 4) { OS << format("", DataSize); return OS.str(); } PrData = support::endian::read32(Data.data()); if (PrData == 0) { OS << ""; return OS.str(); } if (Type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) { DumpBit(GNU_PROPERTY_AARCH64_FEATURE_1_BTI, "BTI"); DumpBit(GNU_PROPERTY_AARCH64_FEATURE_1_PAC, "PAC"); DumpBit(GNU_PROPERTY_AARCH64_FEATURE_1_GCS, "GCS"); } else { DumpBit(GNU_PROPERTY_X86_FEATURE_1_IBT, "IBT"); DumpBit(GNU_PROPERTY_X86_FEATURE_1_SHSTK, "SHSTK"); } if (PrData) OS << format("", PrData); return OS.str(); case GNU_PROPERTY_X86_FEATURE_2_NEEDED: case GNU_PROPERTY_X86_FEATURE_2_USED: OS << "x86 feature " << (Type == GNU_PROPERTY_X86_FEATURE_2_NEEDED ? "needed: " : "used: "); if (DataSize != 4) { OS << format("", DataSize); return OS.str(); } PrData = support::endian::read32(Data.data()); if (PrData == 0) { OS << ""; return OS.str(); } DumpBit(GNU_PROPERTY_X86_FEATURE_2_X86, "x86"); DumpBit(GNU_PROPERTY_X86_FEATURE_2_X87, "x87"); DumpBit(GNU_PROPERTY_X86_FEATURE_2_MMX, "MMX"); DumpBit(GNU_PROPERTY_X86_FEATURE_2_XMM, "XMM"); DumpBit(GNU_PROPERTY_X86_FEATURE_2_YMM, "YMM"); DumpBit(GNU_PROPERTY_X86_FEATURE_2_ZMM, "ZMM"); DumpBit(GNU_PROPERTY_X86_FEATURE_2_FXSR, "FXSR"); DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVE, "XSAVE"); DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT, "XSAVEOPT"); DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVEC, "XSAVEC"); if (PrData) OS << format("", PrData); return OS.str(); case GNU_PROPERTY_X86_ISA_1_NEEDED: case GNU_PROPERTY_X86_ISA_1_USED: OS << "x86 ISA " << (Type == GNU_PROPERTY_X86_ISA_1_NEEDED ? "needed: " : "used: "); if (DataSize != 4) { OS << format("", DataSize); return OS.str(); } PrData = support::endian::read32(Data.data()); if (PrData == 0) { OS << ""; return OS.str(); } DumpBit(GNU_PROPERTY_X86_ISA_1_BASELINE, "x86-64-baseline"); DumpBit(GNU_PROPERTY_X86_ISA_1_V2, "x86-64-v2"); DumpBit(GNU_PROPERTY_X86_ISA_1_V3, "x86-64-v3"); DumpBit(GNU_PROPERTY_X86_ISA_1_V4, "x86-64-v4"); if (PrData) OS << format("", PrData); return OS.str(); } } template static SmallVector getGNUPropertyList(ArrayRef Arr) { using Elf_Word = typename ELFT::Word; SmallVector Properties; while (Arr.size() >= 8) { uint32_t Type = *reinterpret_cast(Arr.data()); uint32_t DataSize = *reinterpret_cast(Arr.data() + 4); Arr = Arr.drop_front(8); // Take padding size into account if present. uint64_t PaddedSize = alignTo(DataSize, sizeof(typename ELFT::uint)); std::string str; raw_string_ostream OS(str); if (Arr.size() < PaddedSize) { OS << format("", Type, DataSize); Properties.push_back(OS.str()); break; } Properties.push_back( getGNUProperty(Type, DataSize, Arr.take_front(PaddedSize))); Arr = Arr.drop_front(PaddedSize); } if (!Arr.empty()) Properties.push_back(""); return Properties; } struct GNUAbiTag { std::string OSName; std::string ABI; bool IsValid; }; template static GNUAbiTag getGNUAbiTag(ArrayRef Desc) { typedef typename ELFT::Word Elf_Word; ArrayRef Words(reinterpret_cast(Desc.begin()), reinterpret_cast(Desc.end())); if (Words.size() < 4) return {"", "", /*IsValid=*/false}; static const char *OSNames[] = { "Linux", "Hurd", "Solaris", "FreeBSD", "NetBSD", "Syllable", "NaCl", }; StringRef OSName = "Unknown"; if (Words[0] < std::size(OSNames)) OSName = OSNames[Words[0]]; uint32_t Major = Words[1], Minor = Words[2], Patch = Words[3]; std::string str; raw_string_ostream ABI(str); ABI << Major << "." << Minor << "." << Patch; return {std::string(OSName), ABI.str(), /*IsValid=*/true}; } static std::string getGNUBuildId(ArrayRef Desc) { std::string str; raw_string_ostream OS(str); for (uint8_t B : Desc) OS << format_hex_no_prefix(B, 2); return OS.str(); } static StringRef getDescAsStringRef(ArrayRef Desc) { return StringRef(reinterpret_cast(Desc.data()), Desc.size()); } template static bool printGNUNote(raw_ostream &OS, uint32_t NoteType, ArrayRef Desc) { // Return true if we were able to pretty-print the note, false otherwise. switch (NoteType) { default: return false; case ELF::NT_GNU_ABI_TAG: { const GNUAbiTag &AbiTag = getGNUAbiTag(Desc); if (!AbiTag.IsValid) OS << " "; else OS << " OS: " << AbiTag.OSName << ", ABI: " << AbiTag.ABI; break; } case ELF::NT_GNU_BUILD_ID: { OS << " Build ID: " << getGNUBuildId(Desc); break; } case ELF::NT_GNU_GOLD_VERSION: OS << " Version: " << getDescAsStringRef(Desc); break; case ELF::NT_GNU_PROPERTY_TYPE_0: OS << " Properties:"; for (const std::string &Property : getGNUPropertyList(Desc)) OS << " " << Property << "\n"; break; } OS << '\n'; return true; } using AndroidNoteProperties = std::vector>; static AndroidNoteProperties getAndroidNoteProperties(uint32_t NoteType, ArrayRef Desc) { AndroidNoteProperties Props; switch (NoteType) { case ELF::NT_ANDROID_TYPE_MEMTAG: if (Desc.empty()) { Props.emplace_back("Invalid .note.android.memtag", ""); return Props; } switch (Desc[0] & NT_MEMTAG_LEVEL_MASK) { case NT_MEMTAG_LEVEL_NONE: Props.emplace_back("Tagging Mode", "NONE"); break; case NT_MEMTAG_LEVEL_ASYNC: Props.emplace_back("Tagging Mode", "ASYNC"); break; case NT_MEMTAG_LEVEL_SYNC: Props.emplace_back("Tagging Mode", "SYNC"); break; default: Props.emplace_back( "Tagging Mode", ("Unknown (" + Twine::utohexstr(Desc[0] & NT_MEMTAG_LEVEL_MASK) + ")") .str()); break; } Props.emplace_back("Heap", (Desc[0] & NT_MEMTAG_HEAP) ? "Enabled" : "Disabled"); Props.emplace_back("Stack", (Desc[0] & NT_MEMTAG_STACK) ? "Enabled" : "Disabled"); break; default: return Props; } return Props; } static bool printAndroidNote(raw_ostream &OS, uint32_t NoteType, ArrayRef Desc) { // Return true if we were able to pretty-print the note, false otherwise. AndroidNoteProperties Props = getAndroidNoteProperties(NoteType, Desc); if (Props.empty()) return false; for (const auto &KV : Props) OS << " " << KV.first << ": " << KV.second << '\n'; return true; } template static bool printAArch64Note(raw_ostream &OS, uint32_t NoteType, ArrayRef Desc) { if (NoteType != NT_ARM_TYPE_PAUTH_ABI_TAG) return false; OS << " AArch64 PAuth ABI tag: "; if (Desc.size() < 16) { OS << format("", Desc.size()); return false; } uint64_t Platform = support::endian::read64(Desc.data() + 0); uint64_t Version = support::endian::read64(Desc.data() + 8); OS << format("platform 0x%" PRIx64 ", version 0x%" PRIx64, Platform, Version); if (Desc.size() > 16) OS << ", additional info 0x" << toHex(ArrayRef(Desc.data() + 16, Desc.size() - 16)); return true; } template void GNUELFDumper::printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc, const ArrayRef> Descriptors) { OS << "Memtag Dynamic Entries:\n"; if (DynamicEntries.empty()) OS << " < none found >\n"; for (const auto &DynamicEntryKV : DynamicEntries) OS << " " << DynamicEntryKV.first << ": " << DynamicEntryKV.second << "\n"; if (!AndroidNoteDesc.empty()) { OS << "Memtag Android Note:\n"; printAndroidNote(OS, ELF::NT_ANDROID_TYPE_MEMTAG, AndroidNoteDesc); } if (Descriptors.empty()) return; OS << "Memtag Global Descriptors:\n"; for (const auto &[Addr, BytesToTag] : Descriptors) { OS << " 0x" << utohexstr(Addr, /*LowerCase=*/true) << ": 0x" << utohexstr(BytesToTag, /*LowerCase=*/true) << "\n"; } } template static bool printLLVMOMPOFFLOADNote(raw_ostream &OS, uint32_t NoteType, ArrayRef Desc) { switch (NoteType) { default: return false; case ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION: OS << " Version: " << getDescAsStringRef(Desc); break; case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER: OS << " Producer: " << getDescAsStringRef(Desc); break; case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION: OS << " Producer version: " << getDescAsStringRef(Desc); break; } OS << '\n'; return true; } const EnumEntry FreeBSDFeatureCtlFlags[] = { {"ASLR_DISABLE", NT_FREEBSD_FCTL_ASLR_DISABLE}, {"PROTMAX_DISABLE", NT_FREEBSD_FCTL_PROTMAX_DISABLE}, {"STKGAP_DISABLE", NT_FREEBSD_FCTL_STKGAP_DISABLE}, {"WXNEEDED", NT_FREEBSD_FCTL_WXNEEDED}, {"LA48", NT_FREEBSD_FCTL_LA48}, {"ASG_DISABLE", NT_FREEBSD_FCTL_ASG_DISABLE}, }; struct FreeBSDNote { std::string Type; std::string Value; }; template static std::optional getFreeBSDNote(uint32_t NoteType, ArrayRef Desc, bool IsCore) { if (IsCore) return std::nullopt; // No pretty-printing yet. switch (NoteType) { case ELF::NT_FREEBSD_ABI_TAG: if (Desc.size() != 4) return std::nullopt; return FreeBSDNote{ "ABI tag", utostr(support::endian::read32(Desc.data()))}; case ELF::NT_FREEBSD_ARCH_TAG: return FreeBSDNote{"Arch tag", toStringRef(Desc).str()}; case ELF::NT_FREEBSD_FEATURE_CTL: { if (Desc.size() != 4) return std::nullopt; unsigned Value = support::endian::read32(Desc.data()); std::string FlagsStr; raw_string_ostream OS(FlagsStr); printFlags(Value, ArrayRef(FreeBSDFeatureCtlFlags), OS); if (OS.str().empty()) OS << "0x" << utohexstr(Value); else OS << "(0x" << utohexstr(Value) << ")"; return FreeBSDNote{"Feature flags", OS.str()}; } default: return std::nullopt; } } struct AMDNote { std::string Type; std::string Value; }; template static AMDNote getAMDNote(uint32_t NoteType, ArrayRef Desc) { switch (NoteType) { default: return {"", ""}; case ELF::NT_AMD_HSA_CODE_OBJECT_VERSION: { struct CodeObjectVersion { support::aligned_ulittle32_t MajorVersion; support::aligned_ulittle32_t MinorVersion; }; if (Desc.size() != sizeof(CodeObjectVersion)) return {"AMD HSA Code Object Version", "Invalid AMD HSA Code Object Version"}; std::string VersionString; raw_string_ostream StrOS(VersionString); auto Version = reinterpret_cast(Desc.data()); StrOS << "[Major: " << Version->MajorVersion << ", Minor: " << Version->MinorVersion << "]"; return {"AMD HSA Code Object Version", VersionString}; } case ELF::NT_AMD_HSA_HSAIL: { struct HSAILProperties { support::aligned_ulittle32_t HSAILMajorVersion; support::aligned_ulittle32_t HSAILMinorVersion; uint8_t Profile; uint8_t MachineModel; uint8_t DefaultFloatRound; }; if (Desc.size() != sizeof(HSAILProperties)) return {"AMD HSA HSAIL Properties", "Invalid AMD HSA HSAIL Properties"}; auto Properties = reinterpret_cast(Desc.data()); std::string HSAILPropetiesString; raw_string_ostream StrOS(HSAILPropetiesString); StrOS << "[HSAIL Major: " << Properties->HSAILMajorVersion << ", HSAIL Minor: " << Properties->HSAILMinorVersion << ", Profile: " << uint32_t(Properties->Profile) << ", Machine Model: " << uint32_t(Properties->MachineModel) << ", Default Float Round: " << uint32_t(Properties->DefaultFloatRound) << "]"; return {"AMD HSA HSAIL Properties", HSAILPropetiesString}; } case ELF::NT_AMD_HSA_ISA_VERSION: { struct IsaVersion { support::aligned_ulittle16_t VendorNameSize; support::aligned_ulittle16_t ArchitectureNameSize; support::aligned_ulittle32_t Major; support::aligned_ulittle32_t Minor; support::aligned_ulittle32_t Stepping; }; if (Desc.size() < sizeof(IsaVersion)) return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"}; auto Isa = reinterpret_cast(Desc.data()); if (Desc.size() < sizeof(IsaVersion) + Isa->VendorNameSize + Isa->ArchitectureNameSize || Isa->VendorNameSize == 0 || Isa->ArchitectureNameSize == 0) return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"}; std::string IsaString; raw_string_ostream StrOS(IsaString); StrOS << "[Vendor: " << StringRef((const char*)Desc.data() + sizeof(IsaVersion), Isa->VendorNameSize - 1) << ", Architecture: " << StringRef((const char*)Desc.data() + sizeof(IsaVersion) + Isa->VendorNameSize, Isa->ArchitectureNameSize - 1) << ", Major: " << Isa->Major << ", Minor: " << Isa->Minor << ", Stepping: " << Isa->Stepping << "]"; return {"AMD HSA ISA Version", IsaString}; } case ELF::NT_AMD_HSA_METADATA: { if (Desc.size() == 0) return {"AMD HSA Metadata", ""}; return { "AMD HSA Metadata", std::string(reinterpret_cast(Desc.data()), Desc.size() - 1)}; } case ELF::NT_AMD_HSA_ISA_NAME: { if (Desc.size() == 0) return {"AMD HSA ISA Name", ""}; return { "AMD HSA ISA Name", std::string(reinterpret_cast(Desc.data()), Desc.size())}; } case ELF::NT_AMD_PAL_METADATA: { struct PALMetadata { support::aligned_ulittle32_t Key; support::aligned_ulittle32_t Value; }; if (Desc.size() % sizeof(PALMetadata) != 0) return {"AMD PAL Metadata", "Invalid AMD PAL Metadata"}; auto Isa = reinterpret_cast(Desc.data()); std::string MetadataString; raw_string_ostream StrOS(MetadataString); for (size_t I = 0, E = Desc.size() / sizeof(PALMetadata); I < E; ++I) { StrOS << "[" << Isa[I].Key << ": " << Isa[I].Value << "]"; } return {"AMD PAL Metadata", MetadataString}; } } } struct AMDGPUNote { std::string Type; std::string Value; }; template static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef Desc) { switch (NoteType) { default: return {"", ""}; case ELF::NT_AMDGPU_METADATA: { StringRef MsgPackString = StringRef(reinterpret_cast(Desc.data()), Desc.size()); msgpack::Document MsgPackDoc; if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false)) return {"", ""}; std::string MetadataString; // FIXME: Metadata Verifier only works with AMDHSA. // This is an ugly workaround to avoid the verifier for other MD // formats (e.g. amdpal) if (MsgPackString.contains("amdhsa.")) { AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); if (!Verifier.verify(MsgPackDoc.getRoot())) MetadataString = "Invalid AMDGPU Metadata\n"; } raw_string_ostream StrOS(MetadataString); if (MsgPackDoc.getRoot().isScalar()) { // TODO: passing a scalar root to toYAML() asserts: // (PolymorphicTraits::getKind(Val) != NodeKind::Scalar && // "plain scalar documents are not supported") // To avoid this crash we print the raw data instead. return {"", ""}; } MsgPackDoc.toYAML(StrOS); return {"AMDGPU Metadata", StrOS.str()}; } } } struct CoreFileMapping { uint64_t Start, End, Offset; StringRef Filename; }; struct CoreNote { uint64_t PageSize; std::vector Mappings; }; static Expected readCoreNote(DataExtractor Desc) { // Expected format of the NT_FILE note description: // 1. # of file mappings (call it N) // 2. Page size // 3. N (start, end, offset) triples // 4. N packed filenames (null delimited) // Each field is an Elf_Addr, except for filenames which are char* strings. CoreNote Ret; const int Bytes = Desc.getAddressSize(); if (!Desc.isValidOffsetForAddress(2)) return createError("the note of size 0x" + Twine::utohexstr(Desc.size()) + " is too short, expected at least 0x" + Twine::utohexstr(Bytes * 2)); if (Desc.getData().back() != 0) return createError("the note is not NUL terminated"); uint64_t DescOffset = 0; uint64_t FileCount = Desc.getAddress(&DescOffset); Ret.PageSize = Desc.getAddress(&DescOffset); if (!Desc.isValidOffsetForAddress(3 * FileCount * Bytes)) return createError("unable to read file mappings (found " + Twine(FileCount) + "): the note of size 0x" + Twine::utohexstr(Desc.size()) + " is too short"); uint64_t FilenamesOffset = 0; DataExtractor Filenames( Desc.getData().drop_front(DescOffset + 3 * FileCount * Bytes), Desc.isLittleEndian(), Desc.getAddressSize()); Ret.Mappings.resize(FileCount); size_t I = 0; for (CoreFileMapping &Mapping : Ret.Mappings) { ++I; if (!Filenames.isValidOffsetForDataOfSize(FilenamesOffset, 1)) return createError( "unable to read the file name for the mapping with index " + Twine(I) + ": the note of size 0x" + Twine::utohexstr(Desc.size()) + " is truncated"); Mapping.Start = Desc.getAddress(&DescOffset); Mapping.End = Desc.getAddress(&DescOffset); Mapping.Offset = Desc.getAddress(&DescOffset); Mapping.Filename = Filenames.getCStrRef(&FilenamesOffset); } return Ret; } template static void printCoreNote(raw_ostream &OS, const CoreNote &Note) { // Length of "0x
" string. const int FieldWidth = ELFT::Is64Bits ? 18 : 10; OS << " Page size: " << format_decimal(Note.PageSize, 0) << '\n'; OS << " " << right_justify("Start", FieldWidth) << " " << right_justify("End", FieldWidth) << " " << right_justify("Page Offset", FieldWidth) << '\n'; for (const CoreFileMapping &Mapping : Note.Mappings) { OS << " " << format_hex(Mapping.Start, FieldWidth) << " " << format_hex(Mapping.End, FieldWidth) << " " << format_hex(Mapping.Offset, FieldWidth) << "\n " << Mapping.Filename << '\n'; } } const NoteType GenericNoteTypes[] = { {ELF::NT_VERSION, "NT_VERSION (version)"}, {ELF::NT_ARCH, "NT_ARCH (architecture)"}, {ELF::NT_GNU_BUILD_ATTRIBUTE_OPEN, "OPEN"}, {ELF::NT_GNU_BUILD_ATTRIBUTE_FUNC, "func"}, }; const NoteType GNUNoteTypes[] = { {ELF::NT_GNU_ABI_TAG, "NT_GNU_ABI_TAG (ABI version tag)"}, {ELF::NT_GNU_HWCAP, "NT_GNU_HWCAP (DSO-supplied software HWCAP info)"}, {ELF::NT_GNU_BUILD_ID, "NT_GNU_BUILD_ID (unique build ID bitstring)"}, {ELF::NT_GNU_GOLD_VERSION, "NT_GNU_GOLD_VERSION (gold version)"}, {ELF::NT_GNU_PROPERTY_TYPE_0, "NT_GNU_PROPERTY_TYPE_0 (property note)"}, }; const NoteType FreeBSDCoreNoteTypes[] = { {ELF::NT_FREEBSD_THRMISC, "NT_THRMISC (thrmisc structure)"}, {ELF::NT_FREEBSD_PROCSTAT_PROC, "NT_PROCSTAT_PROC (proc data)"}, {ELF::NT_FREEBSD_PROCSTAT_FILES, "NT_PROCSTAT_FILES (files data)"}, {ELF::NT_FREEBSD_PROCSTAT_VMMAP, "NT_PROCSTAT_VMMAP (vmmap data)"}, {ELF::NT_FREEBSD_PROCSTAT_GROUPS, "NT_PROCSTAT_GROUPS (groups data)"}, {ELF::NT_FREEBSD_PROCSTAT_UMASK, "NT_PROCSTAT_UMASK (umask data)"}, {ELF::NT_FREEBSD_PROCSTAT_RLIMIT, "NT_PROCSTAT_RLIMIT (rlimit data)"}, {ELF::NT_FREEBSD_PROCSTAT_OSREL, "NT_PROCSTAT_OSREL (osreldate data)"}, {ELF::NT_FREEBSD_PROCSTAT_PSSTRINGS, "NT_PROCSTAT_PSSTRINGS (ps_strings data)"}, {ELF::NT_FREEBSD_PROCSTAT_AUXV, "NT_PROCSTAT_AUXV (auxv data)"}, }; const NoteType FreeBSDNoteTypes[] = { {ELF::NT_FREEBSD_ABI_TAG, "NT_FREEBSD_ABI_TAG (ABI version tag)"}, {ELF::NT_FREEBSD_NOINIT_TAG, "NT_FREEBSD_NOINIT_TAG (no .init tag)"}, {ELF::NT_FREEBSD_ARCH_TAG, "NT_FREEBSD_ARCH_TAG (architecture tag)"}, {ELF::NT_FREEBSD_FEATURE_CTL, "NT_FREEBSD_FEATURE_CTL (FreeBSD feature control)"}, }; const NoteType NetBSDCoreNoteTypes[] = { {ELF::NT_NETBSDCORE_PROCINFO, "NT_NETBSDCORE_PROCINFO (procinfo structure)"}, {ELF::NT_NETBSDCORE_AUXV, "NT_NETBSDCORE_AUXV (ELF auxiliary vector data)"}, {ELF::NT_NETBSDCORE_LWPSTATUS, "PT_LWPSTATUS (ptrace_lwpstatus structure)"}, }; const NoteType OpenBSDCoreNoteTypes[] = { {ELF::NT_OPENBSD_PROCINFO, "NT_OPENBSD_PROCINFO (procinfo structure)"}, {ELF::NT_OPENBSD_AUXV, "NT_OPENBSD_AUXV (ELF auxiliary vector data)"}, {ELF::NT_OPENBSD_REGS, "NT_OPENBSD_REGS (regular registers)"}, {ELF::NT_OPENBSD_FPREGS, "NT_OPENBSD_FPREGS (floating point registers)"}, {ELF::NT_OPENBSD_WCOOKIE, "NT_OPENBSD_WCOOKIE (window cookie)"}, }; const NoteType AMDNoteTypes[] = { {ELF::NT_AMD_HSA_CODE_OBJECT_VERSION, "NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version)"}, {ELF::NT_AMD_HSA_HSAIL, "NT_AMD_HSA_HSAIL (AMD HSA HSAIL Properties)"}, {ELF::NT_AMD_HSA_ISA_VERSION, "NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version)"}, {ELF::NT_AMD_HSA_METADATA, "NT_AMD_HSA_METADATA (AMD HSA Metadata)"}, {ELF::NT_AMD_HSA_ISA_NAME, "NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)"}, {ELF::NT_AMD_PAL_METADATA, "NT_AMD_PAL_METADATA (AMD PAL Metadata)"}, }; const NoteType AMDGPUNoteTypes[] = { {ELF::NT_AMDGPU_METADATA, "NT_AMDGPU_METADATA (AMDGPU Metadata)"}, }; const NoteType LLVMOMPOFFLOADNoteTypes[] = { {ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION, "NT_LLVM_OPENMP_OFFLOAD_VERSION (image format version)"}, {ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER, "NT_LLVM_OPENMP_OFFLOAD_PRODUCER (producing toolchain)"}, {ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION, "NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION (producing toolchain version)"}, }; const NoteType AndroidNoteTypes[] = { {ELF::NT_ANDROID_TYPE_IDENT, "NT_ANDROID_TYPE_IDENT"}, {ELF::NT_ANDROID_TYPE_KUSER, "NT_ANDROID_TYPE_KUSER"}, {ELF::NT_ANDROID_TYPE_MEMTAG, "NT_ANDROID_TYPE_MEMTAG (Android memory tagging information)"}, }; const NoteType ARMNoteTypes[] = { {ELF::NT_ARM_TYPE_PAUTH_ABI_TAG, "NT_ARM_TYPE_PAUTH_ABI_TAG"}, }; const NoteType CoreNoteTypes[] = { {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"}, {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"}, {ELF::NT_PRPSINFO, "NT_PRPSINFO (prpsinfo structure)"}, {ELF::NT_TASKSTRUCT, "NT_TASKSTRUCT (task structure)"}, {ELF::NT_AUXV, "NT_AUXV (auxiliary vector)"}, {ELF::NT_PSTATUS, "NT_PSTATUS (pstatus structure)"}, {ELF::NT_FPREGS, "NT_FPREGS (floating point registers)"}, {ELF::NT_PSINFO, "NT_PSINFO (psinfo structure)"}, {ELF::NT_LWPSTATUS, "NT_LWPSTATUS (lwpstatus_t structure)"}, {ELF::NT_LWPSINFO, "NT_LWPSINFO (lwpsinfo_t structure)"}, {ELF::NT_WIN32PSTATUS, "NT_WIN32PSTATUS (win32_pstatus structure)"}, {ELF::NT_PPC_VMX, "NT_PPC_VMX (ppc Altivec registers)"}, {ELF::NT_PPC_VSX, "NT_PPC_VSX (ppc VSX registers)"}, {ELF::NT_PPC_TAR, "NT_PPC_TAR (ppc TAR register)"}, {ELF::NT_PPC_PPR, "NT_PPC_PPR (ppc PPR register)"}, {ELF::NT_PPC_DSCR, "NT_PPC_DSCR (ppc DSCR register)"}, {ELF::NT_PPC_EBB, "NT_PPC_EBB (ppc EBB registers)"}, {ELF::NT_PPC_PMU, "NT_PPC_PMU (ppc PMU registers)"}, {ELF::NT_PPC_TM_CGPR, "NT_PPC_TM_CGPR (ppc checkpointed GPR registers)"}, {ELF::NT_PPC_TM_CFPR, "NT_PPC_TM_CFPR (ppc checkpointed floating point registers)"}, {ELF::NT_PPC_TM_CVMX, "NT_PPC_TM_CVMX (ppc checkpointed Altivec registers)"}, {ELF::NT_PPC_TM_CVSX, "NT_PPC_TM_CVSX (ppc checkpointed VSX registers)"}, {ELF::NT_PPC_TM_SPR, "NT_PPC_TM_SPR (ppc TM special purpose registers)"}, {ELF::NT_PPC_TM_CTAR, "NT_PPC_TM_CTAR (ppc checkpointed TAR register)"}, {ELF::NT_PPC_TM_CPPR, "NT_PPC_TM_CPPR (ppc checkpointed PPR register)"}, {ELF::NT_PPC_TM_CDSCR, "NT_PPC_TM_CDSCR (ppc checkpointed DSCR register)"}, {ELF::NT_386_TLS, "NT_386_TLS (x86 TLS information)"}, {ELF::NT_386_IOPERM, "NT_386_IOPERM (x86 I/O permissions)"}, {ELF::NT_X86_XSTATE, "NT_X86_XSTATE (x86 XSAVE extended state)"}, {ELF::NT_S390_HIGH_GPRS, "NT_S390_HIGH_GPRS (s390 upper register halves)"}, {ELF::NT_S390_TIMER, "NT_S390_TIMER (s390 timer register)"}, {ELF::NT_S390_TODCMP, "NT_S390_TODCMP (s390 TOD comparator register)"}, {ELF::NT_S390_TODPREG, "NT_S390_TODPREG (s390 TOD programmable register)"}, {ELF::NT_S390_CTRS, "NT_S390_CTRS (s390 control registers)"}, {ELF::NT_S390_PREFIX, "NT_S390_PREFIX (s390 prefix register)"}, {ELF::NT_S390_LAST_BREAK, "NT_S390_LAST_BREAK (s390 last breaking event address)"}, {ELF::NT_S390_SYSTEM_CALL, "NT_S390_SYSTEM_CALL (s390 system call restart data)"}, {ELF::NT_S390_TDB, "NT_S390_TDB (s390 transaction diagnostic block)"}, {ELF::NT_S390_VXRS_LOW, "NT_S390_VXRS_LOW (s390 vector registers 0-15 upper half)"}, {ELF::NT_S390_VXRS_HIGH, "NT_S390_VXRS_HIGH (s390 vector registers 16-31)"}, {ELF::NT_S390_GS_CB, "NT_S390_GS_CB (s390 guarded-storage registers)"}, {ELF::NT_S390_GS_BC, "NT_S390_GS_BC (s390 guarded-storage broadcast control)"}, {ELF::NT_ARM_VFP, "NT_ARM_VFP (arm VFP registers)"}, {ELF::NT_ARM_TLS, "NT_ARM_TLS (AArch TLS registers)"}, {ELF::NT_ARM_HW_BREAK, "NT_ARM_HW_BREAK (AArch hardware breakpoint registers)"}, {ELF::NT_ARM_HW_WATCH, "NT_ARM_HW_WATCH (AArch hardware watchpoint registers)"}, {ELF::NT_ARM_SVE, "NT_ARM_SVE (AArch64 SVE registers)"}, {ELF::NT_ARM_PAC_MASK, "NT_ARM_PAC_MASK (AArch64 Pointer Authentication code masks)"}, {ELF::NT_ARM_TAGGED_ADDR_CTRL, "NT_ARM_TAGGED_ADDR_CTRL (AArch64 Tagged Address Control)"}, {ELF::NT_ARM_SSVE, "NT_ARM_SSVE (AArch64 Streaming SVE registers)"}, {ELF::NT_ARM_ZA, "NT_ARM_ZA (AArch64 SME ZA registers)"}, {ELF::NT_ARM_ZT, "NT_ARM_ZT (AArch64 SME ZT registers)"}, {ELF::NT_FILE, "NT_FILE (mapped files)"}, {ELF::NT_PRXFPREG, "NT_PRXFPREG (user_xfpregs structure)"}, {ELF::NT_SIGINFO, "NT_SIGINFO (siginfo_t data)"}, }; template StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) { uint32_t Type = Note.getType(); auto FindNote = [&](ArrayRef V) -> StringRef { for (const NoteType &N : V) if (N.ID == Type) return N.Name; return ""; }; StringRef Name = Note.getName(); if (Name == "GNU") return FindNote(GNUNoteTypes); if (Name == "FreeBSD") { if (ELFType == ELF::ET_CORE) { // FreeBSD also places the generic core notes in the FreeBSD namespace. StringRef Result = FindNote(FreeBSDCoreNoteTypes); if (!Result.empty()) return Result; return FindNote(CoreNoteTypes); } else { return FindNote(FreeBSDNoteTypes); } } if (ELFType == ELF::ET_CORE && Name.starts_with("NetBSD-CORE")) { StringRef Result = FindNote(NetBSDCoreNoteTypes); if (!Result.empty()) return Result; return FindNote(CoreNoteTypes); } if (ELFType == ELF::ET_CORE && Name.starts_with("OpenBSD")) { // OpenBSD also places the generic core notes in the OpenBSD namespace. StringRef Result = FindNote(OpenBSDCoreNoteTypes); if (!Result.empty()) return Result; return FindNote(CoreNoteTypes); } if (Name == "AMD") return FindNote(AMDNoteTypes); if (Name == "AMDGPU") return FindNote(AMDGPUNoteTypes); if (Name == "LLVMOMPOFFLOAD") return FindNote(LLVMOMPOFFLOADNoteTypes); if (Name == "Android") return FindNote(AndroidNoteTypes); if (Name == "ARM") return FindNote(ARMNoteTypes); if (ELFType == ELF::ET_CORE) return FindNote(CoreNoteTypes); return FindNote(GenericNoteTypes); } template static void processNotesHelper( const ELFDumper &Dumper, llvm::function_ref, typename ELFT::Off, typename ELFT::Addr, size_t)> StartNotesFn, llvm::function_ref ProcessNoteFn, llvm::function_ref FinishNotesFn) { const ELFFile &Obj = Dumper.getElfObject().getELFFile(); bool IsCoreFile = Obj.getHeader().e_type == ELF::ET_CORE; ArrayRef Sections = cantFail(Obj.sections()); if (!IsCoreFile && !Sections.empty()) { for (const typename ELFT::Shdr &S : Sections) { if (S.sh_type != SHT_NOTE) continue; StartNotesFn(expectedToStdOptional(Obj.getSectionName(S)), S.sh_offset, S.sh_size, S.sh_addralign); Error Err = Error::success(); size_t I = 0; for (const typename ELFT::Note Note : Obj.notes(S, Err)) { if (Error E = ProcessNoteFn(Note, IsCoreFile)) Dumper.reportUniqueWarning( "unable to read note with index " + Twine(I) + " from the " + describe(Obj, S) + ": " + toString(std::move(E))); ++I; } if (Err) Dumper.reportUniqueWarning("unable to read notes from the " + describe(Obj, S) + ": " + toString(std::move(Err))); FinishNotesFn(); } return; } Expected> PhdrsOrErr = Obj.program_headers(); if (!PhdrsOrErr) { Dumper.reportUniqueWarning( "unable to read program headers to locate the PT_NOTE segment: " + toString(PhdrsOrErr.takeError())); return; } for (size_t I = 0, E = (*PhdrsOrErr).size(); I != E; ++I) { const typename ELFT::Phdr &P = (*PhdrsOrErr)[I]; if (P.p_type != PT_NOTE) continue; StartNotesFn(/*SecName=*/std::nullopt, P.p_offset, P.p_filesz, P.p_align); Error Err = Error::success(); size_t Index = 0; for (const typename ELFT::Note Note : Obj.notes(P, Err)) { if (Error E = ProcessNoteFn(Note, IsCoreFile)) Dumper.reportUniqueWarning("unable to read note with index " + Twine(Index) + " from the PT_NOTE segment with index " + Twine(I) + ": " + toString(std::move(E))); ++Index; } if (Err) Dumper.reportUniqueWarning( "unable to read notes from the PT_NOTE segment with index " + Twine(I) + ": " + toString(std::move(Err))); FinishNotesFn(); } } template void GNUELFDumper::printNotes() { size_t Align = 0; bool IsFirstHeader = true; auto PrintHeader = [&](std::optional SecName, const typename ELFT::Off Offset, const typename ELFT::Addr Size, size_t Al) { Align = std::max(Al, 4); // Print a newline between notes sections to match GNU readelf. if (!IsFirstHeader) { OS << '\n'; } else { IsFirstHeader = false; } OS << "Displaying notes found "; if (SecName) OS << "in: " << *SecName << "\n"; else OS << "at file offset " << format_hex(Offset, 10) << " with length " << format_hex(Size, 10) << ":\n"; OS << " Owner Data size \tDescription\n"; }; auto ProcessNote = [&](const Elf_Note &Note, bool IsCore) -> Error { StringRef Name = Note.getName(); ArrayRef Descriptor = Note.getDesc(Align); Elf_Word Type = Note.getType(); // Print the note owner/type. OS << " " << left_justify(Name, 20) << ' ' << format_hex(Descriptor.size(), 10) << '\t'; StringRef NoteType = getNoteTypeName(Note, this->Obj.getHeader().e_type); if (!NoteType.empty()) OS << NoteType << '\n'; else OS << "Unknown note type: (" << format_hex(Type, 10) << ")\n"; // Print the description, or fallback to printing raw bytes for unknown // owners/if we fail to pretty-print the contents. if (Name == "GNU") { if (printGNUNote(OS, Type, Descriptor)) return Error::success(); } else if (Name == "FreeBSD") { if (std::optional N = getFreeBSDNote(Type, Descriptor, IsCore)) { OS << " " << N->Type << ": " << N->Value << '\n'; return Error::success(); } } else if (Name == "AMD") { const AMDNote N = getAMDNote(Type, Descriptor); if (!N.Type.empty()) { OS << " " << N.Type << ":\n " << N.Value << '\n'; return Error::success(); } } else if (Name == "AMDGPU") { const AMDGPUNote N = getAMDGPUNote(Type, Descriptor); if (!N.Type.empty()) { OS << " " << N.Type << ":\n " << N.Value << '\n'; return Error::success(); } } else if (Name == "LLVMOMPOFFLOAD") { if (printLLVMOMPOFFLOADNote(OS, Type, Descriptor)) return Error::success(); } else if (Name == "CORE") { if (Type == ELF::NT_FILE) { DataExtractor DescExtractor( Descriptor, ELFT::TargetEndianness == llvm::endianness::little, sizeof(Elf_Addr)); if (Expected NoteOrErr = readCoreNote(DescExtractor)) { printCoreNote(OS, *NoteOrErr); return Error::success(); } else { return NoteOrErr.takeError(); } } } else if (Name == "Android") { if (printAndroidNote(OS, Type, Descriptor)) return Error::success(); } else if (Name == "ARM") { if (printAArch64Note(OS, Type, Descriptor)) return Error::success(); } if (!Descriptor.empty()) { OS << " description data:"; for (uint8_t B : Descriptor) OS << " " << format("%02x", B); OS << '\n'; } return Error::success(); }; processNotesHelper(*this, /*StartNotesFn=*/PrintHeader, /*ProcessNoteFn=*/ProcessNote, /*FinishNotesFn=*/[]() {}); } template ArrayRef ELFDumper::getMemtagGlobalsSectionContents(uint64_t ExpectedAddr) { for (const typename ELFT::Shdr &Sec : cantFail(Obj.sections())) { if (Sec.sh_type != SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC) continue; if (Sec.sh_addr != ExpectedAddr) { reportUniqueWarning( "SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC section was unexpectedly at 0x" + Twine::utohexstr(Sec.sh_addr) + ", when DT_AARCH64_MEMTAG_GLOBALS says it should be at 0x" + Twine::utohexstr(ExpectedAddr)); return ArrayRef(); } Expected> Contents = Obj.getSectionContents(Sec); if (auto E = Contents.takeError()) { reportUniqueWarning( "couldn't get SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC section contents: " + toString(std::move(E))); return ArrayRef(); } return Contents.get(); } return ArrayRef(); } // Reserve the lower three bits of the first byte of the step distance when // encoding the memtag descriptors. Found to be the best overall size tradeoff // when compiling Android T with full MTE globals enabled. constexpr uint64_t MemtagStepVarintReservedBits = 3; constexpr uint64_t MemtagGranuleSize = 16; template void ELFDumper::printMemtag() { if (Obj.getHeader().e_machine != EM_AARCH64) return; std::vector> DynamicEntries; uint64_t MemtagGlobalsSz = 0; uint64_t MemtagGlobals = 0; for (const typename ELFT::Dyn &Entry : dynamic_table()) { uintX_t Tag = Entry.getTag(); switch (Tag) { case DT_AARCH64_MEMTAG_GLOBALSSZ: MemtagGlobalsSz = Entry.getVal(); DynamicEntries.emplace_back(Obj.getDynamicTagAsString(Tag), getDynamicEntry(Tag, Entry.getVal())); break; case DT_AARCH64_MEMTAG_GLOBALS: MemtagGlobals = Entry.getVal(); DynamicEntries.emplace_back(Obj.getDynamicTagAsString(Tag), getDynamicEntry(Tag, Entry.getVal())); break; case DT_AARCH64_MEMTAG_MODE: case DT_AARCH64_MEMTAG_HEAP: case DT_AARCH64_MEMTAG_STACK: DynamicEntries.emplace_back(Obj.getDynamicTagAsString(Tag), getDynamicEntry(Tag, Entry.getVal())); break; } } ArrayRef AndroidNoteDesc; auto FindAndroidNote = [&](const Elf_Note &Note, bool IsCore) -> Error { if (Note.getName() == "Android" && Note.getType() == ELF::NT_ANDROID_TYPE_MEMTAG) AndroidNoteDesc = Note.getDesc(4); return Error::success(); }; processNotesHelper( *this, /*StartNotesFn=*/ [](std::optional, const typename ELFT::Off, const typename ELFT::Addr, size_t) {}, /*ProcessNoteFn=*/FindAndroidNote, /*FinishNotesFn=*/[]() {}); ArrayRef Contents = getMemtagGlobalsSectionContents(MemtagGlobals); if (Contents.size() != MemtagGlobalsSz) { reportUniqueWarning( "mismatch between DT_AARCH64_MEMTAG_GLOBALSSZ (0x" + Twine::utohexstr(MemtagGlobalsSz) + ") and SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC section size (0x" + Twine::utohexstr(Contents.size()) + ")"); Contents = ArrayRef(); } std::vector> GlobalDescriptors; uint64_t Address = 0; // See the AArch64 MemtagABI document for a description of encoding scheme: // https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#83encoding-of-sht_aarch64_memtag_globals_dynamic for (size_t I = 0; I < Contents.size();) { const char *Error = nullptr; unsigned DecodedBytes = 0; uint64_t Value = decodeULEB128(Contents.data() + I, &DecodedBytes, Contents.end(), &Error); I += DecodedBytes; if (Error) { reportUniqueWarning( "error decoding distance uleb, " + Twine(DecodedBytes) + " byte(s) into SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC: " + Twine(Error)); GlobalDescriptors.clear(); break; } uint64_t Distance = Value >> MemtagStepVarintReservedBits; uint64_t GranulesToTag = Value & ((1 << MemtagStepVarintReservedBits) - 1); if (GranulesToTag == 0) { GranulesToTag = decodeULEB128(Contents.data() + I, &DecodedBytes, Contents.end(), &Error) + 1; I += DecodedBytes; if (Error) { reportUniqueWarning( "error decoding size-only uleb, " + Twine(DecodedBytes) + " byte(s) into SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC: " + Twine(Error)); GlobalDescriptors.clear(); break; } } Address += Distance * MemtagGranuleSize; GlobalDescriptors.emplace_back(Address, GranulesToTag * MemtagGranuleSize); Address += GranulesToTag * MemtagGranuleSize; } printMemtag(DynamicEntries, AndroidNoteDesc, GlobalDescriptors); } template void GNUELFDumper::printELFLinkerOptions() { OS << "printELFLinkerOptions not implemented!\n"; } template void ELFDumper::printDependentLibsHelper( function_ref OnSectionStart, function_ref OnLibEntry) { auto Warn = [this](unsigned SecNdx, StringRef Msg) { this->reportUniqueWarning("SHT_LLVM_DEPENDENT_LIBRARIES section at index " + Twine(SecNdx) + " is broken: " + Msg); }; unsigned I = -1; for (const Elf_Shdr &Shdr : cantFail(Obj.sections())) { ++I; if (Shdr.sh_type != ELF::SHT_LLVM_DEPENDENT_LIBRARIES) continue; OnSectionStart(Shdr); Expected> ContentsOrErr = Obj.getSectionContents(Shdr); if (!ContentsOrErr) { Warn(I, toString(ContentsOrErr.takeError())); continue; } ArrayRef Contents = *ContentsOrErr; if (!Contents.empty() && Contents.back() != 0) { Warn(I, "the content is not null-terminated"); continue; } for (const uint8_t *I = Contents.begin(), *E = Contents.end(); I < E;) { StringRef Lib((const char *)I); OnLibEntry(Lib, I - Contents.begin()); I += Lib.size() + 1; } } } template void ELFDumper::forEachRelocationDo( const Elf_Shdr &Sec, bool RawRelr, llvm::function_ref &, unsigned, const Elf_Shdr &, const Elf_Shdr *)> RelRelaFn, llvm::function_ref RelrFn) { auto Warn = [&](Error &&E, const Twine &Prefix = "unable to read relocations from") { this->reportUniqueWarning(Prefix + " " + describe(Sec) + ": " + toString(std::move(E))); }; // SHT_RELR/SHT_ANDROID_RELR/SHT_AARCH64_AUTH_RELR sections do not have an // associated symbol table. For them we should not treat the value of the // sh_link field as an index of a symbol table. const Elf_Shdr *SymTab; if (Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_RELR && !(Obj.getHeader().e_machine == EM_AARCH64 && Sec.sh_type == ELF::SHT_AARCH64_AUTH_RELR)) { Expected SymTabOrErr = Obj.getSection(Sec.sh_link); if (!SymTabOrErr) { Warn(SymTabOrErr.takeError(), "unable to locate a symbol table for"); return; } SymTab = *SymTabOrErr; } unsigned RelNdx = 0; const bool IsMips64EL = this->Obj.isMips64EL(); switch (Sec.sh_type) { case ELF::SHT_REL: if (Expected RangeOrErr = Obj.rels(Sec)) { for (const Elf_Rel &R : *RangeOrErr) RelRelaFn(Relocation(R, IsMips64EL), RelNdx++, Sec, SymTab); } else { Warn(RangeOrErr.takeError()); } break; case ELF::SHT_RELA: if (Expected RangeOrErr = Obj.relas(Sec)) { for (const Elf_Rela &R : *RangeOrErr) RelRelaFn(Relocation(R, IsMips64EL), RelNdx++, Sec, SymTab); } else { Warn(RangeOrErr.takeError()); } break; case ELF::SHT_AARCH64_AUTH_RELR: if (Obj.getHeader().e_machine != EM_AARCH64) break; [[fallthrough]]; case ELF::SHT_RELR: case ELF::SHT_ANDROID_RELR: { Expected RangeOrErr = Obj.relrs(Sec); if (!RangeOrErr) { Warn(RangeOrErr.takeError()); break; } if (RawRelr) { for (const Elf_Relr &R : *RangeOrErr) RelrFn(R); break; } for (const Elf_Rel &R : Obj.decode_relrs(*RangeOrErr)) RelRelaFn(Relocation(R, IsMips64EL), RelNdx++, Sec, /*SymTab=*/nullptr); break; } case ELF::SHT_ANDROID_REL: case ELF::SHT_ANDROID_RELA: if (Expected> RelasOrErr = Obj.android_relas(Sec)) { for (const Elf_Rela &R : *RelasOrErr) RelRelaFn(Relocation(R, IsMips64EL), RelNdx++, Sec, SymTab); } else { Warn(RelasOrErr.takeError()); } break; } } template StringRef ELFDumper::getPrintableSectionName(const Elf_Shdr &Sec) const { StringRef Name = ""; if (Expected SecNameOrErr = Obj.getSectionName(Sec, this->WarningHandler)) Name = *SecNameOrErr; else this->reportUniqueWarning("unable to get the name of " + describe(Sec) + ": " + toString(SecNameOrErr.takeError())); return Name; } template void GNUELFDumper::printDependentLibs() { bool SectionStarted = false; struct NameOffset { StringRef Name; uint64_t Offset; }; std::vector SecEntries; NameOffset Current; auto PrintSection = [&]() { OS << "Dependent libraries section " << Current.Name << " at offset " << format_hex(Current.Offset, 1) << " contains " << SecEntries.size() << " entries:\n"; for (NameOffset Entry : SecEntries) OS << " [" << format("%6" PRIx64, Entry.Offset) << "] " << Entry.Name << "\n"; OS << "\n"; SecEntries.clear(); }; auto OnSectionStart = [&](const Elf_Shdr &Shdr) { if (SectionStarted) PrintSection(); SectionStarted = true; Current.Offset = Shdr.sh_offset; Current.Name = this->getPrintableSectionName(Shdr); }; auto OnLibEntry = [&](StringRef Lib, uint64_t Offset) { SecEntries.push_back(NameOffset{Lib, Offset}); }; this->printDependentLibsHelper(OnSectionStart, OnLibEntry); if (SectionStarted) PrintSection(); } template SmallVector ELFDumper::getSymbolIndexesForFunctionAddress( uint64_t SymValue, std::optional FunctionSec) { SmallVector SymbolIndexes; if (!this->AddressToIndexMap) { // Populate the address to index map upon the first invocation of this // function. this->AddressToIndexMap.emplace(); if (this->DotSymtabSec) { if (Expected SymsOrError = Obj.symbols(this->DotSymtabSec)) { uint32_t Index = (uint32_t)-1; for (const Elf_Sym &Sym : *SymsOrError) { ++Index; if (Sym.st_shndx == ELF::SHN_UNDEF || Sym.getType() != ELF::STT_FUNC) continue; Expected SymAddrOrErr = ObjF.toSymbolRef(this->DotSymtabSec, Index).getAddress(); if (!SymAddrOrErr) { std::string Name = this->getStaticSymbolName(Index); reportUniqueWarning("unable to get address of symbol '" + Name + "': " + toString(SymAddrOrErr.takeError())); return SymbolIndexes; } (*this->AddressToIndexMap)[*SymAddrOrErr].push_back(Index); } } else { reportUniqueWarning("unable to read the symbol table: " + toString(SymsOrError.takeError())); } } } auto Symbols = this->AddressToIndexMap->find(SymValue); if (Symbols == this->AddressToIndexMap->end()) return SymbolIndexes; for (uint32_t Index : Symbols->second) { // Check if the symbol is in the right section. FunctionSec == None // means "any section". if (FunctionSec) { const Elf_Sym &Sym = *cantFail(Obj.getSymbol(this->DotSymtabSec, Index)); if (Expected SecOrErr = Obj.getSection(Sym, this->DotSymtabSec, this->getShndxTable(this->DotSymtabSec))) { if (*FunctionSec != *SecOrErr) continue; } else { std::string Name = this->getStaticSymbolName(Index); // Note: it is impossible to trigger this error currently, it is // untested. reportUniqueWarning("unable to get section of symbol '" + Name + "': " + toString(SecOrErr.takeError())); return SymbolIndexes; } } SymbolIndexes.push_back(Index); } return SymbolIndexes; } template bool ELFDumper::printFunctionStackSize( uint64_t SymValue, std::optional FunctionSec, const Elf_Shdr &StackSizeSec, DataExtractor Data, uint64_t *Offset) { SmallVector FuncSymIndexes = this->getSymbolIndexesForFunctionAddress(SymValue, FunctionSec); if (FuncSymIndexes.empty()) reportUniqueWarning( "could not identify function symbol for stack size entry in " + describe(StackSizeSec)); // Extract the size. The expectation is that Offset is pointing to the right // place, i.e. past the function address. Error Err = Error::success(); uint64_t StackSize = Data.getULEB128(Offset, &Err); if (Err) { reportUniqueWarning("could not extract a valid stack size from " + describe(StackSizeSec) + ": " + toString(std::move(Err))); return false; } if (FuncSymIndexes.empty()) { printStackSizeEntry(StackSize, {"?"}); } else { SmallVector FuncSymNames; for (uint32_t Index : FuncSymIndexes) FuncSymNames.push_back(this->getStaticSymbolName(Index)); printStackSizeEntry(StackSize, FuncSymNames); } return true; } template void GNUELFDumper::printStackSizeEntry(uint64_t Size, ArrayRef FuncNames) { OS.PadToColumn(2); OS << format_decimal(Size, 11); OS.PadToColumn(18); OS << join(FuncNames.begin(), FuncNames.end(), ", ") << "\n"; } template void ELFDumper::printStackSize(const Relocation &R, const Elf_Shdr &RelocSec, unsigned Ndx, const Elf_Shdr *SymTab, const Elf_Shdr *FunctionSec, const Elf_Shdr &StackSizeSec, const RelocationResolver &Resolver, DataExtractor Data) { // This function ignores potentially erroneous input, unless it is directly // related to stack size reporting. const Elf_Sym *Sym = nullptr; Expected> TargetOrErr = this->getRelocationTarget(R, SymTab); if (!TargetOrErr) reportUniqueWarning("unable to get the target of relocation with index " + Twine(Ndx) + " in " + describe(RelocSec) + ": " + toString(TargetOrErr.takeError())); else Sym = TargetOrErr->Sym; uint64_t RelocSymValue = 0; if (Sym) { Expected SectionOrErr = this->Obj.getSection(*Sym, SymTab, this->getShndxTable(SymTab)); if (!SectionOrErr) { reportUniqueWarning( "cannot identify the section for relocation symbol '" + (*TargetOrErr).Name + "': " + toString(SectionOrErr.takeError())); } else if (*SectionOrErr != FunctionSec) { reportUniqueWarning("relocation symbol '" + (*TargetOrErr).Name + "' is not in the expected section"); // Pretend that the symbol is in the correct section and report its // stack size anyway. FunctionSec = *SectionOrErr; } RelocSymValue = Sym->st_value; } uint64_t Offset = R.Offset; if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) { reportUniqueWarning("found invalid relocation offset (0x" + Twine::utohexstr(Offset) + ") into " + describe(StackSizeSec) + " while trying to extract a stack size entry"); return; } uint64_t SymValue = Resolver(R.Type, Offset, RelocSymValue, Data.getAddress(&Offset), R.Addend.value_or(0)); this->printFunctionStackSize(SymValue, FunctionSec, StackSizeSec, Data, &Offset); } template void ELFDumper::printNonRelocatableStackSizes( std::function PrintHeader) { // This function ignores potentially erroneous input, unless it is directly // related to stack size reporting. for (const Elf_Shdr &Sec : cantFail(Obj.sections())) { if (this->getPrintableSectionName(Sec) != ".stack_sizes") continue; PrintHeader(); ArrayRef Contents = unwrapOrError(this->FileName, Obj.getSectionContents(Sec)); DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr)); uint64_t Offset = 0; while (Offset < Contents.size()) { // The function address is followed by a ULEB representing the stack // size. Check for an extra byte before we try to process the entry. if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) { reportUniqueWarning( describe(Sec) + " ended while trying to extract a stack size entry"); break; } uint64_t SymValue = Data.getAddress(&Offset); if (!printFunctionStackSize(SymValue, /*FunctionSec=*/std::nullopt, Sec, Data, &Offset)) break; } } } template void ELFDumper::printRelocatableStackSizes( std::function PrintHeader) { // Build a map between stack size sections and their corresponding relocation // sections. auto IsMatch = [&](const Elf_Shdr &Sec) -> bool { StringRef SectionName; if (Expected NameOrErr = Obj.getSectionName(Sec)) SectionName = *NameOrErr; else consumeError(NameOrErr.takeError()); return SectionName == ".stack_sizes"; }; Expected> StackSizeRelocMapOrErr = Obj.getSectionAndRelocations(IsMatch); if (!StackSizeRelocMapOrErr) { reportUniqueWarning("unable to get stack size map section(s): " + toString(StackSizeRelocMapOrErr.takeError())); return; } for (const auto &StackSizeMapEntry : *StackSizeRelocMapOrErr) { PrintHeader(); const Elf_Shdr *StackSizesELFSec = StackSizeMapEntry.first; const Elf_Shdr *RelocSec = StackSizeMapEntry.second; // Warn about stack size sections without a relocation section. if (!RelocSec) { reportWarning(createError(".stack_sizes (" + describe(*StackSizesELFSec) + ") does not have a corresponding " "relocation section"), FileName); continue; } // A .stack_sizes section header's sh_link field is supposed to point // to the section that contains the functions whose stack sizes are // described in it. const Elf_Shdr *FunctionSec = unwrapOrError( this->FileName, Obj.getSection(StackSizesELFSec->sh_link)); SupportsRelocation IsSupportedFn; RelocationResolver Resolver; std::tie(IsSupportedFn, Resolver) = getRelocationResolver(this->ObjF); ArrayRef Contents = unwrapOrError(this->FileName, Obj.getSectionContents(*StackSizesELFSec)); DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr)); forEachRelocationDo( *RelocSec, /*RawRelr=*/false, [&](const Relocation &R, unsigned Ndx, const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { if (!IsSupportedFn || !IsSupportedFn(R.Type)) { reportUniqueWarning( describe(*RelocSec) + " contains an unsupported relocation with index " + Twine(Ndx) + ": " + Obj.getRelocationTypeName(R.Type)); return; } this->printStackSize(R, *RelocSec, Ndx, SymTab, FunctionSec, *StackSizesELFSec, Resolver, Data); }, [](const Elf_Relr &) { llvm_unreachable("can't get here, because we only support " "SHT_REL/SHT_RELA sections"); }); } } template void GNUELFDumper::printStackSizes() { bool HeaderHasBeenPrinted = false; auto PrintHeader = [&]() { if (HeaderHasBeenPrinted) return; OS << "\nStack Sizes:\n"; OS.PadToColumn(9); OS << "Size"; OS.PadToColumn(18); OS << "Functions\n"; HeaderHasBeenPrinted = true; }; // For non-relocatable objects, look directly for sections whose name starts // with .stack_sizes and process the contents. if (this->Obj.getHeader().e_type == ELF::ET_REL) this->printRelocatableStackSizes(PrintHeader); else this->printNonRelocatableStackSizes(PrintHeader); } template void GNUELFDumper::printMipsGOT(const MipsGOTParser &Parser) { size_t Bias = ELFT::Is64Bits ? 8 : 0; auto PrintEntry = [&](const Elf_Addr *E, StringRef Purpose) { OS.PadToColumn(2); OS << format_hex_no_prefix(Parser.getGotAddress(E), 8 + Bias); OS.PadToColumn(11 + Bias); OS << format_decimal(Parser.getGotOffset(E), 6) << "(gp)"; OS.PadToColumn(22 + Bias); OS << format_hex_no_prefix(*E, 8 + Bias); OS.PadToColumn(31 + 2 * Bias); OS << Purpose << "\n"; }; OS << (Parser.IsStatic ? "Static GOT:\n" : "Primary GOT:\n"); OS << " Canonical gp value: " << format_hex_no_prefix(Parser.getGp(), 8 + Bias) << "\n\n"; OS << " Reserved entries:\n"; if (ELFT::Is64Bits) OS << " Address Access Initial Purpose\n"; else OS << " Address Access Initial Purpose\n"; PrintEntry(Parser.getGotLazyResolver(), "Lazy resolver"); if (Parser.getGotModulePointer()) PrintEntry(Parser.getGotModulePointer(), "Module pointer (GNU extension)"); if (!Parser.getLocalEntries().empty()) { OS << "\n"; OS << " Local entries:\n"; if (ELFT::Is64Bits) OS << " Address Access Initial\n"; else OS << " Address Access Initial\n"; for (auto &E : Parser.getLocalEntries()) PrintEntry(&E, ""); } if (Parser.IsStatic) return; if (!Parser.getGlobalEntries().empty()) { OS << "\n"; OS << " Global entries:\n"; if (ELFT::Is64Bits) OS << " Address Access Initial Sym.Val." << " Type Ndx Name\n"; else OS << " Address Access Initial Sym.Val. Type Ndx Name\n"; DataRegion ShndxTable( (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end()); for (auto &E : Parser.getGlobalEntries()) { const Elf_Sym &Sym = *Parser.getGotSym(&E); const Elf_Sym &FirstSym = this->dynamic_symbols()[0]; std::string SymName = this->getFullSymbolName( Sym, &Sym - &FirstSym, ShndxTable, this->DynamicStringTable, false); OS.PadToColumn(2); OS << to_string(format_hex_no_prefix(Parser.getGotAddress(&E), 8 + Bias)); OS.PadToColumn(11 + Bias); OS << to_string(format_decimal(Parser.getGotOffset(&E), 6)) + "(gp)"; OS.PadToColumn(22 + Bias); OS << to_string(format_hex_no_prefix(E, 8 + Bias)); OS.PadToColumn(31 + 2 * Bias); OS << to_string(format_hex_no_prefix(Sym.st_value, 8 + Bias)); OS.PadToColumn(40 + 3 * Bias); OS << enumToString(Sym.getType(), ArrayRef(ElfSymbolTypes)); OS.PadToColumn(48 + 3 * Bias); OS << getSymbolSectionNdx(Sym, &Sym - this->dynamic_symbols().begin(), ShndxTable); OS.PadToColumn(52 + 3 * Bias); OS << SymName << "\n"; } } if (!Parser.getOtherEntries().empty()) OS << "\n Number of TLS and multi-GOT entries " << Parser.getOtherEntries().size() << "\n"; } template void GNUELFDumper::printMipsPLT(const MipsGOTParser &Parser) { size_t Bias = ELFT::Is64Bits ? 8 : 0; auto PrintEntry = [&](const Elf_Addr *E, StringRef Purpose) { OS.PadToColumn(2); OS << format_hex_no_prefix(Parser.getPltAddress(E), 8 + Bias); OS.PadToColumn(11 + Bias); OS << format_hex_no_prefix(*E, 8 + Bias); OS.PadToColumn(20 + 2 * Bias); OS << Purpose << "\n"; }; OS << "PLT GOT:\n\n"; OS << " Reserved entries:\n"; OS << " Address Initial Purpose\n"; PrintEntry(Parser.getPltLazyResolver(), "PLT lazy resolver"); if (Parser.getPltModulePointer()) PrintEntry(Parser.getPltModulePointer(), "Module pointer"); if (!Parser.getPltEntries().empty()) { OS << "\n"; OS << " Entries:\n"; OS << " Address Initial Sym.Val. Type Ndx Name\n"; DataRegion ShndxTable( (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end()); for (auto &E : Parser.getPltEntries()) { const Elf_Sym &Sym = *Parser.getPltSym(&E); const Elf_Sym &FirstSym = *cantFail( this->Obj.template getEntry(*Parser.getPltSymTable(), 0)); std::string SymName = this->getFullSymbolName( Sym, &Sym - &FirstSym, ShndxTable, this->DynamicStringTable, false); OS.PadToColumn(2); OS << to_string(format_hex_no_prefix(Parser.getPltAddress(&E), 8 + Bias)); OS.PadToColumn(11 + Bias); OS << to_string(format_hex_no_prefix(E, 8 + Bias)); OS.PadToColumn(20 + 2 * Bias); OS << to_string(format_hex_no_prefix(Sym.st_value, 8 + Bias)); OS.PadToColumn(29 + 3 * Bias); OS << enumToString(Sym.getType(), ArrayRef(ElfSymbolTypes)); OS.PadToColumn(37 + 3 * Bias); OS << getSymbolSectionNdx(Sym, &Sym - this->dynamic_symbols().begin(), ShndxTable); OS.PadToColumn(41 + 3 * Bias); OS << SymName << "\n"; } } } template Expected *> getMipsAbiFlagsSection(const ELFDumper &Dumper) { const typename ELFT::Shdr *Sec = Dumper.findSectionByName(".MIPS.abiflags"); if (Sec == nullptr) return nullptr; constexpr StringRef ErrPrefix = "unable to read the .MIPS.abiflags section: "; Expected> DataOrErr = Dumper.getElfObject().getELFFile().getSectionContents(*Sec); if (!DataOrErr) return createError(ErrPrefix + toString(DataOrErr.takeError())); if (DataOrErr->size() != sizeof(Elf_Mips_ABIFlags)) return createError(ErrPrefix + "it has a wrong size (" + Twine(DataOrErr->size()) + ")"); return reinterpret_cast *>(DataOrErr->data()); } template void GNUELFDumper::printMipsABIFlags() { const Elf_Mips_ABIFlags *Flags = nullptr; if (Expected *> SecOrErr = getMipsAbiFlagsSection(*this)) Flags = *SecOrErr; else this->reportUniqueWarning(SecOrErr.takeError()); if (!Flags) return; OS << "MIPS ABI Flags Version: " << Flags->version << "\n\n"; OS << "ISA: MIPS" << int(Flags->isa_level); if (Flags->isa_rev > 1) OS << "r" << int(Flags->isa_rev); OS << "\n"; OS << "GPR size: " << getMipsRegisterSize(Flags->gpr_size) << "\n"; OS << "CPR1 size: " << getMipsRegisterSize(Flags->cpr1_size) << "\n"; OS << "CPR2 size: " << getMipsRegisterSize(Flags->cpr2_size) << "\n"; OS << "FP ABI: " << enumToString(Flags->fp_abi, ArrayRef(ElfMipsFpABIType)) << "\n"; OS << "ISA Extension: " << enumToString(Flags->isa_ext, ArrayRef(ElfMipsISAExtType)) << "\n"; if (Flags->ases == 0) OS << "ASEs: None\n"; else // FIXME: Print each flag on a separate line. OS << "ASEs: " << printFlags(Flags->ases, ArrayRef(ElfMipsASEFlags)) << "\n"; OS << "FLAGS 1: " << format_hex_no_prefix(Flags->flags1, 8, false) << "\n"; OS << "FLAGS 2: " << format_hex_no_prefix(Flags->flags2, 8, false) << "\n"; OS << "\n"; } template void LLVMELFDumper::printFileHeaders() { const Elf_Ehdr &E = this->Obj.getHeader(); { DictScope D(W, "ElfHeader"); { DictScope D(W, "Ident"); W.printBinary("Magic", ArrayRef(E.e_ident).slice(ELF::EI_MAG0, 4)); W.printEnum("Class", E.e_ident[ELF::EI_CLASS], ArrayRef(ElfClass)); W.printEnum("DataEncoding", E.e_ident[ELF::EI_DATA], ArrayRef(ElfDataEncoding)); W.printNumber("FileVersion", E.e_ident[ELF::EI_VERSION]); auto OSABI = ArrayRef(ElfOSABI); if (E.e_ident[ELF::EI_OSABI] >= ELF::ELFOSABI_FIRST_ARCH && E.e_ident[ELF::EI_OSABI] <= ELF::ELFOSABI_LAST_ARCH) { switch (E.e_machine) { case ELF::EM_AMDGPU: OSABI = ArrayRef(AMDGPUElfOSABI); break; case ELF::EM_ARM: OSABI = ArrayRef(ARMElfOSABI); break; case ELF::EM_TI_C6000: OSABI = ArrayRef(C6000ElfOSABI); break; } } W.printEnum("OS/ABI", E.e_ident[ELF::EI_OSABI], OSABI); W.printNumber("ABIVersion", E.e_ident[ELF::EI_ABIVERSION]); W.printBinary("Unused", ArrayRef(E.e_ident).slice(ELF::EI_PAD)); } std::string TypeStr; if (const EnumEntry *Ent = getObjectFileEnumEntry(E.e_type)) { TypeStr = Ent->Name.str(); } else { if (E.e_type >= ET_LOPROC) TypeStr = "Processor Specific"; else if (E.e_type >= ET_LOOS) TypeStr = "OS Specific"; else TypeStr = "Unknown"; } W.printString("Type", TypeStr + " (0x" + utohexstr(E.e_type) + ")"); W.printEnum("Machine", E.e_machine, ArrayRef(ElfMachineType)); W.printNumber("Version", E.e_version); W.printHex("Entry", E.e_entry); W.printHex("ProgramHeaderOffset", E.e_phoff); W.printHex("SectionHeaderOffset", E.e_shoff); if (E.e_machine == EM_MIPS) W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderMipsFlags), unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI), unsigned(ELF::EF_MIPS_MACH)); else if (E.e_machine == EM_AMDGPU) { switch (E.e_ident[ELF::EI_ABIVERSION]) { default: W.printHex("Flags", E.e_flags); break; case 0: // ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags. [[fallthrough]]; case ELF::ELFABIVERSION_AMDGPU_HSA_V3: W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderAMDGPUFlagsABIVersion3), unsigned(ELF::EF_AMDGPU_MACH)); break; case ELF::ELFABIVERSION_AMDGPU_HSA_V4: case ELF::ELFABIVERSION_AMDGPU_HSA_V5: W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderAMDGPUFlagsABIVersion4), unsigned(ELF::EF_AMDGPU_MACH), unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4), unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4)); break; } } else if (E.e_machine == EM_RISCV) W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderRISCVFlags)); else if (E.e_machine == EM_AVR) W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderAVRFlags), unsigned(ELF::EF_AVR_ARCH_MASK)); else if (E.e_machine == EM_LOONGARCH) W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderLoongArchFlags), unsigned(ELF::EF_LOONGARCH_ABI_MODIFIER_MASK), unsigned(ELF::EF_LOONGARCH_OBJABI_MASK)); else if (E.e_machine == EM_XTENSA) W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderXtensaFlags), unsigned(ELF::EF_XTENSA_MACH)); else if (E.e_machine == EM_CUDA) W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderNVPTXFlags), unsigned(ELF::EF_CUDA_SM)); else W.printFlags("Flags", E.e_flags); W.printNumber("HeaderSize", E.e_ehsize); W.printNumber("ProgramHeaderEntrySize", E.e_phentsize); W.printNumber("ProgramHeaderCount", E.e_phnum); W.printNumber("SectionHeaderEntrySize", E.e_shentsize); W.printString("SectionHeaderCount", getSectionHeadersNumString(this->Obj, this->FileName)); W.printString("StringTableSectionIndex", getSectionHeaderTableIndexString(this->Obj, this->FileName)); } } template void LLVMELFDumper::printGroupSections() { DictScope Lists(W, "Groups"); std::vector V = this->getGroups(); DenseMap Map = mapSectionsToGroups(V); for (const GroupSection &G : V) { DictScope D(W, "Group"); W.printNumber("Name", G.Name, G.ShName); W.printNumber("Index", G.Index); W.printNumber("Link", G.Link); W.printNumber("Info", G.Info); W.printHex("Type", getGroupType(G.Type), G.Type); W.printString("Signature", G.Signature); ListScope L(W, getGroupSectionHeaderName()); for (const GroupMember &GM : G.Members) { const GroupSection *MainGroup = Map[GM.Index]; if (MainGroup != &G) this->reportUniqueWarning( "section with index " + Twine(GM.Index) + ", included in the group section with index " + Twine(MainGroup->Index) + ", was also found in the group section with index " + Twine(G.Index)); printSectionGroupMembers(GM.Name, GM.Index); } } if (V.empty()) printEmptyGroupMessage(); } template std::string LLVMELFDumper::getGroupSectionHeaderName() const { return "Section(s) in group"; } template void LLVMELFDumper::printSectionGroupMembers(StringRef Name, uint64_t Idx) const { W.startLine() << Name << " (" << Idx << ")\n"; } template void LLVMELFDumper::printRelocations() { ListScope D(W, "Relocations"); for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (!isRelocationSec(Sec, this->Obj.getHeader())) continue; StringRef Name = this->getPrintableSectionName(Sec); unsigned SecNdx = &Sec - &cantFail(this->Obj.sections()).front(); printRelocationSectionInfo(Sec, Name, SecNdx); } } template void LLVMELFDumper::printRelrReloc(const Elf_Relr &R) { W.startLine() << W.hex(R) << "\n"; } template void LLVMELFDumper::printExpandedRelRelaReloc(const Relocation &R, StringRef SymbolName, StringRef RelocName) { DictScope Group(W, "Relocation"); W.printHex("Offset", R.Offset); W.printNumber("Type", RelocName, R.Type); W.printNumber("Symbol", !SymbolName.empty() ? SymbolName : "-", R.Symbol); if (R.Addend) W.printHex("Addend", (uintX_t)*R.Addend); } template void LLVMELFDumper::printDefaultRelRelaReloc(const Relocation &R, StringRef SymbolName, StringRef RelocName) { raw_ostream &OS = W.startLine(); OS << W.hex(R.Offset) << " " << RelocName << " " << (!SymbolName.empty() ? SymbolName : "-"); if (R.Addend) OS << " " << W.hex((uintX_t)*R.Addend); OS << "\n"; } template void LLVMELFDumper::printRelocationSectionInfo(const Elf_Shdr &Sec, StringRef Name, const unsigned SecNdx) { DictScope D(W, (Twine("Section (") + Twine(SecNdx) + ") " + Name).str()); this->printRelocationsHelper(Sec); } template void LLVMELFDumper::printEmptyGroupMessage() const { W.startLine() << "There are no group sections in the file.\n"; } template void LLVMELFDumper::printRelRelaReloc(const Relocation &R, const RelSymbol &RelSym) { StringRef SymbolName = RelSym.Name; if (RelSym.Sym && RelSym.Name.empty()) SymbolName = ""; SmallString<32> RelocName; this->Obj.getRelocationTypeName(R.Type, RelocName); if (opts::ExpandRelocs) { printExpandedRelRelaReloc(R, SymbolName, RelocName); } else { printDefaultRelRelaReloc(R, SymbolName, RelocName); } } template void LLVMELFDumper::printSectionHeaders() { ListScope SectionsD(W, "Sections"); int SectionIndex = -1; std::vector> FlagsList = getSectionFlagsForTarget(this->Obj.getHeader().e_ident[ELF::EI_OSABI], this->Obj.getHeader().e_machine); for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { DictScope SectionD(W, "Section"); W.printNumber("Index", ++SectionIndex); W.printNumber("Name", this->getPrintableSectionName(Sec), Sec.sh_name); W.printHex("Type", object::getELFSectionTypeName(this->Obj.getHeader().e_machine, Sec.sh_type), Sec.sh_type); W.printFlags("Flags", Sec.sh_flags, ArrayRef(FlagsList)); W.printHex("Address", Sec.sh_addr); W.printHex("Offset", Sec.sh_offset); W.printNumber("Size", Sec.sh_size); W.printNumber("Link", Sec.sh_link); W.printNumber("Info", Sec.sh_info); W.printNumber("AddressAlignment", Sec.sh_addralign); W.printNumber("EntrySize", Sec.sh_entsize); if (opts::SectionRelocations) { ListScope D(W, "Relocations"); this->printRelocationsHelper(Sec); } if (opts::SectionSymbols) { ListScope D(W, "Symbols"); if (this->DotSymtabSec) { StringRef StrTable = unwrapOrError( this->FileName, this->Obj.getStringTableForSymtab(*this->DotSymtabSec)); ArrayRef ShndxTable = this->getShndxTable(this->DotSymtabSec); typename ELFT::SymRange Symbols = unwrapOrError( this->FileName, this->Obj.symbols(this->DotSymtabSec)); for (const Elf_Sym &Sym : Symbols) { const Elf_Shdr *SymSec = unwrapOrError( this->FileName, this->Obj.getSection(Sym, this->DotSymtabSec, ShndxTable)); if (SymSec == &Sec) printSymbol(Sym, &Sym - &Symbols[0], ShndxTable, StrTable, false, /*NonVisibilityBitsUsed=*/false, /*ExtraSymInfo=*/false); } } } if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) { ArrayRef Data = unwrapOrError(this->FileName, this->Obj.getSectionContents(Sec)); W.printBinaryBlock( "SectionData", StringRef(reinterpret_cast(Data.data()), Data.size())); } } } template void LLVMELFDumper::printSymbolSection( const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable) const { auto GetSectionSpecialType = [&]() -> std::optional { if (Symbol.isUndefined()) return StringRef("Undefined"); if (Symbol.isProcessorSpecific()) return StringRef("Processor Specific"); if (Symbol.isOSSpecific()) return StringRef("Operating System Specific"); if (Symbol.isAbsolute()) return StringRef("Absolute"); if (Symbol.isCommon()) return StringRef("Common"); if (Symbol.isReserved() && Symbol.st_shndx != SHN_XINDEX) return StringRef("Reserved"); return std::nullopt; }; if (std::optional Type = GetSectionSpecialType()) { W.printHex("Section", *Type, Symbol.st_shndx); return; } Expected SectionIndex = this->getSymbolSectionIndex(Symbol, SymIndex, ShndxTable); if (!SectionIndex) { assert(Symbol.st_shndx == SHN_XINDEX && "getSymbolSectionIndex should only fail due to an invalid " "SHT_SYMTAB_SHNDX table/reference"); this->reportUniqueWarning(SectionIndex.takeError()); W.printHex("Section", "Reserved", SHN_XINDEX); return; } Expected SectionName = this->getSymbolSectionName(Symbol, *SectionIndex); if (!SectionName) { // Don't report an invalid section name if the section headers are missing. // In such situations, all sections will be "invalid". if (!this->ObjF.sections().empty()) this->reportUniqueWarning(SectionName.takeError()); else consumeError(SectionName.takeError()); W.printHex("Section", "", *SectionIndex); } else { W.printHex("Section", *SectionName, *SectionIndex); } } template void LLVMELFDumper::printSymbolOtherField(const Elf_Sym &Symbol) const { std::vector> SymOtherFlags = this->getOtherFlagsFromSymbol(this->Obj.getHeader(), Symbol); W.printFlags("Other", Symbol.st_other, ArrayRef(SymOtherFlags), 0x3u); } template void LLVMELFDumper::printZeroSymbolOtherField( const Elf_Sym &Symbol) const { assert(Symbol.st_other == 0 && "non-zero Other Field"); // Usually st_other flag is zero. Do not pollute the output // by flags enumeration in that case. W.printNumber("Other", 0); } template void LLVMELFDumper::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable, std::optional StrTable, bool IsDynamic, bool /*NonVisibilityBitsUsed*/, bool /*ExtraSymInfo*/) const { std::string FullSymbolName = this->getFullSymbolName( Symbol, SymIndex, ShndxTable, StrTable, IsDynamic); unsigned char SymbolType = Symbol.getType(); DictScope D(W, "Symbol"); W.printNumber("Name", FullSymbolName, Symbol.st_name); W.printHex("Value", Symbol.st_value); W.printNumber("Size", Symbol.st_size); W.printEnum("Binding", Symbol.getBinding(), ArrayRef(ElfSymbolBindings)); if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) W.printEnum("Type", SymbolType, ArrayRef(AMDGPUSymbolTypes)); else W.printEnum("Type", SymbolType, ArrayRef(ElfSymbolTypes)); if (Symbol.st_other == 0) printZeroSymbolOtherField(Symbol); else printSymbolOtherField(Symbol); printSymbolSection(Symbol, SymIndex, ShndxTable); } template void LLVMELFDumper::printSymbols(bool PrintSymbols, bool PrintDynamicSymbols, bool ExtraSymInfo) { if (PrintSymbols) { ListScope Group(W, "Symbols"); this->printSymbolsHelper(false, ExtraSymInfo); } if (PrintDynamicSymbols) { ListScope Group(W, "DynamicSymbols"); this->printSymbolsHelper(true, ExtraSymInfo); } } template void LLVMELFDumper::printDynamicTable() { Elf_Dyn_Range Table = this->dynamic_table(); if (Table.empty()) return; W.startLine() << "DynamicSection [ (" << Table.size() << " entries)\n"; size_t MaxTagSize = getMaxDynamicTagSize(this->Obj, Table); // The "Name/Value" column should be indented from the "Type" column by N // spaces, where N = MaxTagSize - length of "Type" (4) + trailing // space (1) = -3. W.startLine() << " Tag" << std::string(ELFT::Is64Bits ? 16 : 8, ' ') << "Type" << std::string(MaxTagSize - 3, ' ') << "Name/Value\n"; std::string ValueFmt = "%-" + std::to_string(MaxTagSize) + "s "; for (auto Entry : Table) { uintX_t Tag = Entry.getTag(); std::string Value = this->getDynamicEntry(Tag, Entry.getVal()); W.startLine() << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10, true) << " " << format(ValueFmt.c_str(), this->Obj.getDynamicTagAsString(Tag).c_str()) << Value << "\n"; } W.startLine() << "]\n"; } template void LLVMELFDumper::printDynamicRelocations() { W.startLine() << "Dynamic Relocations {\n"; W.indent(); this->printDynamicRelocationsHelper(); W.unindent(); W.startLine() << "}\n"; } template void LLVMELFDumper::printProgramHeaders( bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { if (PrintProgramHeaders) printProgramHeaders(); if (PrintSectionMapping == cl::BOU_TRUE) printSectionMapping(); } template void LLVMELFDumper::printProgramHeaders() { ListScope L(W, "ProgramHeaders"); Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning("unable to dump program headers: " + toString(PhdrsOrErr.takeError())); return; } for (const Elf_Phdr &Phdr : *PhdrsOrErr) { DictScope P(W, "ProgramHeader"); StringRef Type = segmentTypeToString(this->Obj.getHeader().e_machine, Phdr.p_type); W.printHex("Type", Type.empty() ? "Unknown" : Type, Phdr.p_type); W.printHex("Offset", Phdr.p_offset); W.printHex("VirtualAddress", Phdr.p_vaddr); W.printHex("PhysicalAddress", Phdr.p_paddr); W.printNumber("FileSize", Phdr.p_filesz); W.printNumber("MemSize", Phdr.p_memsz); W.printFlags("Flags", Phdr.p_flags, ArrayRef(ElfSegmentFlags)); W.printNumber("Alignment", Phdr.p_align); } } template void LLVMELFDumper::printVersionSymbolSection(const Elf_Shdr *Sec) { ListScope SS(W, "VersionSymbols"); if (!Sec) return; StringRef StrTable; ArrayRef Syms; const Elf_Shdr *SymTabSec; Expected> VerTableOrErr = this->getVersionTable(*Sec, &Syms, &StrTable, &SymTabSec); if (!VerTableOrErr) { this->reportUniqueWarning(VerTableOrErr.takeError()); return; } if (StrTable.empty() || Syms.empty() || Syms.size() != VerTableOrErr->size()) return; ArrayRef ShNdxTable = this->getShndxTable(SymTabSec); for (size_t I = 0, E = Syms.size(); I < E; ++I) { DictScope S(W, "Symbol"); W.printNumber("Version", (*VerTableOrErr)[I].vs_index & VERSYM_VERSION); W.printString("Name", this->getFullSymbolName(Syms[I], I, ShNdxTable, StrTable, /*IsDynamic=*/true)); } } const EnumEntry SymVersionFlags[] = { {"Base", "BASE", VER_FLG_BASE}, {"Weak", "WEAK", VER_FLG_WEAK}, {"Info", "INFO", VER_FLG_INFO}}; template void LLVMELFDumper::printVersionDefinitionSection(const Elf_Shdr *Sec) { ListScope SD(W, "VersionDefinitions"); if (!Sec) return; Expected> V = this->Obj.getVersionDefinitions(*Sec); if (!V) { this->reportUniqueWarning(V.takeError()); return; } for (const VerDef &D : *V) { DictScope Def(W, "Definition"); W.printNumber("Version", D.Version); W.printFlags("Flags", D.Flags, ArrayRef(SymVersionFlags)); W.printNumber("Index", D.Ndx); W.printNumber("Hash", D.Hash); W.printString("Name", D.Name.c_str()); W.printList( "Predecessors", D.AuxV, [](raw_ostream &OS, const VerdAux &Aux) { OS << Aux.Name.c_str(); }); } } template void LLVMELFDumper::printVersionDependencySection(const Elf_Shdr *Sec) { ListScope SD(W, "VersionRequirements"); if (!Sec) return; Expected> V = this->Obj.getVersionDependencies(*Sec, this->WarningHandler); if (!V) { this->reportUniqueWarning(V.takeError()); return; } for (const VerNeed &VN : *V) { DictScope Entry(W, "Dependency"); W.printNumber("Version", VN.Version); W.printNumber("Count", VN.Cnt); W.printString("FileName", VN.File.c_str()); ListScope L(W, "Entries"); for (const VernAux &Aux : VN.AuxV) { DictScope Entry(W, "Entry"); W.printNumber("Hash", Aux.Hash); W.printFlags("Flags", Aux.Flags, ArrayRef(SymVersionFlags)); W.printNumber("Index", Aux.Other); W.printString("Name", Aux.Name.c_str()); } } } template void LLVMELFDumper::printHashHistogramStats(size_t NBucket, size_t MaxChain, size_t TotalSyms, ArrayRef Count, bool IsGnu) const { StringRef HistName = IsGnu ? "GnuHashHistogram" : "HashHistogram"; StringRef BucketName = IsGnu ? "Bucket" : "Chain"; StringRef ListName = IsGnu ? "Buckets" : "Chains"; DictScope Outer(W, HistName); W.printNumber("TotalBuckets", NBucket); ListScope Buckets(W, ListName); size_t CumulativeNonZero = 0; for (size_t I = 0; I < MaxChain; ++I) { CumulativeNonZero += Count[I] * I; DictScope Bucket(W, BucketName); W.printNumber("Length", I); W.printNumber("Count", Count[I]); W.printNumber("Percentage", (float)(Count[I] * 100.0) / NBucket); W.printNumber("Coverage", (float)(CumulativeNonZero * 100.0) / TotalSyms); } } // Returns true if rel/rela section exists, and populates SymbolIndices. // Otherwise returns false. template static bool getSymbolIndices(const typename ELFT::Shdr *CGRelSection, const ELFFile &Obj, const LLVMELFDumper *Dumper, SmallVector &SymbolIndices) { if (!CGRelSection) { Dumper->reportUniqueWarning( "relocation section for a call graph section doesn't exist"); return false; } if (CGRelSection->sh_type == SHT_REL) { typename ELFT::RelRange CGProfileRel; Expected CGProfileRelOrError = Obj.rels(*CGRelSection); if (!CGProfileRelOrError) { Dumper->reportUniqueWarning("unable to load relocations for " "SHT_LLVM_CALL_GRAPH_PROFILE section: " + toString(CGProfileRelOrError.takeError())); return false; } CGProfileRel = *CGProfileRelOrError; for (const typename ELFT::Rel &Rel : CGProfileRel) SymbolIndices.push_back(Rel.getSymbol(Obj.isMips64EL())); } else { // MC unconditionally produces SHT_REL, but GNU strip/objcopy may convert // the format to SHT_RELA // (https://sourceware.org/bugzilla/show_bug.cgi?id=28035) typename ELFT::RelaRange CGProfileRela; Expected CGProfileRelaOrError = Obj.relas(*CGRelSection); if (!CGProfileRelaOrError) { Dumper->reportUniqueWarning("unable to load relocations for " "SHT_LLVM_CALL_GRAPH_PROFILE section: " + toString(CGProfileRelaOrError.takeError())); return false; } CGProfileRela = *CGProfileRelaOrError; for (const typename ELFT::Rela &Rela : CGProfileRela) SymbolIndices.push_back(Rela.getSymbol(Obj.isMips64EL())); } return true; } template void LLVMELFDumper::printCGProfile() { auto IsMatch = [](const Elf_Shdr &Sec) -> bool { return Sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE; }; Expected> SecToRelocMapOrErr = this->Obj.getSectionAndRelocations(IsMatch); if (!SecToRelocMapOrErr) { this->reportUniqueWarning("unable to get CG Profile section(s): " + toString(SecToRelocMapOrErr.takeError())); return; } for (const auto &CGMapEntry : *SecToRelocMapOrErr) { const Elf_Shdr *CGSection = CGMapEntry.first; const Elf_Shdr *CGRelSection = CGMapEntry.second; Expected> CGProfileOrErr = this->Obj.template getSectionContentsAsArray(*CGSection); if (!CGProfileOrErr) { this->reportUniqueWarning( "unable to load the SHT_LLVM_CALL_GRAPH_PROFILE section: " + toString(CGProfileOrErr.takeError())); return; } SmallVector SymbolIndices; bool UseReloc = getSymbolIndices(CGRelSection, this->Obj, this, SymbolIndices); if (UseReloc && SymbolIndices.size() != CGProfileOrErr->size() * 2) { this->reportUniqueWarning( "number of from/to pairs does not match number of frequencies"); UseReloc = false; } ListScope L(W, "CGProfile"); for (uint32_t I = 0, Size = CGProfileOrErr->size(); I != Size; ++I) { const Elf_CGProfile &CGPE = (*CGProfileOrErr)[I]; DictScope D(W, "CGProfileEntry"); if (UseReloc) { uint32_t From = SymbolIndices[I * 2]; uint32_t To = SymbolIndices[I * 2 + 1]; W.printNumber("From", this->getStaticSymbolName(From), From); W.printNumber("To", this->getStaticSymbolName(To), To); } W.printNumber("Weight", CGPE.cgp_weight); } } } template void LLVMELFDumper::printBBAddrMaps() { bool IsRelocatable = this->Obj.getHeader().e_type == ELF::ET_REL; using Elf_Shdr = typename ELFT::Shdr; auto IsMatch = [](const Elf_Shdr &Sec) -> bool { return Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP || Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP_V0; }; Expected> SecRelocMapOrErr = this->Obj.getSectionAndRelocations(IsMatch); if (!SecRelocMapOrErr) { this->reportUniqueWarning( "failed to get SHT_LLVM_BB_ADDR_MAP section(s): " + toString(SecRelocMapOrErr.takeError())); return; } for (auto const &[Sec, RelocSec] : *SecRelocMapOrErr) { std::optional FunctionSec; if (IsRelocatable) FunctionSec = unwrapOrError(this->FileName, this->Obj.getSection(Sec->sh_link)); ListScope L(W, "BBAddrMap"); if (IsRelocatable && !RelocSec) { this->reportUniqueWarning("unable to get relocation section for " + this->describe(*Sec)); continue; } Expected> BBAddrMapOrErr = this->Obj.decodeBBAddrMap(*Sec, RelocSec); if (!BBAddrMapOrErr) { this->reportUniqueWarning("unable to dump " + this->describe(*Sec) + ": " + toString(BBAddrMapOrErr.takeError())); continue; } for (const BBAddrMap &AM : *BBAddrMapOrErr) { DictScope D(W, "Function"); W.printHex("At", AM.Addr); SmallVector FuncSymIndex = this->getSymbolIndexesForFunctionAddress(AM.Addr, FunctionSec); std::string FuncName = ""; if (FuncSymIndex.empty()) this->reportUniqueWarning( "could not identify function symbol for address (0x" + Twine::utohexstr(AM.Addr) + ") in " + this->describe(*Sec)); else FuncName = this->getStaticSymbolName(FuncSymIndex.front()); W.printString("Name", FuncName); ListScope L(W, "BB entries"); for (const BBAddrMap::BBEntry &BBE : AM.BBEntries) { DictScope L(W); W.printNumber("ID", BBE.ID); W.printHex("Offset", BBE.Offset); W.printHex("Size", BBE.Size); W.printBoolean("HasReturn", BBE.hasReturn()); W.printBoolean("HasTailCall", BBE.hasTailCall()); W.printBoolean("IsEHPad", BBE.isEHPad()); W.printBoolean("CanFallThrough", BBE.canFallThrough()); W.printBoolean("HasIndirectBranch", BBE.hasIndirectBranch()); } } } } template void LLVMELFDumper::printAddrsig() { ListScope L(W, "Addrsig"); if (!this->DotAddrsigSec) return; Expected> SymsOrErr = decodeAddrsigSection(this->Obj, *this->DotAddrsigSec); if (!SymsOrErr) { this->reportUniqueWarning(SymsOrErr.takeError()); return; } for (uint64_t Sym : *SymsOrErr) W.printNumber("Sym", this->getStaticSymbolName(Sym), Sym); } template static bool printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, ScopedPrinter &W) { // Return true if we were able to pretty-print the note, false otherwise. switch (NoteType) { default: return false; case ELF::NT_GNU_ABI_TAG: { const GNUAbiTag &AbiTag = getGNUAbiTag(Desc); if (!AbiTag.IsValid) { W.printString("ABI", ""); return false; } else { W.printString("OS", AbiTag.OSName); W.printString("ABI", AbiTag.ABI); } break; } case ELF::NT_GNU_BUILD_ID: { W.printString("Build ID", getGNUBuildId(Desc)); break; } case ELF::NT_GNU_GOLD_VERSION: W.printString("Version", getDescAsStringRef(Desc)); break; case ELF::NT_GNU_PROPERTY_TYPE_0: ListScope D(W, "Property"); for (const std::string &Property : getGNUPropertyList(Desc)) W.printString(Property); break; } return true; } static bool printAndroidNoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, ScopedPrinter &W) { // Return true if we were able to pretty-print the note, false otherwise. AndroidNoteProperties Props = getAndroidNoteProperties(NoteType, Desc); if (Props.empty()) return false; for (const auto &KV : Props) W.printString(KV.first, KV.second); return true; } template static bool printAarch64NoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, ScopedPrinter &W) { if (NoteType != NT_ARM_TYPE_PAUTH_ABI_TAG) return false; if (Desc.size() < 16) return false; uint64_t platform = support::endian::read64(Desc.data() + 0); uint64_t version = support::endian::read64(Desc.data() + 8); W.printNumber("Platform", platform); W.printNumber("Version", version); if (Desc.size() > 16) W.printString("Additional info", toHex(ArrayRef(Desc.data() + 16, Desc.size() - 16))); return true; } template void LLVMELFDumper::printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc, const ArrayRef> Descriptors) { { ListScope L(W, "Memtag Dynamic Entries:"); if (DynamicEntries.empty()) W.printString("< none found >"); for (const auto &DynamicEntryKV : DynamicEntries) W.printString(DynamicEntryKV.first, DynamicEntryKV.second); } if (!AndroidNoteDesc.empty()) { ListScope L(W, "Memtag Android Note:"); printAndroidNoteLLVMStyle(ELF::NT_ANDROID_TYPE_MEMTAG, AndroidNoteDesc, W); } if (Descriptors.empty()) return; { ListScope L(W, "Memtag Global Descriptors:"); for (const auto &[Addr, BytesToTag] : Descriptors) { W.printHex("0x" + utohexstr(Addr), BytesToTag); } } } template static bool printLLVMOMPOFFLOADNoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, ScopedPrinter &W) { switch (NoteType) { default: return false; case ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION: W.printString("Version", getDescAsStringRef(Desc)); break; case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER: W.printString("Producer", getDescAsStringRef(Desc)); break; case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION: W.printString("Producer version", getDescAsStringRef(Desc)); break; } return true; } static void printCoreNoteLLVMStyle(const CoreNote &Note, ScopedPrinter &W) { W.printNumber("Page Size", Note.PageSize); for (const CoreFileMapping &Mapping : Note.Mappings) { ListScope D(W, "Mapping"); W.printHex("Start", Mapping.Start); W.printHex("End", Mapping.End); W.printHex("Offset", Mapping.Offset); W.printString("Filename", Mapping.Filename); } } template void LLVMELFDumper::printNotes() { ListScope L(W, "Notes"); std::unique_ptr NoteScope; size_t Align = 0; auto StartNotes = [&](std::optional SecName, const typename ELFT::Off Offset, const typename ELFT::Addr Size, size_t Al) { Align = std::max(Al, 4); NoteScope = std::make_unique(W, "NoteSection"); W.printString("Name", SecName ? *SecName : ""); W.printHex("Offset", Offset); W.printHex("Size", Size); }; auto EndNotes = [&] { NoteScope.reset(); }; auto ProcessNote = [&](const Elf_Note &Note, bool IsCore) -> Error { DictScope D2(W, "Note"); StringRef Name = Note.getName(); ArrayRef Descriptor = Note.getDesc(Align); Elf_Word Type = Note.getType(); // Print the note owner/type. W.printString("Owner", Name); W.printHex("Data size", Descriptor.size()); StringRef NoteType = getNoteTypeName(Note, this->Obj.getHeader().e_type); if (!NoteType.empty()) W.printString("Type", NoteType); else W.printString("Type", "Unknown (" + to_string(format_hex(Type, 10)) + ")"); // Print the description, or fallback to printing raw bytes for unknown // owners/if we fail to pretty-print the contents. if (Name == "GNU") { if (printGNUNoteLLVMStyle(Type, Descriptor, W)) return Error::success(); } else if (Name == "FreeBSD") { if (std::optional N = getFreeBSDNote(Type, Descriptor, IsCore)) { W.printString(N->Type, N->Value); return Error::success(); } } else if (Name == "AMD") { const AMDNote N = getAMDNote(Type, Descriptor); if (!N.Type.empty()) { W.printString(N.Type, N.Value); return Error::success(); } } else if (Name == "AMDGPU") { const AMDGPUNote N = getAMDGPUNote(Type, Descriptor); if (!N.Type.empty()) { W.printString(N.Type, N.Value); return Error::success(); } } else if (Name == "LLVMOMPOFFLOAD") { if (printLLVMOMPOFFLOADNoteLLVMStyle(Type, Descriptor, W)) return Error::success(); } else if (Name == "CORE") { if (Type == ELF::NT_FILE) { DataExtractor DescExtractor( Descriptor, ELFT::TargetEndianness == llvm::endianness::little, sizeof(Elf_Addr)); if (Expected N = readCoreNote(DescExtractor)) { printCoreNoteLLVMStyle(*N, W); return Error::success(); } else { return N.takeError(); } } } else if (Name == "Android") { if (printAndroidNoteLLVMStyle(Type, Descriptor, W)) return Error::success(); } else if (Name == "ARM") { if (printAarch64NoteLLVMStyle(Type, Descriptor, W)) return Error::success(); } if (!Descriptor.empty()) { W.printBinaryBlock("Description data", Descriptor); } return Error::success(); }; processNotesHelper(*this, /*StartNotesFn=*/StartNotes, /*ProcessNoteFn=*/ProcessNote, /*FinishNotesFn=*/EndNotes); } template void LLVMELFDumper::printELFLinkerOptions() { ListScope L(W, "LinkerOptions"); unsigned I = -1; for (const Elf_Shdr &Shdr : cantFail(this->Obj.sections())) { ++I; if (Shdr.sh_type != ELF::SHT_LLVM_LINKER_OPTIONS) continue; Expected> ContentsOrErr = this->Obj.getSectionContents(Shdr); if (!ContentsOrErr) { this->reportUniqueWarning("unable to read the content of the " "SHT_LLVM_LINKER_OPTIONS section: " + toString(ContentsOrErr.takeError())); continue; } if (ContentsOrErr->empty()) continue; if (ContentsOrErr->back() != 0) { this->reportUniqueWarning("SHT_LLVM_LINKER_OPTIONS section at index " + Twine(I) + " is broken: the " "content is not null-terminated"); continue; } SmallVector Strings; toStringRef(ContentsOrErr->drop_back()).split(Strings, '\0'); if (Strings.size() % 2 != 0) { this->reportUniqueWarning( "SHT_LLVM_LINKER_OPTIONS section at index " + Twine(I) + " is broken: an incomplete " "key-value pair was found. The last possible key was: \"" + Strings.back() + "\""); continue; } for (size_t I = 0; I < Strings.size(); I += 2) W.printString(Strings[I], Strings[I + 1]); } } template void LLVMELFDumper::printDependentLibs() { ListScope L(W, "DependentLibs"); this->printDependentLibsHelper( [](const Elf_Shdr &) {}, [this](StringRef Lib, uint64_t) { W.printString(Lib); }); } template void LLVMELFDumper::printStackSizes() { ListScope L(W, "StackSizes"); if (this->Obj.getHeader().e_type == ELF::ET_REL) this->printRelocatableStackSizes([]() {}); else this->printNonRelocatableStackSizes([]() {}); } template void LLVMELFDumper::printStackSizeEntry(uint64_t Size, ArrayRef FuncNames) { DictScope D(W, "Entry"); W.printList("Functions", FuncNames); W.printHex("Size", Size); } template void LLVMELFDumper::printMipsGOT(const MipsGOTParser &Parser) { auto PrintEntry = [&](const Elf_Addr *E) { W.printHex("Address", Parser.getGotAddress(E)); W.printNumber("Access", Parser.getGotOffset(E)); W.printHex("Initial", *E); }; DictScope GS(W, Parser.IsStatic ? "Static GOT" : "Primary GOT"); W.printHex("Canonical gp value", Parser.getGp()); { ListScope RS(W, "Reserved entries"); { DictScope D(W, "Entry"); PrintEntry(Parser.getGotLazyResolver()); W.printString("Purpose", StringRef("Lazy resolver")); } if (Parser.getGotModulePointer()) { DictScope D(W, "Entry"); PrintEntry(Parser.getGotModulePointer()); W.printString("Purpose", StringRef("Module pointer (GNU extension)")); } } { ListScope LS(W, "Local entries"); for (auto &E : Parser.getLocalEntries()) { DictScope D(W, "Entry"); PrintEntry(&E); } } if (Parser.IsStatic) return; { ListScope GS(W, "Global entries"); for (auto &E : Parser.getGlobalEntries()) { DictScope D(W, "Entry"); PrintEntry(&E); const Elf_Sym &Sym = *Parser.getGotSym(&E); W.printHex("Value", Sym.st_value); W.printEnum("Type", Sym.getType(), ArrayRef(ElfSymbolTypes)); const unsigned SymIndex = &Sym - this->dynamic_symbols().begin(); DataRegion ShndxTable( (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end()); printSymbolSection(Sym, SymIndex, ShndxTable); std::string SymName = this->getFullSymbolName( Sym, SymIndex, ShndxTable, this->DynamicStringTable, true); W.printNumber("Name", SymName, Sym.st_name); } } W.printNumber("Number of TLS and multi-GOT entries", uint64_t(Parser.getOtherEntries().size())); } template void LLVMELFDumper::printMipsPLT(const MipsGOTParser &Parser) { auto PrintEntry = [&](const Elf_Addr *E) { W.printHex("Address", Parser.getPltAddress(E)); W.printHex("Initial", *E); }; DictScope GS(W, "PLT GOT"); { ListScope RS(W, "Reserved entries"); { DictScope D(W, "Entry"); PrintEntry(Parser.getPltLazyResolver()); W.printString("Purpose", StringRef("PLT lazy resolver")); } if (auto E = Parser.getPltModulePointer()) { DictScope D(W, "Entry"); PrintEntry(E); W.printString("Purpose", StringRef("Module pointer")); } } { ListScope LS(W, "Entries"); DataRegion ShndxTable( (const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end()); for (auto &E : Parser.getPltEntries()) { DictScope D(W, "Entry"); PrintEntry(&E); const Elf_Sym &Sym = *Parser.getPltSym(&E); W.printHex("Value", Sym.st_value); W.printEnum("Type", Sym.getType(), ArrayRef(ElfSymbolTypes)); printSymbolSection(Sym, &Sym - this->dynamic_symbols().begin(), ShndxTable); const Elf_Sym *FirstSym = cantFail( this->Obj.template getEntry(*Parser.getPltSymTable(), 0)); std::string SymName = this->getFullSymbolName( Sym, &Sym - FirstSym, ShndxTable, Parser.getPltStrTable(), true); W.printNumber("Name", SymName, Sym.st_name); } } } template void LLVMELFDumper::printMipsABIFlags() { const Elf_Mips_ABIFlags *Flags; if (Expected *> SecOrErr = getMipsAbiFlagsSection(*this)) { Flags = *SecOrErr; if (!Flags) { W.startLine() << "There is no .MIPS.abiflags section in the file.\n"; return; } } else { this->reportUniqueWarning(SecOrErr.takeError()); return; } raw_ostream &OS = W.getOStream(); DictScope GS(W, "MIPS ABI Flags"); W.printNumber("Version", Flags->version); W.startLine() << "ISA: "; if (Flags->isa_rev <= 1) OS << format("MIPS%u", Flags->isa_level); else OS << format("MIPS%ur%u", Flags->isa_level, Flags->isa_rev); OS << "\n"; W.printEnum("ISA Extension", Flags->isa_ext, ArrayRef(ElfMipsISAExtType)); W.printFlags("ASEs", Flags->ases, ArrayRef(ElfMipsASEFlags)); W.printEnum("FP ABI", Flags->fp_abi, ArrayRef(ElfMipsFpABIType)); W.printNumber("GPR size", getMipsRegisterSize(Flags->gpr_size)); W.printNumber("CPR1 size", getMipsRegisterSize(Flags->cpr1_size)); W.printNumber("CPR2 size", getMipsRegisterSize(Flags->cpr2_size)); W.printFlags("Flags 1", Flags->flags1, ArrayRef(ElfMipsFlags1)); W.printHex("Flags 2", Flags->flags2); } template void JSONELFDumper::printFileSummary(StringRef FileStr, ObjectFile &Obj, ArrayRef InputFilenames, const Archive *A) { FileScope = std::make_unique(this->W); DictScope D(this->W, "FileSummary"); this->W.printString("File", FileStr); this->W.printString("Format", Obj.getFileFormatName()); this->W.printString("Arch", Triple::getArchTypeName(Obj.getArch())); this->W.printString( "AddressSize", std::string(formatv("{0}bit", 8 * Obj.getBytesInAddress()))); this->printLoadName(); } template void JSONELFDumper::printZeroSymbolOtherField( const Elf_Sym &Symbol) const { // We want the JSON format to be uniform, since it is machine readable, so // always print the `Other` field the same way. this->printSymbolOtherField(Symbol); } template void JSONELFDumper::printDefaultRelRelaReloc(const Relocation &R, StringRef SymbolName, StringRef RelocName) { this->printExpandedRelRelaReloc(R, SymbolName, RelocName); } template void JSONELFDumper::printRelocationSectionInfo(const Elf_Shdr &Sec, StringRef Name, const unsigned SecNdx) { DictScope Group(this->W); this->W.printNumber("SectionIndex", SecNdx); ListScope D(this->W, "Relocs"); this->printRelocationsHelper(Sec); } template std::string JSONELFDumper::getGroupSectionHeaderName() const { return "GroupSections"; } template void JSONELFDumper::printSectionGroupMembers(StringRef Name, uint64_t Idx) const { DictScope Grp(this->W); this->W.printString("Name", Name); this->W.printNumber("Index", Idx); } template void JSONELFDumper::printEmptyGroupMessage() const { // JSON output does not need to print anything for empty groups } diff --git a/contrib/llvm-project/openmp/runtime/src/z_AIX_asm.S b/contrib/llvm-project/openmp/runtime/src/z_AIX_asm.S new file mode 100644 index 000000000000..d711fcb7a785 --- /dev/null +++ b/contrib/llvm-project/openmp/runtime/src/z_AIX_asm.S @@ -0,0 +1,410 @@ +// z_AIX_asm.S: - microtasking routines specifically +// written for Power platforms running AIX OS + +// +////===----------------------------------------------------------------------===// +//// +//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +//// See https://llvm.org/LICENSE.txt for license information. +//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//// +////===----------------------------------------------------------------------===// +// + +// ----------------------------------------------------------------------- +// macros +// ----------------------------------------------------------------------- + +#include "kmp_config.h" + +#if KMP_OS_AIX +//------------------------------------------------------------------------ +// int +// __kmp_invoke_microtask( void (*pkfn) (int *gtid, int *tid, ...), +// int gtid, int tid, +// int argc, void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)( & gtid, & tid, p_argv[0], ... ); +// +// // FIXME: This is done at call-site and can be removed here. +// #if OMPT_SUPPORT +// *exit_frame_ptr = 0; +// #endif +// +// return 1; +// } +// +// parameters: +// r3: pkfn +// r4: gtid +// r5: tid +// r6: argc +// r7: p_argv +// r8: &exit_frame +// +// return: r3 (always 1/TRUE) +// + +#if KMP_ARCH_PPC64_XCOFF + + .globl __kmp_invoke_microtask[DS] + .globl .__kmp_invoke_microtask + .align 4 + .csect __kmp_invoke_microtask[DS],3 + .vbyte 8, .__kmp_invoke_microtask + .vbyte 8, TOC[TC0] + .vbyte 8, 0 + .csect .text[PR],2 + .machine "pwr7" +.__kmp_invoke_microtask: + + +// -- Begin __kmp_invoke_microtask +// mark_begin; + +// We need to allocate a stack frame large enough to hold all of the parameters +// on the stack for the microtask plus what this function needs. That's 48 +// bytes under the XCOFF64 ABI, plus max(64, 8*(2 + argc)) for +// the parameters to the microtask (gtid, tid, argc elements of p_argv), +// plus 8 bytes to store the values of r4 and r5, and 8 bytes to store r31. +// With OMP-T support, we need an additional 8 bytes to save r30 to hold +// a copy of r8. +// Stack offsets relative to stack pointer: +// r31: -8, r30: -16, gtid: -20, tid: -24 + + mflr 0 + std 31, -8(1) # Save r31 to the stack + std 0, 16(1) # Save LR to the linkage area + +// This is unusual because normally we'd set r31 equal to r1 after the stack +// frame is established. In this case, however, we need to dynamically compute +// the stack frame size, and so we keep a direct copy of r1 to access our +// register save areas and restore the r1 value before returning. + mr 31, 1 + +// Compute the size of the "argc" portion of the parameter save area. +// The parameter save area is always at least 64 bytes long (i.e. 8 regs) +// The microtask has (2 + argc) parameters, so if argc <= 6, we need to +// to allocate 8*6 bytes, not 8*argc. + li 0, 6 + cmpwi 0, 6, 6 + iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 + sldi 0, 0, 3 # r0 = 8 * max(argc, 6) + +// Compute the size necessary for the local stack frame. +// 88 = 48 + 4 (for r4) + 4 (for r5) + 8 (for r31) + 8 (for OMP-T r30) + +// 8 (parameter gtid) + 8 (parameter tid) + li 12, 88 + add 12, 0, 12 + neg 12, 12 + +// We need to make sure that the stack frame stays aligned (to 16 bytes). + li 0, -16 + and 12, 0, 12 + +// Establish the local stack frame. + stdux 1, 1, 12 + +#if OMPT_SUPPORT + std 30, -16(31) # Save r30 to the stack + std 1, 0(8) + mr 30, 8 +#endif + +// Store gtid and tid to the stack because they're passed by reference to the microtask. + stw 4, -20(31) # Save gtid to the stack + stw 5, -24(31) # Save tid to the stack + + mr 12, 6 # r12 = argc + mr 4, 7 # r4 = p_argv + + cmpwi 0, 12, 1 + blt 0, .Lcall # if (argc < 1) goto .Lcall + + ld 5, 0(4) # r5 = p_argv[0] + + cmpwi 0, 12, 2 + blt 0, .Lcall # if (argc < 2) goto .Lcall + + ld 6, 8(4) # r6 = p_argv[1] + + cmpwi 0, 12, 3 + blt 0, .Lcall # if (argc < 3) goto .Lcall + + ld 7, 16(4) # r7 = p_argv[2] + + cmpwi 0, 12, 4 + blt 0, .Lcall # if (argc < 4) goto .Lcall + + ld 8, 24(4) # r8 = p_argv[3] + + cmpwi 0, 12, 5 + blt 0, .Lcall # if (argc < 5) goto .Lcall + + ld 9, 32(4) # r9 = p_argv[4] + + cmpwi 0, 12, 6 + blt 0, .Lcall # if (argc < 6) goto .Lcall + + ld 10, 40(4) # r10 = p_argv[5] + + cmpwi 0, 12, 7 + blt 0, .Lcall # if (argc < 7) goto .Lcall + +// There are more than 6 microtask parameters, so we need to store the +// remainder to the stack. + addi 12, 12, -6 # argc -= 6 + mtctr 12 + +// These are set to 8 bytes before the first desired store address (we're using +// pre-increment loads and stores in the loop below). The parameter save area +// for the microtask begins 48 + 8*8 == 112 bytes above r1 for XCOFF64. + addi 4, 4, 40 # p_argv = p_argv + 5 + # (i.e. skip the 5 elements we already processed) + addi 12, 1, 104 # r12 = stack offset (112 - 8) + +.Lnext: + ldu 0, 8(4) + stdu 0, 8(12) + bdnz .Lnext + +.Lcall: + std 2, 40(1) # Save the TOC pointer to the linkage area +// Load the actual function address from the function descriptor. + ld 12, 0(3) # Function address + ld 2, 8(3) # TOC pointer + ld 11, 16(3) # Environment pointer + + addi 3, 31, -20 # r3 = >id + addi 4, 31, -24 # r4 = &tid + + mtctr 12 # CTR = function address + bctrl # Branch to CTR + ld 2, 40(1) # Restore TOC pointer from linkage area + +#if OMPT_SUPPORT + li 3, 0 + std 3, 0(30) +#endif + + li 3, 1 + +#if OMPT_SUPPORT + ld 30, -16(31) # Restore r30 from the saved value on the stack +#endif + + mr 1, 31 + ld 31, -8(1) # Restore r31 from the saved value on the stack + ld 0, 16(1) + mtlr 0 # Restore LR from the linkage area + blr # Branch to LR + +#else // KMP_ARCH_PPC_XCOFF + + .globl __kmp_invoke_microtask[DS] + .globl .__kmp_invoke_microtask + .align 4 + .csect __kmp_invoke_microtask[DS],2 + .vbyte 4, .__kmp_invoke_microtask + .vbyte 4, TOC[TC0] + .vbyte 4, 0 + .csect .text[PR],2 + .machine "pwr7" +.__kmp_invoke_microtask: + + +// -- Begin __kmp_invoke_microtask +// mark_begin; + +// We need to allocate a stack frame large enough to hold all of the parameters +// on the stack for the microtask plus what this function needs. That's 24 +// bytes under the XCOFF ABI, plus max(32, 8*(2 + argc)) for +// the parameters to the microtask (gtid, tid, argc elements of p_argv), +// plus 8 bytes to store the values of r4 and r5, and 4 bytes to store r31. +// With OMP-T support, we need an additional 4 bytes to save r30 to hold +// a copy of r8. +// Stack offsets relative to stack pointer: +// r31: -4, r30: -8, gtid: -12, tid: -16 + + mflr 0 + stw 31, -4(1) # Save r31 to the stack + stw 0, 8(1) # Save LR to the linkage area + +// This is unusual because normally we'd set r31 equal to r1 after the stack +// frame is established. In this case, however, we need to dynamically compute +// the stack frame size, and so we keep a direct copy of r1 to access our +// register save areas and restore the r1 value before returning. + mr 31, 1 + +// Compute the size of the "argc" portion of the parameter save area. +// The parameter save area is always at least 32 bytes long (i.e. 8 regs) +// The microtask has (2 + argc) parameters, so if argc <= 6, we need to +// to allocate 4*6 bytes, not 4*argc. + li 0, 6 + cmpwi 0, 6, 6 + iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 + slwi 0, 0, 2 # r0 = 4 * max(argc, 6) + +// Compute the size necessary for the local stack frame. +// 56 = 32 + 4 (for r4) + 4 (for r5) + 4 (for r31) + 4 (for OMP-T r30) + +// 4 (parameter gtid) + 4 (parameter tid) + li 12, 56 + add 12, 0, 12 + neg 12, 12 + +// We need to make sure that the stack frame stays aligned (to 16 bytes). + li 0, -16 + and 12, 0, 12 + +// Establish the local stack frame. + stwux 1, 1, 12 + +#if OMPT_SUPPORT + stw 30, -8(31) # Save r30 to the stack + stw 1, 0(8) + mr 30, 8 +#endif + +// Store gtid and tid to the stack because they're passed by reference to the microtask. + stw 4, -12(31) # Save gtid to the stack + stw 5, -16(31) # Save tid to the stack + + mr 12, 6 # r12 = argc + mr 4, 7 # r4 = p_argv + + cmpwi 0, 12, 1 + blt 0, .Lcall # if (argc < 1) goto .Lcall + + lwz 5, 0(4) # r5 = p_argv[0] + + cmpwi 0, 12, 2 + blt 0, .Lcall # if (argc < 2) goto .Lcall + + lwz 6, 4(4) # r6 = p_argv[1] + + cmpwi 0, 12, 3 + blt 0, .Lcall # if (argc < 3) goto .Lcall + + lwz 7, 8(4) # r7 = p_argv[2] + + cmpwi 0, 12, 4 + blt 0, .Lcall # if (argc < 4) goto .Lcall + + lwz 8, 12(4) # r8 = p_argv[3] + + cmpwi 0, 12, 5 + blt 0, .Lcall # if (argc < 5) goto .Lcall + + lwz 9, 16(4) # r9 = p_argv[4] + + cmpwi 0, 12, 6 + blt 0, .Lcall # if (argc < 6) goto .Lcall + + lwz 10, 20(4) # r10 = p_argv[5] + + cmpwi 0, 12, 7 + blt 0, .Lcall # if (argc < 7) goto .Lcall + +// There are more than 6 microtask parameters, so we need to store the +// remainder to the stack. + addi 12, 12, -6 # argc -= 6 + mtctr 12 + +// These are set to 4 bytes before the first desired store address (we're using +// pre-increment loads and stores in the loop below). The parameter save area +// for the microtask begins 24 + 4*8 == 56 bytes above r1 for XCOFF. + addi 4, 4, 20 # p_argv = p_argv + 5 + # (i.e. skip the 5 elements we already processed) + addi 12, 1, 52 # r12 = stack offset (56 - 4) + +.Lnext: + lwzu 0, 4(4) + stwu 0, 4(12) + bdnz .Lnext + +.Lcall: + stw 2, 20(1) # Save the TOC pointer to the linkage area +// Load the actual function address from the function descriptor. + lwz 12, 0(3) # Function address + lwz 2, 4(3) # TOC pointer + lwz 11, 8(3) # Environment pointer + + addi 3, 31, -12 # r3 = >id + addi 4, 31, -16 # r4 = &tid + + mtctr 12 # CTR = function address + bctrl # Branch to CTR + lwz 2, 20(1) # Restore TOC pointer from linkage area + +#if OMPT_SUPPORT + li 3, 0 + stw 3, 0(30) +#endif + + li 3, 1 + +#if OMPT_SUPPORT + lwz 30, -8(31) # Restore r30 from the saved value on the stack +#endif + + mr 1, 31 + lwz 31, -4(1) # Restore r31 from the saved value on the stack + lwz 0, 8(1) + mtlr 0 # Restore LR from the linkage area + blr # Branch to LR + +#endif // KMP_ARCH_PPC64_XCOFF + +.Lfunc_end0: + .vbyte 4, 0x00000000 # Traceback table begin + .byte 0x00 # Version = 0 + .byte 0x09 # Language = CPlusPlus + .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue + # +HasTraceBackTableOffset, -IsInternalProcedure + # -HasControlledStorage, -IsTOCless + # -IsFloatingPointPresent + # -IsFloatingPointOperationLogOrAbortEnabled + .byte 0x61 # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed + # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved + .byte 0x80 # +IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0 +#if OMPT_SUPPORT + .byte 0x02 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 2 + .byte 0x06 # NumberOfFixedParms = 6 +#else + .byte 0x01 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1 + .byte 0x05 # NumberOfFixedParms = 5 +#endif + .byte 0x01 # NumberOfFPParms = 0, +HasParmsOnStack + .vbyte 4, 0x00000000 # Parameter type = i, i, i, i, i + .vbyte 4, .Lfunc_end0-.__kmp_invoke_microtask # Function size + .vbyte 2, 0x0016 # Function name len = 22 + .byte "__kmp_invoke_microtask" # Function Name + .byte 0x1f # AllocaRegister = 31 + # -- End function + +// -- End __kmp_invoke_microtask + +// Support for unnamed common blocks. + + .comm .gomp_critical_user_, 32, 3 +#if KMP_ARCH_PPC64_XCOFF + .csect __kmp_unnamed_critical_addr[RW],3 +#else + .csect __kmp_unnamed_critical_addr[RW],2 +#endif + .globl __kmp_unnamed_critical_addr[RW] + .ptr .gomp_critical_user_ + +// -- End unnamed common block + + .toc + +#endif // KMP_OS_AIX diff --git a/lib/clang/include/VCSVersion.inc b/lib/clang/include/VCSVersion.inc index fd0a56bce1b7..1dd3c861dbb2 100644 --- a/lib/clang/include/VCSVersion.inc +++ b/lib/clang/include/VCSVersion.inc @@ -1,8 +1,8 @@ -#define LLVM_REVISION "llvmorg-18.1.0-rc2-53-gc7b0a6ecd442" +#define LLVM_REVISION "llvmorg-18.1.0-rc3-0-g6c90f8dd5463" #define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git" -#define CLANG_REVISION "llvmorg-18.1.0-rc2-53-gc7b0a6ecd442" +#define CLANG_REVISION "llvmorg-18.1.0-rc3-0-g6c90f8dd5463" #define CLANG_REPOSITORY "https://github.com/llvm/llvm-project.git" -#define LLDB_REVISION "llvmorg-18.1.0-rc2-53-gc7b0a6ecd442" +#define LLDB_REVISION "llvmorg-18.1.0-rc3-0-g6c90f8dd5463" #define LLDB_REPOSITORY "https://github.com/llvm/llvm-project.git" diff --git a/lib/clang/include/lld/Common/Version.inc b/lib/clang/include/lld/Common/Version.inc index 153e86106f12..002ec1eecc00 100644 --- a/lib/clang/include/lld/Common/Version.inc +++ b/lib/clang/include/lld/Common/Version.inc @@ -1,4 +1,4 @@ // Local identifier in __FreeBSD_version style #define LLD_FREEBSD_VERSION 1400006 -#define LLD_VERSION_STRING "18.1.0 (FreeBSD llvmorg-18.1.0-rc2-53-gc7b0a6ecd442-" __XSTRING(LLD_FREEBSD_VERSION) ")" +#define LLD_VERSION_STRING "18.1.0 (FreeBSD llvmorg-18.1.0-rc3-0-g6c90f8dd5463-" __XSTRING(LLD_FREEBSD_VERSION) ")" diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h index d06ac507357e..6950d566ff2f 100644 --- a/lib/clang/include/llvm/Support/VCSRevision.h +++ b/lib/clang/include/llvm/Support/VCSRevision.h @@ -1,2 +1,2 @@ -#define LLVM_REVISION "llvmorg-18.1.0-rc2-53-gc7b0a6ecd442" +#define LLVM_REVISION "llvmorg-18.1.0-rc3-0-g6c90f8dd5463" #define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git"