Page MenuHomeFreeBSD

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/contrib/llvm-project/clang/lib/AST/ASTContext.cpp b/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
index fdba204fbe7f..0e163f3161a3 100644
--- a/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
+++ b/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
@@ -1,11801 +1,11809 @@
//===- ASTContext.cpp - Context to hold long-lived AST nodes --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the ASTContext interface.
//
//===----------------------------------------------------------------------===//
#include "clang/AST/ASTContext.h"
#include "CXXABI.h"
#include "Interp/Context.h"
#include "clang/AST/APValue.h"
#include "clang/AST/ASTConcept.h"
#include "clang/AST/ASTMutationListener.h"
#include "clang/AST/ASTTypeTraits.h"
#include "clang/AST/Attr.h"
#include "clang/AST/AttrIterator.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/Comment.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclContextInternals.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/DeclarationName.h"
#include "clang/AST/DependenceFlags.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExprConcepts.h"
#include "clang/AST/ExternalASTSource.h"
#include "clang/AST/Mangle.h"
#include "clang/AST/MangleNumberingContext.h"
#include "clang/AST/NestedNameSpecifier.h"
#include "clang/AST/ParentMapContext.h"
#include "clang/AST/RawCommentList.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/TemplateBase.h"
#include "clang/AST/TemplateName.h"
#include "clang/AST/Type.h"
#include "clang/AST/TypeLoc.h"
#include "clang/AST/UnresolvedSet.h"
#include "clang/AST/VTableBuilder.h"
#include "clang/Basic/AddressSpaces.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CommentOptions.h"
#include "clang/Basic/ExceptionSpecificationType.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Linkage.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/NoSanitizeList.h"
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Specifiers.h"
#include "clang/Basic/TargetCXXABI.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/XRayLists.h"
#include "llvm/ADT/APFixedPoint.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Capacity.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <utility>
using namespace clang;
enum FloatingRank {
BFloat16Rank, Float16Rank, HalfRank, FloatRank, DoubleRank, LongDoubleRank, Float128Rank
};
/// \returns location that is relevant when searching for Doc comments related
/// to \p D.
static SourceLocation getDeclLocForCommentSearch(const Decl *D,
SourceManager &SourceMgr) {
assert(D);
// User can not attach documentation to implicit declarations.
if (D->isImplicit())
return {};
// User can not attach documentation to implicit instantiations.
if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
return {};
}
if (const auto *VD = dyn_cast<VarDecl>(D)) {
if (VD->isStaticDataMember() &&
VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
return {};
}
if (const auto *CRD = dyn_cast<CXXRecordDecl>(D)) {
if (CRD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
return {};
}
if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(D)) {
TemplateSpecializationKind TSK = CTSD->getSpecializationKind();
if (TSK == TSK_ImplicitInstantiation ||
TSK == TSK_Undeclared)
return {};
}
if (const auto *ED = dyn_cast<EnumDecl>(D)) {
if (ED->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
return {};
}
if (const auto *TD = dyn_cast<TagDecl>(D)) {
// When tag declaration (but not definition!) is part of the
// decl-specifier-seq of some other declaration, it doesn't get comment
if (TD->isEmbeddedInDeclarator() && !TD->isCompleteDefinition())
return {};
}
// TODO: handle comments for function parameters properly.
if (isa<ParmVarDecl>(D))
return {};
// TODO: we could look up template parameter documentation in the template
// documentation.
if (isa<TemplateTypeParmDecl>(D) ||
isa<NonTypeTemplateParmDecl>(D) ||
isa<TemplateTemplateParmDecl>(D))
return {};
// Find declaration location.
// For Objective-C declarations we generally don't expect to have multiple
// declarators, thus use declaration starting location as the "declaration
// location".
// For all other declarations multiple declarators are used quite frequently,
// so we use the location of the identifier as the "declaration location".
if (isa<ObjCMethodDecl>(D) || isa<ObjCContainerDecl>(D) ||
isa<ObjCPropertyDecl>(D) ||
isa<RedeclarableTemplateDecl>(D) ||
isa<ClassTemplateSpecializationDecl>(D) ||
// Allow association with Y across {} in `typedef struct X {} Y`.
isa<TypedefDecl>(D))
return D->getBeginLoc();
else {
const SourceLocation DeclLoc = D->getLocation();
if (DeclLoc.isMacroID()) {
if (isa<TypedefDecl>(D)) {
// If location of the typedef name is in a macro, it is because being
// declared via a macro. Try using declaration's starting location as
// the "declaration location".
return D->getBeginLoc();
} else if (const auto *TD = dyn_cast<TagDecl>(D)) {
// If location of the tag decl is inside a macro, but the spelling of
// the tag name comes from a macro argument, it looks like a special
// macro like NS_ENUM is being used to define the tag decl. In that
// case, adjust the source location to the expansion loc so that we can
// attach the comment to the tag decl.
if (SourceMgr.isMacroArgExpansion(DeclLoc) &&
TD->isCompleteDefinition())
return SourceMgr.getExpansionLoc(DeclLoc);
}
}
return DeclLoc;
}
return {};
}
RawComment *ASTContext::getRawCommentForDeclNoCacheImpl(
const Decl *D, const SourceLocation RepresentativeLocForDecl,
const std::map<unsigned, RawComment *> &CommentsInTheFile) const {
// If the declaration doesn't map directly to a location in a file, we
// can't find the comment.
if (RepresentativeLocForDecl.isInvalid() ||
!RepresentativeLocForDecl.isFileID())
return nullptr;
// If there are no comments anywhere, we won't find anything.
if (CommentsInTheFile.empty())
return nullptr;
// Decompose the location for the declaration and find the beginning of the
// file buffer.
const std::pair<FileID, unsigned> DeclLocDecomp =
SourceMgr.getDecomposedLoc(RepresentativeLocForDecl);
// Slow path.
auto OffsetCommentBehindDecl =
CommentsInTheFile.lower_bound(DeclLocDecomp.second);
// First check whether we have a trailing comment.
if (OffsetCommentBehindDecl != CommentsInTheFile.end()) {
RawComment *CommentBehindDecl = OffsetCommentBehindDecl->second;
if ((CommentBehindDecl->isDocumentation() ||
LangOpts.CommentOpts.ParseAllComments) &&
CommentBehindDecl->isTrailingComment() &&
(isa<FieldDecl>(D) || isa<EnumConstantDecl>(D) || isa<VarDecl>(D) ||
isa<ObjCMethodDecl>(D) || isa<ObjCPropertyDecl>(D))) {
// Check that Doxygen trailing comment comes after the declaration, starts
// on the same line and in the same file as the declaration.
if (SourceMgr.getLineNumber(DeclLocDecomp.first, DeclLocDecomp.second) ==
Comments.getCommentBeginLine(CommentBehindDecl, DeclLocDecomp.first,
OffsetCommentBehindDecl->first)) {
return CommentBehindDecl;
}
}
}
// The comment just after the declaration was not a trailing comment.
// Let's look at the previous comment.
if (OffsetCommentBehindDecl == CommentsInTheFile.begin())
return nullptr;
auto OffsetCommentBeforeDecl = --OffsetCommentBehindDecl;
RawComment *CommentBeforeDecl = OffsetCommentBeforeDecl->second;
// Check that we actually have a non-member Doxygen comment.
if (!(CommentBeforeDecl->isDocumentation() ||
LangOpts.CommentOpts.ParseAllComments) ||
CommentBeforeDecl->isTrailingComment())
return nullptr;
// Decompose the end of the comment.
const unsigned CommentEndOffset =
Comments.getCommentEndOffset(CommentBeforeDecl);
// Get the corresponding buffer.
bool Invalid = false;
const char *Buffer = SourceMgr.getBufferData(DeclLocDecomp.first,
&Invalid).data();
if (Invalid)
return nullptr;
// Extract text between the comment and declaration.
StringRef Text(Buffer + CommentEndOffset,
DeclLocDecomp.second - CommentEndOffset);
// There should be no other declarations or preprocessor directives between
// comment and declaration.
if (Text.find_first_of(";{}#@") != StringRef::npos)
return nullptr;
return CommentBeforeDecl;
}
RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const {
const SourceLocation DeclLoc = getDeclLocForCommentSearch(D, SourceMgr);
// If the declaration doesn't map directly to a location in a file, we
// can't find the comment.
if (DeclLoc.isInvalid() || !DeclLoc.isFileID())
return nullptr;
if (ExternalSource && !CommentsLoaded) {
ExternalSource->ReadComments();
CommentsLoaded = true;
}
if (Comments.empty())
return nullptr;
const FileID File = SourceMgr.getDecomposedLoc(DeclLoc).first;
const auto CommentsInThisFile = Comments.getCommentsInFile(File);
if (!CommentsInThisFile || CommentsInThisFile->empty())
return nullptr;
return getRawCommentForDeclNoCacheImpl(D, DeclLoc, *CommentsInThisFile);
}
void ASTContext::addComment(const RawComment &RC) {
assert(LangOpts.RetainCommentsFromSystemHeaders ||
!SourceMgr.isInSystemHeader(RC.getSourceRange().getBegin()));
Comments.addComment(RC, LangOpts.CommentOpts, BumpAlloc);
}
/// If we have a 'templated' declaration for a template, adjust 'D' to
/// refer to the actual template.
/// If we have an implicit instantiation, adjust 'D' to refer to template.
static const Decl &adjustDeclToTemplate(const Decl &D) {
if (const auto *FD = dyn_cast<FunctionDecl>(&D)) {
// Is this function declaration part of a function template?
if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate())
return *FTD;
// Nothing to do if function is not an implicit instantiation.
if (FD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation)
return D;
// Function is an implicit instantiation of a function template?
if (const FunctionTemplateDecl *FTD = FD->getPrimaryTemplate())
return *FTD;
// Function is instantiated from a member definition of a class template?
if (const FunctionDecl *MemberDecl =
FD->getInstantiatedFromMemberFunction())
return *MemberDecl;
return D;
}
if (const auto *VD = dyn_cast<VarDecl>(&D)) {
// Static data member is instantiated from a member definition of a class
// template?
if (VD->isStaticDataMember())
if (const VarDecl *MemberDecl = VD->getInstantiatedFromStaticDataMember())
return *MemberDecl;
return D;
}
if (const auto *CRD = dyn_cast<CXXRecordDecl>(&D)) {
// Is this class declaration part of a class template?
if (const ClassTemplateDecl *CTD = CRD->getDescribedClassTemplate())
return *CTD;
// Class is an implicit instantiation of a class template or partial
// specialization?
if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(CRD)) {
if (CTSD->getSpecializationKind() != TSK_ImplicitInstantiation)
return D;
llvm::PointerUnion<ClassTemplateDecl *,
ClassTemplatePartialSpecializationDecl *>
PU = CTSD->getSpecializedTemplateOrPartial();
return PU.is<ClassTemplateDecl *>()
? *static_cast<const Decl *>(PU.get<ClassTemplateDecl *>())
: *static_cast<const Decl *>(
PU.get<ClassTemplatePartialSpecializationDecl *>());
}
// Class is instantiated from a member definition of a class template?
if (const MemberSpecializationInfo *Info =
CRD->getMemberSpecializationInfo())
return *Info->getInstantiatedFrom();
return D;
}
if (const auto *ED = dyn_cast<EnumDecl>(&D)) {
// Enum is instantiated from a member definition of a class template?
if (const EnumDecl *MemberDecl = ED->getInstantiatedFromMemberEnum())
return *MemberDecl;
return D;
}
// FIXME: Adjust alias templates?
return D;
}
const RawComment *ASTContext::getRawCommentForAnyRedecl(
const Decl *D,
const Decl **OriginalDecl) const {
if (!D) {
if (OriginalDecl)
OriginalDecl = nullptr;
return nullptr;
}
D = &adjustDeclToTemplate(*D);
// Any comment directly attached to D?
{
auto DeclComment = DeclRawComments.find(D);
if (DeclComment != DeclRawComments.end()) {
if (OriginalDecl)
*OriginalDecl = D;
return DeclComment->second;
}
}
// Any comment attached to any redeclaration of D?
const Decl *CanonicalD = D->getCanonicalDecl();
if (!CanonicalD)
return nullptr;
{
auto RedeclComment = RedeclChainComments.find(CanonicalD);
if (RedeclComment != RedeclChainComments.end()) {
if (OriginalDecl)
*OriginalDecl = RedeclComment->second;
auto CommentAtRedecl = DeclRawComments.find(RedeclComment->second);
assert(CommentAtRedecl != DeclRawComments.end() &&
"This decl is supposed to have comment attached.");
return CommentAtRedecl->second;
}
}
// Any redeclarations of D that we haven't checked for comments yet?
// We can't use DenseMap::iterator directly since it'd get invalid.
auto LastCheckedRedecl = [this, CanonicalD]() -> const Decl * {
auto LookupRes = CommentlessRedeclChains.find(CanonicalD);
if (LookupRes != CommentlessRedeclChains.end())
return LookupRes->second;
return nullptr;
}();
for (const auto Redecl : D->redecls()) {
assert(Redecl);
// Skip all redeclarations that have been checked previously.
if (LastCheckedRedecl) {
if (LastCheckedRedecl == Redecl) {
LastCheckedRedecl = nullptr;
}
continue;
}
const RawComment *RedeclComment = getRawCommentForDeclNoCache(Redecl);
if (RedeclComment) {
cacheRawCommentForDecl(*Redecl, *RedeclComment);
if (OriginalDecl)
*OriginalDecl = Redecl;
return RedeclComment;
}
CommentlessRedeclChains[CanonicalD] = Redecl;
}
if (OriginalDecl)
*OriginalDecl = nullptr;
return nullptr;
}
void ASTContext::cacheRawCommentForDecl(const Decl &OriginalD,
const RawComment &Comment) const {
assert(Comment.isDocumentation() || LangOpts.CommentOpts.ParseAllComments);
DeclRawComments.try_emplace(&OriginalD, &Comment);
const Decl *const CanonicalDecl = OriginalD.getCanonicalDecl();
RedeclChainComments.try_emplace(CanonicalDecl, &OriginalD);
CommentlessRedeclChains.erase(CanonicalDecl);
}
static void addRedeclaredMethods(const ObjCMethodDecl *ObjCMethod,
SmallVectorImpl<const NamedDecl *> &Redeclared) {
const DeclContext *DC = ObjCMethod->getDeclContext();
if (const auto *IMD = dyn_cast<ObjCImplDecl>(DC)) {
const ObjCInterfaceDecl *ID = IMD->getClassInterface();
if (!ID)
return;
// Add redeclared method here.
for (const auto *Ext : ID->known_extensions()) {
if (ObjCMethodDecl *RedeclaredMethod =
Ext->getMethod(ObjCMethod->getSelector(),
ObjCMethod->isInstanceMethod()))
Redeclared.push_back(RedeclaredMethod);
}
}
}
void ASTContext::attachCommentsToJustParsedDecls(ArrayRef<Decl *> Decls,
const Preprocessor *PP) {
if (Comments.empty() || Decls.empty())
return;
FileID File;
for (Decl *D : Decls) {
SourceLocation Loc = D->getLocation();
if (Loc.isValid()) {
// See if there are any new comments that are not attached to a decl.
// The location doesn't have to be precise - we care only about the file.
File = SourceMgr.getDecomposedLoc(Loc).first;
break;
}
}
if (File.isInvalid())
return;
auto CommentsInThisFile = Comments.getCommentsInFile(File);
if (!CommentsInThisFile || CommentsInThisFile->empty() ||
CommentsInThisFile->rbegin()->second->isAttached())
return;
// There is at least one comment not attached to a decl.
// Maybe it should be attached to one of Decls?
//
// Note that this way we pick up not only comments that precede the
// declaration, but also comments that *follow* the declaration -- thanks to
// the lookahead in the lexer: we've consumed the semicolon and looked
// ahead through comments.
for (const Decl *D : Decls) {
assert(D);
if (D->isInvalidDecl())
continue;
D = &adjustDeclToTemplate(*D);
const SourceLocation DeclLoc = getDeclLocForCommentSearch(D, SourceMgr);
if (DeclLoc.isInvalid() || !DeclLoc.isFileID())
continue;
if (DeclRawComments.count(D) > 0)
continue;
if (RawComment *const DocComment =
getRawCommentForDeclNoCacheImpl(D, DeclLoc, *CommentsInThisFile)) {
cacheRawCommentForDecl(*D, *DocComment);
comments::FullComment *FC = DocComment->parse(*this, PP, D);
ParsedComments[D->getCanonicalDecl()] = FC;
}
}
}
comments::FullComment *ASTContext::cloneFullComment(comments::FullComment *FC,
const Decl *D) const {
auto *ThisDeclInfo = new (*this) comments::DeclInfo;
ThisDeclInfo->CommentDecl = D;
ThisDeclInfo->IsFilled = false;
ThisDeclInfo->fill();
ThisDeclInfo->CommentDecl = FC->getDecl();
if (!ThisDeclInfo->TemplateParameters)
ThisDeclInfo->TemplateParameters = FC->getDeclInfo()->TemplateParameters;
comments::FullComment *CFC =
new (*this) comments::FullComment(FC->getBlocks(),
ThisDeclInfo);
return CFC;
}
comments::FullComment *ASTContext::getLocalCommentForDeclUncached(const Decl *D) const {
const RawComment *RC = getRawCommentForDeclNoCache(D);
return RC ? RC->parse(*this, nullptr, D) : nullptr;
}
comments::FullComment *ASTContext::getCommentForDecl(
const Decl *D,
const Preprocessor *PP) const {
if (!D || D->isInvalidDecl())
return nullptr;
D = &adjustDeclToTemplate(*D);
const Decl *Canonical = D->getCanonicalDecl();
llvm::DenseMap<const Decl *, comments::FullComment *>::iterator Pos =
ParsedComments.find(Canonical);
if (Pos != ParsedComments.end()) {
if (Canonical != D) {
comments::FullComment *FC = Pos->second;
comments::FullComment *CFC = cloneFullComment(FC, D);
return CFC;
}
return Pos->second;
}
const Decl *OriginalDecl = nullptr;
const RawComment *RC = getRawCommentForAnyRedecl(D, &OriginalDecl);
if (!RC) {
if (isa<ObjCMethodDecl>(D) || isa<FunctionDecl>(D)) {
SmallVector<const NamedDecl*, 8> Overridden;
const auto *OMD = dyn_cast<ObjCMethodDecl>(D);
if (OMD && OMD->isPropertyAccessor())
if (const ObjCPropertyDecl *PDecl = OMD->findPropertyDecl())
if (comments::FullComment *FC = getCommentForDecl(PDecl, PP))
return cloneFullComment(FC, D);
if (OMD)
addRedeclaredMethods(OMD, Overridden);
getOverriddenMethods(dyn_cast<NamedDecl>(D), Overridden);
for (unsigned i = 0, e = Overridden.size(); i < e; i++)
if (comments::FullComment *FC = getCommentForDecl(Overridden[i], PP))
return cloneFullComment(FC, D);
}
else if (const auto *TD = dyn_cast<TypedefNameDecl>(D)) {
// Attach any tag type's documentation to its typedef if latter
// does not have one of its own.
QualType QT = TD->getUnderlyingType();
if (const auto *TT = QT->getAs<TagType>())
if (const Decl *TD = TT->getDecl())
if (comments::FullComment *FC = getCommentForDecl(TD, PP))
return cloneFullComment(FC, D);
}
else if (const auto *IC = dyn_cast<ObjCInterfaceDecl>(D)) {
while (IC->getSuperClass()) {
IC = IC->getSuperClass();
if (comments::FullComment *FC = getCommentForDecl(IC, PP))
return cloneFullComment(FC, D);
}
}
else if (const auto *CD = dyn_cast<ObjCCategoryDecl>(D)) {
if (const ObjCInterfaceDecl *IC = CD->getClassInterface())
if (comments::FullComment *FC = getCommentForDecl(IC, PP))
return cloneFullComment(FC, D);
}
else if (const auto *RD = dyn_cast<CXXRecordDecl>(D)) {
if (!(RD = RD->getDefinition()))
return nullptr;
// Check non-virtual bases.
for (const auto &I : RD->bases()) {
if (I.isVirtual() || (I.getAccessSpecifier() != AS_public))
continue;
QualType Ty = I.getType();
if (Ty.isNull())
continue;
if (const CXXRecordDecl *NonVirtualBase = Ty->getAsCXXRecordDecl()) {
if (!(NonVirtualBase= NonVirtualBase->getDefinition()))
continue;
if (comments::FullComment *FC = getCommentForDecl((NonVirtualBase), PP))
return cloneFullComment(FC, D);
}
}
// Check virtual bases.
for (const auto &I : RD->vbases()) {
if (I.getAccessSpecifier() != AS_public)
continue;
QualType Ty = I.getType();
if (Ty.isNull())
continue;
if (const CXXRecordDecl *VirtualBase = Ty->getAsCXXRecordDecl()) {
if (!(VirtualBase= VirtualBase->getDefinition()))
continue;
if (comments::FullComment *FC = getCommentForDecl((VirtualBase), PP))
return cloneFullComment(FC, D);
}
}
}
return nullptr;
}
// If the RawComment was attached to other redeclaration of this Decl, we
// should parse the comment in context of that other Decl. This is important
// because comments can contain references to parameter names which can be
// different across redeclarations.
if (D != OriginalDecl && OriginalDecl)
return getCommentForDecl(OriginalDecl, PP);
comments::FullComment *FC = RC->parse(*this, PP, D);
ParsedComments[Canonical] = FC;
return FC;
}
void
ASTContext::CanonicalTemplateTemplateParm::Profile(llvm::FoldingSetNodeID &ID,
const ASTContext &C,
TemplateTemplateParmDecl *Parm) {
ID.AddInteger(Parm->getDepth());
ID.AddInteger(Parm->getPosition());
ID.AddBoolean(Parm->isParameterPack());
TemplateParameterList *Params = Parm->getTemplateParameters();
ID.AddInteger(Params->size());
for (TemplateParameterList::const_iterator P = Params->begin(),
PEnd = Params->end();
P != PEnd; ++P) {
if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(*P)) {
ID.AddInteger(0);
ID.AddBoolean(TTP->isParameterPack());
const TypeConstraint *TC = TTP->getTypeConstraint();
ID.AddBoolean(TC != nullptr);
if (TC)
TC->getImmediatelyDeclaredConstraint()->Profile(ID, C,
/*Canonical=*/true);
if (TTP->isExpandedParameterPack()) {
ID.AddBoolean(true);
ID.AddInteger(TTP->getNumExpansionParameters());
} else
ID.AddBoolean(false);
continue;
}
if (const auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(*P)) {
ID.AddInteger(1);
ID.AddBoolean(NTTP->isParameterPack());
ID.AddPointer(NTTP->getType().getCanonicalType().getAsOpaquePtr());
if (NTTP->isExpandedParameterPack()) {
ID.AddBoolean(true);
ID.AddInteger(NTTP->getNumExpansionTypes());
for (unsigned I = 0, N = NTTP->getNumExpansionTypes(); I != N; ++I) {
QualType T = NTTP->getExpansionType(I);
ID.AddPointer(T.getCanonicalType().getAsOpaquePtr());
}
} else
ID.AddBoolean(false);
continue;
}
auto *TTP = cast<TemplateTemplateParmDecl>(*P);
ID.AddInteger(2);
Profile(ID, C, TTP);
}
Expr *RequiresClause = Parm->getTemplateParameters()->getRequiresClause();
ID.AddBoolean(RequiresClause != nullptr);
if (RequiresClause)
RequiresClause->Profile(ID, C, /*Canonical=*/true);
}
static Expr *
canonicalizeImmediatelyDeclaredConstraint(const ASTContext &C, Expr *IDC,
QualType ConstrainedType) {
// This is a bit ugly - we need to form a new immediately-declared
// constraint that references the new parameter; this would ideally
// require semantic analysis (e.g. template<C T> struct S {}; - the
// converted arguments of C<T> could be an argument pack if C is
// declared as template<typename... T> concept C = ...).
// We don't have semantic analysis here so we dig deep into the
// ready-made constraint expr and change the thing manually.
ConceptSpecializationExpr *CSE;
if (const auto *Fold = dyn_cast<CXXFoldExpr>(IDC))
CSE = cast<ConceptSpecializationExpr>(Fold->getLHS());
else
CSE = cast<ConceptSpecializationExpr>(IDC);
ArrayRef<TemplateArgument> OldConverted = CSE->getTemplateArguments();
SmallVector<TemplateArgument, 3> NewConverted;
NewConverted.reserve(OldConverted.size());
if (OldConverted.front().getKind() == TemplateArgument::Pack) {
// The case:
// template<typename... T> concept C = true;
// template<C<int> T> struct S; -> constraint is C<{T, int}>
NewConverted.push_back(ConstrainedType);
for (auto &Arg : OldConverted.front().pack_elements().drop_front(1))
NewConverted.push_back(Arg);
TemplateArgument NewPack(NewConverted);
NewConverted.clear();
NewConverted.push_back(NewPack);
assert(OldConverted.size() == 1 &&
"Template parameter pack should be the last parameter");
} else {
assert(OldConverted.front().getKind() == TemplateArgument::Type &&
"Unexpected first argument kind for immediately-declared "
"constraint");
NewConverted.push_back(ConstrainedType);
for (auto &Arg : OldConverted.drop_front(1))
NewConverted.push_back(Arg);
}
Expr *NewIDC = ConceptSpecializationExpr::Create(
C, CSE->getNamedConcept(), NewConverted, nullptr,
CSE->isInstantiationDependent(), CSE->containsUnexpandedParameterPack());
if (auto *OrigFold = dyn_cast<CXXFoldExpr>(IDC))
NewIDC = new (C) CXXFoldExpr(
OrigFold->getType(), /*Callee*/nullptr, SourceLocation(), NewIDC,
BinaryOperatorKind::BO_LAnd, SourceLocation(), /*RHS=*/nullptr,
SourceLocation(), /*NumExpansions=*/None);
return NewIDC;
}
TemplateTemplateParmDecl *
ASTContext::getCanonicalTemplateTemplateParmDecl(
TemplateTemplateParmDecl *TTP) const {
// Check if we already have a canonical template template parameter.
llvm::FoldingSetNodeID ID;
CanonicalTemplateTemplateParm::Profile(ID, *this, TTP);
void *InsertPos = nullptr;
CanonicalTemplateTemplateParm *Canonical
= CanonTemplateTemplateParms.FindNodeOrInsertPos(ID, InsertPos);
if (Canonical)
return Canonical->getParam();
// Build a canonical template parameter list.
TemplateParameterList *Params = TTP->getTemplateParameters();
SmallVector<NamedDecl *, 4> CanonParams;
CanonParams.reserve(Params->size());
for (TemplateParameterList::const_iterator P = Params->begin(),
PEnd = Params->end();
P != PEnd; ++P) {
if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(*P)) {
TemplateTypeParmDecl *NewTTP = TemplateTypeParmDecl::Create(*this,
getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
TTP->getDepth(), TTP->getIndex(), nullptr, false,
TTP->isParameterPack(), TTP->hasTypeConstraint(),
TTP->isExpandedParameterPack() ?
llvm::Optional<unsigned>(TTP->getNumExpansionParameters()) : None);
if (const auto *TC = TTP->getTypeConstraint()) {
QualType ParamAsArgument(NewTTP->getTypeForDecl(), 0);
Expr *NewIDC = canonicalizeImmediatelyDeclaredConstraint(
*this, TC->getImmediatelyDeclaredConstraint(),
ParamAsArgument);
TemplateArgumentListInfo CanonArgsAsWritten;
if (auto *Args = TC->getTemplateArgsAsWritten())
for (const auto &ArgLoc : Args->arguments())
CanonArgsAsWritten.addArgument(
TemplateArgumentLoc(ArgLoc.getArgument(),
TemplateArgumentLocInfo()));
NewTTP->setTypeConstraint(
NestedNameSpecifierLoc(),
DeclarationNameInfo(TC->getNamedConcept()->getDeclName(),
SourceLocation()), /*FoundDecl=*/nullptr,
// Actually canonicalizing a TemplateArgumentLoc is difficult so we
// simply omit the ArgsAsWritten
TC->getNamedConcept(), /*ArgsAsWritten=*/nullptr, NewIDC);
}
CanonParams.push_back(NewTTP);
} else if (const auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(*P)) {
QualType T = getCanonicalType(NTTP->getType());
TypeSourceInfo *TInfo = getTrivialTypeSourceInfo(T);
NonTypeTemplateParmDecl *Param;
if (NTTP->isExpandedParameterPack()) {
SmallVector<QualType, 2> ExpandedTypes;
SmallVector<TypeSourceInfo *, 2> ExpandedTInfos;
for (unsigned I = 0, N = NTTP->getNumExpansionTypes(); I != N; ++I) {
ExpandedTypes.push_back(getCanonicalType(NTTP->getExpansionType(I)));
ExpandedTInfos.push_back(
getTrivialTypeSourceInfo(ExpandedTypes.back()));
}
Param = NonTypeTemplateParmDecl::Create(*this, getTranslationUnitDecl(),
SourceLocation(),
SourceLocation(),
NTTP->getDepth(),
NTTP->getPosition(), nullptr,
T,
TInfo,
ExpandedTypes,
ExpandedTInfos);
} else {
Param = NonTypeTemplateParmDecl::Create(*this, getTranslationUnitDecl(),
SourceLocation(),
SourceLocation(),
NTTP->getDepth(),
NTTP->getPosition(), nullptr,
T,
NTTP->isParameterPack(),
TInfo);
}
if (AutoType *AT = T->getContainedAutoType()) {
if (AT->isConstrained()) {
Param->setPlaceholderTypeConstraint(
canonicalizeImmediatelyDeclaredConstraint(
*this, NTTP->getPlaceholderTypeConstraint(), T));
}
}
CanonParams.push_back(Param);
} else
CanonParams.push_back(getCanonicalTemplateTemplateParmDecl(
cast<TemplateTemplateParmDecl>(*P)));
}
Expr *CanonRequiresClause = nullptr;
if (Expr *RequiresClause = TTP->getTemplateParameters()->getRequiresClause())
CanonRequiresClause = RequiresClause;
TemplateTemplateParmDecl *CanonTTP
= TemplateTemplateParmDecl::Create(*this, getTranslationUnitDecl(),
SourceLocation(), TTP->getDepth(),
TTP->getPosition(),
TTP->isParameterPack(),
nullptr,
TemplateParameterList::Create(*this, SourceLocation(),
SourceLocation(),
CanonParams,
SourceLocation(),
CanonRequiresClause));
// Get the new insert position for the node we care about.
Canonical = CanonTemplateTemplateParms.FindNodeOrInsertPos(ID, InsertPos);
assert(!Canonical && "Shouldn't be in the map!");
(void)Canonical;
// Create the canonical template template parameter entry.
Canonical = new (*this) CanonicalTemplateTemplateParm(CanonTTP);
CanonTemplateTemplateParms.InsertNode(Canonical, InsertPos);
return CanonTTP;
}
TargetCXXABI::Kind ASTContext::getCXXABIKind() const {
auto Kind = getTargetInfo().getCXXABI().getKind();
return getLangOpts().CXXABI.getValueOr(Kind);
}
CXXABI *ASTContext::createCXXABI(const TargetInfo &T) {
if (!LangOpts.CPlusPlus) return nullptr;
switch (getCXXABIKind()) {
case TargetCXXABI::AppleARM64:
case TargetCXXABI::Fuchsia:
case TargetCXXABI::GenericARM: // Same as Itanium at this level
case TargetCXXABI::iOS:
case TargetCXXABI::WatchOS:
case TargetCXXABI::GenericAArch64:
case TargetCXXABI::GenericMIPS:
case TargetCXXABI::GenericItanium:
case TargetCXXABI::WebAssembly:
case TargetCXXABI::XL:
return CreateItaniumCXXABI(*this);
case TargetCXXABI::Microsoft:
return CreateMicrosoftCXXABI(*this);
}
llvm_unreachable("Invalid CXXABI type!");
}
interp::Context &ASTContext::getInterpContext() {
if (!InterpContext) {
InterpContext.reset(new interp::Context(*this));
}
return *InterpContext.get();
}
ParentMapContext &ASTContext::getParentMapContext() {
if (!ParentMapCtx)
ParentMapCtx.reset(new ParentMapContext(*this));
return *ParentMapCtx.get();
}
static const LangASMap *getAddressSpaceMap(const TargetInfo &T,
const LangOptions &LOpts) {
if (LOpts.FakeAddressSpaceMap) {
// The fake address space map must have a distinct entry for each
// language-specific address space.
static const unsigned FakeAddrSpaceMap[] = {
0, // Default
1, // opencl_global
3, // opencl_local
2, // opencl_constant
0, // opencl_private
4, // opencl_generic
5, // opencl_global_device
6, // opencl_global_host
7, // cuda_device
8, // cuda_constant
9, // cuda_shared
1, // sycl_global
5, // sycl_global_device
6, // sycl_global_host
3, // sycl_local
0, // sycl_private
10, // ptr32_sptr
11, // ptr32_uptr
12 // ptr64
};
return &FakeAddrSpaceMap;
} else {
return &T.getAddressSpaceMap();
}
}
static bool isAddrSpaceMapManglingEnabled(const TargetInfo &TI,
const LangOptions &LangOpts) {
switch (LangOpts.getAddressSpaceMapMangling()) {
case LangOptions::ASMM_Target:
return TI.useAddressSpaceMapMangling();
case LangOptions::ASMM_On:
return true;
case LangOptions::ASMM_Off:
return false;
}
llvm_unreachable("getAddressSpaceMapMangling() doesn't cover anything.");
}
ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM,
IdentifierTable &idents, SelectorTable &sels,
Builtin::Context &builtins, TranslationUnitKind TUKind)
: ConstantArrayTypes(this_()), FunctionProtoTypes(this_()),
TemplateSpecializationTypes(this_()),
DependentTemplateSpecializationTypes(this_()), AutoTypes(this_()),
SubstTemplateTemplateParmPacks(this_()),
CanonTemplateTemplateParms(this_()), SourceMgr(SM), LangOpts(LOpts),
NoSanitizeL(new NoSanitizeList(LangOpts.NoSanitizeFiles, SM)),
XRayFilter(new XRayFunctionFilter(LangOpts.XRayAlwaysInstrumentFiles,
LangOpts.XRayNeverInstrumentFiles,
LangOpts.XRayAttrListFiles, SM)),
ProfList(new ProfileList(LangOpts.ProfileListFiles, SM)),
PrintingPolicy(LOpts), Idents(idents), Selectors(sels),
BuiltinInfo(builtins), TUKind(TUKind), DeclarationNames(*this),
Comments(SM), CommentCommandTraits(BumpAlloc, LOpts.CommentOpts),
CompCategories(this_()), LastSDM(nullptr, 0) {
addTranslationUnitDecl();
}
ASTContext::~ASTContext() {
// Release the DenseMaps associated with DeclContext objects.
// FIXME: Is this the ideal solution?
ReleaseDeclContextMaps();
// Call all of the deallocation functions on all of their targets.
for (auto &Pair : Deallocations)
(Pair.first)(Pair.second);
// ASTRecordLayout objects in ASTRecordLayouts must always be destroyed
// because they can contain DenseMaps.
for (llvm::DenseMap<const ObjCContainerDecl*,
const ASTRecordLayout*>::iterator
I = ObjCLayouts.begin(), E = ObjCLayouts.end(); I != E; )
// Increment in loop to prevent using deallocated memory.
if (auto *R = const_cast<ASTRecordLayout *>((I++)->second))
R->Destroy(*this);
for (llvm::DenseMap<const RecordDecl*, const ASTRecordLayout*>::iterator
I = ASTRecordLayouts.begin(), E = ASTRecordLayouts.end(); I != E; ) {
// Increment in loop to prevent using deallocated memory.
if (auto *R = const_cast<ASTRecordLayout *>((I++)->second))
R->Destroy(*this);
}
for (llvm::DenseMap<const Decl*, AttrVec*>::iterator A = DeclAttrs.begin(),
AEnd = DeclAttrs.end();
A != AEnd; ++A)
A->second->~AttrVec();
for (const auto &Value : ModuleInitializers)
Value.second->~PerModuleInitializers();
}
void ASTContext::setTraversalScope(const std::vector<Decl *> &TopLevelDecls) {
TraversalScope = TopLevelDecls;
getParentMapContext().clear();
}
void ASTContext::AddDeallocation(void (*Callback)(void *), void *Data) const {
Deallocations.push_back({Callback, Data});
}
void
ASTContext::setExternalSource(IntrusiveRefCntPtr<ExternalASTSource> Source) {
ExternalSource = std::move(Source);
}
void ASTContext::PrintStats() const {
llvm::errs() << "\n*** AST Context Stats:\n";
llvm::errs() << " " << Types.size() << " types total.\n";
unsigned counts[] = {
#define TYPE(Name, Parent) 0,
#define ABSTRACT_TYPE(Name, Parent)
#include "clang/AST/TypeNodes.inc"
0 // Extra
};
for (unsigned i = 0, e = Types.size(); i != e; ++i) {
Type *T = Types[i];
counts[(unsigned)T->getTypeClass()]++;
}
unsigned Idx = 0;
unsigned TotalBytes = 0;
#define TYPE(Name, Parent) \
if (counts[Idx]) \
llvm::errs() << " " << counts[Idx] << " " << #Name \
<< " types, " << sizeof(Name##Type) << " each " \
<< "(" << counts[Idx] * sizeof(Name##Type) \
<< " bytes)\n"; \
TotalBytes += counts[Idx] * sizeof(Name##Type); \
++Idx;
#define ABSTRACT_TYPE(Name, Parent)
#include "clang/AST/TypeNodes.inc"
llvm::errs() << "Total bytes = " << TotalBytes << "\n";
// Implicit special member functions.
llvm::errs() << NumImplicitDefaultConstructorsDeclared << "/"
<< NumImplicitDefaultConstructors
<< " implicit default constructors created\n";
llvm::errs() << NumImplicitCopyConstructorsDeclared << "/"
<< NumImplicitCopyConstructors
<< " implicit copy constructors created\n";
if (getLangOpts().CPlusPlus)
llvm::errs() << NumImplicitMoveConstructorsDeclared << "/"
<< NumImplicitMoveConstructors
<< " implicit move constructors created\n";
llvm::errs() << NumImplicitCopyAssignmentOperatorsDeclared << "/"
<< NumImplicitCopyAssignmentOperators
<< " implicit copy assignment operators created\n";
if (getLangOpts().CPlusPlus)
llvm::errs() << NumImplicitMoveAssignmentOperatorsDeclared << "/"
<< NumImplicitMoveAssignmentOperators
<< " implicit move assignment operators created\n";
llvm::errs() << NumImplicitDestructorsDeclared << "/"
<< NumImplicitDestructors
<< " implicit destructors created\n";
if (ExternalSource) {
llvm::errs() << "\n";
ExternalSource->PrintStats();
}
BumpAlloc.PrintStats();
}
void ASTContext::mergeDefinitionIntoModule(NamedDecl *ND, Module *M,
bool NotifyListeners) {
if (NotifyListeners)
if (auto *Listener = getASTMutationListener())
Listener->RedefinedHiddenDefinition(ND, M);
MergedDefModules[cast<NamedDecl>(ND->getCanonicalDecl())].push_back(M);
}
void ASTContext::deduplicateMergedDefinitonsFor(NamedDecl *ND) {
auto It = MergedDefModules.find(cast<NamedDecl>(ND->getCanonicalDecl()));
if (It == MergedDefModules.end())
return;
auto &Merged = It->second;
llvm::DenseSet<Module*> Found;
for (Module *&M : Merged)
if (!Found.insert(M).second)
M = nullptr;
Merged.erase(std::remove(Merged.begin(), Merged.end(), nullptr), Merged.end());
}
ArrayRef<Module *>
ASTContext::getModulesWithMergedDefinition(const NamedDecl *Def) {
auto MergedIt =
MergedDefModules.find(cast<NamedDecl>(Def->getCanonicalDecl()));
if (MergedIt == MergedDefModules.end())
return None;
return MergedIt->second;
}
void ASTContext::PerModuleInitializers::resolve(ASTContext &Ctx) {
if (LazyInitializers.empty())
return;
auto *Source = Ctx.getExternalSource();
assert(Source && "lazy initializers but no external source");
auto LazyInits = std::move(LazyInitializers);
LazyInitializers.clear();
for (auto ID : LazyInits)
Initializers.push_back(Source->GetExternalDecl(ID));
assert(LazyInitializers.empty() &&
"GetExternalDecl for lazy module initializer added more inits");
}
void ASTContext::addModuleInitializer(Module *M, Decl *D) {
// One special case: if we add a module initializer that imports another
// module, and that module's only initializer is an ImportDecl, simplify.
if (const auto *ID = dyn_cast<ImportDecl>(D)) {
auto It = ModuleInitializers.find(ID->getImportedModule());
// Maybe the ImportDecl does nothing at all. (Common case.)
if (It == ModuleInitializers.end())
return;
// Maybe the ImportDecl only imports another ImportDecl.
auto &Imported = *It->second;
if (Imported.Initializers.size() + Imported.LazyInitializers.size() == 1) {
Imported.resolve(*this);
auto *OnlyDecl = Imported.Initializers.front();
if (isa<ImportDecl>(OnlyDecl))
D = OnlyDecl;
}
}
auto *&Inits = ModuleInitializers[M];
if (!Inits)
Inits = new (*this) PerModuleInitializers;
Inits->Initializers.push_back(D);
}
void ASTContext::addLazyModuleInitializers(Module *M, ArrayRef<uint32_t> IDs) {
auto *&Inits = ModuleInitializers[M];
if (!Inits)
Inits = new (*this) PerModuleInitializers;
Inits->LazyInitializers.insert(Inits->LazyInitializers.end(),
IDs.begin(), IDs.end());
}
ArrayRef<Decl *> ASTContext::getModuleInitializers(Module *M) {
auto It = ModuleInitializers.find(M);
if (It == ModuleInitializers.end())
return None;
auto *Inits = It->second;
Inits->resolve(*this);
return Inits->Initializers;
}
ExternCContextDecl *ASTContext::getExternCContextDecl() const {
if (!ExternCContext)
ExternCContext = ExternCContextDecl::Create(*this, getTranslationUnitDecl());
return ExternCContext;
}
BuiltinTemplateDecl *
ASTContext::buildBuiltinTemplateDecl(BuiltinTemplateKind BTK,
const IdentifierInfo *II) const {
auto *BuiltinTemplate =
BuiltinTemplateDecl::Create(*this, getTranslationUnitDecl(), II, BTK);
BuiltinTemplate->setImplicit();
getTranslationUnitDecl()->addDecl(BuiltinTemplate);
return BuiltinTemplate;
}
BuiltinTemplateDecl *
ASTContext::getMakeIntegerSeqDecl() const {
if (!MakeIntegerSeqDecl)
MakeIntegerSeqDecl = buildBuiltinTemplateDecl(BTK__make_integer_seq,
getMakeIntegerSeqName());
return MakeIntegerSeqDecl;
}
BuiltinTemplateDecl *
ASTContext::getTypePackElementDecl() const {
if (!TypePackElementDecl)
TypePackElementDecl = buildBuiltinTemplateDecl(BTK__type_pack_element,
getTypePackElementName());
return TypePackElementDecl;
}
RecordDecl *ASTContext::buildImplicitRecord(StringRef Name,
RecordDecl::TagKind TK) const {
SourceLocation Loc;
RecordDecl *NewDecl;
if (getLangOpts().CPlusPlus)
NewDecl = CXXRecordDecl::Create(*this, TK, getTranslationUnitDecl(), Loc,
Loc, &Idents.get(Name));
else
NewDecl = RecordDecl::Create(*this, TK, getTranslationUnitDecl(), Loc, Loc,
&Idents.get(Name));
NewDecl->setImplicit();
NewDecl->addAttr(TypeVisibilityAttr::CreateImplicit(
const_cast<ASTContext &>(*this), TypeVisibilityAttr::Default));
return NewDecl;
}
TypedefDecl *ASTContext::buildImplicitTypedef(QualType T,
StringRef Name) const {
TypeSourceInfo *TInfo = getTrivialTypeSourceInfo(T);
TypedefDecl *NewDecl = TypedefDecl::Create(
const_cast<ASTContext &>(*this), getTranslationUnitDecl(),
SourceLocation(), SourceLocation(), &Idents.get(Name), TInfo);
NewDecl->setImplicit();
return NewDecl;
}
TypedefDecl *ASTContext::getInt128Decl() const {
if (!Int128Decl)
Int128Decl = buildImplicitTypedef(Int128Ty, "__int128_t");
return Int128Decl;
}
TypedefDecl *ASTContext::getUInt128Decl() const {
if (!UInt128Decl)
UInt128Decl = buildImplicitTypedef(UnsignedInt128Ty, "__uint128_t");
return UInt128Decl;
}
void ASTContext::InitBuiltinType(CanQualType &R, BuiltinType::Kind K) {
auto *Ty = new (*this, TypeAlignment) BuiltinType(K);
R = CanQualType::CreateUnsafe(QualType(Ty, 0));
Types.push_back(Ty);
}
void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
const TargetInfo *AuxTarget) {
assert((!this->Target || this->Target == &Target) &&
"Incorrect target reinitialization");
assert(VoidTy.isNull() && "Context reinitialized?");
this->Target = &Target;
this->AuxTarget = AuxTarget;
ABI.reset(createCXXABI(Target));
AddrSpaceMap = getAddressSpaceMap(Target, LangOpts);
AddrSpaceMapMangling = isAddrSpaceMapManglingEnabled(Target, LangOpts);
// C99 6.2.5p19.
InitBuiltinType(VoidTy, BuiltinType::Void);
// C99 6.2.5p2.
InitBuiltinType(BoolTy, BuiltinType::Bool);
// C99 6.2.5p3.
if (LangOpts.CharIsSigned)
InitBuiltinType(CharTy, BuiltinType::Char_S);
else
InitBuiltinType(CharTy, BuiltinType::Char_U);
// C99 6.2.5p4.
InitBuiltinType(SignedCharTy, BuiltinType::SChar);
InitBuiltinType(ShortTy, BuiltinType::Short);
InitBuiltinType(IntTy, BuiltinType::Int);
InitBuiltinType(LongTy, BuiltinType::Long);
InitBuiltinType(LongLongTy, BuiltinType::LongLong);
// C99 6.2.5p6.
InitBuiltinType(UnsignedCharTy, BuiltinType::UChar);
InitBuiltinType(UnsignedShortTy, BuiltinType::UShort);
InitBuiltinType(UnsignedIntTy, BuiltinType::UInt);
InitBuiltinType(UnsignedLongTy, BuiltinType::ULong);
InitBuiltinType(UnsignedLongLongTy, BuiltinType::ULongLong);
// C99 6.2.5p10.
InitBuiltinType(FloatTy, BuiltinType::Float);
InitBuiltinType(DoubleTy, BuiltinType::Double);
InitBuiltinType(LongDoubleTy, BuiltinType::LongDouble);
// GNU extension, __float128 for IEEE quadruple precision
InitBuiltinType(Float128Ty, BuiltinType::Float128);
// C11 extension ISO/IEC TS 18661-3
InitBuiltinType(Float16Ty, BuiltinType::Float16);
// ISO/IEC JTC1 SC22 WG14 N1169 Extension
InitBuiltinType(ShortAccumTy, BuiltinType::ShortAccum);
InitBuiltinType(AccumTy, BuiltinType::Accum);
InitBuiltinType(LongAccumTy, BuiltinType::LongAccum);
InitBuiltinType(UnsignedShortAccumTy, BuiltinType::UShortAccum);
InitBuiltinType(UnsignedAccumTy, BuiltinType::UAccum);
InitBuiltinType(UnsignedLongAccumTy, BuiltinType::ULongAccum);
InitBuiltinType(ShortFractTy, BuiltinType::ShortFract);
InitBuiltinType(FractTy, BuiltinType::Fract);
InitBuiltinType(LongFractTy, BuiltinType::LongFract);
InitBuiltinType(UnsignedShortFractTy, BuiltinType::UShortFract);
InitBuiltinType(UnsignedFractTy, BuiltinType::UFract);
InitBuiltinType(UnsignedLongFractTy, BuiltinType::ULongFract);
InitBuiltinType(SatShortAccumTy, BuiltinType::SatShortAccum);
InitBuiltinType(SatAccumTy, BuiltinType::SatAccum);
InitBuiltinType(SatLongAccumTy, BuiltinType::SatLongAccum);
InitBuiltinType(SatUnsignedShortAccumTy, BuiltinType::SatUShortAccum);
InitBuiltinType(SatUnsignedAccumTy, BuiltinType::SatUAccum);
InitBuiltinType(SatUnsignedLongAccumTy, BuiltinType::SatULongAccum);
InitBuiltinType(SatShortFractTy, BuiltinType::SatShortFract);
InitBuiltinType(SatFractTy, BuiltinType::SatFract);
InitBuiltinType(SatLongFractTy, BuiltinType::SatLongFract);
InitBuiltinType(SatUnsignedShortFractTy, BuiltinType::SatUShortFract);
InitBuiltinType(SatUnsignedFractTy, BuiltinType::SatUFract);
InitBuiltinType(SatUnsignedLongFractTy, BuiltinType::SatULongFract);
// GNU extension, 128-bit integers.
InitBuiltinType(Int128Ty, BuiltinType::Int128);
InitBuiltinType(UnsignedInt128Ty, BuiltinType::UInt128);
// C++ 3.9.1p5
if (TargetInfo::isTypeSigned(Target.getWCharType()))
InitBuiltinType(WCharTy, BuiltinType::WChar_S);
else // -fshort-wchar makes wchar_t be unsigned.
InitBuiltinType(WCharTy, BuiltinType::WChar_U);
if (LangOpts.CPlusPlus && LangOpts.WChar)
WideCharTy = WCharTy;
else {
// C99 (or C++ using -fno-wchar).
WideCharTy = getFromTargetType(Target.getWCharType());
}
WIntTy = getFromTargetType(Target.getWIntType());
// C++20 (proposed)
InitBuiltinType(Char8Ty, BuiltinType::Char8);
if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++
InitBuiltinType(Char16Ty, BuiltinType::Char16);
else // C99
Char16Ty = getFromTargetType(Target.getChar16Type());
if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++
InitBuiltinType(Char32Ty, BuiltinType::Char32);
else // C99
Char32Ty = getFromTargetType(Target.getChar32Type());
// Placeholder type for type-dependent expressions whose type is
// completely unknown. No code should ever check a type against
// DependentTy and users should never see it; however, it is here to
// help diagnose failures to properly check for type-dependent
// expressions.
InitBuiltinType(DependentTy, BuiltinType::Dependent);
// Placeholder type for functions.
InitBuiltinType(OverloadTy, BuiltinType::Overload);
// Placeholder type for bound members.
InitBuiltinType(BoundMemberTy, BuiltinType::BoundMember);
// Placeholder type for pseudo-objects.
InitBuiltinType(PseudoObjectTy, BuiltinType::PseudoObject);
// "any" type; useful for debugger-like clients.
InitBuiltinType(UnknownAnyTy, BuiltinType::UnknownAny);
// Placeholder type for unbridged ARC casts.
InitBuiltinType(ARCUnbridgedCastTy, BuiltinType::ARCUnbridgedCast);
// Placeholder type for builtin functions.
InitBuiltinType(BuiltinFnTy, BuiltinType::BuiltinFn);
// Placeholder type for OMP array sections.
if (LangOpts.OpenMP) {
InitBuiltinType(OMPArraySectionTy, BuiltinType::OMPArraySection);
InitBuiltinType(OMPArrayShapingTy, BuiltinType::OMPArrayShaping);
InitBuiltinType(OMPIteratorTy, BuiltinType::OMPIterator);
}
if (LangOpts.MatrixTypes)
InitBuiltinType(IncompleteMatrixIdxTy, BuiltinType::IncompleteMatrixIdx);
// C99 6.2.5p11.
FloatComplexTy = getComplexType(FloatTy);
DoubleComplexTy = getComplexType(DoubleTy);
LongDoubleComplexTy = getComplexType(LongDoubleTy);
Float128ComplexTy = getComplexType(Float128Ty);
// Builtin types for 'id', 'Class', and 'SEL'.
InitBuiltinType(ObjCBuiltinIdTy, BuiltinType::ObjCId);
InitBuiltinType(ObjCBuiltinClassTy, BuiltinType::ObjCClass);
InitBuiltinType(ObjCBuiltinSelTy, BuiltinType::ObjCSel);
if (LangOpts.OpenCL) {
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
InitBuiltinType(SingletonId, BuiltinType::Id);
#include "clang/Basic/OpenCLImageTypes.def"
InitBuiltinType(OCLSamplerTy, BuiltinType::OCLSampler);
InitBuiltinType(OCLEventTy, BuiltinType::OCLEvent);
InitBuiltinType(OCLClkEventTy, BuiltinType::OCLClkEvent);
InitBuiltinType(OCLQueueTy, BuiltinType::OCLQueue);
InitBuiltinType(OCLReserveIDTy, BuiltinType::OCLReserveID);
#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
InitBuiltinType(Id##Ty, BuiltinType::Id);
#include "clang/Basic/OpenCLExtensionTypes.def"
}
if (Target.hasAArch64SVETypes()) {
#define SVE_TYPE(Name, Id, SingletonId) \
InitBuiltinType(SingletonId, BuiltinType::Id);
#include "clang/Basic/AArch64SVEACLETypes.def"
}
if (Target.getTriple().isPPC64() &&
Target.hasFeature("paired-vector-memops")) {
if (Target.hasFeature("mma")) {
#define PPC_VECTOR_MMA_TYPE(Name, Id, Size) \
InitBuiltinType(Id##Ty, BuiltinType::Id);
#include "clang/Basic/PPCTypes.def"
}
#define PPC_VECTOR_VSX_TYPE(Name, Id, Size) \
InitBuiltinType(Id##Ty, BuiltinType::Id);
#include "clang/Basic/PPCTypes.def"
}
if (Target.hasRISCVVTypes()) {
#define RVV_TYPE(Name, Id, SingletonId) \
InitBuiltinType(SingletonId, BuiltinType::Id);
#include "clang/Basic/RISCVVTypes.def"
}
// Builtin type for __objc_yes and __objc_no
ObjCBuiltinBoolTy = (Target.useSignedCharForObjCBool() ?
SignedCharTy : BoolTy);
ObjCConstantStringType = QualType();
ObjCSuperType = QualType();
// void * type
if (LangOpts.OpenCLGenericAddressSpace) {
auto Q = VoidTy.getQualifiers();
Q.setAddressSpace(LangAS::opencl_generic);
VoidPtrTy = getPointerType(getCanonicalType(
getQualifiedType(VoidTy.getUnqualifiedType(), Q)));
} else {
VoidPtrTy = getPointerType(VoidTy);
}
// nullptr type (C++0x 2.14.7)
InitBuiltinType(NullPtrTy, BuiltinType::NullPtr);
// half type (OpenCL 6.1.1.1) / ARM NEON __fp16
InitBuiltinType(HalfTy, BuiltinType::Half);
InitBuiltinType(BFloat16Ty, BuiltinType::BFloat16);
// Builtin type used to help define __builtin_va_list.
VaListTagDecl = nullptr;
// MSVC predeclares struct _GUID, and we need it to create MSGuidDecls.
if (LangOpts.MicrosoftExt || LangOpts.Borland) {
MSGuidTagDecl = buildImplicitRecord("_GUID");
getTranslationUnitDecl()->addDecl(MSGuidTagDecl);
}
}
DiagnosticsEngine &ASTContext::getDiagnostics() const {
return SourceMgr.getDiagnostics();
}
AttrVec& ASTContext::getDeclAttrs(const Decl *D) {
AttrVec *&Result = DeclAttrs[D];
if (!Result) {
void *Mem = Allocate(sizeof(AttrVec));
Result = new (Mem) AttrVec;
}
return *Result;
}
/// Erase the attributes corresponding to the given declaration.
void ASTContext::eraseDeclAttrs(const Decl *D) {
llvm::DenseMap<const Decl*, AttrVec*>::iterator Pos = DeclAttrs.find(D);
if (Pos != DeclAttrs.end()) {
Pos->second->~AttrVec();
DeclAttrs.erase(Pos);
}
}
// FIXME: Remove ?
MemberSpecializationInfo *
ASTContext::getInstantiatedFromStaticDataMember(const VarDecl *Var) {
assert(Var->isStaticDataMember() && "Not a static data member");
return getTemplateOrSpecializationInfo(Var)
.dyn_cast<MemberSpecializationInfo *>();
}
ASTContext::TemplateOrSpecializationInfo
ASTContext::getTemplateOrSpecializationInfo(const VarDecl *Var) {
llvm::DenseMap<const VarDecl *, TemplateOrSpecializationInfo>::iterator Pos =
TemplateOrInstantiation.find(Var);
if (Pos == TemplateOrInstantiation.end())
return {};
return Pos->second;
}
void
ASTContext::setInstantiatedFromStaticDataMember(VarDecl *Inst, VarDecl *Tmpl,
TemplateSpecializationKind TSK,
SourceLocation PointOfInstantiation) {
assert(Inst->isStaticDataMember() && "Not a static data member");
assert(Tmpl->isStaticDataMember() && "Not a static data member");
setTemplateOrSpecializationInfo(Inst, new (*this) MemberSpecializationInfo(
Tmpl, TSK, PointOfInstantiation));
}
void
ASTContext::setTemplateOrSpecializationInfo(VarDecl *Inst,
TemplateOrSpecializationInfo TSI) {
assert(!TemplateOrInstantiation[Inst] &&
"Already noted what the variable was instantiated from");
TemplateOrInstantiation[Inst] = TSI;
}
NamedDecl *
ASTContext::getInstantiatedFromUsingDecl(NamedDecl *UUD) {
auto Pos = InstantiatedFromUsingDecl.find(UUD);
if (Pos == InstantiatedFromUsingDecl.end())
return nullptr;
return Pos->second;
}
void
ASTContext::setInstantiatedFromUsingDecl(NamedDecl *Inst, NamedDecl *Pattern) {
assert((isa<UsingDecl>(Pattern) ||
isa<UnresolvedUsingValueDecl>(Pattern) ||
isa<UnresolvedUsingTypenameDecl>(Pattern)) &&
"pattern decl is not a using decl");
assert((isa<UsingDecl>(Inst) ||
isa<UnresolvedUsingValueDecl>(Inst) ||
isa<UnresolvedUsingTypenameDecl>(Inst)) &&
"instantiation did not produce a using decl");
assert(!InstantiatedFromUsingDecl[Inst] && "pattern already exists");
InstantiatedFromUsingDecl[Inst] = Pattern;
}
UsingEnumDecl *
ASTContext::getInstantiatedFromUsingEnumDecl(UsingEnumDecl *UUD) {
auto Pos = InstantiatedFromUsingEnumDecl.find(UUD);
if (Pos == InstantiatedFromUsingEnumDecl.end())
return nullptr;
return Pos->second;
}
void ASTContext::setInstantiatedFromUsingEnumDecl(UsingEnumDecl *Inst,
UsingEnumDecl *Pattern) {
assert(!InstantiatedFromUsingEnumDecl[Inst] && "pattern already exists");
InstantiatedFromUsingEnumDecl[Inst] = Pattern;
}
UsingShadowDecl *
ASTContext::getInstantiatedFromUsingShadowDecl(UsingShadowDecl *Inst) {
llvm::DenseMap<UsingShadowDecl*, UsingShadowDecl*>::const_iterator Pos
= InstantiatedFromUsingShadowDecl.find(Inst);
if (Pos == InstantiatedFromUsingShadowDecl.end())
return nullptr;
return Pos->second;
}
void
ASTContext::setInstantiatedFromUsingShadowDecl(UsingShadowDecl *Inst,
UsingShadowDecl *Pattern) {
assert(!InstantiatedFromUsingShadowDecl[Inst] && "pattern already exists");
InstantiatedFromUsingShadowDecl[Inst] = Pattern;
}
FieldDecl *ASTContext::getInstantiatedFromUnnamedFieldDecl(FieldDecl *Field) {
llvm::DenseMap<FieldDecl *, FieldDecl *>::iterator Pos
= InstantiatedFromUnnamedFieldDecl.find(Field);
if (Pos == InstantiatedFromUnnamedFieldDecl.end())
return nullptr;
return Pos->second;
}
void ASTContext::setInstantiatedFromUnnamedFieldDecl(FieldDecl *Inst,
FieldDecl *Tmpl) {
assert(!Inst->getDeclName() && "Instantiated field decl is not unnamed");
assert(!Tmpl->getDeclName() && "Template field decl is not unnamed");
assert(!InstantiatedFromUnnamedFieldDecl[Inst] &&
"Already noted what unnamed field was instantiated from");
InstantiatedFromUnnamedFieldDecl[Inst] = Tmpl;
}
ASTContext::overridden_cxx_method_iterator
ASTContext::overridden_methods_begin(const CXXMethodDecl *Method) const {
return overridden_methods(Method).begin();
}
ASTContext::overridden_cxx_method_iterator
ASTContext::overridden_methods_end(const CXXMethodDecl *Method) const {
return overridden_methods(Method).end();
}
unsigned
ASTContext::overridden_methods_size(const CXXMethodDecl *Method) const {
auto Range = overridden_methods(Method);
return Range.end() - Range.begin();
}
ASTContext::overridden_method_range
ASTContext::overridden_methods(const CXXMethodDecl *Method) const {
llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos =
OverriddenMethods.find(Method->getCanonicalDecl());
if (Pos == OverriddenMethods.end())
return overridden_method_range(nullptr, nullptr);
return overridden_method_range(Pos->second.begin(), Pos->second.end());
}
void ASTContext::addOverriddenMethod(const CXXMethodDecl *Method,
const CXXMethodDecl *Overridden) {
assert(Method->isCanonicalDecl() && Overridden->isCanonicalDecl());
OverriddenMethods[Method].push_back(Overridden);
}
void ASTContext::getOverriddenMethods(
const NamedDecl *D,
SmallVectorImpl<const NamedDecl *> &Overridden) const {
assert(D);
if (const auto *CXXMethod = dyn_cast<CXXMethodDecl>(D)) {
Overridden.append(overridden_methods_begin(CXXMethod),
overridden_methods_end(CXXMethod));
return;
}
const auto *Method = dyn_cast<ObjCMethodDecl>(D);
if (!Method)
return;
SmallVector<const ObjCMethodDecl *, 8> OverDecls;
Method->getOverriddenMethods(OverDecls);
Overridden.append(OverDecls.begin(), OverDecls.end());
}
void ASTContext::addedLocalImportDecl(ImportDecl *Import) {
assert(!Import->getNextLocalImport() &&
"Import declaration already in the chain");
assert(!Import->isFromASTFile() && "Non-local import declaration");
if (!FirstLocalImport) {
FirstLocalImport = Import;
LastLocalImport = Import;
return;
}
LastLocalImport->setNextLocalImport(Import);
LastLocalImport = Import;
}
//===----------------------------------------------------------------------===//
// Type Sizing and Analysis
//===----------------------------------------------------------------------===//
/// getFloatTypeSemantics - Return the APFloat 'semantics' for the specified
/// scalar floating point type.
const llvm::fltSemantics &ASTContext::getFloatTypeSemantics(QualType T) const {
switch (T->castAs<BuiltinType>()->getKind()) {
default:
llvm_unreachable("Not a floating point type!");
case BuiltinType::BFloat16:
return Target->getBFloat16Format();
case BuiltinType::Float16:
case BuiltinType::Half:
return Target->getHalfFormat();
case BuiltinType::Float: return Target->getFloatFormat();
case BuiltinType::Double: return Target->getDoubleFormat();
case BuiltinType::LongDouble:
if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice)
return AuxTarget->getLongDoubleFormat();
return Target->getLongDoubleFormat();
case BuiltinType::Float128:
if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice)
return AuxTarget->getFloat128Format();
return Target->getFloat128Format();
}
}
CharUnits ASTContext::getDeclAlign(const Decl *D, bool ForAlignof) const {
unsigned Align = Target->getCharWidth();
bool UseAlignAttrOnly = false;
if (unsigned AlignFromAttr = D->getMaxAlignment()) {
Align = AlignFromAttr;
// __attribute__((aligned)) can increase or decrease alignment
// *except* on a struct or struct member, where it only increases
// alignment unless 'packed' is also specified.
//
// It is an error for alignas to decrease alignment, so we can
// ignore that possibility; Sema should diagnose it.
if (isa<FieldDecl>(D)) {
UseAlignAttrOnly = D->hasAttr<PackedAttr>() ||
cast<FieldDecl>(D)->getParent()->hasAttr<PackedAttr>();
} else {
UseAlignAttrOnly = true;
}
}
else if (isa<FieldDecl>(D))
UseAlignAttrOnly =
D->hasAttr<PackedAttr>() ||
cast<FieldDecl>(D)->getParent()->hasAttr<PackedAttr>();
// If we're using the align attribute only, just ignore everything
// else about the declaration and its type.
if (UseAlignAttrOnly) {
// do nothing
} else if (const auto *VD = dyn_cast<ValueDecl>(D)) {
QualType T = VD->getType();
if (const auto *RT = T->getAs<ReferenceType>()) {
if (ForAlignof)
T = RT->getPointeeType();
else
T = getPointerType(RT->getPointeeType());
}
QualType BaseT = getBaseElementType(T);
if (T->isFunctionType())
Align = getTypeInfoImpl(T.getTypePtr()).Align;
else if (!BaseT->isIncompleteType()) {
// Adjust alignments of declarations with array type by the
// large-array alignment on the target.
if (const ArrayType *arrayType = getAsArrayType(T)) {
unsigned MinWidth = Target->getLargeArrayMinWidth();
if (!ForAlignof && MinWidth) {
if (isa<VariableArrayType>(arrayType))
Align = std::max(Align, Target->getLargeArrayAlign());
else if (isa<ConstantArrayType>(arrayType) &&
MinWidth <= getTypeSize(cast<ConstantArrayType>(arrayType)))
Align = std::max(Align, Target->getLargeArrayAlign());
}
}
Align = std::max(Align, getPreferredTypeAlign(T.getTypePtr()));
if (BaseT.getQualifiers().hasUnaligned())
Align = Target->getCharWidth();
if (const auto *VD = dyn_cast<VarDecl>(D)) {
if (VD->hasGlobalStorage() && !ForAlignof) {
uint64_t TypeSize = getTypeSize(T.getTypePtr());
Align = std::max(Align, getTargetInfo().getMinGlobalAlign(TypeSize));
}
}
}
// Fields can be subject to extra alignment constraints, like if
// the field is packed, the struct is packed, or the struct has a
// a max-field-alignment constraint (#pragma pack). So calculate
// the actual alignment of the field within the struct, and then
// (as we're expected to) constrain that by the alignment of the type.
if (const auto *Field = dyn_cast<FieldDecl>(VD)) {
const RecordDecl *Parent = Field->getParent();
// We can only produce a sensible answer if the record is valid.
if (!Parent->isInvalidDecl()) {
const ASTRecordLayout &Layout = getASTRecordLayout(Parent);
// Start with the record's overall alignment.
unsigned FieldAlign = toBits(Layout.getAlignment());
// Use the GCD of that and the offset within the record.
uint64_t Offset = Layout.getFieldOffset(Field->getFieldIndex());
if (Offset > 0) {
// Alignment is always a power of 2, so the GCD will be a power of 2,
// which means we get to do this crazy thing instead of Euclid's.
uint64_t LowBitOfOffset = Offset & (~Offset + 1);
if (LowBitOfOffset < FieldAlign)
FieldAlign = static_cast<unsigned>(LowBitOfOffset);
}
Align = std::min(Align, FieldAlign);
}
}
}
// Some targets have hard limitation on the maximum requestable alignment in
// aligned attribute for static variables.
const unsigned MaxAlignedAttr = getTargetInfo().getMaxAlignedAttribute();
const auto *VD = dyn_cast<VarDecl>(D);
if (MaxAlignedAttr && VD && VD->getStorageClass() == SC_Static)
Align = std::min(Align, MaxAlignedAttr);
return toCharUnitsFromBits(Align);
}
CharUnits ASTContext::getExnObjectAlignment() const {
return toCharUnitsFromBits(Target->getExnObjectAlignment());
}
// getTypeInfoDataSizeInChars - Return the size of a type, in
// chars. If the type is a record, its data size is returned. This is
// the size of the memcpy that's performed when assigning this type
// using a trivial copy/move assignment operator.
TypeInfoChars ASTContext::getTypeInfoDataSizeInChars(QualType T) const {
TypeInfoChars Info = getTypeInfoInChars(T);
// In C++, objects can sometimes be allocated into the tail padding
// of a base-class subobject. We decide whether that's possible
// during class layout, so here we can just trust the layout results.
if (getLangOpts().CPlusPlus) {
if (const auto *RT = T->getAs<RecordType>()) {
const ASTRecordLayout &layout = getASTRecordLayout(RT->getDecl());
Info.Width = layout.getDataSize();
}
}
return Info;
}
/// getConstantArrayInfoInChars - Performing the computation in CharUnits
/// instead of in bits prevents overflowing the uint64_t for some large arrays.
TypeInfoChars
static getConstantArrayInfoInChars(const ASTContext &Context,
const ConstantArrayType *CAT) {
TypeInfoChars EltInfo = Context.getTypeInfoInChars(CAT->getElementType());
uint64_t Size = CAT->getSize().getZExtValue();
assert((Size == 0 || static_cast<uint64_t>(EltInfo.Width.getQuantity()) <=
(uint64_t)(-1)/Size) &&
"Overflow in array type char size evaluation");
uint64_t Width = EltInfo.Width.getQuantity() * Size;
unsigned Align = EltInfo.Align.getQuantity();
if (!Context.getTargetInfo().getCXXABI().isMicrosoft() ||
Context.getTargetInfo().getPointerWidth(0) == 64)
Width = llvm::alignTo(Width, Align);
return TypeInfoChars(CharUnits::fromQuantity(Width),
CharUnits::fromQuantity(Align),
EltInfo.AlignIsRequired);
}
TypeInfoChars ASTContext::getTypeInfoInChars(const Type *T) const {
if (const auto *CAT = dyn_cast<ConstantArrayType>(T))
return getConstantArrayInfoInChars(*this, CAT);
TypeInfo Info = getTypeInfo(T);
return TypeInfoChars(toCharUnitsFromBits(Info.Width),
toCharUnitsFromBits(Info.Align),
Info.AlignIsRequired);
}
TypeInfoChars ASTContext::getTypeInfoInChars(QualType T) const {
return getTypeInfoInChars(T.getTypePtr());
}
bool ASTContext::isAlignmentRequired(const Type *T) const {
return getTypeInfo(T).AlignIsRequired;
}
bool ASTContext::isAlignmentRequired(QualType T) const {
return isAlignmentRequired(T.getTypePtr());
}
unsigned ASTContext::getTypeAlignIfKnown(QualType T,
bool NeedsPreferredAlignment) const {
// An alignment on a typedef overrides anything else.
if (const auto *TT = T->getAs<TypedefType>())
if (unsigned Align = TT->getDecl()->getMaxAlignment())
return Align;
// If we have an (array of) complete type, we're done.
T = getBaseElementType(T);
if (!T->isIncompleteType())
return NeedsPreferredAlignment ? getPreferredTypeAlign(T) : getTypeAlign(T);
// If we had an array type, its element type might be a typedef
// type with an alignment attribute.
if (const auto *TT = T->getAs<TypedefType>())
if (unsigned Align = TT->getDecl()->getMaxAlignment())
return Align;
// Otherwise, see if the declaration of the type had an attribute.
if (const auto *TT = T->getAs<TagType>())
return TT->getDecl()->getMaxAlignment();
return 0;
}
TypeInfo ASTContext::getTypeInfo(const Type *T) const {
TypeInfoMap::iterator I = MemoizedTypeInfo.find(T);
if (I != MemoizedTypeInfo.end())
return I->second;
// This call can invalidate MemoizedTypeInfo[T], so we need a second lookup.
TypeInfo TI = getTypeInfoImpl(T);
MemoizedTypeInfo[T] = TI;
return TI;
}
/// getTypeInfoImpl - Return the size of the specified type, in bits. This
/// method does not work on incomplete types.
///
/// FIXME: Pointers into different addr spaces could have different sizes and
/// alignment requirements: getPointerInfo should take an AddrSpace, this
/// should take a QualType, &c.
TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
uint64_t Width = 0;
unsigned Align = 8;
bool AlignIsRequired = false;
unsigned AS = 0;
switch (T->getTypeClass()) {
#define TYPE(Class, Base)
#define ABSTRACT_TYPE(Class, Base)
#define NON_CANONICAL_TYPE(Class, Base)
#define DEPENDENT_TYPE(Class, Base) case Type::Class:
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) \
case Type::Class: \
assert(!T->isDependentType() && "should not see dependent types here"); \
return getTypeInfo(cast<Class##Type>(T)->desugar().getTypePtr());
#include "clang/AST/TypeNodes.inc"
llvm_unreachable("Should not see dependent types");
case Type::FunctionNoProto:
case Type::FunctionProto:
// GCC extension: alignof(function) = 32 bits
Width = 0;
Align = 32;
break;
case Type::IncompleteArray:
case Type::VariableArray:
case Type::ConstantArray: {
// Model non-constant sized arrays as size zero, but track the alignment.
uint64_t Size = 0;
if (const auto *CAT = dyn_cast<ConstantArrayType>(T))
Size = CAT->getSize().getZExtValue();
TypeInfo EltInfo = getTypeInfo(cast<ArrayType>(T)->getElementType());
assert((Size == 0 || EltInfo.Width <= (uint64_t)(-1) / Size) &&
"Overflow in array type bit size evaluation");
Width = EltInfo.Width * Size;
Align = EltInfo.Align;
AlignIsRequired = EltInfo.AlignIsRequired;
if (!getTargetInfo().getCXXABI().isMicrosoft() ||
getTargetInfo().getPointerWidth(0) == 64)
Width = llvm::alignTo(Width, Align);
break;
}
case Type::ExtVector:
case Type::Vector: {
const auto *VT = cast<VectorType>(T);
TypeInfo EltInfo = getTypeInfo(VT->getElementType());
Width = EltInfo.Width * VT->getNumElements();
Align = Width;
// If the alignment is not a power of 2, round up to the next power of 2.
// This happens for non-power-of-2 length vectors.
if (Align & (Align-1)) {
Align = llvm::NextPowerOf2(Align);
Width = llvm::alignTo(Width, Align);
}
// Adjust the alignment based on the target max.
uint64_t TargetVectorAlign = Target->getMaxVectorAlign();
if (TargetVectorAlign && TargetVectorAlign < Align)
Align = TargetVectorAlign;
if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector)
// Adjust the alignment for fixed-length SVE vectors. This is important
// for non-power-of-2 vector lengths.
Align = 128;
else if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
// Adjust the alignment for fixed-length SVE predicates.
Align = 16;
break;
}
case Type::ConstantMatrix: {
const auto *MT = cast<ConstantMatrixType>(T);
TypeInfo ElementInfo = getTypeInfo(MT->getElementType());
// The internal layout of a matrix value is implementation defined.
// Initially be ABI compatible with arrays with respect to alignment and
// size.
Width = ElementInfo.Width * MT->getNumRows() * MT->getNumColumns();
Align = ElementInfo.Align;
break;
}
case Type::Builtin:
switch (cast<BuiltinType>(T)->getKind()) {
default: llvm_unreachable("Unknown builtin type!");
case BuiltinType::Void:
// GCC extension: alignof(void) = 8 bits.
Width = 0;
Align = 8;
break;
case BuiltinType::Bool:
Width = Target->getBoolWidth();
Align = Target->getBoolAlign();
break;
case BuiltinType::Char_S:
case BuiltinType::Char_U:
case BuiltinType::UChar:
case BuiltinType::SChar:
case BuiltinType::Char8:
Width = Target->getCharWidth();
Align = Target->getCharAlign();
break;
case BuiltinType::WChar_S:
case BuiltinType::WChar_U:
Width = Target->getWCharWidth();
Align = Target->getWCharAlign();
break;
case BuiltinType::Char16:
Width = Target->getChar16Width();
Align = Target->getChar16Align();
break;
case BuiltinType::Char32:
Width = Target->getChar32Width();
Align = Target->getChar32Align();
break;
case BuiltinType::UShort:
case BuiltinType::Short:
Width = Target->getShortWidth();
Align = Target->getShortAlign();
break;
case BuiltinType::UInt:
case BuiltinType::Int:
Width = Target->getIntWidth();
Align = Target->getIntAlign();
break;
case BuiltinType::ULong:
case BuiltinType::Long:
Width = Target->getLongWidth();
Align = Target->getLongAlign();
break;
case BuiltinType::ULongLong:
case BuiltinType::LongLong:
Width = Target->getLongLongWidth();
Align = Target->getLongLongAlign();
break;
case BuiltinType::Int128:
case BuiltinType::UInt128:
Width = 128;
Align = 128; // int128_t is 128-bit aligned on all targets.
break;
case BuiltinType::ShortAccum:
case BuiltinType::UShortAccum:
case BuiltinType::SatShortAccum:
case BuiltinType::SatUShortAccum:
Width = Target->getShortAccumWidth();
Align = Target->getShortAccumAlign();
break;
case BuiltinType::Accum:
case BuiltinType::UAccum:
case BuiltinType::SatAccum:
case BuiltinType::SatUAccum:
Width = Target->getAccumWidth();
Align = Target->getAccumAlign();
break;
case BuiltinType::LongAccum:
case BuiltinType::ULongAccum:
case BuiltinType::SatLongAccum:
case BuiltinType::SatULongAccum:
Width = Target->getLongAccumWidth();
Align = Target->getLongAccumAlign();
break;
case BuiltinType::ShortFract:
case BuiltinType::UShortFract:
case BuiltinType::SatShortFract:
case BuiltinType::SatUShortFract:
Width = Target->getShortFractWidth();
Align = Target->getShortFractAlign();
break;
case BuiltinType::Fract:
case BuiltinType::UFract:
case BuiltinType::SatFract:
case BuiltinType::SatUFract:
Width = Target->getFractWidth();
Align = Target->getFractAlign();
break;
case BuiltinType::LongFract:
case BuiltinType::ULongFract:
case BuiltinType::SatLongFract:
case BuiltinType::SatULongFract:
Width = Target->getLongFractWidth();
Align = Target->getLongFractAlign();
break;
case BuiltinType::BFloat16:
Width = Target->getBFloat16Width();
Align = Target->getBFloat16Align();
break;
case BuiltinType::Float16:
case BuiltinType::Half:
if (Target->hasFloat16Type() || !getLangOpts().OpenMP ||
!getLangOpts().OpenMPIsDevice) {
Width = Target->getHalfWidth();
Align = Target->getHalfAlign();
} else {
assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
"Expected OpenMP device compilation.");
Width = AuxTarget->getHalfWidth();
Align = AuxTarget->getHalfAlign();
}
break;
case BuiltinType::Float:
Width = Target->getFloatWidth();
Align = Target->getFloatAlign();
break;
case BuiltinType::Double:
Width = Target->getDoubleWidth();
Align = Target->getDoubleAlign();
break;
case BuiltinType::LongDouble:
if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
(Target->getLongDoubleWidth() != AuxTarget->getLongDoubleWidth() ||
Target->getLongDoubleAlign() != AuxTarget->getLongDoubleAlign())) {
Width = AuxTarget->getLongDoubleWidth();
Align = AuxTarget->getLongDoubleAlign();
} else {
Width = Target->getLongDoubleWidth();
Align = Target->getLongDoubleAlign();
}
break;
case BuiltinType::Float128:
if (Target->hasFloat128Type() || !getLangOpts().OpenMP ||
!getLangOpts().OpenMPIsDevice) {
Width = Target->getFloat128Width();
Align = Target->getFloat128Align();
} else {
assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
"Expected OpenMP device compilation.");
Width = AuxTarget->getFloat128Width();
Align = AuxTarget->getFloat128Align();
}
break;
case BuiltinType::NullPtr:
Width = Target->getPointerWidth(0); // C++ 3.9.1p11: sizeof(nullptr_t)
Align = Target->getPointerAlign(0); // == sizeof(void*)
break;
case BuiltinType::ObjCId:
case BuiltinType::ObjCClass:
case BuiltinType::ObjCSel:
Width = Target->getPointerWidth(0);
Align = Target->getPointerAlign(0);
break;
case BuiltinType::OCLSampler:
case BuiltinType::OCLEvent:
case BuiltinType::OCLClkEvent:
case BuiltinType::OCLQueue:
case BuiltinType::OCLReserveID:
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
case BuiltinType::Id:
#include "clang/Basic/OpenCLImageTypes.def"
#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
case BuiltinType::Id:
#include "clang/Basic/OpenCLExtensionTypes.def"
AS = getTargetAddressSpace(
Target->getOpenCLTypeAddrSpace(getOpenCLTypeKind(T)));
Width = Target->getPointerWidth(AS);
Align = Target->getPointerAlign(AS);
break;
// The SVE types are effectively target-specific. The length of an
// SVE_VECTOR_TYPE is only known at runtime, but it is always a multiple
// of 128 bits. There is one predicate bit for each vector byte, so the
// length of an SVE_PREDICATE_TYPE is always a multiple of 16 bits.
//
// Because the length is only known at runtime, we use a dummy value
// of 0 for the static length. The alignment values are those defined
// by the Procedure Call Standard for the Arm Architecture.
#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits, \
IsSigned, IsFP, IsBF) \
case BuiltinType::Id: \
Width = 0; \
Align = 128; \
break;
#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls) \
case BuiltinType::Id: \
Width = 0; \
Align = 16; \
break;
#include "clang/Basic/AArch64SVEACLETypes.def"
#define PPC_VECTOR_TYPE(Name, Id, Size) \
case BuiltinType::Id: \
Width = Size; \
Align = Size; \
break;
#include "clang/Basic/PPCTypes.def"
#define RVV_VECTOR_TYPE(Name, Id, SingletonId, ElKind, ElBits, NF, IsSigned, \
IsFP) \
case BuiltinType::Id: \
Width = 0; \
Align = ElBits; \
break;
#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, ElKind) \
case BuiltinType::Id: \
Width = 0; \
Align = 8; \
break;
#include "clang/Basic/RISCVVTypes.def"
}
break;
case Type::ObjCObjectPointer:
Width = Target->getPointerWidth(0);
Align = Target->getPointerAlign(0);
break;
case Type::BlockPointer:
AS = getTargetAddressSpace(cast<BlockPointerType>(T)->getPointeeType());
Width = Target->getPointerWidth(AS);
Align = Target->getPointerAlign(AS);
break;
case Type::LValueReference:
case Type::RValueReference:
// alignof and sizeof should never enter this code path here, so we go
// the pointer route.
AS = getTargetAddressSpace(cast<ReferenceType>(T)->getPointeeType());
Width = Target->getPointerWidth(AS);
Align = Target->getPointerAlign(AS);
break;
case Type::Pointer:
AS = getTargetAddressSpace(cast<PointerType>(T)->getPointeeType());
Width = Target->getPointerWidth(AS);
Align = Target->getPointerAlign(AS);
break;
case Type::MemberPointer: {
const auto *MPT = cast<MemberPointerType>(T);
CXXABI::MemberPointerInfo MPI = ABI->getMemberPointerInfo(MPT);
Width = MPI.Width;
Align = MPI.Align;
break;
}
case Type::Complex: {
// Complex types have the same alignment as their elements, but twice the
// size.
TypeInfo EltInfo = getTypeInfo(cast<ComplexType>(T)->getElementType());
Width = EltInfo.Width * 2;
Align = EltInfo.Align;
break;
}
case Type::ObjCObject:
return getTypeInfo(cast<ObjCObjectType>(T)->getBaseType().getTypePtr());
case Type::Adjusted:
case Type::Decayed:
return getTypeInfo(cast<AdjustedType>(T)->getAdjustedType().getTypePtr());
case Type::ObjCInterface: {
const auto *ObjCI = cast<ObjCInterfaceType>(T);
if (ObjCI->getDecl()->isInvalidDecl()) {
Width = 8;
Align = 8;
break;
}
const ASTRecordLayout &Layout = getASTObjCInterfaceLayout(ObjCI->getDecl());
Width = toBits(Layout.getSize());
Align = toBits(Layout.getAlignment());
break;
}
case Type::ExtInt: {
const auto *EIT = cast<ExtIntType>(T);
Align =
std::min(static_cast<unsigned>(std::max(
getCharWidth(), llvm::PowerOf2Ceil(EIT->getNumBits()))),
Target->getLongLongAlign());
Width = llvm::alignTo(EIT->getNumBits(), Align);
break;
}
case Type::Record:
case Type::Enum: {
const auto *TT = cast<TagType>(T);
if (TT->getDecl()->isInvalidDecl()) {
Width = 8;
Align = 8;
break;
}
if (const auto *ET = dyn_cast<EnumType>(TT)) {
const EnumDecl *ED = ET->getDecl();
TypeInfo Info =
getTypeInfo(ED->getIntegerType()->getUnqualifiedDesugaredType());
if (unsigned AttrAlign = ED->getMaxAlignment()) {
Info.Align = AttrAlign;
Info.AlignIsRequired = true;
}
return Info;
}
const auto *RT = cast<RecordType>(TT);
const RecordDecl *RD = RT->getDecl();
const ASTRecordLayout &Layout = getASTRecordLayout(RD);
Width = toBits(Layout.getSize());
Align = toBits(Layout.getAlignment());
AlignIsRequired = RD->hasAttr<AlignedAttr>();
break;
}
case Type::SubstTemplateTypeParm:
return getTypeInfo(cast<SubstTemplateTypeParmType>(T)->
getReplacementType().getTypePtr());
case Type::Auto:
case Type::DeducedTemplateSpecialization: {
const auto *A = cast<DeducedType>(T);
assert(!A->getDeducedType().isNull() &&
"cannot request the size of an undeduced or dependent auto type");
return getTypeInfo(A->getDeducedType().getTypePtr());
}
case Type::Paren:
return getTypeInfo(cast<ParenType>(T)->getInnerType().getTypePtr());
case Type::MacroQualified:
return getTypeInfo(
cast<MacroQualifiedType>(T)->getUnderlyingType().getTypePtr());
case Type::ObjCTypeParam:
return getTypeInfo(cast<ObjCTypeParamType>(T)->desugar().getTypePtr());
case Type::Typedef: {
const TypedefNameDecl *Typedef = cast<TypedefType>(T)->getDecl();
TypeInfo Info = getTypeInfo(Typedef->getUnderlyingType().getTypePtr());
// If the typedef has an aligned attribute on it, it overrides any computed
// alignment we have. This violates the GCC documentation (which says that
// attribute(aligned) can only round up) but matches its implementation.
if (unsigned AttrAlign = Typedef->getMaxAlignment()) {
Align = AttrAlign;
AlignIsRequired = true;
} else {
Align = Info.Align;
AlignIsRequired = Info.AlignIsRequired;
}
Width = Info.Width;
break;
}
case Type::Elaborated:
return getTypeInfo(cast<ElaboratedType>(T)->getNamedType().getTypePtr());
case Type::Attributed:
return getTypeInfo(
cast<AttributedType>(T)->getEquivalentType().getTypePtr());
case Type::Atomic: {
// Start with the base type information.
TypeInfo Info = getTypeInfo(cast<AtomicType>(T)->getValueType());
Width = Info.Width;
Align = Info.Align;
if (!Width) {
// An otherwise zero-sized type should still generate an
// atomic operation.
Width = Target->getCharWidth();
assert(Align);
} else if (Width <= Target->getMaxAtomicPromoteWidth()) {
// If the size of the type doesn't exceed the platform's max
// atomic promotion width, make the size and alignment more
// favorable to atomic operations:
// Round the size up to a power of 2.
if (!llvm::isPowerOf2_64(Width))
Width = llvm::NextPowerOf2(Width);
// Set the alignment equal to the size.
Align = static_cast<unsigned>(Width);
}
}
break;
case Type::Pipe:
Width = Target->getPointerWidth(getTargetAddressSpace(LangAS::opencl_global));
Align = Target->getPointerAlign(getTargetAddressSpace(LangAS::opencl_global));
break;
}
assert(llvm::isPowerOf2_32(Align) && "Alignment must be power of 2");
return TypeInfo(Width, Align, AlignIsRequired);
}
unsigned ASTContext::getTypeUnadjustedAlign(const Type *T) const {
UnadjustedAlignMap::iterator I = MemoizedUnadjustedAlign.find(T);
if (I != MemoizedUnadjustedAlign.end())
return I->second;
unsigned UnadjustedAlign;
if (const auto *RT = T->getAs<RecordType>()) {
const RecordDecl *RD = RT->getDecl();
const ASTRecordLayout &Layout = getASTRecordLayout(RD);
UnadjustedAlign = toBits(Layout.getUnadjustedAlignment());
} else if (const auto *ObjCI = T->getAs<ObjCInterfaceType>()) {
const ASTRecordLayout &Layout = getASTObjCInterfaceLayout(ObjCI->getDecl());
UnadjustedAlign = toBits(Layout.getUnadjustedAlignment());
} else {
UnadjustedAlign = getTypeAlign(T->getUnqualifiedDesugaredType());
}
MemoizedUnadjustedAlign[T] = UnadjustedAlign;
return UnadjustedAlign;
}
unsigned ASTContext::getOpenMPDefaultSimdAlign(QualType T) const {
unsigned SimdAlign = getTargetInfo().getSimdDefaultAlign();
return SimdAlign;
}
/// toCharUnitsFromBits - Convert a size in bits to a size in characters.
CharUnits ASTContext::toCharUnitsFromBits(int64_t BitSize) const {
return CharUnits::fromQuantity(BitSize / getCharWidth());
}
/// toBits - Convert a size in characters to a size in characters.
int64_t ASTContext::toBits(CharUnits CharSize) const {
return CharSize.getQuantity() * getCharWidth();
}
/// getTypeSizeInChars - Return the size of the specified type, in characters.
/// This method does not work on incomplete types.
CharUnits ASTContext::getTypeSizeInChars(QualType T) const {
return getTypeInfoInChars(T).Width;
}
CharUnits ASTContext::getTypeSizeInChars(const Type *T) const {
return getTypeInfoInChars(T).Width;
}
/// getTypeAlignInChars - Return the ABI-specified alignment of a type, in
/// characters. This method does not work on incomplete types.
CharUnits ASTContext::getTypeAlignInChars(QualType T) const {
return toCharUnitsFromBits(getTypeAlign(T));
}
CharUnits ASTContext::getTypeAlignInChars(const Type *T) const {
return toCharUnitsFromBits(getTypeAlign(T));
}
/// getTypeUnadjustedAlignInChars - Return the ABI-specified alignment of a
/// type, in characters, before alignment adustments. This method does
/// not work on incomplete types.
CharUnits ASTContext::getTypeUnadjustedAlignInChars(QualType T) const {
return toCharUnitsFromBits(getTypeUnadjustedAlign(T));
}
CharUnits ASTContext::getTypeUnadjustedAlignInChars(const Type *T) const {
return toCharUnitsFromBits(getTypeUnadjustedAlign(T));
}
/// getPreferredTypeAlign - Return the "preferred" alignment of the specified
/// type for the current target in bits. This can be different than the ABI
/// alignment in cases where it is beneficial for performance or backwards
/// compatibility preserving to overalign a data type. (Note: despite the name,
/// the preferred alignment is ABI-impacting, and not an optimization.)
unsigned ASTContext::getPreferredTypeAlign(const Type *T) const {
TypeInfo TI = getTypeInfo(T);
unsigned ABIAlign = TI.Align;
T = T->getBaseElementTypeUnsafe();
// The preferred alignment of member pointers is that of a pointer.
if (T->isMemberPointerType())
return getPreferredTypeAlign(getPointerDiffType().getTypePtr());
if (!Target->allowsLargerPreferedTypeAlignment())
return ABIAlign;
if (const auto *RT = T->getAs<RecordType>()) {
if (TI.AlignIsRequired || RT->getDecl()->isInvalidDecl())
return ABIAlign;
unsigned PreferredAlign = static_cast<unsigned>(
toBits(getASTRecordLayout(RT->getDecl()).PreferredAlignment));
assert(PreferredAlign >= ABIAlign &&
"PreferredAlign should be at least as large as ABIAlign.");
return PreferredAlign;
}
// Double (and, for targets supporting AIX `power` alignment, long double) and
// long long should be naturally aligned (despite requiring less alignment) if
// possible.
if (const auto *CT = T->getAs<ComplexType>())
T = CT->getElementType().getTypePtr();
if (const auto *ET = T->getAs<EnumType>())
T = ET->getDecl()->getIntegerType().getTypePtr();
if (T->isSpecificBuiltinType(BuiltinType::Double) ||
T->isSpecificBuiltinType(BuiltinType::LongLong) ||
T->isSpecificBuiltinType(BuiltinType::ULongLong) ||
(T->isSpecificBuiltinType(BuiltinType::LongDouble) &&
Target->defaultsToAIXPowerAlignment()))
// Don't increase the alignment if an alignment attribute was specified on a
// typedef declaration.
if (!TI.AlignIsRequired)
return std::max(ABIAlign, (unsigned)getTypeSize(T));
return ABIAlign;
}
/// getTargetDefaultAlignForAttributeAligned - Return the default alignment
/// for __attribute__((aligned)) on this target, to be used if no alignment
/// value is specified.
unsigned ASTContext::getTargetDefaultAlignForAttributeAligned() const {
return getTargetInfo().getDefaultAlignForAttributeAligned();
}
/// getAlignOfGlobalVar - Return the alignment in bits that should be given
/// to a global variable of the specified type.
unsigned ASTContext::getAlignOfGlobalVar(QualType T) const {
uint64_t TypeSize = getTypeSize(T.getTypePtr());
return std::max(getPreferredTypeAlign(T),
getTargetInfo().getMinGlobalAlign(TypeSize));
}
/// getAlignOfGlobalVarInChars - Return the alignment in characters that
/// should be given to a global variable of the specified type.
CharUnits ASTContext::getAlignOfGlobalVarInChars(QualType T) const {
return toCharUnitsFromBits(getAlignOfGlobalVar(T));
}
CharUnits ASTContext::getOffsetOfBaseWithVBPtr(const CXXRecordDecl *RD) const {
CharUnits Offset = CharUnits::Zero();
const ASTRecordLayout *Layout = &getASTRecordLayout(RD);
while (const CXXRecordDecl *Base = Layout->getBaseSharingVBPtr()) {
Offset += Layout->getBaseClassOffset(Base);
Layout = &getASTRecordLayout(Base);
}
return Offset;
}
CharUnits ASTContext::getMemberPointerPathAdjustment(const APValue &MP) const {
const ValueDecl *MPD = MP.getMemberPointerDecl();
CharUnits ThisAdjustment = CharUnits::Zero();
ArrayRef<const CXXRecordDecl*> Path = MP.getMemberPointerPath();
bool DerivedMember = MP.isMemberPointerToDerivedMember();
const CXXRecordDecl *RD = cast<CXXRecordDecl>(MPD->getDeclContext());
for (unsigned I = 0, N = Path.size(); I != N; ++I) {
const CXXRecordDecl *Base = RD;
const CXXRecordDecl *Derived = Path[I];
if (DerivedMember)
std::swap(Base, Derived);
ThisAdjustment += getASTRecordLayout(Derived).getBaseClassOffset(Base);
RD = Path[I];
}
if (DerivedMember)
ThisAdjustment = -ThisAdjustment;
return ThisAdjustment;
}
/// DeepCollectObjCIvars -
/// This routine first collects all declared, but not synthesized, ivars in
/// super class and then collects all ivars, including those synthesized for
/// current class. This routine is used for implementation of current class
/// when all ivars, declared and synthesized are known.
void ASTContext::DeepCollectObjCIvars(const ObjCInterfaceDecl *OI,
bool leafClass,
SmallVectorImpl<const ObjCIvarDecl*> &Ivars) const {
if (const ObjCInterfaceDecl *SuperClass = OI->getSuperClass())
DeepCollectObjCIvars(SuperClass, false, Ivars);
if (!leafClass) {
for (const auto *I : OI->ivars())
Ivars.push_back(I);
} else {
auto *IDecl = const_cast<ObjCInterfaceDecl *>(OI);
for (const ObjCIvarDecl *Iv = IDecl->all_declared_ivar_begin(); Iv;
Iv= Iv->getNextIvar())
Ivars.push_back(Iv);
}
}
/// CollectInheritedProtocols - Collect all protocols in current class and
/// those inherited by it.
void ASTContext::CollectInheritedProtocols(const Decl *CDecl,
llvm::SmallPtrSet<ObjCProtocolDecl*, 8> &Protocols) {
if (const auto *OI = dyn_cast<ObjCInterfaceDecl>(CDecl)) {
// We can use protocol_iterator here instead of
// all_referenced_protocol_iterator since we are walking all categories.
for (auto *Proto : OI->all_referenced_protocols()) {
CollectInheritedProtocols(Proto, Protocols);
}
// Categories of this Interface.
for (const auto *Cat : OI->visible_categories())
CollectInheritedProtocols(Cat, Protocols);
if (ObjCInterfaceDecl *SD = OI->getSuperClass())
while (SD) {
CollectInheritedProtocols(SD, Protocols);
SD = SD->getSuperClass();
}
} else if (const auto *OC = dyn_cast<ObjCCategoryDecl>(CDecl)) {
for (auto *Proto : OC->protocols()) {
CollectInheritedProtocols(Proto, Protocols);
}
} else if (const auto *OP = dyn_cast<ObjCProtocolDecl>(CDecl)) {
// Insert the protocol.
if (!Protocols.insert(
const_cast<ObjCProtocolDecl *>(OP->getCanonicalDecl())).second)
return;
for (auto *Proto : OP->protocols())
CollectInheritedProtocols(Proto, Protocols);
}
}
static bool unionHasUniqueObjectRepresentations(const ASTContext &Context,
const RecordDecl *RD) {
assert(RD->isUnion() && "Must be union type");
CharUnits UnionSize = Context.getTypeSizeInChars(RD->getTypeForDecl());
for (const auto *Field : RD->fields()) {
if (!Context.hasUniqueObjectRepresentations(Field->getType()))
return false;
CharUnits FieldSize = Context.getTypeSizeInChars(Field->getType());
if (FieldSize != UnionSize)
return false;
}
return !RD->field_empty();
}
static bool isStructEmpty(QualType Ty) {
const RecordDecl *RD = Ty->castAs<RecordType>()->getDecl();
if (!RD->field_empty())
return false;
if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RD))
return ClassDecl->isEmpty();
return true;
}
static llvm::Optional<int64_t>
structHasUniqueObjectRepresentations(const ASTContext &Context,
const RecordDecl *RD) {
assert(!RD->isUnion() && "Must be struct/class type");
const auto &Layout = Context.getASTRecordLayout(RD);
int64_t CurOffsetInBits = 0;
if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RD)) {
if (ClassDecl->isDynamicClass())
return llvm::None;
SmallVector<std::pair<QualType, int64_t>, 4> Bases;
for (const auto &Base : ClassDecl->bases()) {
// Empty types can be inherited from, and non-empty types can potentially
// have tail padding, so just make sure there isn't an error.
if (!isStructEmpty(Base.getType())) {
llvm::Optional<int64_t> Size = structHasUniqueObjectRepresentations(
Context, Base.getType()->castAs<RecordType>()->getDecl());
if (!Size)
return llvm::None;
Bases.emplace_back(Base.getType(), Size.getValue());
}
}
llvm::sort(Bases, [&](const std::pair<QualType, int64_t> &L,
const std::pair<QualType, int64_t> &R) {
return Layout.getBaseClassOffset(L.first->getAsCXXRecordDecl()) <
Layout.getBaseClassOffset(R.first->getAsCXXRecordDecl());
});
for (const auto &Base : Bases) {
int64_t BaseOffset = Context.toBits(
Layout.getBaseClassOffset(Base.first->getAsCXXRecordDecl()));
int64_t BaseSize = Base.second;
if (BaseOffset != CurOffsetInBits)
return llvm::None;
CurOffsetInBits = BaseOffset + BaseSize;
}
}
for (const auto *Field : RD->fields()) {
if (!Field->getType()->isReferenceType() &&
!Context.hasUniqueObjectRepresentations(Field->getType()))
return llvm::None;
int64_t FieldSizeInBits =
Context.toBits(Context.getTypeSizeInChars(Field->getType()));
if (Field->isBitField()) {
int64_t BitfieldSize = Field->getBitWidthValue(Context);
if (BitfieldSize > FieldSizeInBits)
return llvm::None;
FieldSizeInBits = BitfieldSize;
}
int64_t FieldOffsetInBits = Context.getFieldOffset(Field);
if (FieldOffsetInBits != CurOffsetInBits)
return llvm::None;
CurOffsetInBits = FieldSizeInBits + FieldOffsetInBits;
}
return CurOffsetInBits;
}
bool ASTContext::hasUniqueObjectRepresentations(QualType Ty) const {
// C++17 [meta.unary.prop]:
// The predicate condition for a template specialization
// has_unique_object_representations<T> shall be
// satisfied if and only if:
// (9.1) - T is trivially copyable, and
// (9.2) - any two objects of type T with the same value have the same
// object representation, where two objects
// of array or non-union class type are considered to have the same value
// if their respective sequences of
// direct subobjects have the same values, and two objects of union type
// are considered to have the same
// value if they have the same active member and the corresponding members
// have the same value.
// The set of scalar types for which this condition holds is
// implementation-defined. [ Note: If a type has padding
// bits, the condition does not hold; otherwise, the condition holds true
// for unsigned integral types. -- end note ]
assert(!Ty.isNull() && "Null QualType sent to unique object rep check");
// Arrays are unique only if their element type is unique.
if (Ty->isArrayType())
return hasUniqueObjectRepresentations(getBaseElementType(Ty));
// (9.1) - T is trivially copyable...
if (!Ty.isTriviallyCopyableType(*this))
return false;
// All integrals and enums are unique.
if (Ty->isIntegralOrEnumerationType())
return true;
// All other pointers are unique.
if (Ty->isPointerType())
return true;
if (Ty->isMemberPointerType()) {
const auto *MPT = Ty->getAs<MemberPointerType>();
return !ABI->getMemberPointerInfo(MPT).HasPadding;
}
if (Ty->isRecordType()) {
const RecordDecl *Record = Ty->castAs<RecordType>()->getDecl();
if (Record->isInvalidDecl())
return false;
if (Record->isUnion())
return unionHasUniqueObjectRepresentations(*this, Record);
Optional<int64_t> StructSize =
structHasUniqueObjectRepresentations(*this, Record);
return StructSize &&
StructSize.getValue() == static_cast<int64_t>(getTypeSize(Ty));
}
// FIXME: More cases to handle here (list by rsmith):
// vectors (careful about, eg, vector of 3 foo)
// _Complex int and friends
// _Atomic T
// Obj-C block pointers
// Obj-C object pointers
// and perhaps OpenCL's various builtin types (pipe, sampler_t, event_t,
// clk_event_t, queue_t, reserve_id_t)
// There're also Obj-C class types and the Obj-C selector type, but I think it
// makes sense for those to return false here.
return false;
}
unsigned ASTContext::CountNonClassIvars(const ObjCInterfaceDecl *OI) const {
unsigned count = 0;
// Count ivars declared in class extension.
for (const auto *Ext : OI->known_extensions())
count += Ext->ivar_size();
// Count ivar defined in this class's implementation. This
// includes synthesized ivars.
if (ObjCImplementationDecl *ImplDecl = OI->getImplementation())
count += ImplDecl->ivar_size();
return count;
}
bool ASTContext::isSentinelNullExpr(const Expr *E) {
if (!E)
return false;
// nullptr_t is always treated as null.
if (E->getType()->isNullPtrType()) return true;
if (E->getType()->isAnyPointerType() &&
E->IgnoreParenCasts()->isNullPointerConstant(*this,
Expr::NPC_ValueDependentIsNull))
return true;
// Unfortunately, __null has type 'int'.
if (isa<GNUNullExpr>(E)) return true;
return false;
}
/// Get the implementation of ObjCInterfaceDecl, or nullptr if none
/// exists.
ObjCImplementationDecl *ASTContext::getObjCImplementation(ObjCInterfaceDecl *D) {
llvm::DenseMap<ObjCContainerDecl*, ObjCImplDecl*>::iterator
I = ObjCImpls.find(D);
if (I != ObjCImpls.end())
return cast<ObjCImplementationDecl>(I->second);
return nullptr;
}
/// Get the implementation of ObjCCategoryDecl, or nullptr if none
/// exists.
ObjCCategoryImplDecl *ASTContext::getObjCImplementation(ObjCCategoryDecl *D) {
llvm::DenseMap<ObjCContainerDecl*, ObjCImplDecl*>::iterator
I = ObjCImpls.find(D);
if (I != ObjCImpls.end())
return cast<ObjCCategoryImplDecl>(I->second);
return nullptr;
}
/// Set the implementation of ObjCInterfaceDecl.
void ASTContext::setObjCImplementation(ObjCInterfaceDecl *IFaceD,
ObjCImplementationDecl *ImplD) {
assert(IFaceD && ImplD && "Passed null params");
ObjCImpls[IFaceD] = ImplD;
}
/// Set the implementation of ObjCCategoryDecl.
void ASTContext::setObjCImplementation(ObjCCategoryDecl *CatD,
ObjCCategoryImplDecl *ImplD) {
assert(CatD && ImplD && "Passed null params");
ObjCImpls[CatD] = ImplD;
}
const ObjCMethodDecl *
ASTContext::getObjCMethodRedeclaration(const ObjCMethodDecl *MD) const {
return ObjCMethodRedecls.lookup(MD);
}
void ASTContext::setObjCMethodRedeclaration(const ObjCMethodDecl *MD,
const ObjCMethodDecl *Redecl) {
assert(!getObjCMethodRedeclaration(MD) && "MD already has a redeclaration");
ObjCMethodRedecls[MD] = Redecl;
}
const ObjCInterfaceDecl *ASTContext::getObjContainingInterface(
const NamedDecl *ND) const {
if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(ND->getDeclContext()))
return ID;
if (const auto *CD = dyn_cast<ObjCCategoryDecl>(ND->getDeclContext()))
return CD->getClassInterface();
if (const auto *IMD = dyn_cast<ObjCImplDecl>(ND->getDeclContext()))
return IMD->getClassInterface();
return nullptr;
}
/// Get the copy initialization expression of VarDecl, or nullptr if
/// none exists.
BlockVarCopyInit ASTContext::getBlockVarCopyInit(const VarDecl *VD) const {
assert(VD && "Passed null params");
assert(VD->hasAttr<BlocksAttr>() &&
"getBlockVarCopyInits - not __block var");
auto I = BlockVarCopyInits.find(VD);
if (I != BlockVarCopyInits.end())
return I->second;
return {nullptr, false};
}
/// Set the copy initialization expression of a block var decl.
void ASTContext::setBlockVarCopyInit(const VarDecl*VD, Expr *CopyExpr,
bool CanThrow) {
assert(VD && CopyExpr && "Passed null params");
assert(VD->hasAttr<BlocksAttr>() &&
"setBlockVarCopyInits - not __block var");
BlockVarCopyInits[VD].setExprAndFlag(CopyExpr, CanThrow);
}
TypeSourceInfo *ASTContext::CreateTypeSourceInfo(QualType T,
unsigned DataSize) const {
if (!DataSize)
DataSize = TypeLoc::getFullDataSizeForType(T);
else
assert(DataSize == TypeLoc::getFullDataSizeForType(T) &&
"incorrect data size provided to CreateTypeSourceInfo!");
auto *TInfo =
(TypeSourceInfo*)BumpAlloc.Allocate(sizeof(TypeSourceInfo) + DataSize, 8);
new (TInfo) TypeSourceInfo(T);
return TInfo;
}
TypeSourceInfo *ASTContext::getTrivialTypeSourceInfo(QualType T,
SourceLocation L) const {
TypeSourceInfo *DI = CreateTypeSourceInfo(T);
DI->getTypeLoc().initialize(const_cast<ASTContext &>(*this), L);
return DI;
}
const ASTRecordLayout &
ASTContext::getASTObjCInterfaceLayout(const ObjCInterfaceDecl *D) const {
return getObjCLayout(D, nullptr);
}
const ASTRecordLayout &
ASTContext::getASTObjCImplementationLayout(
const ObjCImplementationDecl *D) const {
return getObjCLayout(D->getClassInterface(), D);
}
//===----------------------------------------------------------------------===//
// Type creation/memoization methods
//===----------------------------------------------------------------------===//
QualType
ASTContext::getExtQualType(const Type *baseType, Qualifiers quals) const {
unsigned fastQuals = quals.getFastQualifiers();
quals.removeFastQualifiers();
// Check if we've already instantiated this type.
llvm::FoldingSetNodeID ID;
ExtQuals::Profile(ID, baseType, quals);
void *insertPos = nullptr;
if (ExtQuals *eq = ExtQualNodes.FindNodeOrInsertPos(ID, insertPos)) {
assert(eq->getQualifiers() == quals);
return QualType(eq, fastQuals);
}
// If the base type is not canonical, make the appropriate canonical type.
QualType canon;
if (!baseType->isCanonicalUnqualified()) {
SplitQualType canonSplit = baseType->getCanonicalTypeInternal().split();
canonSplit.Quals.addConsistentQualifiers(quals);
canon = getExtQualType(canonSplit.Ty, canonSplit.Quals);
// Re-find the insert position.
(void) ExtQualNodes.FindNodeOrInsertPos(ID, insertPos);
}
auto *eq = new (*this, TypeAlignment) ExtQuals(baseType, canon, quals);
ExtQualNodes.InsertNode(eq, insertPos);
return QualType(eq, fastQuals);
}
QualType ASTContext::getAddrSpaceQualType(QualType T,
LangAS AddressSpace) const {
QualType CanT = getCanonicalType(T);
if (CanT.getAddressSpace() == AddressSpace)
return T;
// If we are composing extended qualifiers together, merge together
// into one ExtQuals node.
QualifierCollector Quals;
const Type *TypeNode = Quals.strip(T);
// If this type already has an address space specified, it cannot get
// another one.
assert(!Quals.hasAddressSpace() &&
"Type cannot be in multiple addr spaces!");
Quals.addAddressSpace(AddressSpace);
return getExtQualType(TypeNode, Quals);
}
QualType ASTContext::removeAddrSpaceQualType(QualType T) const {
// If the type is not qualified with an address space, just return it
// immediately.
if (!T.hasAddressSpace())
return T;
// If we are composing extended qualifiers together, merge together
// into one ExtQuals node.
QualifierCollector Quals;
const Type *TypeNode;
while (T.hasAddressSpace()) {
TypeNode = Quals.strip(T);
// If the type no longer has an address space after stripping qualifiers,
// jump out.
if (!QualType(TypeNode, 0).hasAddressSpace())
break;
// There might be sugar in the way. Strip it and try again.
T = T.getSingleStepDesugaredType(*this);
}
Quals.removeAddressSpace();
// Removal of the address space can mean there are no longer any
// non-fast qualifiers, so creating an ExtQualType isn't possible (asserts)
// or required.
if (Quals.hasNonFastQualifiers())
return getExtQualType(TypeNode, Quals);
else
return QualType(TypeNode, Quals.getFastQualifiers());
}
QualType ASTContext::getObjCGCQualType(QualType T,
Qualifiers::GC GCAttr) const {
QualType CanT = getCanonicalType(T);
if (CanT.getObjCGCAttr() == GCAttr)
return T;
if (const auto *ptr = T->getAs<PointerType>()) {
QualType Pointee = ptr->getPointeeType();
if (Pointee->isAnyPointerType()) {
QualType ResultType = getObjCGCQualType(Pointee, GCAttr);
return getPointerType(ResultType);
}
}
// If we are composing extended qualifiers together, merge together
// into one ExtQuals node.
QualifierCollector Quals;
const Type *TypeNode = Quals.strip(T);
// If this type already has an ObjCGC specified, it cannot get
// another one.
assert(!Quals.hasObjCGCAttr() &&
"Type cannot have multiple ObjCGCs!");
Quals.addObjCGCAttr(GCAttr);
return getExtQualType(TypeNode, Quals);
}
QualType ASTContext::removePtrSizeAddrSpace(QualType T) const {
if (const PointerType *Ptr = T->getAs<PointerType>()) {
QualType Pointee = Ptr->getPointeeType();
if (isPtrSizeAddressSpace(Pointee.getAddressSpace())) {
return getPointerType(removeAddrSpaceQualType(Pointee));
}
}
return T;
}
const FunctionType *ASTContext::adjustFunctionType(const FunctionType *T,
FunctionType::ExtInfo Info) {
if (T->getExtInfo() == Info)
return T;
QualType Result;
if (const auto *FNPT = dyn_cast<FunctionNoProtoType>(T)) {
Result = getFunctionNoProtoType(FNPT->getReturnType(), Info);
} else {
const auto *FPT = cast<FunctionProtoType>(T);
FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
EPI.ExtInfo = Info;
Result = getFunctionType(FPT->getReturnType(), FPT->getParamTypes(), EPI);
}
return cast<FunctionType>(Result.getTypePtr());
}
void ASTContext::adjustDeducedFunctionResultType(FunctionDecl *FD,
QualType ResultType) {
FD = FD->getMostRecentDecl();
while (true) {
const auto *FPT = FD->getType()->castAs<FunctionProtoType>();
FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
FD->setType(getFunctionType(ResultType, FPT->getParamTypes(), EPI));
if (FunctionDecl *Next = FD->getPreviousDecl())
FD = Next;
else
break;
}
if (ASTMutationListener *L = getASTMutationListener())
L->DeducedReturnType(FD, ResultType);
}
/// Get a function type and produce the equivalent function type with the
/// specified exception specification. Type sugar that can be present on a
/// declaration of a function with an exception specification is permitted
/// and preserved. Other type sugar (for instance, typedefs) is not.
QualType ASTContext::getFunctionTypeWithExceptionSpec(
QualType Orig, const FunctionProtoType::ExceptionSpecInfo &ESI) {
// Might have some parens.
if (const auto *PT = dyn_cast<ParenType>(Orig))
return getParenType(
getFunctionTypeWithExceptionSpec(PT->getInnerType(), ESI));
// Might be wrapped in a macro qualified type.
if (const auto *MQT = dyn_cast<MacroQualifiedType>(Orig))
return getMacroQualifiedType(
getFunctionTypeWithExceptionSpec(MQT->getUnderlyingType(), ESI),
MQT->getMacroIdentifier());
// Might have a calling-convention attribute.
if (const auto *AT = dyn_cast<AttributedType>(Orig))
return getAttributedType(
AT->getAttrKind(),
getFunctionTypeWithExceptionSpec(AT->getModifiedType(), ESI),
getFunctionTypeWithExceptionSpec(AT->getEquivalentType(), ESI));
// Anything else must be a function type. Rebuild it with the new exception
// specification.
const auto *Proto = Orig->castAs<FunctionProtoType>();
return getFunctionType(
Proto->getReturnType(), Proto->getParamTypes(),
Proto->getExtProtoInfo().withExceptionSpec(ESI));
}
bool ASTContext::hasSameFunctionTypeIgnoringExceptionSpec(QualType T,
QualType U) {
return hasSameType(T, U) ||
(getLangOpts().CPlusPlus17 &&
hasSameType(getFunctionTypeWithExceptionSpec(T, EST_None),
getFunctionTypeWithExceptionSpec(U, EST_None)));
}
QualType ASTContext::getFunctionTypeWithoutPtrSizes(QualType T) {
if (const auto *Proto = T->getAs<FunctionProtoType>()) {
QualType RetTy = removePtrSizeAddrSpace(Proto->getReturnType());
SmallVector<QualType, 16> Args(Proto->param_types());
for (unsigned i = 0, n = Args.size(); i != n; ++i)
Args[i] = removePtrSizeAddrSpace(Args[i]);
return getFunctionType(RetTy, Args, Proto->getExtProtoInfo());
}
if (const FunctionNoProtoType *Proto = T->getAs<FunctionNoProtoType>()) {
QualType RetTy = removePtrSizeAddrSpace(Proto->getReturnType());
return getFunctionNoProtoType(RetTy, Proto->getExtInfo());
}
return T;
}
bool ASTContext::hasSameFunctionTypeIgnoringPtrSizes(QualType T, QualType U) {
return hasSameType(T, U) ||
hasSameType(getFunctionTypeWithoutPtrSizes(T),
getFunctionTypeWithoutPtrSizes(U));
}
void ASTContext::adjustExceptionSpec(
FunctionDecl *FD, const FunctionProtoType::ExceptionSpecInfo &ESI,
bool AsWritten) {
// Update the type.
QualType Updated =
getFunctionTypeWithExceptionSpec(FD->getType(), ESI);
FD->setType(Updated);
if (!AsWritten)
return;
// Update the type in the type source information too.
if (TypeSourceInfo *TSInfo = FD->getTypeSourceInfo()) {
// If the type and the type-as-written differ, we may need to update
// the type-as-written too.
if (TSInfo->getType() != FD->getType())
Updated = getFunctionTypeWithExceptionSpec(TSInfo->getType(), ESI);
// FIXME: When we get proper type location information for exceptions,
// we'll also have to rebuild the TypeSourceInfo. For now, we just patch
// up the TypeSourceInfo;
assert(TypeLoc::getFullDataSizeForType(Updated) ==
TypeLoc::getFullDataSizeForType(TSInfo->getType()) &&
"TypeLoc size mismatch from updating exception specification");
TSInfo->overrideType(Updated);
}
}
/// getComplexType - Return the uniqued reference to the type for a complex
/// number with the specified element type.
QualType ASTContext::getComplexType(QualType T) const {
// Unique pointers, to guarantee there is only one pointer of a particular
// structure.
llvm::FoldingSetNodeID ID;
ComplexType::Profile(ID, T);
void *InsertPos = nullptr;
if (ComplexType *CT = ComplexTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(CT, 0);
// If the pointee type isn't canonical, this won't be a canonical type either,
// so fill in the canonical type field.
QualType Canonical;
if (!T.isCanonical()) {
Canonical = getComplexType(getCanonicalType(T));
// Get the new insert position for the node we care about.
ComplexType *NewIP = ComplexTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
auto *New = new (*this, TypeAlignment) ComplexType(T, Canonical);
Types.push_back(New);
ComplexTypes.InsertNode(New, InsertPos);
return QualType(New, 0);
}
/// getPointerType - Return the uniqued reference to the type for a pointer to
/// the specified type.
QualType ASTContext::getPointerType(QualType T) const {
// Unique pointers, to guarantee there is only one pointer of a particular
// structure.
llvm::FoldingSetNodeID ID;
PointerType::Profile(ID, T);
void *InsertPos = nullptr;
if (PointerType *PT = PointerTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(PT, 0);
// If the pointee type isn't canonical, this won't be a canonical type either,
// so fill in the canonical type field.
QualType Canonical;
if (!T.isCanonical()) {
Canonical = getPointerType(getCanonicalType(T));
// Get the new insert position for the node we care about.
PointerType *NewIP = PointerTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
auto *New = new (*this, TypeAlignment) PointerType(T, Canonical);
Types.push_back(New);
PointerTypes.InsertNode(New, InsertPos);
return QualType(New, 0);
}
QualType ASTContext::getAdjustedType(QualType Orig, QualType New) const {
llvm::FoldingSetNodeID ID;
AdjustedType::Profile(ID, Orig, New);
void *InsertPos = nullptr;
AdjustedType *AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
if (AT)
return QualType(AT, 0);
QualType Canonical = getCanonicalType(New);
// Get the new insert position for the node we care about.
AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!AT && "Shouldn't be in the map!");
AT = new (*this, TypeAlignment)
AdjustedType(Type::Adjusted, Orig, New, Canonical);
Types.push_back(AT);
AdjustedTypes.InsertNode(AT, InsertPos);
return QualType(AT, 0);
}
QualType ASTContext::getDecayedType(QualType T) const {
assert((T->isArrayType() || T->isFunctionType()) && "T does not decay");
QualType Decayed;
// C99 6.7.5.3p7:
// A declaration of a parameter as "array of type" shall be
// adjusted to "qualified pointer to type", where the type
// qualifiers (if any) are those specified within the [ and ] of
// the array type derivation.
if (T->isArrayType())
Decayed = getArrayDecayedType(T);
// C99 6.7.5.3p8:
// A declaration of a parameter as "function returning type"
// shall be adjusted to "pointer to function returning type", as
// in 6.3.2.1.
if (T->isFunctionType())
Decayed = getPointerType(T);
llvm::FoldingSetNodeID ID;
AdjustedType::Profile(ID, T, Decayed);
void *InsertPos = nullptr;
AdjustedType *AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
if (AT)
return QualType(AT, 0);
QualType Canonical = getCanonicalType(Decayed);
// Get the new insert position for the node we care about.
AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!AT && "Shouldn't be in the map!");
AT = new (*this, TypeAlignment) DecayedType(T, Decayed, Canonical);
Types.push_back(AT);
AdjustedTypes.InsertNode(AT, InsertPos);
return QualType(AT, 0);
}
/// getBlockPointerType - Return the uniqued reference to the type for
/// a pointer to the specified block.
QualType ASTContext::getBlockPointerType(QualType T) const {
assert(T->isFunctionType() && "block of function types only");
// Unique pointers, to guarantee there is only one block of a particular
// structure.
llvm::FoldingSetNodeID ID;
BlockPointerType::Profile(ID, T);
void *InsertPos = nullptr;
if (BlockPointerType *PT =
BlockPointerTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(PT, 0);
// If the block pointee type isn't canonical, this won't be a canonical
// type either so fill in the canonical type field.
QualType Canonical;
if (!T.isCanonical()) {
Canonical = getBlockPointerType(getCanonicalType(T));
// Get the new insert position for the node we care about.
BlockPointerType *NewIP =
BlockPointerTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
auto *New = new (*this, TypeAlignment) BlockPointerType(T, Canonical);
Types.push_back(New);
BlockPointerTypes.InsertNode(New, InsertPos);
return QualType(New, 0);
}
/// getLValueReferenceType - Return the uniqued reference to the type for an
/// lvalue reference to the specified type.
QualType
ASTContext::getLValueReferenceType(QualType T, bool SpelledAsLValue) const {
assert(getCanonicalType(T) != OverloadTy &&
"Unresolved overloaded function type");
// Unique pointers, to guarantee there is only one pointer of a particular
// structure.
llvm::FoldingSetNodeID ID;
ReferenceType::Profile(ID, T, SpelledAsLValue);
void *InsertPos = nullptr;
if (LValueReferenceType *RT =
LValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(RT, 0);
const auto *InnerRef = T->getAs<ReferenceType>();
// If the referencee type isn't canonical, this won't be a canonical type
// either, so fill in the canonical type field.
QualType Canonical;
if (!SpelledAsLValue || InnerRef || !T.isCanonical()) {
QualType PointeeType = (InnerRef ? InnerRef->getPointeeType() : T);
Canonical = getLValueReferenceType(getCanonicalType(PointeeType));
// Get the new insert position for the node we care about.
LValueReferenceType *NewIP =
LValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
auto *New = new (*this, TypeAlignment) LValueReferenceType(T, Canonical,
SpelledAsLValue);
Types.push_back(New);
LValueReferenceTypes.InsertNode(New, InsertPos);
return QualType(New, 0);
}
/// getRValueReferenceType - Return the uniqued reference to the type for an
/// rvalue reference to the specified type.
QualType ASTContext::getRValueReferenceType(QualType T) const {
// Unique pointers, to guarantee there is only one pointer of a particular
// structure.
llvm::FoldingSetNodeID ID;
ReferenceType::Profile(ID, T, false);
void *InsertPos = nullptr;
if (RValueReferenceType *RT =
RValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(RT, 0);
const auto *InnerRef = T->getAs<ReferenceType>();
// If the referencee type isn't canonical, this won't be a canonical type
// either, so fill in the canonical type field.
QualType Canonical;
if (InnerRef || !T.isCanonical()) {
QualType PointeeType = (InnerRef ? InnerRef->getPointeeType() : T);
Canonical = getRValueReferenceType(getCanonicalType(PointeeType));
// Get the new insert position for the node we care about.
RValueReferenceType *NewIP =
RValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
auto *New = new (*this, TypeAlignment) RValueReferenceType(T, Canonical);
Types.push_back(New);
RValueReferenceTypes.InsertNode(New, InsertPos);
return QualType(New, 0);
}
/// getMemberPointerType - Return the uniqued reference to the type for a
/// member pointer to the specified type, in the specified class.
QualType ASTContext::getMemberPointerType(QualType T, const Type *Cls) const {
// Unique pointers, to guarantee there is only one pointer of a particular
// structure.
llvm::FoldingSetNodeID ID;
MemberPointerType::Profile(ID, T, Cls);
void *InsertPos = nullptr;
if (MemberPointerType *PT =
MemberPointerTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(PT, 0);
// If the pointee or class type isn't canonical, this won't be a canonical
// type either, so fill in the canonical type field.
QualType Canonical;
if (!T.isCanonical() || !Cls->isCanonicalUnqualified()) {
Canonical = getMemberPointerType(getCanonicalType(T),getCanonicalType(Cls));
// Get the new insert position for the node we care about.
MemberPointerType *NewIP =
MemberPointerTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
auto *New = new (*this, TypeAlignment) MemberPointerType(T, Cls, Canonical);
Types.push_back(New);
MemberPointerTypes.InsertNode(New, InsertPos);
return QualType(New, 0);
}
/// getConstantArrayType - Return the unique reference to the type for an
/// array of the specified element type.
QualType ASTContext::getConstantArrayType(QualType EltTy,
const llvm::APInt &ArySizeIn,
const Expr *SizeExpr,
ArrayType::ArraySizeModifier ASM,
unsigned IndexTypeQuals) const {
assert((EltTy->isDependentType() ||
EltTy->isIncompleteType() || EltTy->isConstantSizeType()) &&
"Constant array of VLAs is illegal!");
// We only need the size as part of the type if it's instantiation-dependent.
if (SizeExpr && !SizeExpr->isInstantiationDependent())
SizeExpr = nullptr;
// Convert the array size into a canonical width matching the pointer size for
// the target.
llvm::APInt ArySize(ArySizeIn);
ArySize = ArySize.zextOrTrunc(Target->getMaxPointerWidth());
llvm::FoldingSetNodeID ID;
ConstantArrayType::Profile(ID, *this, EltTy, ArySize, SizeExpr, ASM,
IndexTypeQuals);
void *InsertPos = nullptr;
if (ConstantArrayType *ATP =
ConstantArrayTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(ATP, 0);
// If the element type isn't canonical or has qualifiers, or the array bound
// is instantiation-dependent, this won't be a canonical type either, so fill
// in the canonical type field.
QualType Canon;
if (!EltTy.isCanonical() || EltTy.hasLocalQualifiers() || SizeExpr) {
SplitQualType canonSplit = getCanonicalType(EltTy).split();
Canon = getConstantArrayType(QualType(canonSplit.Ty, 0), ArySize, nullptr,
ASM, IndexTypeQuals);
Canon = getQualifiedType(Canon, canonSplit.Quals);
// Get the new insert position for the node we care about.
ConstantArrayType *NewIP =
ConstantArrayTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
void *Mem = Allocate(
ConstantArrayType::totalSizeToAlloc<const Expr *>(SizeExpr ? 1 : 0),
TypeAlignment);
auto *New = new (Mem)
ConstantArrayType(EltTy, Canon, ArySize, SizeExpr, ASM, IndexTypeQuals);
ConstantArrayTypes.InsertNode(New, InsertPos);
Types.push_back(New);
return QualType(New, 0);
}
/// getVariableArrayDecayedType - Turns the given type, which may be
/// variably-modified, into the corresponding type with all the known
/// sizes replaced with [*].
QualType ASTContext::getVariableArrayDecayedType(QualType type) const {
// Vastly most common case.
if (!type->isVariablyModifiedType()) return type;
QualType result;
SplitQualType split = type.getSplitDesugaredType();
const Type *ty = split.Ty;
switch (ty->getTypeClass()) {
#define TYPE(Class, Base)
#define ABSTRACT_TYPE(Class, Base)
#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
#include "clang/AST/TypeNodes.inc"
llvm_unreachable("didn't desugar past all non-canonical types?");
// These types should never be variably-modified.
case Type::Builtin:
case Type::Complex:
case Type::Vector:
case Type::DependentVector:
case Type::ExtVector:
case Type::DependentSizedExtVector:
case Type::ConstantMatrix:
case Type::DependentSizedMatrix:
case Type::DependentAddressSpace:
case Type::ObjCObject:
case Type::ObjCInterface:
case Type::ObjCObjectPointer:
case Type::Record:
case Type::Enum:
case Type::UnresolvedUsing:
case Type::TypeOfExpr:
case Type::TypeOf:
case Type::Decltype:
case Type::UnaryTransform:
case Type::DependentName:
case Type::InjectedClassName:
case Type::TemplateSpecialization:
case Type::DependentTemplateSpecialization:
case Type::TemplateTypeParm:
case Type::SubstTemplateTypeParmPack:
case Type::Auto:
case Type::DeducedTemplateSpecialization:
case Type::PackExpansion:
case Type::ExtInt:
case Type::DependentExtInt:
llvm_unreachable("type should never be variably-modified");
// These types can be variably-modified but should never need to
// further decay.
case Type::FunctionNoProto:
case Type::FunctionProto:
case Type::BlockPointer:
case Type::MemberPointer:
case Type::Pipe:
return type;
// These types can be variably-modified. All these modifications
// preserve structure except as noted by comments.
// TODO: if we ever care about optimizing VLAs, there are no-op
// optimizations available here.
case Type::Pointer:
result = getPointerType(getVariableArrayDecayedType(
cast<PointerType>(ty)->getPointeeType()));
break;
case Type::LValueReference: {
const auto *lv = cast<LValueReferenceType>(ty);
result = getLValueReferenceType(
getVariableArrayDecayedType(lv->getPointeeType()),
lv->isSpelledAsLValue());
break;
}
case Type::RValueReference: {
const auto *lv = cast<RValueReferenceType>(ty);
result = getRValueReferenceType(
getVariableArrayDecayedType(lv->getPointeeType()));
break;
}
case Type::Atomic: {
const auto *at = cast<AtomicType>(ty);
result = getAtomicType(getVariableArrayDecayedType(at->getValueType()));
break;
}
case Type::ConstantArray: {
const auto *cat = cast<ConstantArrayType>(ty);
result = getConstantArrayType(
getVariableArrayDecayedType(cat->getElementType()),
cat->getSize(),
cat->getSizeExpr(),
cat->getSizeModifier(),
cat->getIndexTypeCVRQualifiers());
break;
}
case Type::DependentSizedArray: {
const auto *dat = cast<DependentSizedArrayType>(ty);
result = getDependentSizedArrayType(
getVariableArrayDecayedType(dat->getElementType()),
dat->getSizeExpr(),
dat->getSizeModifier(),
dat->getIndexTypeCVRQualifiers(),
dat->getBracketsRange());
break;
}
// Turn incomplete types into [*] types.
case Type::IncompleteArray: {
const auto *iat = cast<IncompleteArrayType>(ty);
result = getVariableArrayType(
getVariableArrayDecayedType(iat->getElementType()),
/*size*/ nullptr,
ArrayType::Normal,
iat->getIndexTypeCVRQualifiers(),
SourceRange());
break;
}
// Turn VLA types into [*] types.
case Type::VariableArray: {
const auto *vat = cast<VariableArrayType>(ty);
result = getVariableArrayType(
getVariableArrayDecayedType(vat->getElementType()),
/*size*/ nullptr,
ArrayType::Star,
vat->getIndexTypeCVRQualifiers(),
vat->getBracketsRange());
break;
}
}
// Apply the top-level qualifiers from the original.
return getQualifiedType(result, split.Quals);
}
/// getVariableArrayType - Returns a non-unique reference to the type for a
/// variable array of the specified element type.
QualType ASTContext::getVariableArrayType(QualType EltTy,
Expr *NumElts,
ArrayType::ArraySizeModifier ASM,
unsigned IndexTypeQuals,
SourceRange Brackets) const {
// Since we don't unique expressions, it isn't possible to unique VLA's
// that have an expression provided for their size.
QualType Canon;
// Be sure to pull qualifiers off the element type.
if (!EltTy.isCanonical() || EltTy.hasLocalQualifiers()) {
SplitQualType canonSplit = getCanonicalType(EltTy).split();
Canon = getVariableArrayType(QualType(canonSplit.Ty, 0), NumElts, ASM,
IndexTypeQuals, Brackets);
Canon = getQualifiedType(Canon, canonSplit.Quals);
}
auto *New = new (*this, TypeAlignment)
VariableArrayType(EltTy, Canon, NumElts, ASM, IndexTypeQuals, Brackets);
VariableArrayTypes.push_back(New);
Types.push_back(New);
return QualType(New, 0);
}
/// getDependentSizedArrayType - Returns a non-unique reference to
/// the type for a dependently-sized array of the specified element
/// type.
QualType ASTContext::getDependentSizedArrayType(QualType elementType,
Expr *numElements,
ArrayType::ArraySizeModifier ASM,
unsigned elementTypeQuals,
SourceRange brackets) const {
assert((!numElements || numElements->isTypeDependent() ||
numElements->isValueDependent()) &&
"Size must be type- or value-dependent!");
// Dependently-sized array types that do not have a specified number
// of elements will have their sizes deduced from a dependent
// initializer. We do no canonicalization here at all, which is okay
// because they can't be used in most locations.
if (!numElements) {
auto *newType
= new (*this, TypeAlignment)
DependentSizedArrayType(*this, elementType, QualType(),
numElements, ASM, elementTypeQuals,
brackets);
Types.push_back(newType);
return QualType(newType, 0);
}
// Otherwise, we actually build a new type every time, but we
// also build a canonical type.
SplitQualType canonElementType = getCanonicalType(elementType).split();
void *insertPos = nullptr;
llvm::FoldingSetNodeID ID;
DependentSizedArrayType::Profile(ID, *this,
QualType(canonElementType.Ty, 0),
ASM, elementTypeQuals, numElements);
// Look for an existing type with these properties.
DependentSizedArrayType *canonTy =
DependentSizedArrayTypes.FindNodeOrInsertPos(ID, insertPos);
// If we don't have one, build one.
if (!canonTy) {
canonTy = new (*this, TypeAlignment)
DependentSizedArrayType(*this, QualType(canonElementType.Ty, 0),
QualType(), numElements, ASM, elementTypeQuals,
brackets);
DependentSizedArrayTypes.InsertNode(canonTy, insertPos);
Types.push_back(canonTy);
}
// Apply qualifiers from the element type to the array.
QualType canon = getQualifiedType(QualType(canonTy,0),
canonElementType.Quals);
// If we didn't need extra canonicalization for the element type or the size
// expression, then just use that as our result.
if (QualType(canonElementType.Ty, 0) == elementType &&
canonTy->getSizeExpr() == numElements)
return canon;
// Otherwise, we need to build a type which follows the spelling
// of the element type.
auto *sugaredType
= new (*this, TypeAlignment)
DependentSizedArrayType(*this, elementType, canon, numElements,
ASM, elementTypeQuals, brackets);
Types.push_back(sugaredType);
return QualType(sugaredType, 0);
}
QualType ASTContext::getIncompleteArrayType(QualType elementType,
ArrayType::ArraySizeModifier ASM,
unsigned elementTypeQuals) const {
llvm::FoldingSetNodeID ID;
IncompleteArrayType::Profile(ID, elementType, ASM, elementTypeQuals);
void *insertPos = nullptr;
if (IncompleteArrayType *iat =
IncompleteArrayTypes.FindNodeOrInsertPos(ID, insertPos))
return QualType(iat, 0);
// If the element type isn't canonical, this won't be a canonical type
// either, so fill in the canonical type field. We also have to pull
// qualifiers off the element type.
QualType canon;
if (!elementType.isCanonical() || elementType.hasLocalQualifiers()) {
SplitQualType canonSplit = getCanonicalType(elementType).split();
canon = getIncompleteArrayType(QualType(canonSplit.Ty, 0),
ASM, elementTypeQuals);
canon = getQualifiedType(canon, canonSplit.Quals);
// Get the new insert position for the node we care about.
IncompleteArrayType *existing =
IncompleteArrayTypes.FindNodeOrInsertPos(ID, insertPos);
assert(!existing && "Shouldn't be in the map!"); (void) existing;
}
auto *newType = new (*this, TypeAlignment)
IncompleteArrayType(elementType, canon, ASM, elementTypeQuals);
IncompleteArrayTypes.InsertNode(newType, insertPos);
Types.push_back(newType);
return QualType(newType, 0);
}
ASTContext::BuiltinVectorTypeInfo
ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const {
#define SVE_INT_ELTTY(BITS, ELTS, SIGNED, NUMVECTORS) \
{getIntTypeForBitwidth(BITS, SIGNED), llvm::ElementCount::getScalable(ELTS), \
NUMVECTORS};
#define SVE_ELTTY(ELTTY, ELTS, NUMVECTORS) \
{ELTTY, llvm::ElementCount::getScalable(ELTS), NUMVECTORS};
switch (Ty->getKind()) {
default:
llvm_unreachable("Unsupported builtin vector type");
case BuiltinType::SveInt8:
return SVE_INT_ELTTY(8, 16, true, 1);
case BuiltinType::SveUint8:
return SVE_INT_ELTTY(8, 16, false, 1);
case BuiltinType::SveInt8x2:
return SVE_INT_ELTTY(8, 16, true, 2);
case BuiltinType::SveUint8x2:
return SVE_INT_ELTTY(8, 16, false, 2);
case BuiltinType::SveInt8x3:
return SVE_INT_ELTTY(8, 16, true, 3);
case BuiltinType::SveUint8x3:
return SVE_INT_ELTTY(8, 16, false, 3);
case BuiltinType::SveInt8x4:
return SVE_INT_ELTTY(8, 16, true, 4);
case BuiltinType::SveUint8x4:
return SVE_INT_ELTTY(8, 16, false, 4);
case BuiltinType::SveInt16:
return SVE_INT_ELTTY(16, 8, true, 1);
case BuiltinType::SveUint16:
return SVE_INT_ELTTY(16, 8, false, 1);
case BuiltinType::SveInt16x2:
return SVE_INT_ELTTY(16, 8, true, 2);
case BuiltinType::SveUint16x2:
return SVE_INT_ELTTY(16, 8, false, 2);
case BuiltinType::SveInt16x3:
return SVE_INT_ELTTY(16, 8, true, 3);
case BuiltinType::SveUint16x3:
return SVE_INT_ELTTY(16, 8, false, 3);
case BuiltinType::SveInt16x4:
return SVE_INT_ELTTY(16, 8, true, 4);
case BuiltinType::SveUint16x4:
return SVE_INT_ELTTY(16, 8, false, 4);
case BuiltinType::SveInt32:
return SVE_INT_ELTTY(32, 4, true, 1);
case BuiltinType::SveUint32:
return SVE_INT_ELTTY(32, 4, false, 1);
case BuiltinType::SveInt32x2:
return SVE_INT_ELTTY(32, 4, true, 2);
case BuiltinType::SveUint32x2:
return SVE_INT_ELTTY(32, 4, false, 2);
case BuiltinType::SveInt32x3:
return SVE_INT_ELTTY(32, 4, true, 3);
case BuiltinType::SveUint32x3:
return SVE_INT_ELTTY(32, 4, false, 3);
case BuiltinType::SveInt32x4:
return SVE_INT_ELTTY(32, 4, true, 4);
case BuiltinType::SveUint32x4:
return SVE_INT_ELTTY(32, 4, false, 4);
case BuiltinType::SveInt64:
return SVE_INT_ELTTY(64, 2, true, 1);
case BuiltinType::SveUint64:
return SVE_INT_ELTTY(64, 2, false, 1);
case BuiltinType::SveInt64x2:
return SVE_INT_ELTTY(64, 2, true, 2);
case BuiltinType::SveUint64x2:
return SVE_INT_ELTTY(64, 2, false, 2);
case BuiltinType::SveInt64x3:
return SVE_INT_ELTTY(64, 2, true, 3);
case BuiltinType::SveUint64x3:
return SVE_INT_ELTTY(64, 2, false, 3);
case BuiltinType::SveInt64x4:
return SVE_INT_ELTTY(64, 2, true, 4);
case BuiltinType::SveUint64x4:
return SVE_INT_ELTTY(64, 2, false, 4);
case BuiltinType::SveBool:
return SVE_ELTTY(BoolTy, 16, 1);
case BuiltinType::SveFloat16:
return SVE_ELTTY(HalfTy, 8, 1);
case BuiltinType::SveFloat16x2:
return SVE_ELTTY(HalfTy, 8, 2);
case BuiltinType::SveFloat16x3:
return SVE_ELTTY(HalfTy, 8, 3);
case BuiltinType::SveFloat16x4:
return SVE_ELTTY(HalfTy, 8, 4);
case BuiltinType::SveFloat32:
return SVE_ELTTY(FloatTy, 4, 1);
case BuiltinType::SveFloat32x2:
return SVE_ELTTY(FloatTy, 4, 2);
case BuiltinType::SveFloat32x3:
return SVE_ELTTY(FloatTy, 4, 3);
case BuiltinType::SveFloat32x4:
return SVE_ELTTY(FloatTy, 4, 4);
case BuiltinType::SveFloat64:
return SVE_ELTTY(DoubleTy, 2, 1);
case BuiltinType::SveFloat64x2:
return SVE_ELTTY(DoubleTy, 2, 2);
case BuiltinType::SveFloat64x3:
return SVE_ELTTY(DoubleTy, 2, 3);
case BuiltinType::SveFloat64x4:
return SVE_ELTTY(DoubleTy, 2, 4);
case BuiltinType::SveBFloat16:
return SVE_ELTTY(BFloat16Ty, 8, 1);
case BuiltinType::SveBFloat16x2:
return SVE_ELTTY(BFloat16Ty, 8, 2);
case BuiltinType::SveBFloat16x3:
return SVE_ELTTY(BFloat16Ty, 8, 3);
case BuiltinType::SveBFloat16x4:
return SVE_ELTTY(BFloat16Ty, 8, 4);
#define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF, \
IsSigned) \
case BuiltinType::Id: \
return {getIntTypeForBitwidth(ElBits, IsSigned), \
llvm::ElementCount::getScalable(NumEls), NF};
#define RVV_VECTOR_TYPE_FLOAT(Name, Id, SingletonId, NumEls, ElBits, NF) \
case BuiltinType::Id: \
return {ElBits == 16 ? Float16Ty : (ElBits == 32 ? FloatTy : DoubleTy), \
llvm::ElementCount::getScalable(NumEls), NF};
#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \
case BuiltinType::Id: \
return {BoolTy, llvm::ElementCount::getScalable(NumEls), 1};
#include "clang/Basic/RISCVVTypes.def"
}
}
/// getScalableVectorType - Return the unique reference to a scalable vector
/// type of the specified element type and size. VectorType must be a built-in
/// type.
QualType ASTContext::getScalableVectorType(QualType EltTy,
unsigned NumElts) const {
if (Target->hasAArch64SVETypes()) {
uint64_t EltTySize = getTypeSize(EltTy);
#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits, \
IsSigned, IsFP, IsBF) \
if (!EltTy->isBooleanType() && \
((EltTy->hasIntegerRepresentation() && \
EltTy->hasSignedIntegerRepresentation() == IsSigned) || \
(EltTy->hasFloatingRepresentation() && !EltTy->isBFloat16Type() && \
IsFP && !IsBF) || \
(EltTy->hasFloatingRepresentation() && EltTy->isBFloat16Type() && \
IsBF && !IsFP)) && \
EltTySize == ElBits && NumElts == NumEls) { \
return SingletonId; \
}
#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls) \
if (EltTy->isBooleanType() && NumElts == NumEls) \
return SingletonId;
#include "clang/Basic/AArch64SVEACLETypes.def"
} else if (Target->hasRISCVVTypes()) {
uint64_t EltTySize = getTypeSize(EltTy);
#define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, \
IsFP) \
if (!EltTy->isBooleanType() && \
((EltTy->hasIntegerRepresentation() && \
EltTy->hasSignedIntegerRepresentation() == IsSigned) || \
(EltTy->hasFloatingRepresentation() && IsFP)) && \
EltTySize == ElBits && NumElts == NumEls) \
return SingletonId;
#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \
if (EltTy->isBooleanType() && NumElts == NumEls) \
return SingletonId;
#include "clang/Basic/RISCVVTypes.def"
}
return QualType();
}
/// getVectorType - Return the unique reference to a vector type of
/// the specified element type and size. VectorType must be a built-in type.
QualType ASTContext::getVectorType(QualType vecType, unsigned NumElts,
VectorType::VectorKind VecKind) const {
assert(vecType->isBuiltinType());
// Check if we've already instantiated a vector of this type.
llvm::FoldingSetNodeID ID;
VectorType::Profile(ID, vecType, NumElts, Type::Vector, VecKind);
void *InsertPos = nullptr;
if (VectorType *VTP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(VTP, 0);
// If the element type isn't canonical, this won't be a canonical type either,
// so fill in the canonical type field.
QualType Canonical;
if (!vecType.isCanonical()) {
Canonical = getVectorType(getCanonicalType(vecType), NumElts, VecKind);
// Get the new insert position for the node we care about.
VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
auto *New = new (*this, TypeAlignment)
VectorType(vecType, NumElts, Canonical, VecKind);
VectorTypes.InsertNode(New, InsertPos);
Types.push_back(New);
return QualType(New, 0);
}
QualType
ASTContext::getDependentVectorType(QualType VecType, Expr *SizeExpr,
SourceLocation AttrLoc,
VectorType::VectorKind VecKind) const {
llvm::FoldingSetNodeID ID;
DependentVectorType::Profile(ID, *this, getCanonicalType(VecType), SizeExpr,
VecKind);
void *InsertPos = nullptr;
DependentVectorType *Canon =
DependentVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
DependentVectorType *New;
if (Canon) {
New = new (*this, TypeAlignment) DependentVectorType(
*this, VecType, QualType(Canon, 0), SizeExpr, AttrLoc, VecKind);
} else {
QualType CanonVecTy = getCanonicalType(VecType);
if (CanonVecTy == VecType) {
New = new (*this, TypeAlignment) DependentVectorType(
*this, VecType, QualType(), SizeExpr, AttrLoc, VecKind);
DependentVectorType *CanonCheck =
DependentVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!CanonCheck &&
"Dependent-sized vector_size canonical type broken");
(void)CanonCheck;
DependentVectorTypes.InsertNode(New, InsertPos);
} else {
QualType CanonTy = getDependentVectorType(CanonVecTy, SizeExpr,
SourceLocation(), VecKind);
New = new (*this, TypeAlignment) DependentVectorType(
*this, VecType, CanonTy, SizeExpr, AttrLoc, VecKind);
}
}
Types.push_back(New);
return QualType(New, 0);
}
/// getExtVectorType - Return the unique reference to an extended vector type of
/// the specified element type and size. VectorType must be a built-in type.
QualType
ASTContext::getExtVectorType(QualType vecType, unsigned NumElts) const {
assert(vecType->isBuiltinType() || vecType->isDependentType());
// Check if we've already instantiated a vector of this type.
llvm::FoldingSetNodeID ID;
VectorType::Profile(ID, vecType, NumElts, Type::ExtVector,
VectorType::GenericVector);
void *InsertPos = nullptr;
if (VectorType *VTP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(VTP, 0);
// If the element type isn't canonical, this won't be a canonical type either,
// so fill in the canonical type field.
QualType Canonical;
if (!vecType.isCanonical()) {
Canonical = getExtVectorType(getCanonicalType(vecType), NumElts);
// Get the new insert position for the node we care about.
VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
auto *New = new (*this, TypeAlignment)
ExtVectorType(vecType, NumElts, Canonical);
VectorTypes.InsertNode(New, InsertPos);
Types.push_back(New);
return QualType(New, 0);
}
QualType
ASTContext::getDependentSizedExtVectorType(QualType vecType,
Expr *SizeExpr,
SourceLocation AttrLoc) const {
llvm::FoldingSetNodeID ID;
DependentSizedExtVectorType::Profile(ID, *this, getCanonicalType(vecType),
SizeExpr);
void *InsertPos = nullptr;
DependentSizedExtVectorType *Canon
= DependentSizedExtVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
DependentSizedExtVectorType *New;
if (Canon) {
// We already have a canonical version of this array type; use it as
// the canonical type for a newly-built type.
New = new (*this, TypeAlignment)
DependentSizedExtVectorType(*this, vecType, QualType(Canon, 0),
SizeExpr, AttrLoc);
} else {
QualType CanonVecTy = getCanonicalType(vecType);
if (CanonVecTy == vecType) {
New = new (*this, TypeAlignment)
DependentSizedExtVectorType(*this, vecType, QualType(), SizeExpr,
AttrLoc);
DependentSizedExtVectorType *CanonCheck
= DependentSizedExtVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!CanonCheck && "Dependent-sized ext_vector canonical type broken");
(void)CanonCheck;
DependentSizedExtVectorTypes.InsertNode(New, InsertPos);
} else {
QualType CanonExtTy = getDependentSizedExtVectorType(CanonVecTy, SizeExpr,
SourceLocation());
New = new (*this, TypeAlignment) DependentSizedExtVectorType(
*this, vecType, CanonExtTy, SizeExpr, AttrLoc);
}
}
Types.push_back(New);
return QualType(New, 0);
}
QualType ASTContext::getConstantMatrixType(QualType ElementTy, unsigned NumRows,
unsigned NumColumns) const {
llvm::FoldingSetNodeID ID;
ConstantMatrixType::Profile(ID, ElementTy, NumRows, NumColumns,
Type::ConstantMatrix);
assert(MatrixType::isValidElementType(ElementTy) &&
"need a valid element type");
assert(ConstantMatrixType::isDimensionValid(NumRows) &&
ConstantMatrixType::isDimensionValid(NumColumns) &&
"need valid matrix dimensions");
void *InsertPos = nullptr;
if (ConstantMatrixType *MTP = MatrixTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(MTP, 0);
QualType Canonical;
if (!ElementTy.isCanonical()) {
Canonical =
getConstantMatrixType(getCanonicalType(ElementTy), NumRows, NumColumns);
ConstantMatrixType *NewIP = MatrixTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Matrix type shouldn't already exist in the map");
(void)NewIP;
}
auto *New = new (*this, TypeAlignment)
ConstantMatrixType(ElementTy, NumRows, NumColumns, Canonical);
MatrixTypes.InsertNode(New, InsertPos);
Types.push_back(New);
return QualType(New, 0);
}
QualType ASTContext::getDependentSizedMatrixType(QualType ElementTy,
Expr *RowExpr,
Expr *ColumnExpr,
SourceLocation AttrLoc) const {
QualType CanonElementTy = getCanonicalType(ElementTy);
llvm::FoldingSetNodeID ID;
DependentSizedMatrixType::Profile(ID, *this, CanonElementTy, RowExpr,
ColumnExpr);
void *InsertPos = nullptr;
DependentSizedMatrixType *Canon =
DependentSizedMatrixTypes.FindNodeOrInsertPos(ID, InsertPos);
if (!Canon) {
Canon = new (*this, TypeAlignment) DependentSizedMatrixType(
*this, CanonElementTy, QualType(), RowExpr, ColumnExpr, AttrLoc);
#ifndef NDEBUG
DependentSizedMatrixType *CanonCheck =
DependentSizedMatrixTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!CanonCheck && "Dependent-sized matrix canonical type broken");
#endif
DependentSizedMatrixTypes.InsertNode(Canon, InsertPos);
Types.push_back(Canon);
}
// Already have a canonical version of the matrix type
//
// If it exactly matches the requested type, use it directly.
if (Canon->getElementType() == ElementTy && Canon->getRowExpr() == RowExpr &&
Canon->getRowExpr() == ColumnExpr)
return QualType(Canon, 0);
// Use Canon as the canonical type for newly-built type.
DependentSizedMatrixType *New = new (*this, TypeAlignment)
DependentSizedMatrixType(*this, ElementTy, QualType(Canon, 0), RowExpr,
ColumnExpr, AttrLoc);
Types.push_back(New);
return QualType(New, 0);
}
QualType ASTContext::getDependentAddressSpaceType(QualType PointeeType,
Expr *AddrSpaceExpr,
SourceLocation AttrLoc) const {
assert(AddrSpaceExpr->isInstantiationDependent());
QualType canonPointeeType = getCanonicalType(PointeeType);
void *insertPos = nullptr;
llvm::FoldingSetNodeID ID;
DependentAddressSpaceType::Profile(ID, *this, canonPointeeType,
AddrSpaceExpr);
DependentAddressSpaceType *canonTy =
DependentAddressSpaceTypes.FindNodeOrInsertPos(ID, insertPos);
if (!canonTy) {
canonTy = new (*this, TypeAlignment)
DependentAddressSpaceType(*this, canonPointeeType,
QualType(), AddrSpaceExpr, AttrLoc);
DependentAddressSpaceTypes.InsertNode(canonTy, insertPos);
Types.push_back(canonTy);
}
if (canonPointeeType == PointeeType &&
canonTy->getAddrSpaceExpr() == AddrSpaceExpr)
return QualType(canonTy, 0);
auto *sugaredType
= new (*this, TypeAlignment)
DependentAddressSpaceType(*this, PointeeType, QualType(canonTy, 0),
AddrSpaceExpr, AttrLoc);
Types.push_back(sugaredType);
return QualType(sugaredType, 0);
}
/// Determine whether \p T is canonical as the result type of a function.
static bool isCanonicalResultType(QualType T) {
return T.isCanonical() &&
(T.getObjCLifetime() == Qualifiers::OCL_None ||
T.getObjCLifetime() == Qualifiers::OCL_ExplicitNone);
}
/// getFunctionNoProtoType - Return a K&R style C function type like 'int()'.
QualType
ASTContext::getFunctionNoProtoType(QualType ResultTy,
const FunctionType::ExtInfo &Info) const {
// Unique functions, to guarantee there is only one function of a particular
// structure.
llvm::FoldingSetNodeID ID;
FunctionNoProtoType::Profile(ID, ResultTy, Info);
void *InsertPos = nullptr;
if (FunctionNoProtoType *FT =
FunctionNoProtoTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(FT, 0);
QualType Canonical;
if (!isCanonicalResultType(ResultTy)) {
Canonical =
getFunctionNoProtoType(getCanonicalFunctionResultType(ResultTy), Info);
// Get the new insert position for the node we care about.
FunctionNoProtoType *NewIP =
FunctionNoProtoTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
auto *New = new (*this, TypeAlignment)
FunctionNoProtoType(ResultTy, Canonical, Info);
Types.push_back(New);
FunctionNoProtoTypes.InsertNode(New, InsertPos);
return QualType(New, 0);
}
CanQualType
ASTContext::getCanonicalFunctionResultType(QualType ResultType) const {
CanQualType CanResultType = getCanonicalType(ResultType);
// Canonical result types do not have ARC lifetime qualifiers.
if (CanResultType.getQualifiers().hasObjCLifetime()) {
Qualifiers Qs = CanResultType.getQualifiers();
Qs.removeObjCLifetime();
return CanQualType::CreateUnsafe(
getQualifiedType(CanResultType.getUnqualifiedType(), Qs));
}
return CanResultType;
}
static bool isCanonicalExceptionSpecification(
const FunctionProtoType::ExceptionSpecInfo &ESI, bool NoexceptInType) {
if (ESI.Type == EST_None)
return true;
if (!NoexceptInType)
return false;
// C++17 onwards: exception specification is part of the type, as a simple
// boolean "can this function type throw".
if (ESI.Type == EST_BasicNoexcept)
return true;
// A noexcept(expr) specification is (possibly) canonical if expr is
// value-dependent.
if (ESI.Type == EST_DependentNoexcept)
return true;
// A dynamic exception specification is canonical if it only contains pack
// expansions (so we can't tell whether it's non-throwing) and all its
// contained types are canonical.
if (ESI.Type == EST_Dynamic) {
bool AnyPackExpansions = false;
for (QualType ET : ESI.Exceptions) {
if (!ET.isCanonical())
return false;
if (ET->getAs<PackExpansionType>())
AnyPackExpansions = true;
}
return AnyPackExpansions;
}
return false;
}
QualType ASTContext::getFunctionTypeInternal(
QualType ResultTy, ArrayRef<QualType> ArgArray,
const FunctionProtoType::ExtProtoInfo &EPI, bool OnlyWantCanonical) const {
size_t NumArgs = ArgArray.size();
// Unique functions, to guarantee there is only one function of a particular
// structure.
llvm::FoldingSetNodeID ID;
FunctionProtoType::Profile(ID, ResultTy, ArgArray.begin(), NumArgs, EPI,
*this, true);
QualType Canonical;
bool Unique = false;
void *InsertPos = nullptr;
if (FunctionProtoType *FPT =
FunctionProtoTypes.FindNodeOrInsertPos(ID, InsertPos)) {
QualType Existing = QualType(FPT, 0);
// If we find a pre-existing equivalent FunctionProtoType, we can just reuse
// it so long as our exception specification doesn't contain a dependent
// noexcept expression, or we're just looking for a canonical type.
// Otherwise, we're going to need to create a type
// sugar node to hold the concrete expression.
if (OnlyWantCanonical || !isComputedNoexcept(EPI.ExceptionSpec.Type) ||
EPI.ExceptionSpec.NoexceptExpr == FPT->getNoexceptExpr())
return Existing;
// We need a new type sugar node for this one, to hold the new noexcept
// expression. We do no canonicalization here, but that's OK since we don't
// expect to see the same noexcept expression much more than once.
Canonical = getCanonicalType(Existing);
Unique = true;
}
bool NoexceptInType = getLangOpts().CPlusPlus17;
bool IsCanonicalExceptionSpec =
isCanonicalExceptionSpecification(EPI.ExceptionSpec, NoexceptInType);
// Determine whether the type being created is already canonical or not.
bool isCanonical = !Unique && IsCanonicalExceptionSpec &&
isCanonicalResultType(ResultTy) && !EPI.HasTrailingReturn;
for (unsigned i = 0; i != NumArgs && isCanonical; ++i)
if (!ArgArray[i].isCanonicalAsParam())
isCanonical = false;
if (OnlyWantCanonical)
assert(isCanonical &&
"given non-canonical parameters constructing canonical type");
// If this type isn't canonical, get the canonical version of it if we don't
// already have it. The exception spec is only partially part of the
// canonical type, and only in C++17 onwards.
if (!isCanonical && Canonical.isNull()) {
SmallVector<QualType, 16> CanonicalArgs;
CanonicalArgs.reserve(NumArgs);
for (unsigned i = 0; i != NumArgs; ++i)
CanonicalArgs.push_back(getCanonicalParamType(ArgArray[i]));
llvm::SmallVector<QualType, 8> ExceptionTypeStorage;
FunctionProtoType::ExtProtoInfo CanonicalEPI = EPI;
CanonicalEPI.HasTrailingReturn = false;
if (IsCanonicalExceptionSpec) {
// Exception spec is already OK.
} else if (NoexceptInType) {
switch (EPI.ExceptionSpec.Type) {
case EST_Unparsed: case EST_Unevaluated: case EST_Uninstantiated:
// We don't know yet. It shouldn't matter what we pick here; no-one
// should ever look at this.
LLVM_FALLTHROUGH;
case EST_None: case EST_MSAny: case EST_NoexceptFalse:
CanonicalEPI.ExceptionSpec.Type = EST_None;
break;
// A dynamic exception specification is almost always "not noexcept",
// with the exception that a pack expansion might expand to no types.
case EST_Dynamic: {
bool AnyPacks = false;
for (QualType ET : EPI.ExceptionSpec.Exceptions) {
if (ET->getAs<PackExpansionType>())
AnyPacks = true;
ExceptionTypeStorage.push_back(getCanonicalType(ET));
}
if (!AnyPacks)
CanonicalEPI.ExceptionSpec.Type = EST_None;
else {
CanonicalEPI.ExceptionSpec.Type = EST_Dynamic;
CanonicalEPI.ExceptionSpec.Exceptions = ExceptionTypeStorage;
}
break;
}
case EST_DynamicNone:
case EST_BasicNoexcept:
case EST_NoexceptTrue:
case EST_NoThrow:
CanonicalEPI.ExceptionSpec.Type = EST_BasicNoexcept;
break;
case EST_DependentNoexcept:
llvm_unreachable("dependent noexcept is already canonical");
}
} else {
CanonicalEPI.ExceptionSpec = FunctionProtoType::ExceptionSpecInfo();
}
// Adjust the canonical function result type.
CanQualType CanResultTy = getCanonicalFunctionResultType(ResultTy);
Canonical =
getFunctionTypeInternal(CanResultTy, CanonicalArgs, CanonicalEPI, true);
// Get the new insert position for the node we care about.
FunctionProtoType *NewIP =
FunctionProtoTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
// Compute the needed size to hold this FunctionProtoType and the
// various trailing objects.
auto ESH = FunctionProtoType::getExceptionSpecSize(
EPI.ExceptionSpec.Type, EPI.ExceptionSpec.Exceptions.size());
size_t Size = FunctionProtoType::totalSizeToAlloc<
QualType, SourceLocation, FunctionType::FunctionTypeExtraBitfields,
FunctionType::ExceptionType, Expr *, FunctionDecl *,
FunctionProtoType::ExtParameterInfo, Qualifiers>(
NumArgs, EPI.Variadic,
FunctionProtoType::hasExtraBitfields(EPI.ExceptionSpec.Type),
ESH.NumExceptionType, ESH.NumExprPtr, ESH.NumFunctionDeclPtr,
EPI.ExtParameterInfos ? NumArgs : 0,
EPI.TypeQuals.hasNonFastQualifiers() ? 1 : 0);
auto *FTP = (FunctionProtoType *)Allocate(Size, TypeAlignment);
FunctionProtoType::ExtProtoInfo newEPI = EPI;
new (FTP) FunctionProtoType(ResultTy, ArgArray, Canonical, newEPI);
Types.push_back(FTP);
if (!Unique)
FunctionProtoTypes.InsertNode(FTP, InsertPos);
return QualType(FTP, 0);
}
QualType ASTContext::getPipeType(QualType T, bool ReadOnly) const {
llvm::FoldingSetNodeID ID;
PipeType::Profile(ID, T, ReadOnly);
void *InsertPos = nullptr;
if (PipeType *PT = PipeTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(PT, 0);
// If the pipe element type isn't canonical, this won't be a canonical type
// either, so fill in the canonical type field.
QualType Canonical;
if (!T.isCanonical()) {
Canonical = getPipeType(getCanonicalType(T), ReadOnly);
// Get the new insert position for the node we care about.
PipeType *NewIP = PipeTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!");
(void)NewIP;
}
auto *New = new (*this, TypeAlignment) PipeType(T, Canonical, ReadOnly);
Types.push_back(New);
PipeTypes.InsertNode(New, InsertPos);
return QualType(New, 0);
}
QualType ASTContext::adjustStringLiteralBaseType(QualType Ty) const {
// OpenCL v1.1 s6.5.3: a string literal is in the constant address space.
return LangOpts.OpenCL ? getAddrSpaceQualType(Ty, LangAS::opencl_constant)
: Ty;
}
QualType ASTContext::getReadPipeType(QualType T) const {
return getPipeType(T, true);
}
QualType ASTContext::getWritePipeType(QualType T) const {
return getPipeType(T, false);
}
QualType ASTContext::getExtIntType(bool IsUnsigned, unsigned NumBits) const {
llvm::FoldingSetNodeID ID;
ExtIntType::Profile(ID, IsUnsigned, NumBits);
void *InsertPos = nullptr;
if (ExtIntType *EIT = ExtIntTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(EIT, 0);
auto *New = new (*this, TypeAlignment) ExtIntType(IsUnsigned, NumBits);
ExtIntTypes.InsertNode(New, InsertPos);
Types.push_back(New);
return QualType(New, 0);
}
QualType ASTContext::getDependentExtIntType(bool IsUnsigned,
Expr *NumBitsExpr) const {
assert(NumBitsExpr->isInstantiationDependent() && "Only good for dependent");
llvm::FoldingSetNodeID ID;
DependentExtIntType::Profile(ID, *this, IsUnsigned, NumBitsExpr);
void *InsertPos = nullptr;
if (DependentExtIntType *Existing =
DependentExtIntTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(Existing, 0);
auto *New = new (*this, TypeAlignment)
DependentExtIntType(*this, IsUnsigned, NumBitsExpr);
DependentExtIntTypes.InsertNode(New, InsertPos);
Types.push_back(New);
return QualType(New, 0);
}
#ifndef NDEBUG
static bool NeedsInjectedClassNameType(const RecordDecl *D) {
if (!isa<CXXRecordDecl>(D)) return false;
const auto *RD = cast<CXXRecordDecl>(D);
if (isa<ClassTemplatePartialSpecializationDecl>(RD))
return true;
if (RD->getDescribedClassTemplate() &&
!isa<ClassTemplateSpecializationDecl>(RD))
return true;
return false;
}
#endif
/// getInjectedClassNameType - Return the unique reference to the
/// injected class name type for the specified templated declaration.
QualType ASTContext::getInjectedClassNameType(CXXRecordDecl *Decl,
QualType TST) const {
assert(NeedsInjectedClassNameType(Decl));
if (Decl->TypeForDecl) {
assert(isa<InjectedClassNameType>(Decl->TypeForDecl));
} else if (CXXRecordDecl *PrevDecl = Decl->getPreviousDecl()) {
assert(PrevDecl->TypeForDecl && "previous declaration has no type");
Decl->TypeForDecl = PrevDecl->TypeForDecl;
assert(isa<InjectedClassNameType>(Decl->TypeForDecl));
} else {
Type *newType =
new (*this, TypeAlignment) InjectedClassNameType(Decl, TST);
Decl->TypeForDecl = newType;
Types.push_back(newType);
}
return QualType(Decl->TypeForDecl, 0);
}
/// getTypeDeclType - Return the unique reference to the type for the
/// specified type declaration.
QualType ASTContext::getTypeDeclTypeSlow(const TypeDecl *Decl) const {
assert(Decl && "Passed null for Decl param");
assert(!Decl->TypeForDecl && "TypeForDecl present in slow case");
if (const auto *Typedef = dyn_cast<TypedefNameDecl>(Decl))
return getTypedefType(Typedef);
assert(!isa<TemplateTypeParmDecl>(Decl) &&
"Template type parameter types are always available.");
if (const auto *Record = dyn_cast<RecordDecl>(Decl)) {
assert(Record->isFirstDecl() && "struct/union has previous declaration");
assert(!NeedsInjectedClassNameType(Record));
return getRecordType(Record);
} else if (const auto *Enum = dyn_cast<EnumDecl>(Decl)) {
assert(Enum->isFirstDecl() && "enum has previous declaration");
return getEnumType(Enum);
} else if (const auto *Using = dyn_cast<UnresolvedUsingTypenameDecl>(Decl)) {
Type *newType = new (*this, TypeAlignment) UnresolvedUsingType(Using);
Decl->TypeForDecl = newType;
Types.push_back(newType);
} else
llvm_unreachable("TypeDecl without a type?");
return QualType(Decl->TypeForDecl, 0);
}
/// getTypedefType - Return the unique reference to the type for the
/// specified typedef name decl.
QualType ASTContext::getTypedefType(const TypedefNameDecl *Decl,
QualType Underlying) const {
if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0);
if (Underlying.isNull())
Underlying = Decl->getUnderlyingType();
QualType Canonical = getCanonicalType(Underlying);
auto *newType = new (*this, TypeAlignment)
TypedefType(Type::Typedef, Decl, Underlying, Canonical);
Decl->TypeForDecl = newType;
Types.push_back(newType);
return QualType(newType, 0);
}
QualType ASTContext::getRecordType(const RecordDecl *Decl) const {
if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0);
if (const RecordDecl *PrevDecl = Decl->getPreviousDecl())
if (PrevDecl->TypeForDecl)
return QualType(Decl->TypeForDecl = PrevDecl->TypeForDecl, 0);
auto *newType = new (*this, TypeAlignment) RecordType(Decl);
Decl->TypeForDecl = newType;
Types.push_back(newType);
return QualType(newType, 0);
}
QualType ASTContext::getEnumType(const EnumDecl *Decl) const {
if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0);
if (const EnumDecl *PrevDecl = Decl->getPreviousDecl())
if (PrevDecl->TypeForDecl)
return QualType(Decl->TypeForDecl = PrevDecl->TypeForDecl, 0);
auto *newType = new (*this, TypeAlignment) EnumType(Decl);
Decl->TypeForDecl = newType;
Types.push_back(newType);
return QualType(newType, 0);
}
QualType ASTContext::getAttributedType(attr::Kind attrKind,
QualType modifiedType,
QualType equivalentType) {
llvm::FoldingSetNodeID id;
AttributedType::Profile(id, attrKind, modifiedType, equivalentType);
void *insertPos = nullptr;
AttributedType *type = AttributedTypes.FindNodeOrInsertPos(id, insertPos);
if (type) return QualType(type, 0);
QualType canon = getCanonicalType(equivalentType);
type = new (*this, TypeAlignment)
AttributedType(canon, attrKind, modifiedType, equivalentType);
Types.push_back(type);
AttributedTypes.InsertNode(type, insertPos);
return QualType(type, 0);
}
/// Retrieve a substitution-result type.
QualType
ASTContext::getSubstTemplateTypeParmType(const TemplateTypeParmType *Parm,
QualType Replacement) const {
assert(Replacement.isCanonical()
&& "replacement types must always be canonical");
llvm::FoldingSetNodeID ID;
SubstTemplateTypeParmType::Profile(ID, Parm, Replacement);
void *InsertPos = nullptr;
SubstTemplateTypeParmType *SubstParm
= SubstTemplateTypeParmTypes.FindNodeOrInsertPos(ID, InsertPos);
if (!SubstParm) {
SubstParm = new (*this, TypeAlignment)
SubstTemplateTypeParmType(Parm, Replacement);
Types.push_back(SubstParm);
SubstTemplateTypeParmTypes.InsertNode(SubstParm, InsertPos);
}
return QualType(SubstParm, 0);
}
/// Retrieve a
QualType ASTContext::getSubstTemplateTypeParmPackType(
const TemplateTypeParmType *Parm,
const TemplateArgument &ArgPack) {
#ifndef NDEBUG
for (const auto &P : ArgPack.pack_elements()) {
assert(P.getKind() == TemplateArgument::Type &&"Pack contains a non-type");
assert(P.getAsType().isCanonical() && "Pack contains non-canonical type");
}
#endif
llvm::FoldingSetNodeID ID;
SubstTemplateTypeParmPackType::Profile(ID, Parm, ArgPack);
void *InsertPos = nullptr;
if (SubstTemplateTypeParmPackType *SubstParm
= SubstTemplateTypeParmPackTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(SubstParm, 0);
QualType Canon;
if (!Parm->isCanonicalUnqualified()) {
Canon = getCanonicalType(QualType(Parm, 0));
Canon = getSubstTemplateTypeParmPackType(cast<TemplateTypeParmType>(Canon),
ArgPack);
SubstTemplateTypeParmPackTypes.FindNodeOrInsertPos(ID, InsertPos);
}
auto *SubstParm
= new (*this, TypeAlignment) SubstTemplateTypeParmPackType(Parm, Canon,
ArgPack);
Types.push_back(SubstParm);
SubstTemplateTypeParmPackTypes.InsertNode(SubstParm, InsertPos);
return QualType(SubstParm, 0);
}
/// Retrieve the template type parameter type for a template
/// parameter or parameter pack with the given depth, index, and (optionally)
/// name.
QualType ASTContext::getTemplateTypeParmType(unsigned Depth, unsigned Index,
bool ParameterPack,
TemplateTypeParmDecl *TTPDecl) const {
llvm::FoldingSetNodeID ID;
TemplateTypeParmType::Profile(ID, Depth, Index, ParameterPack, TTPDecl);
void *InsertPos = nullptr;
TemplateTypeParmType *TypeParm
= TemplateTypeParmTypes.FindNodeOrInsertPos(ID, InsertPos);
if (TypeParm)
return QualType(TypeParm, 0);
if (TTPDecl) {
QualType Canon = getTemplateTypeParmType(Depth, Index, ParameterPack);
TypeParm = new (*this, TypeAlignment) TemplateTypeParmType(TTPDecl, Canon);
TemplateTypeParmType *TypeCheck
= TemplateTypeParmTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!TypeCheck && "Template type parameter canonical type broken");
(void)TypeCheck;
} else
TypeParm = new (*this, TypeAlignment)
TemplateTypeParmType(Depth, Index, ParameterPack);
Types.push_back(TypeParm);
TemplateTypeParmTypes.InsertNode(TypeParm, InsertPos);
return QualType(TypeParm, 0);
}
TypeSourceInfo *
ASTContext::getTemplateSpecializationTypeInfo(TemplateName Name,
SourceLocation NameLoc,
const TemplateArgumentListInfo &Args,
QualType Underlying) const {
assert(!Name.getAsDependentTemplateName() &&
"No dependent template names here!");
QualType TST = getTemplateSpecializationType(Name, Args, Underlying);
TypeSourceInfo *DI = CreateTypeSourceInfo(TST);
TemplateSpecializationTypeLoc TL =
DI->getTypeLoc().castAs<TemplateSpecializationTypeLoc>();
TL.setTemplateKeywordLoc(SourceLocation());
TL.setTemplateNameLoc(NameLoc);
TL.setLAngleLoc(Args.getLAngleLoc());
TL.setRAngleLoc(Args.getRAngleLoc());
for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i)
TL.setArgLocInfo(i, Args[i].getLocInfo());
return DI;
}
QualType
ASTContext::getTemplateSpecializationType(TemplateName Template,
const TemplateArgumentListInfo &Args,
QualType Underlying) const {
assert(!Template.getAsDependentTemplateName() &&
"No dependent template names here!");
SmallVector<TemplateArgument, 4> ArgVec;
ArgVec.reserve(Args.size());
for (const TemplateArgumentLoc &Arg : Args.arguments())
ArgVec.push_back(Arg.getArgument());
return getTemplateSpecializationType(Template, ArgVec, Underlying);
}
#ifndef NDEBUG
static bool hasAnyPackExpansions(ArrayRef<TemplateArgument> Args) {
for (const TemplateArgument &Arg : Args)
if (Arg.isPackExpansion())
return true;
return true;
}
#endif
QualType
ASTContext::getTemplateSpecializationType(TemplateName Template,
ArrayRef<TemplateArgument> Args,
QualType Underlying) const {
assert(!Template.getAsDependentTemplateName() &&
"No dependent template names here!");
// Look through qualified template names.
if (QualifiedTemplateName *QTN = Template.getAsQualifiedTemplateName())
Template = TemplateName(QTN->getTemplateDecl());
bool IsTypeAlias =
Template.getAsTemplateDecl() &&
isa<TypeAliasTemplateDecl>(Template.getAsTemplateDecl());
QualType CanonType;
if (!Underlying.isNull())
CanonType = getCanonicalType(Underlying);
else {
// We can get here with an alias template when the specialization contains
// a pack expansion that does not match up with a parameter pack.
assert((!IsTypeAlias || hasAnyPackExpansions(Args)) &&
"Caller must compute aliased type");
IsTypeAlias = false;
CanonType = getCanonicalTemplateSpecializationType(Template, Args);
}
// Allocate the (non-canonical) template specialization type, but don't
// try to unique it: these types typically have location information that
// we don't unique and don't want to lose.
void *Mem = Allocate(sizeof(TemplateSpecializationType) +
sizeof(TemplateArgument) * Args.size() +
(IsTypeAlias? sizeof(QualType) : 0),
TypeAlignment);
auto *Spec
= new (Mem) TemplateSpecializationType(Template, Args, CanonType,
IsTypeAlias ? Underlying : QualType());
Types.push_back(Spec);
return QualType(Spec, 0);
}
QualType ASTContext::getCanonicalTemplateSpecializationType(
TemplateName Template, ArrayRef<TemplateArgument> Args) const {
assert(!Template.getAsDependentTemplateName() &&
"No dependent template names here!");
// Look through qualified template names.
if (QualifiedTemplateName *QTN = Template.getAsQualifiedTemplateName())
Template = TemplateName(QTN->getTemplateDecl());
// Build the canonical template specialization type.
TemplateName CanonTemplate = getCanonicalTemplateName(Template);
SmallVector<TemplateArgument, 4> CanonArgs;
unsigned NumArgs = Args.size();
CanonArgs.reserve(NumArgs);
for (const TemplateArgument &Arg : Args)
CanonArgs.push_back(getCanonicalTemplateArgument(Arg));
// Determine whether this canonical template specialization type already
// exists.
llvm::FoldingSetNodeID ID;
TemplateSpecializationType::Profile(ID, CanonTemplate,
CanonArgs, *this);
void *InsertPos = nullptr;
TemplateSpecializationType *Spec
= TemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos);
if (!Spec) {
// Allocate a new canonical template specialization type.
void *Mem = Allocate((sizeof(TemplateSpecializationType) +
sizeof(TemplateArgument) * NumArgs),
TypeAlignment);
Spec = new (Mem) TemplateSpecializationType(CanonTemplate,
CanonArgs,
QualType(), QualType());
Types.push_back(Spec);
TemplateSpecializationTypes.InsertNode(Spec, InsertPos);
}
assert(Spec->isDependentType() &&
"Non-dependent template-id type must have a canonical type");
return QualType(Spec, 0);
}
QualType ASTContext::getElaboratedType(ElaboratedTypeKeyword Keyword,
NestedNameSpecifier *NNS,
QualType NamedType,
TagDecl *OwnedTagDecl) const {
llvm::FoldingSetNodeID ID;
ElaboratedType::Profile(ID, Keyword, NNS, NamedType, OwnedTagDecl);
void *InsertPos = nullptr;
ElaboratedType *T = ElaboratedTypes.FindNodeOrInsertPos(ID, InsertPos);
if (T)
return QualType(T, 0);
QualType Canon = NamedType;
if (!Canon.isCanonical()) {
Canon = getCanonicalType(NamedType);
ElaboratedType *CheckT = ElaboratedTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!CheckT && "Elaborated canonical type broken");
(void)CheckT;
}
void *Mem = Allocate(ElaboratedType::totalSizeToAlloc<TagDecl *>(!!OwnedTagDecl),
TypeAlignment);
T = new (Mem) ElaboratedType(Keyword, NNS, NamedType, Canon, OwnedTagDecl);
Types.push_back(T);
ElaboratedTypes.InsertNode(T, InsertPos);
return QualType(T, 0);
}
QualType
ASTContext::getParenType(QualType InnerType) const {
llvm::FoldingSetNodeID ID;
ParenType::Profile(ID, InnerType);
void *InsertPos = nullptr;
ParenType *T = ParenTypes.FindNodeOrInsertPos(ID, InsertPos);
if (T)
return QualType(T, 0);
QualType Canon = InnerType;
if (!Canon.isCanonical()) {
Canon = getCanonicalType(InnerType);
ParenType *CheckT = ParenTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!CheckT && "Paren canonical type broken");
(void)CheckT;
}
T = new (*this, TypeAlignment) ParenType(InnerType, Canon);
Types.push_back(T);
ParenTypes.InsertNode(T, InsertPos);
return QualType(T, 0);
}
QualType
ASTContext::getMacroQualifiedType(QualType UnderlyingTy,
const IdentifierInfo *MacroII) const {
QualType Canon = UnderlyingTy;
if (!Canon.isCanonical())
Canon = getCanonicalType(UnderlyingTy);
auto *newType = new (*this, TypeAlignment)
MacroQualifiedType(UnderlyingTy, Canon, MacroII);
Types.push_back(newType);
return QualType(newType, 0);
}
QualType ASTContext::getDependentNameType(ElaboratedTypeKeyword Keyword,
NestedNameSpecifier *NNS,
const IdentifierInfo *Name,
QualType Canon) const {
if (Canon.isNull()) {
NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);
if (CanonNNS != NNS)
Canon = getDependentNameType(Keyword, CanonNNS, Name);
}
llvm::FoldingSetNodeID ID;
DependentNameType::Profile(ID, Keyword, NNS, Name);
void *InsertPos = nullptr;
DependentNameType *T
= DependentNameTypes.FindNodeOrInsertPos(ID, InsertPos);
if (T)
return QualType(T, 0);
T = new (*this, TypeAlignment) DependentNameType(Keyword, NNS, Name, Canon);
Types.push_back(T);
DependentNameTypes.InsertNode(T, InsertPos);
return QualType(T, 0);
}
QualType
ASTContext::getDependentTemplateSpecializationType(
ElaboratedTypeKeyword Keyword,
NestedNameSpecifier *NNS,
const IdentifierInfo *Name,
const TemplateArgumentListInfo &Args) const {
// TODO: avoid this copy
SmallVector<TemplateArgument, 16> ArgCopy;
for (unsigned I = 0, E = Args.size(); I != E; ++I)
ArgCopy.push_back(Args[I].getArgument());
return getDependentTemplateSpecializationType(Keyword, NNS, Name, ArgCopy);
}
QualType
ASTContext::getDependentTemplateSpecializationType(
ElaboratedTypeKeyword Keyword,
NestedNameSpecifier *NNS,
const IdentifierInfo *Name,
ArrayRef<TemplateArgument> Args) const {
assert((!NNS || NNS->isDependent()) &&
"nested-name-specifier must be dependent");
llvm::FoldingSetNodeID ID;
DependentTemplateSpecializationType::Profile(ID, *this, Keyword, NNS,
Name, Args);
void *InsertPos = nullptr;
DependentTemplateSpecializationType *T
= DependentTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos);
if (T)
return QualType(T, 0);
NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);
ElaboratedTypeKeyword CanonKeyword = Keyword;
if (Keyword == ETK_None) CanonKeyword = ETK_Typename;
bool AnyNonCanonArgs = false;
unsigned NumArgs = Args.size();
SmallVector<TemplateArgument, 16> CanonArgs(NumArgs);
for (unsigned I = 0; I != NumArgs; ++I) {
CanonArgs[I] = getCanonicalTemplateArgument(Args[I]);
if (!CanonArgs[I].structurallyEquals(Args[I]))
AnyNonCanonArgs = true;
}
QualType Canon;
if (AnyNonCanonArgs || CanonNNS != NNS || CanonKeyword != Keyword) {
Canon = getDependentTemplateSpecializationType(CanonKeyword, CanonNNS,
Name,
CanonArgs);
// Find the insert position again.
DependentTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos);
}
void *Mem = Allocate((sizeof(DependentTemplateSpecializationType) +
sizeof(TemplateArgument) * NumArgs),
TypeAlignment);
T = new (Mem) DependentTemplateSpecializationType(Keyword, NNS,
Name, Args, Canon);
Types.push_back(T);
DependentTemplateSpecializationTypes.InsertNode(T, InsertPos);
return QualType(T, 0);
}
TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) {
TemplateArgument Arg;
if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(Param)) {
QualType ArgType = getTypeDeclType(TTP);
if (TTP->isParameterPack())
ArgType = getPackExpansionType(ArgType, None);
Arg = TemplateArgument(ArgType);
} else if (auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
QualType T =
NTTP->getType().getNonPackExpansionType().getNonLValueExprType(*this);
// For class NTTPs, ensure we include the 'const' so the type matches that
// of a real template argument.
// FIXME: It would be more faithful to model this as something like an
// lvalue-to-rvalue conversion applied to a const-qualified lvalue.
if (T->isRecordType())
T.addConst();
Expr *E = new (*this) DeclRefExpr(
*this, NTTP, /*enclosing*/ false, T,
Expr::getValueKindForType(NTTP->getType()), NTTP->getLocation());
if (NTTP->isParameterPack())
E = new (*this) PackExpansionExpr(DependentTy, E, NTTP->getLocation(),
None);
Arg = TemplateArgument(E);
} else {
auto *TTP = cast<TemplateTemplateParmDecl>(Param);
if (TTP->isParameterPack())
Arg = TemplateArgument(TemplateName(TTP), Optional<unsigned>());
else
Arg = TemplateArgument(TemplateName(TTP));
}
if (Param->isTemplateParameterPack())
Arg = TemplateArgument::CreatePackCopy(*this, Arg);
return Arg;
}
void
ASTContext::getInjectedTemplateArgs(const TemplateParameterList *Params,
SmallVectorImpl<TemplateArgument> &Args) {
Args.reserve(Args.size() + Params->size());
for (NamedDecl *Param : *Params)
Args.push_back(getInjectedTemplateArg(Param));
}
QualType ASTContext::getPackExpansionType(QualType Pattern,
Optional<unsigned> NumExpansions,
bool ExpectPackInType) {
assert((!ExpectPackInType || Pattern->containsUnexpandedParameterPack()) &&
"Pack expansions must expand one or more parameter packs");
llvm::FoldingSetNodeID ID;
PackExpansionType::Profile(ID, Pattern, NumExpansions);
void *InsertPos = nullptr;
PackExpansionType *T = PackExpansionTypes.FindNodeOrInsertPos(ID, InsertPos);
if (T)
return QualType(T, 0);
QualType Canon;
if (!Pattern.isCanonical()) {
Canon = getPackExpansionType(getCanonicalType(Pattern), NumExpansions,
/*ExpectPackInType=*/false);
// Find the insert position again, in case we inserted an element into
// PackExpansionTypes and invalidated our insert position.
PackExpansionTypes.FindNodeOrInsertPos(ID, InsertPos);
}
T = new (*this, TypeAlignment)
PackExpansionType(Pattern, Canon, NumExpansions);
Types.push_back(T);
PackExpansionTypes.InsertNode(T, InsertPos);
return QualType(T, 0);
}
/// CmpProtocolNames - Comparison predicate for sorting protocols
/// alphabetically.
static int CmpProtocolNames(ObjCProtocolDecl *const *LHS,
ObjCProtocolDecl *const *RHS) {
return DeclarationName::compare((*LHS)->getDeclName(), (*RHS)->getDeclName());
}
static bool areSortedAndUniqued(ArrayRef<ObjCProtocolDecl *> Protocols) {
if (Protocols.empty()) return true;
if (Protocols[0]->getCanonicalDecl() != Protocols[0])
return false;
for (unsigned i = 1; i != Protocols.size(); ++i)
if (CmpProtocolNames(&Protocols[i - 1], &Protocols[i]) >= 0 ||
Protocols[i]->getCanonicalDecl() != Protocols[i])
return false;
return true;
}
static void
SortAndUniqueProtocols(SmallVectorImpl<ObjCProtocolDecl *> &Protocols) {
// Sort protocols, keyed by name.
llvm::array_pod_sort(Protocols.begin(), Protocols.end(), CmpProtocolNames);
// Canonicalize.
for (ObjCProtocolDecl *&P : Protocols)
P = P->getCanonicalDecl();
// Remove duplicates.
auto ProtocolsEnd = std::unique(Protocols.begin(), Protocols.end());
Protocols.erase(ProtocolsEnd, Protocols.end());
}
QualType ASTContext::getObjCObjectType(QualType BaseType,
ObjCProtocolDecl * const *Protocols,
unsigned NumProtocols) const {
return getObjCObjectType(BaseType, {},
llvm::makeArrayRef(Protocols, NumProtocols),
/*isKindOf=*/false);
}
QualType ASTContext::getObjCObjectType(
QualType baseType,
ArrayRef<QualType> typeArgs,
ArrayRef<ObjCProtocolDecl *> protocols,
bool isKindOf) const {
// If the base type is an interface and there aren't any protocols or
// type arguments to add, then the interface type will do just fine.
if (typeArgs.empty() && protocols.empty() && !isKindOf &&
isa<ObjCInterfaceType>(baseType))
return baseType;
// Look in the folding set for an existing type.
llvm::FoldingSetNodeID ID;
ObjCObjectTypeImpl::Profile(ID, baseType, typeArgs, protocols, isKindOf);
void *InsertPos = nullptr;
if (ObjCObjectType *QT = ObjCObjectTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(QT, 0);
// Determine the type arguments to be used for canonicalization,
// which may be explicitly specified here or written on the base
// type.
ArrayRef<QualType> effectiveTypeArgs = typeArgs;
if (effectiveTypeArgs.empty()) {
if (const auto *baseObject = baseType->getAs<ObjCObjectType>())
effectiveTypeArgs = baseObject->getTypeArgs();
}
// Build the canonical type, which has the canonical base type and a
// sorted-and-uniqued list of protocols and the type arguments
// canonicalized.
QualType canonical;
bool typeArgsAreCanonical = std::all_of(effectiveTypeArgs.begin(),
effectiveTypeArgs.end(),
[&](QualType type) {
return type.isCanonical();
});
bool protocolsSorted = areSortedAndUniqued(protocols);
if (!typeArgsAreCanonical || !protocolsSorted || !baseType.isCanonical()) {
// Determine the canonical type arguments.
ArrayRef<QualType> canonTypeArgs;
SmallVector<QualType, 4> canonTypeArgsVec;
if (!typeArgsAreCanonical) {
canonTypeArgsVec.reserve(effectiveTypeArgs.size());
for (auto typeArg : effectiveTypeArgs)
canonTypeArgsVec.push_back(getCanonicalType(typeArg));
canonTypeArgs = canonTypeArgsVec;
} else {
canonTypeArgs = effectiveTypeArgs;
}
ArrayRef<ObjCProtocolDecl *> canonProtocols;
SmallVector<ObjCProtocolDecl*, 8> canonProtocolsVec;
if (!protocolsSorted) {
canonProtocolsVec.append(protocols.begin(), protocols.end());
SortAndUniqueProtocols(canonProtocolsVec);
canonProtocols = canonProtocolsVec;
} else {
canonProtocols = protocols;
}
canonical = getObjCObjectType(getCanonicalType(baseType), canonTypeArgs,
canonProtocols, isKindOf);
// Regenerate InsertPos.
ObjCObjectTypes.FindNodeOrInsertPos(ID, InsertPos);
}
unsigned size = sizeof(ObjCObjectTypeImpl);
size += typeArgs.size() * sizeof(QualType);
size += protocols.size() * sizeof(ObjCProtocolDecl *);
void *mem = Allocate(size, TypeAlignment);
auto *T =
new (mem) ObjCObjectTypeImpl(canonical, baseType, typeArgs, protocols,
isKindOf);
Types.push_back(T);
ObjCObjectTypes.InsertNode(T, InsertPos);
return QualType(T, 0);
}
/// Apply Objective-C protocol qualifiers to the given type.
/// If this is for the canonical type of a type parameter, we can apply
/// protocol qualifiers on the ObjCObjectPointerType.
QualType
ASTContext::applyObjCProtocolQualifiers(QualType type,
ArrayRef<ObjCProtocolDecl *> protocols, bool &hasError,
bool allowOnPointerType) const {
hasError = false;
if (const auto *objT = dyn_cast<ObjCTypeParamType>(type.getTypePtr())) {
return getObjCTypeParamType(objT->getDecl(), protocols);
}
// Apply protocol qualifiers to ObjCObjectPointerType.
if (allowOnPointerType) {
if (const auto *objPtr =
dyn_cast<ObjCObjectPointerType>(type.getTypePtr())) {
const ObjCObjectType *objT = objPtr->getObjectType();
// Merge protocol lists and construct ObjCObjectType.
SmallVector<ObjCProtocolDecl*, 8> protocolsVec;
protocolsVec.append(objT->qual_begin(),
objT->qual_end());
protocolsVec.append(protocols.begin(), protocols.end());
ArrayRef<ObjCProtocolDecl *> protocols = protocolsVec;
type = getObjCObjectType(
objT->getBaseType(),
objT->getTypeArgsAsWritten(),
protocols,
objT->isKindOfTypeAsWritten());
return getObjCObjectPointerType(type);
}
}
// Apply protocol qualifiers to ObjCObjectType.
if (const auto *objT = dyn_cast<ObjCObjectType>(type.getTypePtr())){
// FIXME: Check for protocols to which the class type is already
// known to conform.
return getObjCObjectType(objT->getBaseType(),
objT->getTypeArgsAsWritten(),
protocols,
objT->isKindOfTypeAsWritten());
}
// If the canonical type is ObjCObjectType, ...
if (type->isObjCObjectType()) {
// Silently overwrite any existing protocol qualifiers.
// TODO: determine whether that's the right thing to do.
// FIXME: Check for protocols to which the class type is already
// known to conform.
return getObjCObjectType(type, {}, protocols, false);
}
// id<protocol-list>
if (type->isObjCIdType()) {
const auto *objPtr = type->castAs<ObjCObjectPointerType>();
type = getObjCObjectType(ObjCBuiltinIdTy, {}, protocols,
objPtr->isKindOfType());
return getObjCObjectPointerType(type);
}
// Class<protocol-list>
if (type->isObjCClassType()) {
const auto *objPtr = type->castAs<ObjCObjectPointerType>();
type = getObjCObjectType(ObjCBuiltinClassTy, {}, protocols,
objPtr->isKindOfType());
return getObjCObjectPointerType(type);
}
hasError = true;
return type;
}
QualType
ASTContext::getObjCTypeParamType(const ObjCTypeParamDecl *Decl,
ArrayRef<ObjCProtocolDecl *> protocols) const {
// Look in the folding set for an existing type.
llvm::FoldingSetNodeID ID;
ObjCTypeParamType::Profile(ID, Decl, Decl->getUnderlyingType(), protocols);
void *InsertPos = nullptr;
if (ObjCTypeParamType *TypeParam =
ObjCTypeParamTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(TypeParam, 0);
// We canonicalize to the underlying type.
QualType Canonical = getCanonicalType(Decl->getUnderlyingType());
if (!protocols.empty()) {
// Apply the protocol qualifers.
bool hasError;
Canonical = getCanonicalType(applyObjCProtocolQualifiers(
Canonical, protocols, hasError, true /*allowOnPointerType*/));
assert(!hasError && "Error when apply protocol qualifier to bound type");
}
unsigned size = sizeof(ObjCTypeParamType);
size += protocols.size() * sizeof(ObjCProtocolDecl *);
void *mem = Allocate(size, TypeAlignment);
auto *newType = new (mem) ObjCTypeParamType(Decl, Canonical, protocols);
Types.push_back(newType);
ObjCTypeParamTypes.InsertNode(newType, InsertPos);
return QualType(newType, 0);
}
void ASTContext::adjustObjCTypeParamBoundType(const ObjCTypeParamDecl *Orig,
ObjCTypeParamDecl *New) const {
New->setTypeSourceInfo(getTrivialTypeSourceInfo(Orig->getUnderlyingType()));
// Update TypeForDecl after updating TypeSourceInfo.
auto NewTypeParamTy = cast<ObjCTypeParamType>(New->getTypeForDecl());
SmallVector<ObjCProtocolDecl *, 8> protocols;
protocols.append(NewTypeParamTy->qual_begin(), NewTypeParamTy->qual_end());
QualType UpdatedTy = getObjCTypeParamType(New, protocols);
New->setTypeForDecl(UpdatedTy.getTypePtr());
}
/// ObjCObjectAdoptsQTypeProtocols - Checks that protocols in IC's
/// protocol list adopt all protocols in QT's qualified-id protocol
/// list.
bool ASTContext::ObjCObjectAdoptsQTypeProtocols(QualType QT,
ObjCInterfaceDecl *IC) {
if (!QT->isObjCQualifiedIdType())
return false;
if (const auto *OPT = QT->getAs<ObjCObjectPointerType>()) {
// If both the right and left sides have qualifiers.
for (auto *Proto : OPT->quals()) {
if (!IC->ClassImplementsProtocol(Proto, false))
return false;
}
return true;
}
return false;
}
/// QIdProtocolsAdoptObjCObjectProtocols - Checks that protocols in
/// QT's qualified-id protocol list adopt all protocols in IDecl's list
/// of protocols.
bool ASTContext::QIdProtocolsAdoptObjCObjectProtocols(QualType QT,
ObjCInterfaceDecl *IDecl) {
if (!QT->isObjCQualifiedIdType())
return false;
const auto *OPT = QT->getAs<ObjCObjectPointerType>();
if (!OPT)
return false;
if (!IDecl->hasDefinition())
return false;
llvm::SmallPtrSet<ObjCProtocolDecl *, 8> InheritedProtocols;
CollectInheritedProtocols(IDecl, InheritedProtocols);
if (InheritedProtocols.empty())
return false;
// Check that if every protocol in list of id<plist> conforms to a protocol
// of IDecl's, then bridge casting is ok.
bool Conforms = false;
for (auto *Proto : OPT->quals()) {
Conforms = false;
for (auto *PI : InheritedProtocols) {
if (ProtocolCompatibleWithProtocol(Proto, PI)) {
Conforms = true;
break;
}
}
if (!Conforms)
break;
}
if (Conforms)
return true;
for (auto *PI : InheritedProtocols) {
// If both the right and left sides have qualifiers.
bool Adopts = false;
for (auto *Proto : OPT->quals()) {
// return 'true' if 'PI' is in the inheritance hierarchy of Proto
if ((Adopts = ProtocolCompatibleWithProtocol(PI, Proto)))
break;
}
if (!Adopts)
return false;
}
return true;
}
/// getObjCObjectPointerType - Return a ObjCObjectPointerType type for
/// the given object type.
QualType ASTContext::getObjCObjectPointerType(QualType ObjectT) const {
llvm::FoldingSetNodeID ID;
ObjCObjectPointerType::Profile(ID, ObjectT);
void *InsertPos = nullptr;
if (ObjCObjectPointerType *QT =
ObjCObjectPointerTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(QT, 0);
// Find the canonical object type.
QualType Canonical;
if (!ObjectT.isCanonical()) {
Canonical = getObjCObjectPointerType(getCanonicalType(ObjectT));
// Regenerate InsertPos.
ObjCObjectPointerTypes.FindNodeOrInsertPos(ID, InsertPos);
}
// No match.
void *Mem = Allocate(sizeof(ObjCObjectPointerType), TypeAlignment);
auto *QType =
new (Mem) ObjCObjectPointerType(Canonical, ObjectT);
Types.push_back(QType);
ObjCObjectPointerTypes.InsertNode(QType, InsertPos);
return QualType(QType, 0);
}
/// getObjCInterfaceType - Return the unique reference to the type for the
/// specified ObjC interface decl. The list of protocols is optional.
QualType ASTContext::getObjCInterfaceType(const ObjCInterfaceDecl *Decl,
ObjCInterfaceDecl *PrevDecl) const {
if (Decl->TypeForDecl)
return QualType(Decl->TypeForDecl, 0);
if (PrevDecl) {
assert(PrevDecl->TypeForDecl && "previous decl has no TypeForDecl");
Decl->TypeForDecl = PrevDecl->TypeForDecl;
return QualType(PrevDecl->TypeForDecl, 0);
}
// Prefer the definition, if there is one.
if (const ObjCInterfaceDecl *Def = Decl->getDefinition())
Decl = Def;
void *Mem = Allocate(sizeof(ObjCInterfaceType), TypeAlignment);
auto *T = new (Mem) ObjCInterfaceType(Decl);
Decl->TypeForDecl = T;
Types.push_back(T);
return QualType(T, 0);
}
/// getTypeOfExprType - Unlike many "get<Type>" functions, we can't unique
/// TypeOfExprType AST's (since expression's are never shared). For example,
/// multiple declarations that refer to "typeof(x)" all contain different
/// DeclRefExpr's. This doesn't effect the type checker, since it operates
/// on canonical type's (which are always unique).
QualType ASTContext::getTypeOfExprType(Expr *tofExpr) const {
TypeOfExprType *toe;
if (tofExpr->isTypeDependent()) {
llvm::FoldingSetNodeID ID;
DependentTypeOfExprType::Profile(ID, *this, tofExpr);
void *InsertPos = nullptr;
DependentTypeOfExprType *Canon
= DependentTypeOfExprTypes.FindNodeOrInsertPos(ID, InsertPos);
if (Canon) {
// We already have a "canonical" version of an identical, dependent
// typeof(expr) type. Use that as our canonical type.
toe = new (*this, TypeAlignment) TypeOfExprType(tofExpr,
QualType((TypeOfExprType*)Canon, 0));
} else {
// Build a new, canonical typeof(expr) type.
Canon
= new (*this, TypeAlignment) DependentTypeOfExprType(*this, tofExpr);
DependentTypeOfExprTypes.InsertNode(Canon, InsertPos);
toe = Canon;
}
} else {
QualType Canonical = getCanonicalType(tofExpr->getType());
toe = new (*this, TypeAlignment) TypeOfExprType(tofExpr, Canonical);
}
Types.push_back(toe);
return QualType(toe, 0);
}
/// getTypeOfType - Unlike many "get<Type>" functions, we don't unique
/// TypeOfType nodes. The only motivation to unique these nodes would be
/// memory savings. Since typeof(t) is fairly uncommon, space shouldn't be
/// an issue. This doesn't affect the type checker, since it operates
/// on canonical types (which are always unique).
QualType ASTContext::getTypeOfType(QualType tofType) const {
QualType Canonical = getCanonicalType(tofType);
auto *tot = new (*this, TypeAlignment) TypeOfType(tofType, Canonical);
Types.push_back(tot);
return QualType(tot, 0);
}
/// Unlike many "get<Type>" functions, we don't unique DecltypeType
/// nodes. This would never be helpful, since each such type has its own
/// expression, and would not give a significant memory saving, since there
/// is an Expr tree under each such type.
QualType ASTContext::getDecltypeType(Expr *e, QualType UnderlyingType) const {
DecltypeType *dt;
// C++11 [temp.type]p2:
// If an expression e involves a template parameter, decltype(e) denotes a
// unique dependent type. Two such decltype-specifiers refer to the same
// type only if their expressions are equivalent (14.5.6.1).
if (e->isInstantiationDependent()) {
llvm::FoldingSetNodeID ID;
DependentDecltypeType::Profile(ID, *this, e);
void *InsertPos = nullptr;
DependentDecltypeType *Canon
= DependentDecltypeTypes.FindNodeOrInsertPos(ID, InsertPos);
if (!Canon) {
// Build a new, canonical decltype(expr) type.
Canon = new (*this, TypeAlignment) DependentDecltypeType(*this, e);
DependentDecltypeTypes.InsertNode(Canon, InsertPos);
}
dt = new (*this, TypeAlignment)
DecltypeType(e, UnderlyingType, QualType((DecltypeType *)Canon, 0));
} else {
dt = new (*this, TypeAlignment)
DecltypeType(e, UnderlyingType, getCanonicalType(UnderlyingType));
}
Types.push_back(dt);
return QualType(dt, 0);
}
/// getUnaryTransformationType - We don't unique these, since the memory
/// savings are minimal and these are rare.
QualType ASTContext::getUnaryTransformType(QualType BaseType,
QualType UnderlyingType,
UnaryTransformType::UTTKind Kind)
const {
UnaryTransformType *ut = nullptr;
if (BaseType->isDependentType()) {
// Look in the folding set for an existing type.
llvm::FoldingSetNodeID ID;
DependentUnaryTransformType::Profile(ID, getCanonicalType(BaseType), Kind);
void *InsertPos = nullptr;
DependentUnaryTransformType *Canon
= DependentUnaryTransformTypes.FindNodeOrInsertPos(ID, InsertPos);
if (!Canon) {
// Build a new, canonical __underlying_type(type) type.
Canon = new (*this, TypeAlignment)
DependentUnaryTransformType(*this, getCanonicalType(BaseType),
Kind);
DependentUnaryTransformTypes.InsertNode(Canon, InsertPos);
}
ut = new (*this, TypeAlignment) UnaryTransformType (BaseType,
QualType(), Kind,
QualType(Canon, 0));
} else {
QualType CanonType = getCanonicalType(UnderlyingType);
ut = new (*this, TypeAlignment) UnaryTransformType (BaseType,
UnderlyingType, Kind,
CanonType);
}
Types.push_back(ut);
return QualType(ut, 0);
}
/// getAutoType - Return the uniqued reference to the 'auto' type which has been
/// deduced to the given type, or to the canonical undeduced 'auto' type, or the
/// canonical deduced-but-dependent 'auto' type.
QualType
ASTContext::getAutoType(QualType DeducedType, AutoTypeKeyword Keyword,
bool IsDependent, bool IsPack,
ConceptDecl *TypeConstraintConcept,
ArrayRef<TemplateArgument> TypeConstraintArgs) const {
assert((!IsPack || IsDependent) && "only use IsPack for a dependent pack");
if (DeducedType.isNull() && Keyword == AutoTypeKeyword::Auto &&
!TypeConstraintConcept && !IsDependent)
return getAutoDeductType();
// Look in the folding set for an existing type.
void *InsertPos = nullptr;
llvm::FoldingSetNodeID ID;
AutoType::Profile(ID, *this, DeducedType, Keyword, IsDependent,
TypeConstraintConcept, TypeConstraintArgs);
if (AutoType *AT = AutoTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(AT, 0);
void *Mem = Allocate(sizeof(AutoType) +
sizeof(TemplateArgument) * TypeConstraintArgs.size(),
TypeAlignment);
auto *AT = new (Mem) AutoType(
DeducedType, Keyword,
(IsDependent ? TypeDependence::DependentInstantiation
: TypeDependence::None) |
(IsPack ? TypeDependence::UnexpandedPack : TypeDependence::None),
TypeConstraintConcept, TypeConstraintArgs);
Types.push_back(AT);
if (InsertPos)
AutoTypes.InsertNode(AT, InsertPos);
return QualType(AT, 0);
}
/// Return the uniqued reference to the deduced template specialization type
/// which has been deduced to the given type, or to the canonical undeduced
/// such type, or the canonical deduced-but-dependent such type.
QualType ASTContext::getDeducedTemplateSpecializationType(
TemplateName Template, QualType DeducedType, bool IsDependent) const {
// Look in the folding set for an existing type.
void *InsertPos = nullptr;
llvm::FoldingSetNodeID ID;
DeducedTemplateSpecializationType::Profile(ID, Template, DeducedType,
IsDependent);
if (DeducedTemplateSpecializationType *DTST =
DeducedTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(DTST, 0);
auto *DTST = new (*this, TypeAlignment)
DeducedTemplateSpecializationType(Template, DeducedType, IsDependent);
Types.push_back(DTST);
if (InsertPos)
DeducedTemplateSpecializationTypes.InsertNode(DTST, InsertPos);
return QualType(DTST, 0);
}
/// getAtomicType - Return the uniqued reference to the atomic type for
/// the given value type.
QualType ASTContext::getAtomicType(QualType T) const {
// Unique pointers, to guarantee there is only one pointer of a particular
// structure.
llvm::FoldingSetNodeID ID;
AtomicType::Profile(ID, T);
void *InsertPos = nullptr;
if (AtomicType *AT = AtomicTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(AT, 0);
// If the atomic value type isn't canonical, this won't be a canonical type
// either, so fill in the canonical type field.
QualType Canonical;
if (!T.isCanonical()) {
Canonical = getAtomicType(getCanonicalType(T));
// Get the new insert position for the node we care about.
AtomicType *NewIP = AtomicTypes.FindNodeOrInsertPos(ID, InsertPos);
assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
}
auto *New = new (*this, TypeAlignment) AtomicType(T, Canonical);
Types.push_back(New);
AtomicTypes.InsertNode(New, InsertPos);
return QualType(New, 0);
}
/// getAutoDeductType - Get type pattern for deducing against 'auto'.
QualType ASTContext::getAutoDeductType() const {
if (AutoDeductTy.isNull())
AutoDeductTy = QualType(new (*this, TypeAlignment)
AutoType(QualType(), AutoTypeKeyword::Auto,
TypeDependence::None,
/*concept*/ nullptr, /*args*/ {}),
0);
return AutoDeductTy;
}
/// getAutoRRefDeductType - Get type pattern for deducing against 'auto &&'.
QualType ASTContext::getAutoRRefDeductType() const {
if (AutoRRefDeductTy.isNull())
AutoRRefDeductTy = getRValueReferenceType(getAutoDeductType());
assert(!AutoRRefDeductTy.isNull() && "can't build 'auto &&' pattern");
return AutoRRefDeductTy;
}
/// getTagDeclType - Return the unique reference to the type for the
/// specified TagDecl (struct/union/class/enum) decl.
QualType ASTContext::getTagDeclType(const TagDecl *Decl) const {
assert(Decl);
// FIXME: What is the design on getTagDeclType when it requires casting
// away const? mutable?
return getTypeDeclType(const_cast<TagDecl*>(Decl));
}
/// getSizeType - Return the unique type for "size_t" (C99 7.17), the result
/// of the sizeof operator (C99 6.5.3.4p4). The value is target dependent and
/// needs to agree with the definition in <stddef.h>.
CanQualType ASTContext::getSizeType() const {
return getFromTargetType(Target->getSizeType());
}
/// Return the unique signed counterpart of the integer type
/// corresponding to size_t.
CanQualType ASTContext::getSignedSizeType() const {
return getFromTargetType(Target->getSignedSizeType());
}
/// getIntMaxType - Return the unique type for "intmax_t" (C99 7.18.1.5).
CanQualType ASTContext::getIntMaxType() const {
return getFromTargetType(Target->getIntMaxType());
}
/// getUIntMaxType - Return the unique type for "uintmax_t" (C99 7.18.1.5).
CanQualType ASTContext::getUIntMaxType() const {
return getFromTargetType(Target->getUIntMaxType());
}
/// getSignedWCharType - Return the type of "signed wchar_t".
/// Used when in C++, as a GCC extension.
QualType ASTContext::getSignedWCharType() const {
// FIXME: derive from "Target" ?
return WCharTy;
}
/// getUnsignedWCharType - Return the type of "unsigned wchar_t".
/// Used when in C++, as a GCC extension.
QualType ASTContext::getUnsignedWCharType() const {
// FIXME: derive from "Target" ?
return UnsignedIntTy;
}
QualType ASTContext::getIntPtrType() const {
return getFromTargetType(Target->getIntPtrType());
}
QualType ASTContext::getUIntPtrType() const {
return getCorrespondingUnsignedType(getIntPtrType());
}
/// getPointerDiffType - Return the unique type for "ptrdiff_t" (C99 7.17)
/// defined in <stddef.h>. Pointer - pointer requires this (C99 6.5.6p9).
QualType ASTContext::getPointerDiffType() const {
return getFromTargetType(Target->getPtrDiffType(0));
}
/// Return the unique unsigned counterpart of "ptrdiff_t"
/// integer type. The standard (C11 7.21.6.1p7) refers to this type
/// in the definition of %tu format specifier.
QualType ASTContext::getUnsignedPointerDiffType() const {
return getFromTargetType(Target->getUnsignedPtrDiffType(0));
}
/// Return the unique type for "pid_t" defined in
/// <sys/types.h>. We need this to compute the correct type for vfork().
QualType ASTContext::getProcessIDType() const {
return getFromTargetType(Target->getProcessIDType());
}
//===----------------------------------------------------------------------===//
// Type Operators
//===----------------------------------------------------------------------===//
CanQualType ASTContext::getCanonicalParamType(QualType T) const {
// Push qualifiers into arrays, and then discard any remaining
// qualifiers.
T = getCanonicalType(T);
T = getVariableArrayDecayedType(T);
const Type *Ty = T.getTypePtr();
QualType Result;
if (isa<ArrayType>(Ty)) {
Result = getArrayDecayedType(QualType(Ty,0));
} else if (isa<FunctionType>(Ty)) {
Result = getPointerType(QualType(Ty, 0));
} else {
Result = QualType(Ty, 0);
}
return CanQualType::CreateUnsafe(Result);
}
QualType ASTContext::getUnqualifiedArrayType(QualType type,
Qualifiers &quals) {
SplitQualType splitType = type.getSplitUnqualifiedType();
// FIXME: getSplitUnqualifiedType() actually walks all the way to
// the unqualified desugared type and then drops it on the floor.
// We then have to strip that sugar back off with
// getUnqualifiedDesugaredType(), which is silly.
const auto *AT =
dyn_cast<ArrayType>(splitType.Ty->getUnqualifiedDesugaredType());
// If we don't have an array, just use the results in splitType.
if (!AT) {
quals = splitType.Quals;
return QualType(splitType.Ty, 0);
}
// Otherwise, recurse on the array's element type.
QualType elementType = AT->getElementType();
QualType unqualElementType = getUnqualifiedArrayType(elementType, quals);
// If that didn't change the element type, AT has no qualifiers, so we
// can just use the results in splitType.
if (elementType == unqualElementType) {
assert(quals.empty()); // from the recursive call
quals = splitType.Quals;
return QualType(splitType.Ty, 0);
}
// Otherwise, add in the qualifiers from the outermost type, then
// build the type back up.
quals.addConsistentQualifiers(splitType.Quals);
if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) {
return getConstantArrayType(unqualElementType, CAT->getSize(),
CAT->getSizeExpr(), CAT->getSizeModifier(), 0);
}
if (const auto *IAT = dyn_cast<IncompleteArrayType>(AT)) {
return getIncompleteArrayType(unqualElementType, IAT->getSizeModifier(), 0);
}
if (const auto *VAT = dyn_cast<VariableArrayType>(AT)) {
return getVariableArrayType(unqualElementType,
VAT->getSizeExpr(),
VAT->getSizeModifier(),
VAT->getIndexTypeCVRQualifiers(),
VAT->getBracketsRange());
}
const auto *DSAT = cast<DependentSizedArrayType>(AT);
return getDependentSizedArrayType(unqualElementType, DSAT->getSizeExpr(),
DSAT->getSizeModifier(), 0,
SourceRange());
}
/// Attempt to unwrap two types that may both be array types with the same bound
/// (or both be array types of unknown bound) for the purpose of comparing the
/// cv-decomposition of two types per C++ [conv.qual].
void ASTContext::UnwrapSimilarArrayTypes(QualType &T1, QualType &T2) {
while (true) {
auto *AT1 = getAsArrayType(T1);
if (!AT1)
return;
auto *AT2 = getAsArrayType(T2);
if (!AT2)
return;
// If we don't have two array types with the same constant bound nor two
// incomplete array types, we've unwrapped everything we can.
if (auto *CAT1 = dyn_cast<ConstantArrayType>(AT1)) {
auto *CAT2 = dyn_cast<ConstantArrayType>(AT2);
if (!CAT2 || CAT1->getSize() != CAT2->getSize())
return;
} else if (!isa<IncompleteArrayType>(AT1) ||
!isa<IncompleteArrayType>(AT2)) {
return;
}
T1 = AT1->getElementType();
T2 = AT2->getElementType();
}
}
/// Attempt to unwrap two types that may be similar (C++ [conv.qual]).
///
/// If T1 and T2 are both pointer types of the same kind, or both array types
/// with the same bound, unwraps layers from T1 and T2 until a pointer type is
/// unwrapped. Top-level qualifiers on T1 and T2 are ignored.
///
/// This function will typically be called in a loop that successively
/// "unwraps" pointer and pointer-to-member types to compare them at each
/// level.
///
/// \return \c true if a pointer type was unwrapped, \c false if we reached a
/// pair of types that can't be unwrapped further.
bool ASTContext::UnwrapSimilarTypes(QualType &T1, QualType &T2) {
UnwrapSimilarArrayTypes(T1, T2);
const auto *T1PtrType = T1->getAs<PointerType>();
const auto *T2PtrType = T2->getAs<PointerType>();
if (T1PtrType && T2PtrType) {
T1 = T1PtrType->getPointeeType();
T2 = T2PtrType->getPointeeType();
return true;
}
const auto *T1MPType = T1->getAs<MemberPointerType>();
const auto *T2MPType = T2->getAs<MemberPointerType>();
if (T1MPType && T2MPType &&
hasSameUnqualifiedType(QualType(T1MPType->getClass(), 0),
QualType(T2MPType->getClass(), 0))) {
T1 = T1MPType->getPointeeType();
T2 = T2MPType->getPointeeType();
return true;
}
if (getLangOpts().ObjC) {
const auto *T1OPType = T1->getAs<ObjCObjectPointerType>();
const auto *T2OPType = T2->getAs<ObjCObjectPointerType>();
if (T1OPType && T2OPType) {
T1 = T1OPType->getPointeeType();
T2 = T2OPType->getPointeeType();
return true;
}
}
// FIXME: Block pointers, too?
return false;
}
bool ASTContext::hasSimilarType(QualType T1, QualType T2) {
while (true) {
Qualifiers Quals;
T1 = getUnqualifiedArrayType(T1, Quals);
T2 = getUnqualifiedArrayType(T2, Quals);
if (hasSameType(T1, T2))
return true;
if (!UnwrapSimilarTypes(T1, T2))
return false;
}
}
bool ASTContext::hasCvrSimilarType(QualType T1, QualType T2) {
while (true) {
Qualifiers Quals1, Quals2;
T1 = getUnqualifiedArrayType(T1, Quals1);
T2 = getUnqualifiedArrayType(T2, Quals2);
Quals1.removeCVRQualifiers();
Quals2.removeCVRQualifiers();
if (Quals1 != Quals2)
return false;
if (hasSameType(T1, T2))
return true;
if (!UnwrapSimilarTypes(T1, T2))
return false;
}
}
DeclarationNameInfo
ASTContext::getNameForTemplate(TemplateName Name,
SourceLocation NameLoc) const {
switch (Name.getKind()) {
case TemplateName::QualifiedTemplate:
case TemplateName::Template:
// DNInfo work in progress: CHECKME: what about DNLoc?
return DeclarationNameInfo(Name.getAsTemplateDecl()->getDeclName(),
NameLoc);
case TemplateName::OverloadedTemplate: {
OverloadedTemplateStorage *Storage = Name.getAsOverloadedTemplate();
// DNInfo work in progress: CHECKME: what about DNLoc?
return DeclarationNameInfo((*Storage->begin())->getDeclName(), NameLoc);
}
case TemplateName::AssumedTemplate: {
AssumedTemplateStorage *Storage = Name.getAsAssumedTemplateName();
return DeclarationNameInfo(Storage->getDeclName(), NameLoc);
}
case TemplateName::DependentTemplate: {
DependentTemplateName *DTN = Name.getAsDependentTemplateName();
DeclarationName DName;
if (DTN->isIdentifier()) {
DName = DeclarationNames.getIdentifier(DTN->getIdentifier());
return DeclarationNameInfo(DName, NameLoc);
} else {
DName = DeclarationNames.getCXXOperatorName(DTN->getOperator());
// DNInfo work in progress: FIXME: source locations?
DeclarationNameLoc DNLoc =
DeclarationNameLoc::makeCXXOperatorNameLoc(SourceRange());
return DeclarationNameInfo(DName, NameLoc, DNLoc);
}
}
case TemplateName::SubstTemplateTemplateParm: {
SubstTemplateTemplateParmStorage *subst
= Name.getAsSubstTemplateTemplateParm();
return DeclarationNameInfo(subst->getParameter()->getDeclName(),
NameLoc);
}
case TemplateName::SubstTemplateTemplateParmPack: {
SubstTemplateTemplateParmPackStorage *subst
= Name.getAsSubstTemplateTemplateParmPack();
return DeclarationNameInfo(subst->getParameterPack()->getDeclName(),
NameLoc);
}
}
llvm_unreachable("bad template name kind!");
}
TemplateName ASTContext::getCanonicalTemplateName(TemplateName Name) const {
switch (Name.getKind()) {
case TemplateName::QualifiedTemplate:
case TemplateName::Template: {
TemplateDecl *Template = Name.getAsTemplateDecl();
if (auto *TTP = dyn_cast<TemplateTemplateParmDecl>(Template))
Template = getCanonicalTemplateTemplateParmDecl(TTP);
// The canonical template name is the canonical template declaration.
return TemplateName(cast<TemplateDecl>(Template->getCanonicalDecl()));
}
case TemplateName::OverloadedTemplate:
case TemplateName::AssumedTemplate:
llvm_unreachable("cannot canonicalize unresolved template");
case TemplateName::DependentTemplate: {
DependentTemplateName *DTN = Name.getAsDependentTemplateName();
assert(DTN && "Non-dependent template names must refer to template decls.");
return DTN->CanonicalTemplateName;
}
case TemplateName::SubstTemplateTemplateParm: {
SubstTemplateTemplateParmStorage *subst
= Name.getAsSubstTemplateTemplateParm();
return getCanonicalTemplateName(subst->getReplacement());
}
case TemplateName::SubstTemplateTemplateParmPack: {
SubstTemplateTemplateParmPackStorage *subst
= Name.getAsSubstTemplateTemplateParmPack();
TemplateTemplateParmDecl *canonParameter
= getCanonicalTemplateTemplateParmDecl(subst->getParameterPack());
TemplateArgument canonArgPack
= getCanonicalTemplateArgument(subst->getArgumentPack());
return getSubstTemplateTemplateParmPack(canonParameter, canonArgPack);
}
}
llvm_unreachable("bad template name!");
}
bool ASTContext::hasSameTemplateName(TemplateName X, TemplateName Y) {
X = getCanonicalTemplateName(X);
Y = getCanonicalTemplateName(Y);
return X.getAsVoidPointer() == Y.getAsVoidPointer();
}
TemplateArgument
ASTContext::getCanonicalTemplateArgument(const TemplateArgument &Arg) const {
switch (Arg.getKind()) {
case TemplateArgument::Null:
return Arg;
case TemplateArgument::Expression:
return Arg;
case TemplateArgument::Declaration: {
auto *D = cast<ValueDecl>(Arg.getAsDecl()->getCanonicalDecl());
return TemplateArgument(D, Arg.getParamTypeForDecl());
}
case TemplateArgument::NullPtr:
return TemplateArgument(getCanonicalType(Arg.getNullPtrType()),
/*isNullPtr*/true);
case TemplateArgument::Template:
return TemplateArgument(getCanonicalTemplateName(Arg.getAsTemplate()));
case TemplateArgument::TemplateExpansion:
return TemplateArgument(getCanonicalTemplateName(
Arg.getAsTemplateOrTemplatePattern()),
Arg.getNumTemplateExpansions());
case TemplateArgument::Integral:
return TemplateArgument(Arg, getCanonicalType(Arg.getIntegralType()));
case TemplateArgument::Type:
return TemplateArgument(getCanonicalType(Arg.getAsType()));
case TemplateArgument::Pack: {
if (Arg.pack_size() == 0)
return Arg;
auto *CanonArgs = new (*this) TemplateArgument[Arg.pack_size()];
unsigned Idx = 0;
for (TemplateArgument::pack_iterator A = Arg.pack_begin(),
AEnd = Arg.pack_end();
A != AEnd; (void)++A, ++Idx)
CanonArgs[Idx] = getCanonicalTemplateArgument(*A);
return TemplateArgument(llvm::makeArrayRef(CanonArgs, Arg.pack_size()));
}
}
// Silence GCC warning
llvm_unreachable("Unhandled template argument kind");
}
NestedNameSpecifier *
ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const {
if (!NNS)
return nullptr;
switch (NNS->getKind()) {
case NestedNameSpecifier::Identifier:
// Canonicalize the prefix but keep the identifier the same.
return NestedNameSpecifier::Create(*this,
getCanonicalNestedNameSpecifier(NNS->getPrefix()),
NNS->getAsIdentifier());
case NestedNameSpecifier::Namespace:
// A namespace is canonical; build a nested-name-specifier with
// this namespace and no prefix.
return NestedNameSpecifier::Create(*this, nullptr,
NNS->getAsNamespace()->getOriginalNamespace());
case NestedNameSpecifier::NamespaceAlias:
// A namespace is canonical; build a nested-name-specifier with
// this namespace and no prefix.
return NestedNameSpecifier::Create(*this, nullptr,
NNS->getAsNamespaceAlias()->getNamespace()
->getOriginalNamespace());
// The difference between TypeSpec and TypeSpecWithTemplate is that the
// latter will have the 'template' keyword when printed.
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate: {
const Type *T = getCanonicalType(NNS->getAsType());
// If we have some kind of dependent-named type (e.g., "typename T::type"),
// break it apart into its prefix and identifier, then reconsititute those
// as the canonical nested-name-specifier. This is required to canonicalize
// a dependent nested-name-specifier involving typedefs of dependent-name
// types, e.g.,
// typedef typename T::type T1;
// typedef typename T1::type T2;
if (const auto *DNT = T->getAs<DependentNameType>())
return NestedNameSpecifier::Create(
*this, DNT->getQualifier(),
const_cast<IdentifierInfo *>(DNT->getIdentifier()));
if (const auto *DTST = T->getAs<DependentTemplateSpecializationType>())
return NestedNameSpecifier::Create(*this, DTST->getQualifier(), true,
const_cast<Type *>(T));
// TODO: Set 'Template' parameter to true for other template types.
return NestedNameSpecifier::Create(*this, nullptr, false,
const_cast<Type *>(T));
}
case NestedNameSpecifier::Global:
case NestedNameSpecifier::Super:
// The global specifier and __super specifer are canonical and unique.
return NNS;
}
llvm_unreachable("Invalid NestedNameSpecifier::Kind!");
}
const ArrayType *ASTContext::getAsArrayType(QualType T) const {
// Handle the non-qualified case efficiently.
if (!T.hasLocalQualifiers()) {
// Handle the common positive case fast.
if (const auto *AT = dyn_cast<ArrayType>(T))
return AT;
}
// Handle the common negative case fast.
if (!isa<ArrayType>(T.getCanonicalType()))
return nullptr;
// Apply any qualifiers from the array type to the element type. This
// implements C99 6.7.3p8: "If the specification of an array type includes
// any type qualifiers, the element type is so qualified, not the array type."
// If we get here, we either have type qualifiers on the type, or we have
// sugar such as a typedef in the way. If we have type qualifiers on the type
// we must propagate them down into the element type.
SplitQualType split = T.getSplitDesugaredType();
Qualifiers qs = split.Quals;
// If we have a simple case, just return now.
const auto *ATy = dyn_cast<ArrayType>(split.Ty);
if (!ATy || qs.empty())
return ATy;
// Otherwise, we have an array and we have qualifiers on it. Push the
// qualifiers into the array element type and return a new array type.
QualType NewEltTy = getQualifiedType(ATy->getElementType(), qs);
if (const auto *CAT = dyn_cast<ConstantArrayType>(ATy))
return cast<ArrayType>(getConstantArrayType(NewEltTy, CAT->getSize(),
CAT->getSizeExpr(),
CAT->getSizeModifier(),
CAT->getIndexTypeCVRQualifiers()));
if (const auto *IAT = dyn_cast<IncompleteArrayType>(ATy))
return cast<ArrayType>(getIncompleteArrayType(NewEltTy,
IAT->getSizeModifier(),
IAT->getIndexTypeCVRQualifiers()));
if (const auto *DSAT = dyn_cast<DependentSizedArrayType>(ATy))
return cast<ArrayType>(
getDependentSizedArrayType(NewEltTy,
DSAT->getSizeExpr(),
DSAT->getSizeModifier(),
DSAT->getIndexTypeCVRQualifiers(),
DSAT->getBracketsRange()));
const auto *VAT = cast<VariableArrayType>(ATy);
return cast<ArrayType>(getVariableArrayType(NewEltTy,
VAT->getSizeExpr(),
VAT->getSizeModifier(),
VAT->getIndexTypeCVRQualifiers(),
VAT->getBracketsRange()));
}
QualType ASTContext::getAdjustedParameterType(QualType T) const {
if (T->isArrayType() || T->isFunctionType())
return getDecayedType(T);
return T;
}
QualType ASTContext::getSignatureParameterType(QualType T) const {
T = getVariableArrayDecayedType(T);
T = getAdjustedParameterType(T);
return T.getUnqualifiedType();
}
QualType ASTContext::getExceptionObjectType(QualType T) const {
// C++ [except.throw]p3:
// A throw-expression initializes a temporary object, called the exception
// object, the type of which is determined by removing any top-level
// cv-qualifiers from the static type of the operand of throw and adjusting
// the type from "array of T" or "function returning T" to "pointer to T"
// or "pointer to function returning T", [...]
T = getVariableArrayDecayedType(T);
if (T->isArrayType() || T->isFunctionType())
T = getDecayedType(T);
return T.getUnqualifiedType();
}
/// getArrayDecayedType - Return the properly qualified result of decaying the
/// specified array type to a pointer. This operation is non-trivial when
/// handling typedefs etc. The canonical type of "T" must be an array type,
/// this returns a pointer to a properly qualified element of the array.
///
/// See C99 6.7.5.3p7 and C99 6.3.2.1p3.
QualType ASTContext::getArrayDecayedType(QualType Ty) const {
// Get the element type with 'getAsArrayType' so that we don't lose any
// typedefs in the element type of the array. This also handles propagation
// of type qualifiers from the array type into the element type if present
// (C99 6.7.3p8).
const ArrayType *PrettyArrayType = getAsArrayType(Ty);
assert(PrettyArrayType && "Not an array type!");
QualType PtrTy = getPointerType(PrettyArrayType->getElementType());
// int x[restrict 4] -> int *restrict
QualType Result = getQualifiedType(PtrTy,
PrettyArrayType->getIndexTypeQualifiers());
// int x[_Nullable] -> int * _Nullable
if (auto Nullability = Ty->getNullability(*this)) {
Result = const_cast<ASTContext *>(this)->getAttributedType(
AttributedType::getNullabilityAttrKind(*Nullability), Result, Result);
}
return Result;
}
QualType ASTContext::getBaseElementType(const ArrayType *array) const {
return getBaseElementType(array->getElementType());
}
QualType ASTContext::getBaseElementType(QualType type) const {
Qualifiers qs;
while (true) {
SplitQualType split = type.getSplitDesugaredType();
const ArrayType *array = split.Ty->getAsArrayTypeUnsafe();
if (!array) break;
type = array->getElementType();
qs.addConsistentQualifiers(split.Quals);
}
return getQualifiedType(type, qs);
}
/// getConstantArrayElementCount - Returns number of constant array elements.
uint64_t
ASTContext::getConstantArrayElementCount(const ConstantArrayType *CA) const {
uint64_t ElementCount = 1;
do {
ElementCount *= CA->getSize().getZExtValue();
CA = dyn_cast_or_null<ConstantArrayType>(
CA->getElementType()->getAsArrayTypeUnsafe());
} while (CA);
return ElementCount;
}
/// getFloatingRank - Return a relative rank for floating point types.
/// This routine will assert if passed a built-in type that isn't a float.
static FloatingRank getFloatingRank(QualType T) {
if (const auto *CT = T->getAs<ComplexType>())
return getFloatingRank(CT->getElementType());
switch (T->castAs<BuiltinType>()->getKind()) {
default: llvm_unreachable("getFloatingRank(): not a floating type");
case BuiltinType::Float16: return Float16Rank;
case BuiltinType::Half: return HalfRank;
case BuiltinType::Float: return FloatRank;
case BuiltinType::Double: return DoubleRank;
case BuiltinType::LongDouble: return LongDoubleRank;
case BuiltinType::Float128: return Float128Rank;
case BuiltinType::BFloat16: return BFloat16Rank;
}
}
/// getFloatingTypeOfSizeWithinDomain - Returns a real floating
/// point or a complex type (based on typeDomain/typeSize).
/// 'typeDomain' is a real floating point or complex type.
/// 'typeSize' is a real floating point or complex type.
QualType ASTContext::getFloatingTypeOfSizeWithinDomain(QualType Size,
QualType Domain) const {
FloatingRank EltRank = getFloatingRank(Size);
if (Domain->isComplexType()) {
switch (EltRank) {
case BFloat16Rank: llvm_unreachable("Complex bfloat16 is not supported");
case Float16Rank:
case HalfRank: llvm_unreachable("Complex half is not supported");
case FloatRank: return FloatComplexTy;
case DoubleRank: return DoubleComplexTy;
case LongDoubleRank: return LongDoubleComplexTy;
case Float128Rank: return Float128ComplexTy;
}
}
assert(Domain->isRealFloatingType() && "Unknown domain!");
switch (EltRank) {
case Float16Rank: return HalfTy;
case BFloat16Rank: return BFloat16Ty;
case HalfRank: return HalfTy;
case FloatRank: return FloatTy;
case DoubleRank: return DoubleTy;
case LongDoubleRank: return LongDoubleTy;
case Float128Rank: return Float128Ty;
}
llvm_unreachable("getFloatingRank(): illegal value for rank");
}
/// getFloatingTypeOrder - Compare the rank of the two specified floating
/// point types, ignoring the domain of the type (i.e. 'double' ==
/// '_Complex double'). If LHS > RHS, return 1. If LHS == RHS, return 0. If
/// LHS < RHS, return -1.
int ASTContext::getFloatingTypeOrder(QualType LHS, QualType RHS) const {
FloatingRank LHSR = getFloatingRank(LHS);
FloatingRank RHSR = getFloatingRank(RHS);
if (LHSR == RHSR)
return 0;
if (LHSR > RHSR)
return 1;
return -1;
}
int ASTContext::getFloatingTypeSemanticOrder(QualType LHS, QualType RHS) const {
if (&getFloatTypeSemantics(LHS) == &getFloatTypeSemantics(RHS))
return 0;
return getFloatingTypeOrder(LHS, RHS);
}
/// getIntegerRank - Return an integer conversion rank (C99 6.3.1.1p1). This
/// routine will assert if passed a built-in type that isn't an integer or enum,
/// or if it is not canonicalized.
unsigned ASTContext::getIntegerRank(const Type *T) const {
assert(T->isCanonicalUnqualified() && "T should be canonicalized");
// Results in this 'losing' to any type of the same size, but winning if
// larger.
if (const auto *EIT = dyn_cast<ExtIntType>(T))
return 0 + (EIT->getNumBits() << 3);
switch (cast<BuiltinType>(T)->getKind()) {
default: llvm_unreachable("getIntegerRank(): not a built-in integer");
case BuiltinType::Bool:
return 1 + (getIntWidth(BoolTy) << 3);
case BuiltinType::Char_S:
case BuiltinType::Char_U:
case BuiltinType::SChar:
case BuiltinType::UChar:
return 2 + (getIntWidth(CharTy) << 3);
case BuiltinType::Short:
case BuiltinType::UShort:
return 3 + (getIntWidth(ShortTy) << 3);
case BuiltinType::Int:
case BuiltinType::UInt:
return 4 + (getIntWidth(IntTy) << 3);
case BuiltinType::Long:
case BuiltinType::ULong:
return 5 + (getIntWidth(LongTy) << 3);
case BuiltinType::LongLong:
case BuiltinType::ULongLong:
return 6 + (getIntWidth(LongLongTy) << 3);
case BuiltinType::Int128:
case BuiltinType::UInt128:
return 7 + (getIntWidth(Int128Ty) << 3);
}
}
/// Whether this is a promotable bitfield reference according
/// to C99 6.3.1.1p2, bullet 2 (and GCC extensions).
///
/// \returns the type this bit-field will promote to, or NULL if no
/// promotion occurs.
QualType ASTContext::isPromotableBitField(Expr *E) const {
if (E->isTypeDependent() || E->isValueDependent())
return {};
// C++ [conv.prom]p5:
// If the bit-field has an enumerated type, it is treated as any other
// value of that type for promotion purposes.
if (getLangOpts().CPlusPlus && E->getType()->isEnumeralType())
return {};
// FIXME: We should not do this unless E->refersToBitField() is true. This
// matters in C where getSourceBitField() will find bit-fields for various
// cases where the source expression is not a bit-field designator.
FieldDecl *Field = E->getSourceBitField(); // FIXME: conditional bit-fields?
if (!Field)
return {};
QualType FT = Field->getType();
uint64_t BitWidth = Field->getBitWidthValue(*this);
uint64_t IntSize = getTypeSize(IntTy);
// C++ [conv.prom]p5:
// A prvalue for an integral bit-field can be converted to a prvalue of type
// int if int can represent all the values of the bit-field; otherwise, it
// can be converted to unsigned int if unsigned int can represent all the
// values of the bit-field. If the bit-field is larger yet, no integral
// promotion applies to it.
// C11 6.3.1.1/2:
// [For a bit-field of type _Bool, int, signed int, or unsigned int:]
// If an int can represent all values of the original type (as restricted by
// the width, for a bit-field), the value is converted to an int; otherwise,
// it is converted to an unsigned int.
//
// FIXME: C does not permit promotion of a 'long : 3' bitfield to int.
// We perform that promotion here to match GCC and C++.
// FIXME: C does not permit promotion of an enum bit-field whose rank is
// greater than that of 'int'. We perform that promotion to match GCC.
if (BitWidth < IntSize)
return IntTy;
if (BitWidth == IntSize)
return FT->isSignedIntegerType() ? IntTy : UnsignedIntTy;
// Bit-fields wider than int are not subject to promotions, and therefore act
// like the base type. GCC has some weird bugs in this area that we
// deliberately do not follow (GCC follows a pre-standard resolution to
// C's DR315 which treats bit-width as being part of the type, and this leaks
// into their semantics in some cases).
return {};
}
/// getPromotedIntegerType - Returns the type that Promotable will
/// promote to: C99 6.3.1.1p2, assuming that Promotable is a promotable
/// integer type.
QualType ASTContext::getPromotedIntegerType(QualType Promotable) const {
assert(!Promotable.isNull());
assert(Promotable->isPromotableIntegerType());
if (const auto *ET = Promotable->getAs<EnumType>())
return ET->getDecl()->getPromotionType();
if (const auto *BT = Promotable->getAs<BuiltinType>()) {
// C++ [conv.prom]: A prvalue of type char16_t, char32_t, or wchar_t
// (3.9.1) can be converted to a prvalue of the first of the following
// types that can represent all the values of its underlying type:
// int, unsigned int, long int, unsigned long int, long long int, or
// unsigned long long int [...]
// FIXME: Is there some better way to compute this?
if (BT->getKind() == BuiltinType::WChar_S ||
BT->getKind() == BuiltinType::WChar_U ||
BT->getKind() == BuiltinType::Char8 ||
BT->getKind() == BuiltinType::Char16 ||
BT->getKind() == BuiltinType::Char32) {
bool FromIsSigned = BT->getKind() == BuiltinType::WChar_S;
uint64_t FromSize = getTypeSize(BT);
QualType PromoteTypes[] = { IntTy, UnsignedIntTy, LongTy, UnsignedLongTy,
LongLongTy, UnsignedLongLongTy };
for (size_t Idx = 0; Idx < llvm::array_lengthof(PromoteTypes); ++Idx) {
uint64_t ToSize = getTypeSize(PromoteTypes[Idx]);
if (FromSize < ToSize ||
(FromSize == ToSize &&
FromIsSigned == PromoteTypes[Idx]->isSignedIntegerType()))
return PromoteTypes[Idx];
}
llvm_unreachable("char type should fit into long long");
}
}
// At this point, we should have a signed or unsigned integer type.
if (Promotable->isSignedIntegerType())
return IntTy;
uint64_t PromotableSize = getIntWidth(Promotable);
uint64_t IntSize = getIntWidth(IntTy);
assert(Promotable->isUnsignedIntegerType() && PromotableSize <= IntSize);
return (PromotableSize != IntSize) ? IntTy : UnsignedIntTy;
}
/// Recurses in pointer/array types until it finds an objc retainable
/// type and returns its ownership.
Qualifiers::ObjCLifetime ASTContext::getInnerObjCOwnership(QualType T) const {
while (!T.isNull()) {
if (T.getObjCLifetime() != Qualifiers::OCL_None)
return T.getObjCLifetime();
if (T->isArrayType())
T = getBaseElementType(T);
else if (const auto *PT = T->getAs<PointerType>())
T = PT->getPointeeType();
else if (const auto *RT = T->getAs<ReferenceType>())
T = RT->getPointeeType();
else
break;
}
return Qualifiers::OCL_None;
}
static const Type *getIntegerTypeForEnum(const EnumType *ET) {
// Incomplete enum types are not treated as integer types.
// FIXME: In C++, enum types are never integer types.
if (ET->getDecl()->isComplete() && !ET->getDecl()->isScoped())
return ET->getDecl()->getIntegerType().getTypePtr();
return nullptr;
}
/// getIntegerTypeOrder - Returns the highest ranked integer type:
/// C99 6.3.1.8p1. If LHS > RHS, return 1. If LHS == RHS, return 0. If
/// LHS < RHS, return -1.
int ASTContext::getIntegerTypeOrder(QualType LHS, QualType RHS) const {
const Type *LHSC = getCanonicalType(LHS).getTypePtr();
const Type *RHSC = getCanonicalType(RHS).getTypePtr();
// Unwrap enums to their underlying type.
if (const auto *ET = dyn_cast<EnumType>(LHSC))
LHSC = getIntegerTypeForEnum(ET);
if (const auto *ET = dyn_cast<EnumType>(RHSC))
RHSC = getIntegerTypeForEnum(ET);
if (LHSC == RHSC) return 0;
bool LHSUnsigned = LHSC->isUnsignedIntegerType();
bool RHSUnsigned = RHSC->isUnsignedIntegerType();
unsigned LHSRank = getIntegerRank(LHSC);
unsigned RHSRank = getIntegerRank(RHSC);
if (LHSUnsigned == RHSUnsigned) { // Both signed or both unsigned.
if (LHSRank == RHSRank) return 0;
return LHSRank > RHSRank ? 1 : -1;
}
// Otherwise, the LHS is signed and the RHS is unsigned or visa versa.
if (LHSUnsigned) {
// If the unsigned [LHS] type is larger, return it.
if (LHSRank >= RHSRank)
return 1;
// If the signed type can represent all values of the unsigned type, it
// wins. Because we are dealing with 2's complement and types that are
// powers of two larger than each other, this is always safe.
return -1;
}
// If the unsigned [RHS] type is larger, return it.
if (RHSRank >= LHSRank)
return -1;
// If the signed type can represent all values of the unsigned type, it
// wins. Because we are dealing with 2's complement and types that are
// powers of two larger than each other, this is always safe.
return 1;
}
TypedefDecl *ASTContext::getCFConstantStringDecl() const {
if (CFConstantStringTypeDecl)
return CFConstantStringTypeDecl;
assert(!CFConstantStringTagDecl &&
"tag and typedef should be initialized together");
CFConstantStringTagDecl = buildImplicitRecord("__NSConstantString_tag");
CFConstantStringTagDecl->startDefinition();
struct {
QualType Type;
const char *Name;
} Fields[5];
unsigned Count = 0;
/// Objective-C ABI
///
/// typedef struct __NSConstantString_tag {
/// const int *isa;
/// int flags;
/// const char *str;
/// long length;
/// } __NSConstantString;
///
/// Swift ABI (4.1, 4.2)
///
/// typedef struct __NSConstantString_tag {
/// uintptr_t _cfisa;
/// uintptr_t _swift_rc;
/// _Atomic(uint64_t) _cfinfoa;
/// const char *_ptr;
/// uint32_t _length;
/// } __NSConstantString;
///
/// Swift ABI (5.0)
///
/// typedef struct __NSConstantString_tag {
/// uintptr_t _cfisa;
/// uintptr_t _swift_rc;
/// _Atomic(uint64_t) _cfinfoa;
/// const char *_ptr;
/// uintptr_t _length;
/// } __NSConstantString;
const auto CFRuntime = getLangOpts().CFRuntime;
if (static_cast<unsigned>(CFRuntime) <
static_cast<unsigned>(LangOptions::CoreFoundationABI::Swift)) {
Fields[Count++] = { getPointerType(IntTy.withConst()), "isa" };
Fields[Count++] = { IntTy, "flags" };
Fields[Count++] = { getPointerType(CharTy.withConst()), "str" };
Fields[Count++] = { LongTy, "length" };
} else {
Fields[Count++] = { getUIntPtrType(), "_cfisa" };
Fields[Count++] = { getUIntPtrType(), "_swift_rc" };
Fields[Count++] = { getFromTargetType(Target->getUInt64Type()), "_swift_rc" };
Fields[Count++] = { getPointerType(CharTy.withConst()), "_ptr" };
if (CFRuntime == LangOptions::CoreFoundationABI::Swift4_1 ||
CFRuntime == LangOptions::CoreFoundationABI::Swift4_2)
Fields[Count++] = { IntTy, "_ptr" };
else
Fields[Count++] = { getUIntPtrType(), "_ptr" };
}
// Create fields
for (unsigned i = 0; i < Count; ++i) {
FieldDecl *Field =
FieldDecl::Create(*this, CFConstantStringTagDecl, SourceLocation(),
SourceLocation(), &Idents.get(Fields[i].Name),
Fields[i].Type, /*TInfo=*/nullptr,
/*BitWidth=*/nullptr, /*Mutable=*/false, ICIS_NoInit);
Field->setAccess(AS_public);
CFConstantStringTagDecl->addDecl(Field);
}
CFConstantStringTagDecl->completeDefinition();
// This type is designed to be compatible with NSConstantString, but cannot
// use the same name, since NSConstantString is an interface.
auto tagType = getTagDeclType(CFConstantStringTagDecl);
CFConstantStringTypeDecl =
buildImplicitTypedef(tagType, "__NSConstantString");
return CFConstantStringTypeDecl;
}
RecordDecl *ASTContext::getCFConstantStringTagDecl() const {
if (!CFConstantStringTagDecl)
getCFConstantStringDecl(); // Build the tag and the typedef.
return CFConstantStringTagDecl;
}
// getCFConstantStringType - Return the type used for constant CFStrings.
QualType ASTContext::getCFConstantStringType() const {
return getTypedefType(getCFConstantStringDecl());
}
QualType ASTContext::getObjCSuperType() const {
if (ObjCSuperType.isNull()) {
RecordDecl *ObjCSuperTypeDecl = buildImplicitRecord("objc_super");
getTranslationUnitDecl()->addDecl(ObjCSuperTypeDecl);
ObjCSuperType = getTagDeclType(ObjCSuperTypeDecl);
}
return ObjCSuperType;
}
void ASTContext::setCFConstantStringType(QualType T) {
const auto *TD = T->castAs<TypedefType>();
CFConstantStringTypeDecl = cast<TypedefDecl>(TD->getDecl());
const auto *TagType =
CFConstantStringTypeDecl->getUnderlyingType()->castAs<RecordType>();
CFConstantStringTagDecl = TagType->getDecl();
}
QualType ASTContext::getBlockDescriptorType() const {
if (BlockDescriptorType)
return getTagDeclType(BlockDescriptorType);
RecordDecl *RD;
// FIXME: Needs the FlagAppleBlock bit.
RD = buildImplicitRecord("__block_descriptor");
RD->startDefinition();
QualType FieldTypes[] = {
UnsignedLongTy,
UnsignedLongTy,
};
static const char *const FieldNames[] = {
"reserved",
"Size"
};
for (size_t i = 0; i < 2; ++i) {
FieldDecl *Field = FieldDecl::Create(
*this, RD, SourceLocation(), SourceLocation(),
&Idents.get(FieldNames[i]), FieldTypes[i], /*TInfo=*/nullptr,
/*BitWidth=*/nullptr, /*Mutable=*/false, ICIS_NoInit);
Field->setAccess(AS_public);
RD->addDecl(Field);
}
RD->completeDefinition();
BlockDescriptorType = RD;
return getTagDeclType(BlockDescriptorType);
}
QualType ASTContext::getBlockDescriptorExtendedType() const {
if (BlockDescriptorExtendedType)
return getTagDeclType(BlockDescriptorExtendedType);
RecordDecl *RD;
// FIXME: Needs the FlagAppleBlock bit.
RD = buildImplicitRecord("__block_descriptor_withcopydispose");
RD->startDefinition();
QualType FieldTypes[] = {
UnsignedLongTy,
UnsignedLongTy,
getPointerType(VoidPtrTy),
getPointerType(VoidPtrTy)
};
static const char *const FieldNames[] = {
"reserved",
"Size",
"CopyFuncPtr",
"DestroyFuncPtr"
};
for (size_t i = 0; i < 4; ++i) {
FieldDecl *Field = FieldDecl::Create(
*this, RD, SourceLocation(), SourceLocation(),
&Idents.get(FieldNames[i]), FieldTypes[i], /*TInfo=*/nullptr,
/*BitWidth=*/nullptr,
/*Mutable=*/false, ICIS_NoInit);
Field->setAccess(AS_public);
RD->addDecl(Field);
}
RD->completeDefinition();
BlockDescriptorExtendedType = RD;
return getTagDeclType(BlockDescriptorExtendedType);
}
OpenCLTypeKind ASTContext::getOpenCLTypeKind(const Type *T) const {
const auto *BT = dyn_cast<BuiltinType>(T);
if (!BT) {
if (isa<PipeType>(T))
return OCLTK_Pipe;
return OCLTK_Default;
}
switch (BT->getKind()) {
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
case BuiltinType::Id: \
return OCLTK_Image;
#include "clang/Basic/OpenCLImageTypes.def"
case BuiltinType::OCLClkEvent:
return OCLTK_ClkEvent;
case BuiltinType::OCLEvent:
return OCLTK_Event;
case BuiltinType::OCLQueue:
return OCLTK_Queue;
case BuiltinType::OCLReserveID:
return OCLTK_ReserveID;
case BuiltinType::OCLSampler:
return OCLTK_Sampler;
default:
return OCLTK_Default;
}
}
LangAS ASTContext::getOpenCLTypeAddrSpace(const Type *T) const {
return Target->getOpenCLTypeAddrSpace(getOpenCLTypeKind(T));
}
/// BlockRequiresCopying - Returns true if byref variable "D" of type "Ty"
/// requires copy/dispose. Note that this must match the logic
/// in buildByrefHelpers.
bool ASTContext::BlockRequiresCopying(QualType Ty,
const VarDecl *D) {
if (const CXXRecordDecl *record = Ty->getAsCXXRecordDecl()) {
const Expr *copyExpr = getBlockVarCopyInit(D).getCopyExpr();
if (!copyExpr && record->hasTrivialDestructor()) return false;
return true;
}
// The block needs copy/destroy helpers if Ty is non-trivial to destructively
// move or destroy.
if (Ty.isNonTrivialToPrimitiveDestructiveMove() || Ty.isDestructedType())
return true;
if (!Ty->isObjCRetainableType()) return false;
Qualifiers qs = Ty.getQualifiers();
// If we have lifetime, that dominates.
if (Qualifiers::ObjCLifetime lifetime = qs.getObjCLifetime()) {
switch (lifetime) {
case Qualifiers::OCL_None: llvm_unreachable("impossible");
// These are just bits as far as the runtime is concerned.
case Qualifiers::OCL_ExplicitNone:
case Qualifiers::OCL_Autoreleasing:
return false;
// These cases should have been taken care of when checking the type's
// non-triviality.
case Qualifiers::OCL_Weak:
case Qualifiers::OCL_Strong:
llvm_unreachable("impossible");
}
llvm_unreachable("fell out of lifetime switch!");
}
return (Ty->isBlockPointerType() || isObjCNSObjectType(Ty) ||
Ty->isObjCObjectPointerType());
}
bool ASTContext::getByrefLifetime(QualType Ty,
Qualifiers::ObjCLifetime &LifeTime,
bool &HasByrefExtendedLayout) const {
if (!getLangOpts().ObjC ||
getLangOpts().getGC() != LangOptions::NonGC)
return false;
HasByrefExtendedLayout = false;
if (Ty->isRecordType()) {
HasByrefExtendedLayout = true;
LifeTime = Qualifiers::OCL_None;
} else if ((LifeTime = Ty.getObjCLifetime())) {
// Honor the ARC qualifiers.
} else if (Ty->isObjCObjectPointerType() || Ty->isBlockPointerType()) {
// The MRR rule.
LifeTime = Qualifiers::OCL_ExplicitNone;
} else {
LifeTime = Qualifiers::OCL_None;
}
return true;
}
CanQualType ASTContext::getNSUIntegerType() const {
assert(Target && "Expected target to be initialized");
const llvm::Triple &T = Target->getTriple();
// Windows is LLP64 rather than LP64
if (T.isOSWindows() && T.isArch64Bit())
return UnsignedLongLongTy;
return UnsignedLongTy;
}
CanQualType ASTContext::getNSIntegerType() const {
assert(Target && "Expected target to be initialized");
const llvm::Triple &T = Target->getTriple();
// Windows is LLP64 rather than LP64
if (T.isOSWindows() && T.isArch64Bit())
return LongLongTy;
return LongTy;
}
TypedefDecl *ASTContext::getObjCInstanceTypeDecl() {
if (!ObjCInstanceTypeDecl)
ObjCInstanceTypeDecl =
buildImplicitTypedef(getObjCIdType(), "instancetype");
return ObjCInstanceTypeDecl;
}
// This returns true if a type has been typedefed to BOOL:
// typedef <type> BOOL;
static bool isTypeTypedefedAsBOOL(QualType T) {
if (const auto *TT = dyn_cast<TypedefType>(T))
if (IdentifierInfo *II = TT->getDecl()->getIdentifier())
return II->isStr("BOOL");
return false;
}
/// getObjCEncodingTypeSize returns size of type for objective-c encoding
/// purpose.
CharUnits ASTContext::getObjCEncodingTypeSize(QualType type) const {
if (!type->isIncompleteArrayType() && type->isIncompleteType())
return CharUnits::Zero();
CharUnits sz = getTypeSizeInChars(type);
// Make all integer and enum types at least as large as an int
if (sz.isPositive() && type->isIntegralOrEnumerationType())
sz = std::max(sz, getTypeSizeInChars(IntTy));
// Treat arrays as pointers, since that's how they're passed in.
else if (type->isArrayType())
sz = getTypeSizeInChars(VoidPtrTy);
return sz;
}
bool ASTContext::isMSStaticDataMemberInlineDefinition(const VarDecl *VD) const {
return getTargetInfo().getCXXABI().isMicrosoft() &&
VD->isStaticDataMember() &&
VD->getType()->isIntegralOrEnumerationType() &&
!VD->getFirstDecl()->isOutOfLine() && VD->getFirstDecl()->hasInit();
}
ASTContext::InlineVariableDefinitionKind
ASTContext::getInlineVariableDefinitionKind(const VarDecl *VD) const {
if (!VD->isInline())
return InlineVariableDefinitionKind::None;
// In almost all cases, it's a weak definition.
auto *First = VD->getFirstDecl();
if (First->isInlineSpecified() || !First->isStaticDataMember())
return InlineVariableDefinitionKind::Weak;
// If there's a file-context declaration in this translation unit, it's a
// non-discardable definition.
for (auto *D : VD->redecls())
if (D->getLexicalDeclContext()->isFileContext() &&
!D->isInlineSpecified() && (D->isConstexpr() || First->isConstexpr()))
return InlineVariableDefinitionKind::Strong;
// If we've not seen one yet, we don't know.
return InlineVariableDefinitionKind::WeakUnknown;
}
static std::string charUnitsToString(const CharUnits &CU) {
return llvm::itostr(CU.getQuantity());
}
/// getObjCEncodingForBlock - Return the encoded type for this block
/// declaration.
std::string ASTContext::getObjCEncodingForBlock(const BlockExpr *Expr) const {
std::string S;
const BlockDecl *Decl = Expr->getBlockDecl();
QualType BlockTy =
Expr->getType()->castAs<BlockPointerType>()->getPointeeType();
QualType BlockReturnTy = BlockTy->castAs<FunctionType>()->getReturnType();
// Encode result type.
if (getLangOpts().EncodeExtendedBlockSig)
getObjCEncodingForMethodParameter(Decl::OBJC_TQ_None, BlockReturnTy, S,
true /*Extended*/);
else
getObjCEncodingForType(BlockReturnTy, S);
// Compute size of all parameters.
// Start with computing size of a pointer in number of bytes.
// FIXME: There might(should) be a better way of doing this computation!
CharUnits PtrSize = getTypeSizeInChars(VoidPtrTy);
CharUnits ParmOffset = PtrSize;
for (auto PI : Decl->parameters()) {
QualType PType = PI->getType();
CharUnits sz = getObjCEncodingTypeSize(PType);
if (sz.isZero())
continue;
assert(sz.isPositive() && "BlockExpr - Incomplete param type");
ParmOffset += sz;
}
// Size of the argument frame
S += charUnitsToString(ParmOffset);
// Block pointer and offset.
S += "@?0";
// Argument types.
ParmOffset = PtrSize;
for (auto PVDecl : Decl->parameters()) {
QualType PType = PVDecl->getOriginalType();
if (const auto *AT =
dyn_cast<ArrayType>(PType->getCanonicalTypeInternal())) {
// Use array's original type only if it has known number of
// elements.
if (!isa<ConstantArrayType>(AT))
PType = PVDecl->getType();
} else if (PType->isFunctionType())
PType = PVDecl->getType();
if (getLangOpts().EncodeExtendedBlockSig)
getObjCEncodingForMethodParameter(Decl::OBJC_TQ_None, PType,
S, true /*Extended*/);
else
getObjCEncodingForType(PType, S);
S += charUnitsToString(ParmOffset);
ParmOffset += getObjCEncodingTypeSize(PType);
}
return S;
}
std::string
ASTContext::getObjCEncodingForFunctionDecl(const FunctionDecl *Decl) const {
std::string S;
// Encode result type.
getObjCEncodingForType(Decl->getReturnType(), S);
CharUnits ParmOffset;
// Compute size of all parameters.
for (auto PI : Decl->parameters()) {
QualType PType = PI->getType();
CharUnits sz = getObjCEncodingTypeSize(PType);
if (sz.isZero())
continue;
assert(sz.isPositive() &&
"getObjCEncodingForFunctionDecl - Incomplete param type");
ParmOffset += sz;
}
S += charUnitsToString(ParmOffset);
ParmOffset = CharUnits::Zero();
// Argument types.
for (auto PVDecl : Decl->parameters()) {
QualType PType = PVDecl->getOriginalType();
if (const auto *AT =
dyn_cast<ArrayType>(PType->getCanonicalTypeInternal())) {
// Use array's original type only if it has known number of
// elements.
if (!isa<ConstantArrayType>(AT))
PType = PVDecl->getType();
} else if (PType->isFunctionType())
PType = PVDecl->getType();
getObjCEncodingForType(PType, S);
S += charUnitsToString(ParmOffset);
ParmOffset += getObjCEncodingTypeSize(PType);
}
return S;
}
/// getObjCEncodingForMethodParameter - Return the encoded type for a single
/// method parameter or return type. If Extended, include class names and
/// block object types.
void ASTContext::getObjCEncodingForMethodParameter(Decl::ObjCDeclQualifier QT,
QualType T, std::string& S,
bool Extended) const {
// Encode type qualifer, 'in', 'inout', etc. for the parameter.
getObjCEncodingForTypeQualifier(QT, S);
// Encode parameter type.
ObjCEncOptions Options = ObjCEncOptions()
.setExpandPointedToStructures()
.setExpandStructures()
.setIsOutermostType();
if (Extended)
Options.setEncodeBlockParameters().setEncodeClassNames();
getObjCEncodingForTypeImpl(T, S, Options, /*Field=*/nullptr);
}
/// getObjCEncodingForMethodDecl - Return the encoded type for this method
/// declaration.
std::string ASTContext::getObjCEncodingForMethodDecl(const ObjCMethodDecl *Decl,
bool Extended) const {
// FIXME: This is not very efficient.
// Encode return type.
std::string S;
getObjCEncodingForMethodParameter(Decl->getObjCDeclQualifier(),
Decl->getReturnType(), S, Extended);
// Compute size of all parameters.
// Start with computing size of a pointer in number of bytes.
// FIXME: There might(should) be a better way of doing this computation!
CharUnits PtrSize = getTypeSizeInChars(VoidPtrTy);
// The first two arguments (self and _cmd) are pointers; account for
// their size.
CharUnits ParmOffset = 2 * PtrSize;
for (ObjCMethodDecl::param_const_iterator PI = Decl->param_begin(),
E = Decl->sel_param_end(); PI != E; ++PI) {
QualType PType = (*PI)->getType();
CharUnits sz = getObjCEncodingTypeSize(PType);
if (sz.isZero())
continue;
assert(sz.isPositive() &&
"getObjCEncodingForMethodDecl - Incomplete param type");
ParmOffset += sz;
}
S += charUnitsToString(ParmOffset);
S += "@0:";
S += charUnitsToString(PtrSize);
// Argument types.
ParmOffset = 2 * PtrSize;
for (ObjCMethodDecl::param_const_iterator PI = Decl->param_begin(),
E = Decl->sel_param_end(); PI != E; ++PI) {
const ParmVarDecl *PVDecl = *PI;
QualType PType = PVDecl->getOriginalType();
if (const auto *AT =
dyn_cast<ArrayType>(PType->getCanonicalTypeInternal())) {
// Use array's original type only if it has known number of
// elements.
if (!isa<ConstantArrayType>(AT))
PType = PVDecl->getType();
} else if (PType->isFunctionType())
PType = PVDecl->getType();
getObjCEncodingForMethodParameter(PVDecl->getObjCDeclQualifier(),
PType, S, Extended);
S += charUnitsToString(ParmOffset);
ParmOffset += getObjCEncodingTypeSize(PType);
}
return S;
}
ObjCPropertyImplDecl *
ASTContext::getObjCPropertyImplDeclForPropertyDecl(
const ObjCPropertyDecl *PD,
const Decl *Container) const {
if (!Container)
return nullptr;
if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(Container)) {
for (auto *PID : CID->property_impls())
if (PID->getPropertyDecl() == PD)
return PID;
} else {
const auto *OID = cast<ObjCImplementationDecl>(Container);
for (auto *PID : OID->property_impls())
if (PID->getPropertyDecl() == PD)
return PID;
}
return nullptr;
}
/// getObjCEncodingForPropertyDecl - Return the encoded type for this
/// property declaration. If non-NULL, Container must be either an
/// ObjCCategoryImplDecl or ObjCImplementationDecl; it should only be
/// NULL when getting encodings for protocol properties.
/// Property attributes are stored as a comma-delimited C string. The simple
/// attributes readonly and bycopy are encoded as single characters. The
/// parametrized attributes, getter=name, setter=name, and ivar=name, are
/// encoded as single characters, followed by an identifier. Property types
/// are also encoded as a parametrized attribute. The characters used to encode
/// these attributes are defined by the following enumeration:
/// @code
/// enum PropertyAttributes {
/// kPropertyReadOnly = 'R', // property is read-only.
/// kPropertyBycopy = 'C', // property is a copy of the value last assigned
/// kPropertyByref = '&', // property is a reference to the value last assigned
/// kPropertyDynamic = 'D', // property is dynamic
/// kPropertyGetter = 'G', // followed by getter selector name
/// kPropertySetter = 'S', // followed by setter selector name
/// kPropertyInstanceVariable = 'V' // followed by instance variable name
/// kPropertyType = 'T' // followed by old-style type encoding.
/// kPropertyWeak = 'W' // 'weak' property
/// kPropertyStrong = 'P' // property GC'able
/// kPropertyNonAtomic = 'N' // property non-atomic
/// };
/// @endcode
std::string
ASTContext::getObjCEncodingForPropertyDecl(const ObjCPropertyDecl *PD,
const Decl *Container) const {
// Collect information from the property implementation decl(s).
bool Dynamic = false;
ObjCPropertyImplDecl *SynthesizePID = nullptr;
if (ObjCPropertyImplDecl *PropertyImpDecl =
getObjCPropertyImplDeclForPropertyDecl(PD, Container)) {
if (PropertyImpDecl->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic)
Dynamic = true;
else
SynthesizePID = PropertyImpDecl;
}
// FIXME: This is not very efficient.
std::string S = "T";
// Encode result type.
// GCC has some special rules regarding encoding of properties which
// closely resembles encoding of ivars.
getObjCEncodingForPropertyType(PD->getType(), S);
if (PD->isReadOnly()) {
S += ",R";
if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_copy)
S += ",C";
if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_retain)
S += ",&";
if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_weak)
S += ",W";
} else {
switch (PD->getSetterKind()) {
case ObjCPropertyDecl::Assign: break;
case ObjCPropertyDecl::Copy: S += ",C"; break;
case ObjCPropertyDecl::Retain: S += ",&"; break;
case ObjCPropertyDecl::Weak: S += ",W"; break;
}
}
// It really isn't clear at all what this means, since properties
// are "dynamic by default".
if (Dynamic)
S += ",D";
if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_nonatomic)
S += ",N";
if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_getter) {
S += ",G";
S += PD->getGetterName().getAsString();
}
if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_setter) {
S += ",S";
S += PD->getSetterName().getAsString();
}
if (SynthesizePID) {
const ObjCIvarDecl *OID = SynthesizePID->getPropertyIvarDecl();
S += ",V";
S += OID->getNameAsString();
}
// FIXME: OBJCGC: weak & strong
return S;
}
/// getLegacyIntegralTypeEncoding -
/// Another legacy compatibility encoding: 32-bit longs are encoded as
/// 'l' or 'L' , but not always. For typedefs, we need to use
/// 'i' or 'I' instead if encoding a struct field, or a pointer!
void ASTContext::getLegacyIntegralTypeEncoding (QualType &PointeeTy) const {
if (isa<TypedefType>(PointeeTy.getTypePtr())) {
if (const auto *BT = PointeeTy->getAs<BuiltinType>()) {
if (BT->getKind() == BuiltinType::ULong && getIntWidth(PointeeTy) == 32)
PointeeTy = UnsignedIntTy;
else
if (BT->getKind() == BuiltinType::Long && getIntWidth(PointeeTy) == 32)
PointeeTy = IntTy;
}
}
}
void ASTContext::getObjCEncodingForType(QualType T, std::string& S,
const FieldDecl *Field,
QualType *NotEncodedT) const {
// We follow the behavior of gcc, expanding structures which are
// directly pointed to, and expanding embedded structures. Note that
// these rules are sufficient to prevent recursive encoding of the
// same type.
getObjCEncodingForTypeImpl(T, S,
ObjCEncOptions()
.setExpandPointedToStructures()
.setExpandStructures()
.setIsOutermostType(),
Field, NotEncodedT);
}
void ASTContext::getObjCEncodingForPropertyType(QualType T,
std::string& S) const {
// Encode result type.
// GCC has some special rules regarding encoding of properties which
// closely resembles encoding of ivars.
getObjCEncodingForTypeImpl(T, S,
ObjCEncOptions()
.setExpandPointedToStructures()
.setExpandStructures()
.setIsOutermostType()
.setEncodingProperty(),
/*Field=*/nullptr);
}
static char getObjCEncodingForPrimitiveType(const ASTContext *C,
const BuiltinType *BT) {
BuiltinType::Kind kind = BT->getKind();
switch (kind) {
case BuiltinType::Void: return 'v';
case BuiltinType::Bool: return 'B';
case BuiltinType::Char8:
case BuiltinType::Char_U:
case BuiltinType::UChar: return 'C';
case BuiltinType::Char16:
case BuiltinType::UShort: return 'S';
case BuiltinType::Char32:
case BuiltinType::UInt: return 'I';
case BuiltinType::ULong:
return C->getTargetInfo().getLongWidth() == 32 ? 'L' : 'Q';
case BuiltinType::UInt128: return 'T';
case BuiltinType::ULongLong: return 'Q';
case BuiltinType::Char_S:
case BuiltinType::SChar: return 'c';
case BuiltinType::Short: return 's';
case BuiltinType::WChar_S:
case BuiltinType::WChar_U:
case BuiltinType::Int: return 'i';
case BuiltinType::Long:
return C->getTargetInfo().getLongWidth() == 32 ? 'l' : 'q';
case BuiltinType::LongLong: return 'q';
case BuiltinType::Int128: return 't';
case BuiltinType::Float: return 'f';
case BuiltinType::Double: return 'd';
case BuiltinType::LongDouble: return 'D';
case BuiltinType::NullPtr: return '*'; // like char*
case BuiltinType::BFloat16:
case BuiltinType::Float16:
case BuiltinType::Float128:
case BuiltinType::Half:
case BuiltinType::ShortAccum:
case BuiltinType::Accum:
case BuiltinType::LongAccum:
case BuiltinType::UShortAccum:
case BuiltinType::UAccum:
case BuiltinType::ULongAccum:
case BuiltinType::ShortFract:
case BuiltinType::Fract:
case BuiltinType::LongFract:
case BuiltinType::UShortFract:
case BuiltinType::UFract:
case BuiltinType::ULongFract:
case BuiltinType::SatShortAccum:
case BuiltinType::SatAccum:
case BuiltinType::SatLongAccum:
case BuiltinType::SatUShortAccum:
case BuiltinType::SatUAccum:
case BuiltinType::SatULongAccum:
case BuiltinType::SatShortFract:
case BuiltinType::SatFract:
case BuiltinType::SatLongFract:
case BuiltinType::SatUShortFract:
case BuiltinType::SatUFract:
case BuiltinType::SatULongFract:
// FIXME: potentially need @encodes for these!
return ' ';
#define SVE_TYPE(Name, Id, SingletonId) \
case BuiltinType::Id:
#include "clang/Basic/AArch64SVEACLETypes.def"
#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
#include "clang/Basic/RISCVVTypes.def"
{
DiagnosticsEngine &Diags = C->getDiagnostics();
unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
"cannot yet @encode type %0");
Diags.Report(DiagID) << BT->getName(C->getPrintingPolicy());
return ' ';
}
case BuiltinType::ObjCId:
case BuiltinType::ObjCClass:
case BuiltinType::ObjCSel:
llvm_unreachable("@encoding ObjC primitive type");
// OpenCL and placeholder types don't need @encodings.
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
case BuiltinType::Id:
#include "clang/Basic/OpenCLImageTypes.def"
#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
case BuiltinType::Id:
#include "clang/Basic/OpenCLExtensionTypes.def"
case BuiltinType::OCLEvent:
case BuiltinType::OCLClkEvent:
case BuiltinType::OCLQueue:
case BuiltinType::OCLReserveID:
case BuiltinType::OCLSampler:
case BuiltinType::Dependent:
#define PPC_VECTOR_TYPE(Name, Id, Size) \
case BuiltinType::Id:
#include "clang/Basic/PPCTypes.def"
#define BUILTIN_TYPE(KIND, ID)
#define PLACEHOLDER_TYPE(KIND, ID) \
case BuiltinType::KIND:
#include "clang/AST/BuiltinTypes.def"
llvm_unreachable("invalid builtin type for @encode");
}
llvm_unreachable("invalid BuiltinType::Kind value");
}
static char ObjCEncodingForEnumType(const ASTContext *C, const EnumType *ET) {
EnumDecl *Enum = ET->getDecl();
// The encoding of an non-fixed enum type is always 'i', regardless of size.
if (!Enum->isFixed())
return 'i';
// The encoding of a fixed enum type matches its fixed underlying type.
const auto *BT = Enum->getIntegerType()->castAs<BuiltinType>();
return getObjCEncodingForPrimitiveType(C, BT);
}
static void EncodeBitField(const ASTContext *Ctx, std::string& S,
QualType T, const FieldDecl *FD) {
assert(FD->isBitField() && "not a bitfield - getObjCEncodingForTypeImpl");
S += 'b';
// The NeXT runtime encodes bit fields as b followed by the number of bits.
// The GNU runtime requires more information; bitfields are encoded as b,
// then the offset (in bits) of the first element, then the type of the
// bitfield, then the size in bits. For example, in this structure:
//
// struct
// {
// int integer;
// int flags:2;
// };
// On a 32-bit system, the encoding for flags would be b2 for the NeXT
// runtime, but b32i2 for the GNU runtime. The reason for this extra
// information is not especially sensible, but we're stuck with it for
// compatibility with GCC, although providing it breaks anything that
// actually uses runtime introspection and wants to work on both runtimes...
if (Ctx->getLangOpts().ObjCRuntime.isGNUFamily()) {
uint64_t Offset;
if (const auto *IVD = dyn_cast<ObjCIvarDecl>(FD)) {
Offset = Ctx->lookupFieldBitOffset(IVD->getContainingInterface(), nullptr,
IVD);
} else {
const RecordDecl *RD = FD->getParent();
const ASTRecordLayout &RL = Ctx->getASTRecordLayout(RD);
Offset = RL.getFieldOffset(FD->getFieldIndex());
}
S += llvm::utostr(Offset);
if (const auto *ET = T->getAs<EnumType>())
S += ObjCEncodingForEnumType(Ctx, ET);
else {
const auto *BT = T->castAs<BuiltinType>();
S += getObjCEncodingForPrimitiveType(Ctx, BT);
}
}
S += llvm::utostr(FD->getBitWidthValue(*Ctx));
}
// Helper function for determining whether the encoded type string would include
// a template specialization type.
static bool hasTemplateSpecializationInEncodedString(const Type *T,
bool VisitBasesAndFields) {
T = T->getBaseElementTypeUnsafe();
if (auto *PT = T->getAs<PointerType>())
return hasTemplateSpecializationInEncodedString(
PT->getPointeeType().getTypePtr(), false);
auto *CXXRD = T->getAsCXXRecordDecl();
if (!CXXRD)
return false;
if (isa<ClassTemplateSpecializationDecl>(CXXRD))
return true;
if (!CXXRD->hasDefinition() || !VisitBasesAndFields)
return false;
for (auto B : CXXRD->bases())
if (hasTemplateSpecializationInEncodedString(B.getType().getTypePtr(),
true))
return true;
for (auto *FD : CXXRD->fields())
if (hasTemplateSpecializationInEncodedString(FD->getType().getTypePtr(),
true))
return true;
return false;
}
// FIXME: Use SmallString for accumulating string.
void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
const ObjCEncOptions Options,
const FieldDecl *FD,
QualType *NotEncodedT) const {
CanQualType CT = getCanonicalType(T);
switch (CT->getTypeClass()) {
case Type::Builtin:
case Type::Enum:
if (FD && FD->isBitField())
return EncodeBitField(this, S, T, FD);
if (const auto *BT = dyn_cast<BuiltinType>(CT))
S += getObjCEncodingForPrimitiveType(this, BT);
else
S += ObjCEncodingForEnumType(this, cast<EnumType>(CT));
return;
case Type::Complex:
S += 'j';
getObjCEncodingForTypeImpl(T->castAs<ComplexType>()->getElementType(), S,
ObjCEncOptions(),
/*Field=*/nullptr);
return;
case Type::Atomic:
S += 'A';
getObjCEncodingForTypeImpl(T->castAs<AtomicType>()->getValueType(), S,
ObjCEncOptions(),
/*Field=*/nullptr);
return;
// encoding for pointer or reference types.
case Type::Pointer:
case Type::LValueReference:
case Type::RValueReference: {
QualType PointeeTy;
if (isa<PointerType>(CT)) {
const auto *PT = T->castAs<PointerType>();
if (PT->isObjCSelType()) {
S += ':';
return;
}
PointeeTy = PT->getPointeeType();
} else {
PointeeTy = T->castAs<ReferenceType>()->getPointeeType();
}
bool isReadOnly = false;
// For historical/compatibility reasons, the read-only qualifier of the
// pointee gets emitted _before_ the '^'. The read-only qualifier of
// the pointer itself gets ignored, _unless_ we are looking at a typedef!
// Also, do not emit the 'r' for anything but the outermost type!
if (isa<TypedefType>(T.getTypePtr())) {
if (Options.IsOutermostType() && T.isConstQualified()) {
isReadOnly = true;
S += 'r';
}
} else if (Options.IsOutermostType()) {
QualType P = PointeeTy;
while (auto PT = P->getAs<PointerType>())
P = PT->getPointeeType();
if (P.isConstQualified()) {
isReadOnly = true;
S += 'r';
}
}
if (isReadOnly) {
// Another legacy compatibility encoding. Some ObjC qualifier and type
// combinations need to be rearranged.
// Rewrite "in const" from "nr" to "rn"
if (StringRef(S).endswith("nr"))
S.replace(S.end()-2, S.end(), "rn");
}
if (PointeeTy->isCharType()) {
// char pointer types should be encoded as '*' unless it is a
// type that has been typedef'd to 'BOOL'.
if (!isTypeTypedefedAsBOOL(PointeeTy)) {
S += '*';
return;
}
} else if (const auto *RTy = PointeeTy->getAs<RecordType>()) {
// GCC binary compat: Need to convert "struct objc_class *" to "#".
if (RTy->getDecl()->getIdentifier() == &Idents.get("objc_class")) {
S += '#';
return;
}
// GCC binary compat: Need to convert "struct objc_object *" to "@".
if (RTy->getDecl()->getIdentifier() == &Idents.get("objc_object")) {
S += '@';
return;
}
// If the encoded string for the class includes template names, just emit
// "^v" for pointers to the class.
if (getLangOpts().CPlusPlus &&
(!getLangOpts().EncodeCXXClassTemplateSpec &&
hasTemplateSpecializationInEncodedString(
RTy, Options.ExpandPointedToStructures()))) {
S += "^v";
return;
}
// fall through...
}
S += '^';
getLegacyIntegralTypeEncoding(PointeeTy);
ObjCEncOptions NewOptions;
if (Options.ExpandPointedToStructures())
NewOptions.setExpandStructures();
getObjCEncodingForTypeImpl(PointeeTy, S, NewOptions,
/*Field=*/nullptr, NotEncodedT);
return;
}
case Type::ConstantArray:
case Type::IncompleteArray:
case Type::VariableArray: {
const auto *AT = cast<ArrayType>(CT);
if (isa<IncompleteArrayType>(AT) && !Options.IsStructField()) {
// Incomplete arrays are encoded as a pointer to the array element.
S += '^';
getObjCEncodingForTypeImpl(
AT->getElementType(), S,
Options.keepingOnly(ObjCEncOptions().setExpandStructures()), FD);
} else {
S += '[';
if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
S += llvm::utostr(CAT->getSize().getZExtValue());
else {
//Variable length arrays are encoded as a regular array with 0 elements.
assert((isa<VariableArrayType>(AT) || isa<IncompleteArrayType>(AT)) &&
"Unknown array type!");
S += '0';
}
getObjCEncodingForTypeImpl(
AT->getElementType(), S,
Options.keepingOnly(ObjCEncOptions().setExpandStructures()), FD,
NotEncodedT);
S += ']';
}
return;
}
case Type::FunctionNoProto:
case Type::FunctionProto:
S += '?';
return;
case Type::Record: {
RecordDecl *RDecl = cast<RecordType>(CT)->getDecl();
S += RDecl->isUnion() ? '(' : '{';
// Anonymous structures print as '?'
if (const IdentifierInfo *II = RDecl->getIdentifier()) {
S += II->getName();
if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(RDecl)) {
const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
llvm::raw_string_ostream OS(S);
printTemplateArgumentList(OS, TemplateArgs.asArray(),
getPrintingPolicy());
}
} else {
S += '?';
}
if (Options.ExpandStructures()) {
S += '=';
if (!RDecl->isUnion()) {
getObjCEncodingForStructureImpl(RDecl, S, FD, true, NotEncodedT);
} else {
for (const auto *Field : RDecl->fields()) {
if (FD) {
S += '"';
S += Field->getNameAsString();
S += '"';
}
// Special case bit-fields.
if (Field->isBitField()) {
getObjCEncodingForTypeImpl(Field->getType(), S,
ObjCEncOptions().setExpandStructures(),
Field);
} else {
QualType qt = Field->getType();
getLegacyIntegralTypeEncoding(qt);
getObjCEncodingForTypeImpl(
qt, S,
ObjCEncOptions().setExpandStructures().setIsStructField(), FD,
NotEncodedT);
}
}
}
}
S += RDecl->isUnion() ? ')' : '}';
return;
}
case Type::BlockPointer: {
const auto *BT = T->castAs<BlockPointerType>();
S += "@?"; // Unlike a pointer-to-function, which is "^?".
if (Options.EncodeBlockParameters()) {
const auto *FT = BT->getPointeeType()->castAs<FunctionType>();
S += '<';
// Block return type
getObjCEncodingForTypeImpl(FT->getReturnType(), S,
Options.forComponentType(), FD, NotEncodedT);
// Block self
S += "@?";
// Block parameters
if (const auto *FPT = dyn_cast<FunctionProtoType>(FT)) {
for (const auto &I : FPT->param_types())
getObjCEncodingForTypeImpl(I, S, Options.forComponentType(), FD,
NotEncodedT);
}
S += '>';
}
return;
}
case Type::ObjCObject: {
// hack to match legacy encoding of *id and *Class
QualType Ty = getObjCObjectPointerType(CT);
if (Ty->isObjCIdType()) {
S += "{objc_object=}";
return;
}
else if (Ty->isObjCClassType()) {
S += "{objc_class=}";
return;
}
// TODO: Double check to make sure this intentionally falls through.
LLVM_FALLTHROUGH;
}
case Type::ObjCInterface: {
// Ignore protocol qualifiers when mangling at this level.
// @encode(class_name)
ObjCInterfaceDecl *OI = T->castAs<ObjCObjectType>()->getInterface();
S += '{';
S += OI->getObjCRuntimeNameAsString();
if (Options.ExpandStructures()) {
S += '=';
SmallVector<const ObjCIvarDecl*, 32> Ivars;
DeepCollectObjCIvars(OI, true, Ivars);
for (unsigned i = 0, e = Ivars.size(); i != e; ++i) {
const FieldDecl *Field = Ivars[i];
if (Field->isBitField())
getObjCEncodingForTypeImpl(Field->getType(), S,
ObjCEncOptions().setExpandStructures(),
Field);
else
getObjCEncodingForTypeImpl(Field->getType(), S,
ObjCEncOptions().setExpandStructures(), FD,
NotEncodedT);
}
}
S += '}';
return;
}
case Type::ObjCObjectPointer: {
const auto *OPT = T->castAs<ObjCObjectPointerType>();
if (OPT->isObjCIdType()) {
S += '@';
return;
}
if (OPT->isObjCClassType() || OPT->isObjCQualifiedClassType()) {
// FIXME: Consider if we need to output qualifiers for 'Class<p>'.
// Since this is a binary compatibility issue, need to consult with
// runtime folks. Fortunately, this is a *very* obscure construct.
S += '#';
return;
}
if (OPT->isObjCQualifiedIdType()) {
getObjCEncodingForTypeImpl(
getObjCIdType(), S,
Options.keepingOnly(ObjCEncOptions()
.setExpandPointedToStructures()
.setExpandStructures()),
FD);
if (FD || Options.EncodingProperty() || Options.EncodeClassNames()) {
// Note that we do extended encoding of protocol qualifer list
// Only when doing ivar or property encoding.
S += '"';
for (const auto *I : OPT->quals()) {
S += '<';
S += I->getObjCRuntimeNameAsString();
S += '>';
}
S += '"';
}
return;
}
S += '@';
if (OPT->getInterfaceDecl() &&
(FD || Options.EncodingProperty() || Options.EncodeClassNames())) {
S += '"';
S += OPT->getInterfaceDecl()->getObjCRuntimeNameAsString();
for (const auto *I : OPT->quals()) {
S += '<';
S += I->getObjCRuntimeNameAsString();
S += '>';
}
S += '"';
}
return;
}
// gcc just blithely ignores member pointers.
// FIXME: we should do better than that. 'M' is available.
case Type::MemberPointer:
// This matches gcc's encoding, even though technically it is insufficient.
//FIXME. We should do a better job than gcc.
case Type::Vector:
case Type::ExtVector:
// Until we have a coherent encoding of these three types, issue warning.
if (NotEncodedT)
*NotEncodedT = T;
return;
case Type::ConstantMatrix:
if (NotEncodedT)
*NotEncodedT = T;
return;
// We could see an undeduced auto type here during error recovery.
// Just ignore it.
case Type::Auto:
case Type::DeducedTemplateSpecialization:
return;
case Type::Pipe:
case Type::ExtInt:
#define ABSTRACT_TYPE(KIND, BASE)
#define TYPE(KIND, BASE)
#define DEPENDENT_TYPE(KIND, BASE) \
case Type::KIND:
#define NON_CANONICAL_TYPE(KIND, BASE) \
case Type::KIND:
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(KIND, BASE) \
case Type::KIND:
#include "clang/AST/TypeNodes.inc"
llvm_unreachable("@encode for dependent type!");
}
llvm_unreachable("bad type kind!");
}
void ASTContext::getObjCEncodingForStructureImpl(RecordDecl *RDecl,
std::string &S,
const FieldDecl *FD,
bool includeVBases,
QualType *NotEncodedT) const {
assert(RDecl && "Expected non-null RecordDecl");
assert(!RDecl->isUnion() && "Should not be called for unions");
if (!RDecl->getDefinition() || RDecl->getDefinition()->isInvalidDecl())
return;
const auto *CXXRec = dyn_cast<CXXRecordDecl>(RDecl);
std::multimap<uint64_t, NamedDecl *> FieldOrBaseOffsets;
const ASTRecordLayout &layout = getASTRecordLayout(RDecl);
if (CXXRec) {
for (const auto &BI : CXXRec->bases()) {
if (!BI.isVirtual()) {
CXXRecordDecl *base = BI.getType()->getAsCXXRecordDecl();
if (base->isEmpty())
continue;
uint64_t offs = toBits(layout.getBaseClassOffset(base));
FieldOrBaseOffsets.insert(FieldOrBaseOffsets.upper_bound(offs),
std::make_pair(offs, base));
}
}
}
unsigned i = 0;
for (FieldDecl *Field : RDecl->fields()) {
if (!Field->isZeroLengthBitField(*this) && Field->isZeroSize(*this))
continue;
uint64_t offs = layout.getFieldOffset(i);
FieldOrBaseOffsets.insert(FieldOrBaseOffsets.upper_bound(offs),
std::make_pair(offs, Field));
++i;
}
if (CXXRec && includeVBases) {
for (const auto &BI : CXXRec->vbases()) {
CXXRecordDecl *base = BI.getType()->getAsCXXRecordDecl();
if (base->isEmpty())
continue;
uint64_t offs = toBits(layout.getVBaseClassOffset(base));
if (offs >= uint64_t(toBits(layout.getNonVirtualSize())) &&
FieldOrBaseOffsets.find(offs) == FieldOrBaseOffsets.end())
FieldOrBaseOffsets.insert(FieldOrBaseOffsets.end(),
std::make_pair(offs, base));
}
}
CharUnits size;
if (CXXRec) {
size = includeVBases ? layout.getSize() : layout.getNonVirtualSize();
} else {
size = layout.getSize();
}
#ifndef NDEBUG
uint64_t CurOffs = 0;
#endif
std::multimap<uint64_t, NamedDecl *>::iterator
CurLayObj = FieldOrBaseOffsets.begin();
if (CXXRec && CXXRec->isDynamicClass() &&
(CurLayObj == FieldOrBaseOffsets.end() || CurLayObj->first != 0)) {
if (FD) {
S += "\"_vptr$";
std::string recname = CXXRec->getNameAsString();
if (recname.empty()) recname = "?";
S += recname;
S += '"';
}
S += "^^?";
#ifndef NDEBUG
CurOffs += getTypeSize(VoidPtrTy);
#endif
}
if (!RDecl->hasFlexibleArrayMember()) {
// Mark the end of the structure.
uint64_t offs = toBits(size);
FieldOrBaseOffsets.insert(FieldOrBaseOffsets.upper_bound(offs),
std::make_pair(offs, nullptr));
}
for (; CurLayObj != FieldOrBaseOffsets.end(); ++CurLayObj) {
#ifndef NDEBUG
assert(CurOffs <= CurLayObj->first);
if (CurOffs < CurLayObj->first) {
uint64_t padding = CurLayObj->first - CurOffs;
// FIXME: There doesn't seem to be a way to indicate in the encoding that
// packing/alignment of members is different that normal, in which case
// the encoding will be out-of-sync with the real layout.
// If the runtime switches to just consider the size of types without
// taking into account alignment, we could make padding explicit in the
// encoding (e.g. using arrays of chars). The encoding strings would be
// longer then though.
CurOffs += padding;
}
#endif
NamedDecl *dcl = CurLayObj->second;
if (!dcl)
break; // reached end of structure.
if (auto *base = dyn_cast<CXXRecordDecl>(dcl)) {
// We expand the bases without their virtual bases since those are going
// in the initial structure. Note that this differs from gcc which
// expands virtual bases each time one is encountered in the hierarchy,
// making the encoding type bigger than it really is.
getObjCEncodingForStructureImpl(base, S, FD, /*includeVBases*/false,
NotEncodedT);
assert(!base->isEmpty());
#ifndef NDEBUG
CurOffs += toBits(getASTRecordLayout(base).getNonVirtualSize());
#endif
} else {
const auto *field = cast<FieldDecl>(dcl);
if (FD) {
S += '"';
S += field->getNameAsString();
S += '"';
}
if (field->isBitField()) {
EncodeBitField(this, S, field->getType(), field);
#ifndef NDEBUG
CurOffs += field->getBitWidthValue(*this);
#endif
} else {
QualType qt = field->getType();
getLegacyIntegralTypeEncoding(qt);
getObjCEncodingForTypeImpl(
qt, S, ObjCEncOptions().setExpandStructures().setIsStructField(),
FD, NotEncodedT);
#ifndef NDEBUG
CurOffs += getTypeSize(field->getType());
#endif
}
}
}
}
void ASTContext::getObjCEncodingForTypeQualifier(Decl::ObjCDeclQualifier QT,
std::string& S) const {
if (QT & Decl::OBJC_TQ_In)
S += 'n';
if (QT & Decl::OBJC_TQ_Inout)
S += 'N';
if (QT & Decl::OBJC_TQ_Out)
S += 'o';
if (QT & Decl::OBJC_TQ_Bycopy)
S += 'O';
if (QT & Decl::OBJC_TQ_Byref)
S += 'R';
if (QT & Decl::OBJC_TQ_Oneway)
S += 'V';
}
TypedefDecl *ASTContext::getObjCIdDecl() const {
if (!ObjCIdDecl) {
QualType T = getObjCObjectType(ObjCBuiltinIdTy, {}, {});
T = getObjCObjectPointerType(T);
ObjCIdDecl = buildImplicitTypedef(T, "id");
}
return ObjCIdDecl;
}
TypedefDecl *ASTContext::getObjCSelDecl() const {
if (!ObjCSelDecl) {
QualType T = getPointerType(ObjCBuiltinSelTy);
ObjCSelDecl = buildImplicitTypedef(T, "SEL");
}
return ObjCSelDecl;
}
TypedefDecl *ASTContext::getObjCClassDecl() const {
if (!ObjCClassDecl) {
QualType T = getObjCObjectType(ObjCBuiltinClassTy, {}, {});
T = getObjCObjectPointerType(T);
ObjCClassDecl = buildImplicitTypedef(T, "Class");
}
return ObjCClassDecl;
}
ObjCInterfaceDecl *ASTContext::getObjCProtocolDecl() const {
if (!ObjCProtocolClassDecl) {
ObjCProtocolClassDecl
= ObjCInterfaceDecl::Create(*this, getTranslationUnitDecl(),
SourceLocation(),
&Idents.get("Protocol"),
/*typeParamList=*/nullptr,
/*PrevDecl=*/nullptr,
SourceLocation(), true);
}
return ObjCProtocolClassDecl;
}
//===----------------------------------------------------------------------===//
// __builtin_va_list Construction Functions
//===----------------------------------------------------------------------===//
static TypedefDecl *CreateCharPtrNamedVaListDecl(const ASTContext *Context,
StringRef Name) {
// typedef char* __builtin[_ms]_va_list;
QualType T = Context->getPointerType(Context->CharTy);
return Context->buildImplicitTypedef(T, Name);
}
static TypedefDecl *CreateMSVaListDecl(const ASTContext *Context) {
return CreateCharPtrNamedVaListDecl(Context, "__builtin_ms_va_list");
}
static TypedefDecl *CreateCharPtrBuiltinVaListDecl(const ASTContext *Context) {
return CreateCharPtrNamedVaListDecl(Context, "__builtin_va_list");
}
static TypedefDecl *CreateVoidPtrBuiltinVaListDecl(const ASTContext *Context) {
// typedef void* __builtin_va_list;
QualType T = Context->getPointerType(Context->VoidTy);
return Context->buildImplicitTypedef(T, "__builtin_va_list");
}
static TypedefDecl *
CreateAArch64ABIBuiltinVaListDecl(const ASTContext *Context) {
RecordDecl *VaListTagDecl = Context->buildImplicitRecord("__va_list");
// namespace std { struct __va_list {
// Note that we create the namespace even in C. This is intentional so that
// the type is consistent between C and C++, which is important in cases where
// the types need to match between translation units (e.g. with
// -fsanitize=cfi-icall). Ideally we wouldn't have created this namespace at
// all, but it's now part of the ABI (e.g. in mangled names), so we can't
// change it.
auto *NS = NamespaceDecl::Create(
const_cast<ASTContext &>(*Context), Context->getTranslationUnitDecl(),
/*Inline*/ false, SourceLocation(), SourceLocation(),
&Context->Idents.get("std"),
/*PrevDecl*/ nullptr);
NS->setImplicit();
VaListTagDecl->setDeclContext(NS);
VaListTagDecl->startDefinition();
const size_t NumFields = 5;
QualType FieldTypes[NumFields];
const char *FieldNames[NumFields];
// void *__stack;
FieldTypes[0] = Context->getPointerType(Context->VoidTy);
FieldNames[0] = "__stack";
// void *__gr_top;
FieldTypes[1] = Context->getPointerType(Context->VoidTy);
FieldNames[1] = "__gr_top";
// void *__vr_top;
FieldTypes[2] = Context->getPointerType(Context->VoidTy);
FieldNames[2] = "__vr_top";
// int __gr_offs;
FieldTypes[3] = Context->IntTy;
FieldNames[3] = "__gr_offs";
// int __vr_offs;
FieldTypes[4] = Context->IntTy;
FieldNames[4] = "__vr_offs";
// Create fields
for (unsigned i = 0; i < NumFields; ++i) {
FieldDecl *Field = FieldDecl::Create(const_cast<ASTContext &>(*Context),
VaListTagDecl,
SourceLocation(),
SourceLocation(),
&Context->Idents.get(FieldNames[i]),
FieldTypes[i], /*TInfo=*/nullptr,
/*BitWidth=*/nullptr,
/*Mutable=*/false,
ICIS_NoInit);
Field->setAccess(AS_public);
VaListTagDecl->addDecl(Field);
}
VaListTagDecl->completeDefinition();
Context->VaListTagDecl = VaListTagDecl;
QualType VaListTagType = Context->getRecordType(VaListTagDecl);
// } __builtin_va_list;
return Context->buildImplicitTypedef(VaListTagType, "__builtin_va_list");
}
static TypedefDecl *CreatePowerABIBuiltinVaListDecl(const ASTContext *Context) {
// typedef struct __va_list_tag {
RecordDecl *VaListTagDecl;
VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
VaListTagDecl->startDefinition();
const size_t NumFields = 5;
QualType FieldTypes[NumFields];
const char *FieldNames[NumFields];
// unsigned char gpr;
FieldTypes[0] = Context->UnsignedCharTy;
FieldNames[0] = "gpr";
// unsigned char fpr;
FieldTypes[1] = Context->UnsignedCharTy;
FieldNames[1] = "fpr";
// unsigned short reserved;
FieldTypes[2] = Context->UnsignedShortTy;
FieldNames[2] = "reserved";
// void* overflow_arg_area;
FieldTypes[3] = Context->getPointerType(Context->VoidTy);
FieldNames[3] = "overflow_arg_area";
// void* reg_save_area;
FieldTypes[4] = Context->getPointerType(Context->VoidTy);
FieldNames[4] = "reg_save_area";
// Create fields
for (unsigned i = 0; i < NumFields; ++i) {
FieldDecl *Field = FieldDecl::Create(*Context, VaListTagDecl,
SourceLocation(),
SourceLocation(),
&Context->Idents.get(FieldNames[i]),
FieldTypes[i], /*TInfo=*/nullptr,
/*BitWidth=*/nullptr,
/*Mutable=*/false,
ICIS_NoInit);
Field->setAccess(AS_public);
VaListTagDecl->addDecl(Field);
}
VaListTagDecl->completeDefinition();
Context->VaListTagDecl = VaListTagDecl;
QualType VaListTagType = Context->getRecordType(VaListTagDecl);
// } __va_list_tag;
TypedefDecl *VaListTagTypedefDecl =
Context->buildImplicitTypedef(VaListTagType, "__va_list_tag");
QualType VaListTagTypedefType =
Context->getTypedefType(VaListTagTypedefDecl);
// typedef __va_list_tag __builtin_va_list[1];
llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
QualType VaListTagArrayType
= Context->getConstantArrayType(VaListTagTypedefType,
Size, nullptr, ArrayType::Normal, 0);
return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
}
static TypedefDecl *
CreateX86_64ABIBuiltinVaListDecl(const ASTContext *Context) {
// struct __va_list_tag {
RecordDecl *VaListTagDecl;
VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
VaListTagDecl->startDefinition();
const size_t NumFields = 4;
QualType FieldTypes[NumFields];
const char *FieldNames[NumFields];
// unsigned gp_offset;
FieldTypes[0] = Context->UnsignedIntTy;
FieldNames[0] = "gp_offset";
// unsigned fp_offset;
FieldTypes[1] = Context->UnsignedIntTy;
FieldNames[1] = "fp_offset";
// void* overflow_arg_area;
FieldTypes[2] = Context->getPointerType(Context->VoidTy);
FieldNames[2] = "overflow_arg_area";
// void* reg_save_area;
FieldTypes[3] = Context->getPointerType(Context->VoidTy);
FieldNames[3] = "reg_save_area";
// Create fields
for (unsigned i = 0; i < NumFields; ++i) {
FieldDecl *Field = FieldDecl::Create(const_cast<ASTContext &>(*Context),
VaListTagDecl,
SourceLocation(),
SourceLocation(),
&Context->Idents.get(FieldNames[i]),
FieldTypes[i], /*TInfo=*/nullptr,
/*BitWidth=*/nullptr,
/*Mutable=*/false,
ICIS_NoInit);
Field->setAccess(AS_public);
VaListTagDecl->addDecl(Field);
}
VaListTagDecl->completeDefinition();
Context->VaListTagDecl = VaListTagDecl;
QualType VaListTagType = Context->getRecordType(VaListTagDecl);
// };
// typedef struct __va_list_tag __builtin_va_list[1];
llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
QualType VaListTagArrayType = Context->getConstantArrayType(
VaListTagType, Size, nullptr, ArrayType::Normal, 0);
return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
}
static TypedefDecl *CreatePNaClABIBuiltinVaListDecl(const ASTContext *Context) {
// typedef int __builtin_va_list[4];
llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 4);
QualType IntArrayType = Context->getConstantArrayType(
Context->IntTy, Size, nullptr, ArrayType::Normal, 0);
return Context->buildImplicitTypedef(IntArrayType, "__builtin_va_list");
}
static TypedefDecl *
CreateAAPCSABIBuiltinVaListDecl(const ASTContext *Context) {
// struct __va_list
RecordDecl *VaListDecl = Context->buildImplicitRecord("__va_list");
if (Context->getLangOpts().CPlusPlus) {
// namespace std { struct __va_list {
NamespaceDecl *NS;
NS = NamespaceDecl::Create(const_cast<ASTContext &>(*Context),
Context->getTranslationUnitDecl(),
/*Inline*/false, SourceLocation(),
SourceLocation(), &Context->Idents.get("std"),
/*PrevDecl*/ nullptr);
NS->setImplicit();
VaListDecl->setDeclContext(NS);
}
VaListDecl->startDefinition();
// void * __ap;
FieldDecl *Field = FieldDecl::Create(const_cast<ASTContext &>(*Context),
VaListDecl,
SourceLocation(),
SourceLocation(),
&Context->Idents.get("__ap"),
Context->getPointerType(Context->VoidTy),
/*TInfo=*/nullptr,
/*BitWidth=*/nullptr,
/*Mutable=*/false,
ICIS_NoInit);
Field->setAccess(AS_public);
VaListDecl->addDecl(Field);
// };
VaListDecl->completeDefinition();
Context->VaListTagDecl = VaListDecl;
// typedef struct __va_list __builtin_va_list;
QualType T = Context->getRecordType(VaListDecl);
return Context->buildImplicitTypedef(T, "__builtin_va_list");
}
static TypedefDecl *
CreateSystemZBuiltinVaListDecl(const ASTContext *Context) {
// struct __va_list_tag {
RecordDecl *VaListTagDecl;
VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
VaListTagDecl->startDefinition();
const size_t NumFields = 4;
QualType FieldTypes[NumFields];
const char *FieldNames[NumFields];
// long __gpr;
FieldTypes[0] = Context->LongTy;
FieldNames[0] = "__gpr";
// long __fpr;
FieldTypes[1] = Context->LongTy;
FieldNames[1] = "__fpr";
// void *__overflow_arg_area;
FieldTypes[2] = Context->getPointerType(Context->VoidTy);
FieldNames[2] = "__overflow_arg_area";
// void *__reg_save_area;
FieldTypes[3] = Context->getPointerType(Context->VoidTy);
FieldNames[3] = "__reg_save_area";
// Create fields
for (unsigned i = 0; i < NumFields; ++i) {
FieldDecl *Field = FieldDecl::Create(const_cast<ASTContext &>(*Context),
VaListTagDecl,
SourceLocation(),
SourceLocation(),
&Context->Idents.get(FieldNames[i]),
FieldTypes[i], /*TInfo=*/nullptr,
/*BitWidth=*/nullptr,
/*Mutable=*/false,
ICIS_NoInit);
Field->setAccess(AS_public);
VaListTagDecl->addDecl(Field);
}
VaListTagDecl->completeDefinition();
Context->VaListTagDecl = VaListTagDecl;
QualType VaListTagType = Context->getRecordType(VaListTagDecl);
// };
// typedef __va_list_tag __builtin_va_list[1];
llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
QualType VaListTagArrayType = Context->getConstantArrayType(
VaListTagType, Size, nullptr, ArrayType::Normal, 0);
return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
}
static TypedefDecl *CreateHexagonBuiltinVaListDecl(const ASTContext *Context) {
// typedef struct __va_list_tag {
RecordDecl *VaListTagDecl;
VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
VaListTagDecl->startDefinition();
const size_t NumFields = 3;
QualType FieldTypes[NumFields];
const char *FieldNames[NumFields];
// void *CurrentSavedRegisterArea;
FieldTypes[0] = Context->getPointerType(Context->VoidTy);
FieldNames[0] = "__current_saved_reg_area_pointer";
// void *SavedRegAreaEnd;
FieldTypes[1] = Context->getPointerType(Context->VoidTy);
FieldNames[1] = "__saved_reg_area_end_pointer";
// void *OverflowArea;
FieldTypes[2] = Context->getPointerType(Context->VoidTy);
FieldNames[2] = "__overflow_area_pointer";
// Create fields
for (unsigned i = 0; i < NumFields; ++i) {
FieldDecl *Field = FieldDecl::Create(
const_cast<ASTContext &>(*Context), VaListTagDecl, SourceLocation(),
SourceLocation(), &Context->Idents.get(FieldNames[i]), FieldTypes[i],
/*TInfo=*/0,
/*BitWidth=*/0,
/*Mutable=*/false, ICIS_NoInit);
Field->setAccess(AS_public);
VaListTagDecl->addDecl(Field);
}
VaListTagDecl->completeDefinition();
Context->VaListTagDecl = VaListTagDecl;
QualType VaListTagType = Context->getRecordType(VaListTagDecl);
// } __va_list_tag;
TypedefDecl *VaListTagTypedefDecl =
Context->buildImplicitTypedef(VaListTagType, "__va_list_tag");
QualType VaListTagTypedefType = Context->getTypedefType(VaListTagTypedefDecl);
// typedef __va_list_tag __builtin_va_list[1];
llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
QualType VaListTagArrayType = Context->getConstantArrayType(
VaListTagTypedefType, Size, nullptr, ArrayType::Normal, 0);
return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
}
static TypedefDecl *CreateVaListDecl(const ASTContext *Context,
TargetInfo::BuiltinVaListKind Kind) {
switch (Kind) {
case TargetInfo::CharPtrBuiltinVaList:
return CreateCharPtrBuiltinVaListDecl(Context);
case TargetInfo::VoidPtrBuiltinVaList:
return CreateVoidPtrBuiltinVaListDecl(Context);
case TargetInfo::AArch64ABIBuiltinVaList:
return CreateAArch64ABIBuiltinVaListDecl(Context);
case TargetInfo::PowerABIBuiltinVaList:
return CreatePowerABIBuiltinVaListDecl(Context);
case TargetInfo::X86_64ABIBuiltinVaList:
return CreateX86_64ABIBuiltinVaListDecl(Context);
case TargetInfo::PNaClABIBuiltinVaList:
return CreatePNaClABIBuiltinVaListDecl(Context);
case TargetInfo::AAPCSABIBuiltinVaList:
return CreateAAPCSABIBuiltinVaListDecl(Context);
case TargetInfo::SystemZBuiltinVaList:
return CreateSystemZBuiltinVaListDecl(Context);
case TargetInfo::HexagonBuiltinVaList:
return CreateHexagonBuiltinVaListDecl(Context);
}
llvm_unreachable("Unhandled __builtin_va_list type kind");
}
TypedefDecl *ASTContext::getBuiltinVaListDecl() const {
if (!BuiltinVaListDecl) {
BuiltinVaListDecl = CreateVaListDecl(this, Target->getBuiltinVaListKind());
assert(BuiltinVaListDecl->isImplicit());
}
return BuiltinVaListDecl;
}
Decl *ASTContext::getVaListTagDecl() const {
// Force the creation of VaListTagDecl by building the __builtin_va_list
// declaration.
if (!VaListTagDecl)
(void)getBuiltinVaListDecl();
return VaListTagDecl;
}
TypedefDecl *ASTContext::getBuiltinMSVaListDecl() const {
if (!BuiltinMSVaListDecl)
BuiltinMSVaListDecl = CreateMSVaListDecl(this);
return BuiltinMSVaListDecl;
}
bool ASTContext::canBuiltinBeRedeclared(const FunctionDecl *FD) const {
return BuiltinInfo.canBeRedeclared(FD->getBuiltinID());
}
void ASTContext::setObjCConstantStringInterface(ObjCInterfaceDecl *Decl) {
assert(ObjCConstantStringType.isNull() &&
"'NSConstantString' type already set!");
ObjCConstantStringType = getObjCInterfaceType(Decl);
}
/// Retrieve the template name that corresponds to a non-empty
/// lookup.
TemplateName
ASTContext::getOverloadedTemplateName(UnresolvedSetIterator Begin,
UnresolvedSetIterator End) const {
unsigned size = End - Begin;
assert(size > 1 && "set is not overloaded!");
void *memory = Allocate(sizeof(OverloadedTemplateStorage) +
size * sizeof(FunctionTemplateDecl*));
auto *OT = new (memory) OverloadedTemplateStorage(size);
NamedDecl **Storage = OT->getStorage();
for (UnresolvedSetIterator I = Begin; I != End; ++I) {
NamedDecl *D = *I;
assert(isa<FunctionTemplateDecl>(D) ||
isa<UnresolvedUsingValueDecl>(D) ||
(isa<UsingShadowDecl>(D) &&
isa<FunctionTemplateDecl>(D->getUnderlyingDecl())));
*Storage++ = D;
}
return TemplateName(OT);
}
/// Retrieve a template name representing an unqualified-id that has been
/// assumed to name a template for ADL purposes.
TemplateName ASTContext::getAssumedTemplateName(DeclarationName Name) const {
auto *OT = new (*this) AssumedTemplateStorage(Name);
return TemplateName(OT);
}
/// Retrieve the template name that represents a qualified
/// template name such as \c std::vector.
TemplateName
ASTContext::getQualifiedTemplateName(NestedNameSpecifier *NNS,
bool TemplateKeyword,
TemplateDecl *Template) const {
assert(NNS && "Missing nested-name-specifier in qualified template name");
// FIXME: Canonicalization?
llvm::FoldingSetNodeID ID;
QualifiedTemplateName::Profile(ID, NNS, TemplateKeyword, Template);
void *InsertPos = nullptr;
QualifiedTemplateName *QTN =
QualifiedTemplateNames.FindNodeOrInsertPos(ID, InsertPos);
if (!QTN) {
QTN = new (*this, alignof(QualifiedTemplateName))
QualifiedTemplateName(NNS, TemplateKeyword, Template);
QualifiedTemplateNames.InsertNode(QTN, InsertPos);
}
return TemplateName(QTN);
}
/// Retrieve the template name that represents a dependent
/// template name such as \c MetaFun::template apply.
TemplateName
ASTContext::getDependentTemplateName(NestedNameSpecifier *NNS,
const IdentifierInfo *Name) const {
assert((!NNS || NNS->isDependent()) &&
"Nested name specifier must be dependent");
llvm::FoldingSetNodeID ID;
DependentTemplateName::Profile(ID, NNS, Name);
void *InsertPos = nullptr;
DependentTemplateName *QTN =
DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);
if (QTN)
return TemplateName(QTN);
NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);
if (CanonNNS == NNS) {
QTN = new (*this, alignof(DependentTemplateName))
DependentTemplateName(NNS, Name);
} else {
TemplateName Canon = getDependentTemplateName(CanonNNS, Name);
QTN = new (*this, alignof(DependentTemplateName))
DependentTemplateName(NNS, Name, Canon);
DependentTemplateName *CheckQTN =
DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);
assert(!CheckQTN && "Dependent type name canonicalization broken");
(void)CheckQTN;
}
DependentTemplateNames.InsertNode(QTN, InsertPos);
return TemplateName(QTN);
}
/// Retrieve the template name that represents a dependent
/// template name such as \c MetaFun::template operator+.
TemplateName
ASTContext::getDependentTemplateName(NestedNameSpecifier *NNS,
OverloadedOperatorKind Operator) const {
assert((!NNS || NNS->isDependent()) &&
"Nested name specifier must be dependent");
llvm::FoldingSetNodeID ID;
DependentTemplateName::Profile(ID, NNS, Operator);
void *InsertPos = nullptr;
DependentTemplateName *QTN
= DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);
if (QTN)
return TemplateName(QTN);
NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);
if (CanonNNS == NNS) {
QTN = new (*this, alignof(DependentTemplateName))
DependentTemplateName(NNS, Operator);
} else {
TemplateName Canon = getDependentTemplateName(CanonNNS, Operator);
QTN = new (*this, alignof(DependentTemplateName))
DependentTemplateName(NNS, Operator, Canon);
DependentTemplateName *CheckQTN
= DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);
assert(!CheckQTN && "Dependent template name canonicalization broken");
(void)CheckQTN;
}
DependentTemplateNames.InsertNode(QTN, InsertPos);
return TemplateName(QTN);
}
TemplateName
ASTContext::getSubstTemplateTemplateParm(TemplateTemplateParmDecl *param,
TemplateName replacement) const {
llvm::FoldingSetNodeID ID;
SubstTemplateTemplateParmStorage::Profile(ID, param, replacement);
void *insertPos = nullptr;
SubstTemplateTemplateParmStorage *subst
= SubstTemplateTemplateParms.FindNodeOrInsertPos(ID, insertPos);
if (!subst) {
subst = new (*this) SubstTemplateTemplateParmStorage(param, replacement);
SubstTemplateTemplateParms.InsertNode(subst, insertPos);
}
return TemplateName(subst);
}
TemplateName
ASTContext::getSubstTemplateTemplateParmPack(TemplateTemplateParmDecl *Param,
const TemplateArgument &ArgPack) const {
auto &Self = const_cast<ASTContext &>(*this);
llvm::FoldingSetNodeID ID;
SubstTemplateTemplateParmPackStorage::Profile(ID, Self, Param, ArgPack);
void *InsertPos = nullptr;
SubstTemplateTemplateParmPackStorage *Subst
= SubstTemplateTemplateParmPacks.FindNodeOrInsertPos(ID, InsertPos);
if (!Subst) {
Subst = new (*this) SubstTemplateTemplateParmPackStorage(Param,
ArgPack.pack_size(),
ArgPack.pack_begin());
SubstTemplateTemplateParmPacks.InsertNode(Subst, InsertPos);
}
return TemplateName(Subst);
}
/// getFromTargetType - Given one of the integer types provided by
/// TargetInfo, produce the corresponding type. The unsigned @p Type
/// is actually a value of type @c TargetInfo::IntType.
CanQualType ASTContext::getFromTargetType(unsigned Type) const {
switch (Type) {
case TargetInfo::NoInt: return {};
case TargetInfo::SignedChar: return SignedCharTy;
case TargetInfo::UnsignedChar: return UnsignedCharTy;
case TargetInfo::SignedShort: return ShortTy;
case TargetInfo::UnsignedShort: return UnsignedShortTy;
case TargetInfo::SignedInt: return IntTy;
case TargetInfo::UnsignedInt: return UnsignedIntTy;
case TargetInfo::SignedLong: return LongTy;
case TargetInfo::UnsignedLong: return UnsignedLongTy;
case TargetInfo::SignedLongLong: return LongLongTy;
case TargetInfo::UnsignedLongLong: return UnsignedLongLongTy;
}
llvm_unreachable("Unhandled TargetInfo::IntType value");
}
//===----------------------------------------------------------------------===//
// Type Predicates.
//===----------------------------------------------------------------------===//
/// getObjCGCAttr - Returns one of GCNone, Weak or Strong objc's
/// garbage collection attribute.
///
Qualifiers::GC ASTContext::getObjCGCAttrKind(QualType Ty) const {
if (getLangOpts().getGC() == LangOptions::NonGC)
return Qualifiers::GCNone;
assert(getLangOpts().ObjC);
Qualifiers::GC GCAttrs = Ty.getObjCGCAttr();
// Default behaviour under objective-C's gc is for ObjC pointers
// (or pointers to them) be treated as though they were declared
// as __strong.
if (GCAttrs == Qualifiers::GCNone) {
if (Ty->isObjCObjectPointerType() || Ty->isBlockPointerType())
return Qualifiers::Strong;
else if (Ty->isPointerType())
return getObjCGCAttrKind(Ty->castAs<PointerType>()->getPointeeType());
} else {
// It's not valid to set GC attributes on anything that isn't a
// pointer.
#ifndef NDEBUG
QualType CT = Ty->getCanonicalTypeInternal();
while (const auto *AT = dyn_cast<ArrayType>(CT))
CT = AT->getElementType();
assert(CT->isAnyPointerType() || CT->isBlockPointerType());
#endif
}
return GCAttrs;
}
//===----------------------------------------------------------------------===//
// Type Compatibility Testing
//===----------------------------------------------------------------------===//
/// areCompatVectorTypes - Return true if the two specified vector types are
/// compatible.
static bool areCompatVectorTypes(const VectorType *LHS,
const VectorType *RHS) {
assert(LHS->isCanonicalUnqualified() && RHS->isCanonicalUnqualified());
return LHS->getElementType() == RHS->getElementType() &&
LHS->getNumElements() == RHS->getNumElements();
}
/// areCompatMatrixTypes - Return true if the two specified matrix types are
/// compatible.
static bool areCompatMatrixTypes(const ConstantMatrixType *LHS,
const ConstantMatrixType *RHS) {
assert(LHS->isCanonicalUnqualified() && RHS->isCanonicalUnqualified());
return LHS->getElementType() == RHS->getElementType() &&
LHS->getNumRows() == RHS->getNumRows() &&
LHS->getNumColumns() == RHS->getNumColumns();
}
bool ASTContext::areCompatibleVectorTypes(QualType FirstVec,
QualType SecondVec) {
assert(FirstVec->isVectorType() && "FirstVec should be a vector type");
assert(SecondVec->isVectorType() && "SecondVec should be a vector type");
if (hasSameUnqualifiedType(FirstVec, SecondVec))
return true;
// Treat Neon vector types and most AltiVec vector types as if they are the
// equivalent GCC vector types.
const auto *First = FirstVec->castAs<VectorType>();
const auto *Second = SecondVec->castAs<VectorType>();
if (First->getNumElements() == Second->getNumElements() &&
hasSameType(First->getElementType(), Second->getElementType()) &&
First->getVectorKind() != VectorType::AltiVecPixel &&
First->getVectorKind() != VectorType::AltiVecBool &&
Second->getVectorKind() != VectorType::AltiVecPixel &&
Second->getVectorKind() != VectorType::AltiVecBool &&
First->getVectorKind() != VectorType::SveFixedLengthDataVector &&
First->getVectorKind() != VectorType::SveFixedLengthPredicateVector &&
Second->getVectorKind() != VectorType::SveFixedLengthDataVector &&
Second->getVectorKind() != VectorType::SveFixedLengthPredicateVector)
return true;
return false;
}
/// getSVETypeSize - Return SVE vector or predicate register size.
static uint64_t getSVETypeSize(ASTContext &Context, const BuiltinType *Ty) {
assert(Ty->isVLSTBuiltinType() && "Invalid SVE Type");
return Ty->getKind() == BuiltinType::SveBool
? Context.getLangOpts().ArmSveVectorBits / Context.getCharWidth()
: Context.getLangOpts().ArmSveVectorBits;
}
bool ASTContext::areCompatibleSveTypes(QualType FirstType,
QualType SecondType) {
assert(((FirstType->isSizelessBuiltinType() && SecondType->isVectorType()) ||
(FirstType->isVectorType() && SecondType->isSizelessBuiltinType())) &&
"Expected SVE builtin type and vector type!");
auto IsValidCast = [this](QualType FirstType, QualType SecondType) {
if (const auto *BT = FirstType->getAs<BuiltinType>()) {
if (const auto *VT = SecondType->getAs<VectorType>()) {
// Predicates have the same representation as uint8 so we also have to
// check the kind to make these types incompatible.
if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
return BT->getKind() == BuiltinType::SveBool;
else if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector)
return VT->getElementType().getCanonicalType() ==
FirstType->getSveEltType(*this);
else if (VT->getVectorKind() == VectorType::GenericVector)
return getTypeSize(SecondType) == getSVETypeSize(*this, BT) &&
hasSameType(VT->getElementType(),
getBuiltinVectorTypeInfo(BT).ElementType);
}
}
return false;
};
return IsValidCast(FirstType, SecondType) ||
IsValidCast(SecondType, FirstType);
}
bool ASTContext::areLaxCompatibleSveTypes(QualType FirstType,
QualType SecondType) {
assert(((FirstType->isSizelessBuiltinType() && SecondType->isVectorType()) ||
(FirstType->isVectorType() && SecondType->isSizelessBuiltinType())) &&
"Expected SVE builtin type and vector type!");
auto IsLaxCompatible = [this](QualType FirstType, QualType SecondType) {
const auto *BT = FirstType->getAs<BuiltinType>();
if (!BT)
return false;
const auto *VecTy = SecondType->getAs<VectorType>();
if (VecTy &&
(VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector ||
VecTy->getVectorKind() == VectorType::GenericVector)) {
const LangOptions::LaxVectorConversionKind LVCKind =
getLangOpts().getLaxVectorConversions();
// Can not convert between sve predicates and sve vectors because of
// different size.
if (BT->getKind() == BuiltinType::SveBool &&
VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector)
return false;
// If __ARM_FEATURE_SVE_BITS != N do not allow GNU vector lax conversion.
// "Whenever __ARM_FEATURE_SVE_BITS==N, GNUT implicitly
// converts to VLAT and VLAT implicitly converts to GNUT."
// ACLE Spec Version 00bet6, 3.7.3.2. Behavior common to vectors and
// predicates.
if (VecTy->getVectorKind() == VectorType::GenericVector &&
getTypeSize(SecondType) != getSVETypeSize(*this, BT))
return false;
// If -flax-vector-conversions=all is specified, the types are
// certainly compatible.
if (LVCKind == LangOptions::LaxVectorConversionKind::All)
return true;
// If -flax-vector-conversions=integer is specified, the types are
// compatible if the elements are integer types.
if (LVCKind == LangOptions::LaxVectorConversionKind::Integer)
return VecTy->getElementType().getCanonicalType()->isIntegerType() &&
FirstType->getSveEltType(*this)->isIntegerType();
}
return false;
};
return IsLaxCompatible(FirstType, SecondType) ||
IsLaxCompatible(SecondType, FirstType);
}
bool ASTContext::hasDirectOwnershipQualifier(QualType Ty) const {
while (true) {
// __strong id
if (const AttributedType *Attr = dyn_cast<AttributedType>(Ty)) {
if (Attr->getAttrKind() == attr::ObjCOwnership)
return true;
Ty = Attr->getModifiedType();
// X *__strong (...)
} else if (const ParenType *Paren = dyn_cast<ParenType>(Ty)) {
Ty = Paren->getInnerType();
// We do not want to look through typedefs, typeof(expr),
// typeof(type), or any other way that the type is somehow
// abstracted.
} else {
return false;
}
}
}
//===----------------------------------------------------------------------===//
// ObjCQualifiedIdTypesAreCompatible - Compatibility testing for qualified id's.
//===----------------------------------------------------------------------===//
/// ProtocolCompatibleWithProtocol - return 'true' if 'lProto' is in the
/// inheritance hierarchy of 'rProto'.
bool
ASTContext::ProtocolCompatibleWithProtocol(ObjCProtocolDecl *lProto,
ObjCProtocolDecl *rProto) const {
if (declaresSameEntity(lProto, rProto))
return true;
for (auto *PI : rProto->protocols())
if (ProtocolCompatibleWithProtocol(lProto, PI))
return true;
return false;
}
/// ObjCQualifiedClassTypesAreCompatible - compare Class<pr,...> and
/// Class<pr1, ...>.
bool ASTContext::ObjCQualifiedClassTypesAreCompatible(
const ObjCObjectPointerType *lhs, const ObjCObjectPointerType *rhs) {
for (auto *lhsProto : lhs->quals()) {
bool match = false;
for (auto *rhsProto : rhs->quals()) {
if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto)) {
match = true;
break;
}
}
if (!match)
return false;
}
return true;
}
/// ObjCQualifiedIdTypesAreCompatible - We know that one of lhs/rhs is an
/// ObjCQualifiedIDType.
bool ASTContext::ObjCQualifiedIdTypesAreCompatible(
const ObjCObjectPointerType *lhs, const ObjCObjectPointerType *rhs,
bool compare) {
// Allow id<P..> and an 'id' in all cases.
if (lhs->isObjCIdType() || rhs->isObjCIdType())
return true;
// Don't allow id<P..> to convert to Class or Class<P..> in either direction.
if (lhs->isObjCClassType() || lhs->isObjCQualifiedClassType() ||
rhs->isObjCClassType() || rhs->isObjCQualifiedClassType())
return false;
if (lhs->isObjCQualifiedIdType()) {
if (rhs->qual_empty()) {
// If the RHS is a unqualified interface pointer "NSString*",
// make sure we check the class hierarchy.
if (ObjCInterfaceDecl *rhsID = rhs->getInterfaceDecl()) {
for (auto *I : lhs->quals()) {
// when comparing an id<P> on lhs with a static type on rhs,
// see if static class implements all of id's protocols, directly or
// through its super class and categories.
if (!rhsID->ClassImplementsProtocol(I, true))
return false;
}
}
// If there are no qualifiers and no interface, we have an 'id'.
return true;
}
// Both the right and left sides have qualifiers.
for (auto *lhsProto : lhs->quals()) {
bool match = false;
// when comparing an id<P> on lhs with a static type on rhs,
// see if static class implements all of id's protocols, directly or
// through its super class and categories.
for (auto *rhsProto : rhs->quals()) {
if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) ||
(compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) {
match = true;
break;
}
}
// If the RHS is a qualified interface pointer "NSString<P>*",
// make sure we check the class hierarchy.
if (ObjCInterfaceDecl *rhsID = rhs->getInterfaceDecl()) {
for (auto *I : lhs->quals()) {
// when comparing an id<P> on lhs with a static type on rhs,
// see if static class implements all of id's protocols, directly or
// through its super class and categories.
if (rhsID->ClassImplementsProtocol(I, true)) {
match = true;
break;
}
}
}
if (!match)
return false;
}
return true;
}
assert(rhs->isObjCQualifiedIdType() && "One of the LHS/RHS should be id<x>");
if (lhs->getInterfaceType()) {
// If both the right and left sides have qualifiers.
for (auto *lhsProto : lhs->quals()) {
bool match = false;
// when comparing an id<P> on rhs with a static type on lhs,
// see if static class implements all of id's protocols, directly or
// through its super class and categories.
// First, lhs protocols in the qualifier list must be found, direct
// or indirect in rhs's qualifier list or it is a mismatch.
for (auto *rhsProto : rhs->quals()) {
if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) ||
(compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) {
match = true;
break;
}
}
if (!match)
return false;
}
// Static class's protocols, or its super class or category protocols
// must be found, direct or indirect in rhs's qualifier list or it is a mismatch.
if (ObjCInterfaceDecl *lhsID = lhs->getInterfaceDecl()) {
llvm::SmallPtrSet<ObjCProtocolDecl *, 8> LHSInheritedProtocols;
CollectInheritedProtocols(lhsID, LHSInheritedProtocols);
// This is rather dubious but matches gcc's behavior. If lhs has
// no type qualifier and its class has no static protocol(s)
// assume that it is mismatch.
if (LHSInheritedProtocols.empty() && lhs->qual_empty())
return false;
for (auto *lhsProto : LHSInheritedProtocols) {
bool match = false;
for (auto *rhsProto : rhs->quals()) {
if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) ||
(compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) {
match = true;
break;
}
}
if (!match)
return false;
}
}
return true;
}
return false;
}
/// canAssignObjCInterfaces - Return true if the two interface types are
/// compatible for assignment from RHS to LHS. This handles validation of any
/// protocol qualifiers on the LHS or RHS.
bool ASTContext::canAssignObjCInterfaces(const ObjCObjectPointerType *LHSOPT,
const ObjCObjectPointerType *RHSOPT) {
const ObjCObjectType* LHS = LHSOPT->getObjectType();
const ObjCObjectType* RHS = RHSOPT->getObjectType();
// If either type represents the built-in 'id' type, return true.
if (LHS->isObjCUnqualifiedId() || RHS->isObjCUnqualifiedId())
return true;
// Function object that propagates a successful result or handles
// __kindof types.
auto finish = [&](bool succeeded) -> bool {
if (succeeded)
return true;
if (!RHS->isKindOfType())
return false;
// Strip off __kindof and protocol qualifiers, then check whether
// we can assign the other way.
return canAssignObjCInterfaces(RHSOPT->stripObjCKindOfTypeAndQuals(*this),
LHSOPT->stripObjCKindOfTypeAndQuals(*this));
};
// Casts from or to id<P> are allowed when the other side has compatible
// protocols.
if (LHS->isObjCQualifiedId() || RHS->isObjCQualifiedId()) {
return finish(ObjCQualifiedIdTypesAreCompatible(LHSOPT, RHSOPT, false));
}
// Verify protocol compatibility for casts from Class<P1> to Class<P2>.
if (LHS->isObjCQualifiedClass() && RHS->isObjCQualifiedClass()) {
return finish(ObjCQualifiedClassTypesAreCompatible(LHSOPT, RHSOPT));
}
// Casts from Class to Class<Foo>, or vice-versa, are allowed.
if (LHS->isObjCClass() && RHS->isObjCClass()) {
return true;
}
// If we have 2 user-defined types, fall into that path.
if (LHS->getInterface() && RHS->getInterface()) {
return finish(canAssignObjCInterfaces(LHS, RHS));
}
return false;
}
/// canAssignObjCInterfacesInBlockPointer - This routine is specifically written
/// for providing type-safety for objective-c pointers used to pass/return
/// arguments in block literals. When passed as arguments, passing 'A*' where
/// 'id' is expected is not OK. Passing 'Sub *" where 'Super *" is expected is
/// not OK. For the return type, the opposite is not OK.
bool ASTContext::canAssignObjCInterfacesInBlockPointer(
const ObjCObjectPointerType *LHSOPT,
const ObjCObjectPointerType *RHSOPT,
bool BlockReturnType) {
// Function object that propagates a successful result or handles
// __kindof types.
auto finish = [&](bool succeeded) -> bool {
if (succeeded)
return true;
const ObjCObjectPointerType *Expected = BlockReturnType ? RHSOPT : LHSOPT;
if (!Expected->isKindOfType())
return false;
// Strip off __kindof and protocol qualifiers, then check whether
// we can assign the other way.
return canAssignObjCInterfacesInBlockPointer(
RHSOPT->stripObjCKindOfTypeAndQuals(*this),
LHSOPT->stripObjCKindOfTypeAndQuals(*this),
BlockReturnType);
};
if (RHSOPT->isObjCBuiltinType() || LHSOPT->isObjCIdType())
return true;
if (LHSOPT->isObjCBuiltinType()) {
return finish(RHSOPT->isObjCBuiltinType() ||
RHSOPT->isObjCQualifiedIdType());
}
if (LHSOPT->isObjCQualifiedIdType() || RHSOPT->isObjCQualifiedIdType()) {
if (getLangOpts().CompatibilityQualifiedIdBlockParamTypeChecking)
// Use for block parameters previous type checking for compatibility.
return finish(ObjCQualifiedIdTypesAreCompatible(LHSOPT, RHSOPT, false) ||
// Or corrected type checking as in non-compat mode.
(!BlockReturnType &&
ObjCQualifiedIdTypesAreCompatible(RHSOPT, LHSOPT, false)));
else
return finish(ObjCQualifiedIdTypesAreCompatible(
(BlockReturnType ? LHSOPT : RHSOPT),
(BlockReturnType ? RHSOPT : LHSOPT), false));
}
const ObjCInterfaceType* LHS = LHSOPT->getInterfaceType();
const ObjCInterfaceType* RHS = RHSOPT->getInterfaceType();
if (LHS && RHS) { // We have 2 user-defined types.
if (LHS != RHS) {
if (LHS->getDecl()->isSuperClassOf(RHS->getDecl()))
return finish(BlockReturnType);
if (RHS->getDecl()->isSuperClassOf(LHS->getDecl()))
return finish(!BlockReturnType);
}
else
return true;
}
return false;
}
/// Comparison routine for Objective-C protocols to be used with
/// llvm::array_pod_sort.
static int compareObjCProtocolsByName(ObjCProtocolDecl * const *lhs,
ObjCProtocolDecl * const *rhs) {
return (*lhs)->getName().compare((*rhs)->getName());
}
/// getIntersectionOfProtocols - This routine finds the intersection of set
/// of protocols inherited from two distinct objective-c pointer objects with
/// the given common base.
/// It is used to build composite qualifier list of the composite type of
/// the conditional expression involving two objective-c pointer objects.
static
void getIntersectionOfProtocols(ASTContext &Context,
const ObjCInterfaceDecl *CommonBase,
const ObjCObjectPointerType *LHSOPT,
const ObjCObjectPointerType *RHSOPT,
SmallVectorImpl<ObjCProtocolDecl *> &IntersectionSet) {
const ObjCObjectType* LHS = LHSOPT->getObjectType();
const ObjCObjectType* RHS = RHSOPT->getObjectType();
assert(LHS->getInterface() && "LHS must have an interface base");
assert(RHS->getInterface() && "RHS must have an interface base");
// Add all of the protocols for the LHS.
llvm::SmallPtrSet<ObjCProtocolDecl *, 8> LHSProtocolSet;
// Start with the protocol qualifiers.
for (auto proto : LHS->quals()) {
Context.CollectInheritedProtocols(proto, LHSProtocolSet);
}
// Also add the protocols associated with the LHS interface.
Context.CollectInheritedProtocols(LHS->getInterface(), LHSProtocolSet);
// Add all of the protocols for the RHS.
llvm::SmallPtrSet<ObjCProtocolDecl *, 8> RHSProtocolSet;
// Start with the protocol qualifiers.
for (auto proto : RHS->quals()) {
Context.CollectInheritedProtocols(proto, RHSProtocolSet);
}
// Also add the protocols associated with the RHS interface.
Context.CollectInheritedProtocols(RHS->getInterface(), RHSProtocolSet);
// Compute the intersection of the collected protocol sets.
for (auto proto : LHSProtocolSet) {
if (RHSProtocolSet.count(proto))
IntersectionSet.push_back(proto);
}
// Compute the set of protocols that is implied by either the common type or
// the protocols within the intersection.
llvm::SmallPtrSet<ObjCProtocolDecl *, 8> ImpliedProtocols;
Context.CollectInheritedProtocols(CommonBase, ImpliedProtocols);
// Remove any implied protocols from the list of inherited protocols.
if (!ImpliedProtocols.empty()) {
IntersectionSet.erase(
std::remove_if(IntersectionSet.begin(),
IntersectionSet.end(),
[&](ObjCProtocolDecl *proto) -> bool {
return ImpliedProtocols.count(proto) > 0;
}),
IntersectionSet.end());
}
// Sort the remaining protocols by name.
llvm::array_pod_sort(IntersectionSet.begin(), IntersectionSet.end(),
compareObjCProtocolsByName);
}
/// Determine whether the first type is a subtype of the second.
static bool canAssignObjCObjectTypes(ASTContext &ctx, QualType lhs,
QualType rhs) {
// Common case: two object pointers.
const auto *lhsOPT = lhs->getAs<ObjCObjectPointerType>();
const auto *rhsOPT = rhs->getAs<ObjCObjectPointerType>();
if (lhsOPT && rhsOPT)
return ctx.canAssignObjCInterfaces(lhsOPT, rhsOPT);
// Two block pointers.
const auto *lhsBlock = lhs->getAs<BlockPointerType>();
const auto *rhsBlock = rhs->getAs<BlockPointerType>();
if (lhsBlock && rhsBlock)
return ctx.typesAreBlockPointerCompatible(lhs, rhs);
// If either is an unqualified 'id' and the other is a block, it's
// acceptable.
if ((lhsOPT && lhsOPT->isObjCIdType() && rhsBlock) ||
(rhsOPT && rhsOPT->isObjCIdType() && lhsBlock))
return true;
return false;
}
// Check that the given Objective-C type argument lists are equivalent.
static bool sameObjCTypeArgs(ASTContext &ctx,
const ObjCInterfaceDecl *iface,
ArrayRef<QualType> lhsArgs,
ArrayRef<QualType> rhsArgs,
bool stripKindOf) {
if (lhsArgs.size() != rhsArgs.size())
return false;
ObjCTypeParamList *typeParams = iface->getTypeParamList();
for (unsigned i = 0, n = lhsArgs.size(); i != n; ++i) {
if (ctx.hasSameType(lhsArgs[i], rhsArgs[i]))
continue;
switch (typeParams->begin()[i]->getVariance()) {
case ObjCTypeParamVariance::Invariant:
if (!stripKindOf ||
!ctx.hasSameType(lhsArgs[i].stripObjCKindOfType(ctx),
rhsArgs[i].stripObjCKindOfType(ctx))) {
return false;
}
break;
case ObjCTypeParamVariance::Covariant:
if (!canAssignObjCObjectTypes(ctx, lhsArgs[i], rhsArgs[i]))
return false;
break;
case ObjCTypeParamVariance::Contravariant:
if (!canAssignObjCObjectTypes(ctx, rhsArgs[i], lhsArgs[i]))
return false;
break;
}
}
return true;
}
QualType ASTContext::areCommonBaseCompatible(
const ObjCObjectPointerType *Lptr,
const ObjCObjectPointerType *Rptr) {
const ObjCObjectType *LHS = Lptr->getObjectType();
const ObjCObjectType *RHS = Rptr->getObjectType();
const ObjCInterfaceDecl* LDecl = LHS->getInterface();
const ObjCInterfaceDecl* RDecl = RHS->getInterface();
if (!LDecl || !RDecl)
return {};
// When either LHS or RHS is a kindof type, we should return a kindof type.
// For example, for common base of kindof(ASub1) and kindof(ASub2), we return
// kindof(A).
bool anyKindOf = LHS->isKindOfType() || RHS->isKindOfType();
// Follow the left-hand side up the class hierarchy until we either hit a
// root or find the RHS. Record the ancestors in case we don't find it.
llvm::SmallDenseMap<const ObjCInterfaceDecl *, const ObjCObjectType *, 4>
LHSAncestors;
while (true) {
// Record this ancestor. We'll need this if the common type isn't in the
// path from the LHS to the root.
LHSAncestors[LHS->getInterface()->getCanonicalDecl()] = LHS;
if (declaresSameEntity(LHS->getInterface(), RDecl)) {
// Get the type arguments.
ArrayRef<QualType> LHSTypeArgs = LHS->getTypeArgsAsWritten();
bool anyChanges = false;
if (LHS->isSpecialized() && RHS->isSpecialized()) {
// Both have type arguments, compare them.
if (!sameObjCTypeArgs(*this, LHS->getInterface(),
LHS->getTypeArgs(), RHS->getTypeArgs(),
/*stripKindOf=*/true))
return {};
} else if (LHS->isSpecialized() != RHS->isSpecialized()) {
// If only one has type arguments, the result will not have type
// arguments.
LHSTypeArgs = {};
anyChanges = true;
}
// Compute the intersection of protocols.
SmallVector<ObjCProtocolDecl *, 8> Protocols;
getIntersectionOfProtocols(*this, LHS->getInterface(), Lptr, Rptr,
Protocols);
if (!Protocols.empty())
anyChanges = true;
// If anything in the LHS will have changed, build a new result type.
// If we need to return a kindof type but LHS is not a kindof type, we
// build a new result type.
if (anyChanges || LHS->isKindOfType() != anyKindOf) {
QualType Result = getObjCInterfaceType(LHS->getInterface());
Result = getObjCObjectType(Result, LHSTypeArgs, Protocols,
anyKindOf || LHS->isKindOfType());
return getObjCObjectPointerType(Result);
}
return getObjCObjectPointerType(QualType(LHS, 0));
}
// Find the superclass.
QualType LHSSuperType = LHS->getSuperClassType();
if (LHSSuperType.isNull())
break;
LHS = LHSSuperType->castAs<ObjCObjectType>();
}
// We didn't find anything by following the LHS to its root; now check
// the RHS against the cached set of ancestors.
while (true) {
auto KnownLHS = LHSAncestors.find(RHS->getInterface()->getCanonicalDecl());
if (KnownLHS != LHSAncestors.end()) {
LHS = KnownLHS->second;
// Get the type arguments.
ArrayRef<QualType> RHSTypeArgs = RHS->getTypeArgsAsWritten();
bool anyChanges = false;
if (LHS->isSpecialized() && RHS->isSpecialized()) {
// Both have type arguments, compare them.
if (!sameObjCTypeArgs(*this, LHS->getInterface(),
LHS->getTypeArgs(), RHS->getTypeArgs(),
/*stripKindOf=*/true))
return {};
} else if (LHS->isSpecialized() != RHS->isSpecialized()) {
// If only one has type arguments, the result will not have type
// arguments.
RHSTypeArgs = {};
anyChanges = true;
}
// Compute the intersection of protocols.
SmallVector<ObjCProtocolDecl *, 8> Protocols;
getIntersectionOfProtocols(*this, RHS->getInterface(), Lptr, Rptr,
Protocols);
if (!Protocols.empty())
anyChanges = true;
// If we need to return a kindof type but RHS is not a kindof type, we
// build a new result type.
if (anyChanges || RHS->isKindOfType() != anyKindOf) {
QualType Result = getObjCInterfaceType(RHS->getInterface());
Result = getObjCObjectType(Result, RHSTypeArgs, Protocols,
anyKindOf || RHS->isKindOfType());
return getObjCObjectPointerType(Result);
}
return getObjCObjectPointerType(QualType(RHS, 0));
}
// Find the superclass of the RHS.
QualType RHSSuperType = RHS->getSuperClassType();
if (RHSSuperType.isNull())
break;
RHS = RHSSuperType->castAs<ObjCObjectType>();
}
return {};
}
bool ASTContext::canAssignObjCInterfaces(const ObjCObjectType *LHS,
const ObjCObjectType *RHS) {
assert(LHS->getInterface() && "LHS is not an interface type");
assert(RHS->getInterface() && "RHS is not an interface type");
// Verify that the base decls are compatible: the RHS must be a subclass of
// the LHS.
ObjCInterfaceDecl *LHSInterface = LHS->getInterface();
bool IsSuperClass = LHSInterface->isSuperClassOf(RHS->getInterface());
if (!IsSuperClass)
return false;
// If the LHS has protocol qualifiers, determine whether all of them are
// satisfied by the RHS (i.e., the RHS has a superset of the protocols in the
// LHS).
if (LHS->getNumProtocols() > 0) {
// OK if conversion of LHS to SuperClass results in narrowing of types
// ; i.e., SuperClass may implement at least one of the protocols
// in LHS's protocol list. Example, SuperObj<P1> = lhs<P1,P2> is ok.
// But not SuperObj<P1,P2,P3> = lhs<P1,P2>.
llvm::SmallPtrSet<ObjCProtocolDecl *, 8> SuperClassInheritedProtocols;
CollectInheritedProtocols(RHS->getInterface(), SuperClassInheritedProtocols);
// Also, if RHS has explicit quelifiers, include them for comparing with LHS's
// qualifiers.
for (auto *RHSPI : RHS->quals())
CollectInheritedProtocols(RHSPI, SuperClassInheritedProtocols);
// If there is no protocols associated with RHS, it is not a match.
if (SuperClassInheritedProtocols.empty())
return false;
for (const auto *LHSProto : LHS->quals()) {
bool SuperImplementsProtocol = false;
for (auto *SuperClassProto : SuperClassInheritedProtocols)
if (SuperClassProto->lookupProtocolNamed(LHSProto->getIdentifier())) {
SuperImplementsProtocol = true;
break;
}
if (!SuperImplementsProtocol)
return false;
}
}
// If the LHS is specialized, we may need to check type arguments.
if (LHS->isSpecialized()) {
// Follow the superclass chain until we've matched the LHS class in the
// hierarchy. This substitutes type arguments through.
const ObjCObjectType *RHSSuper = RHS;
while (!declaresSameEntity(RHSSuper->getInterface(), LHSInterface))
RHSSuper = RHSSuper->getSuperClassType()->castAs<ObjCObjectType>();
// If the RHS is specializd, compare type arguments.
if (RHSSuper->isSpecialized() &&
!sameObjCTypeArgs(*this, LHS->getInterface(),
LHS->getTypeArgs(), RHSSuper->getTypeArgs(),
/*stripKindOf=*/true)) {
return false;
}
}
return true;
}
bool ASTContext::areComparableObjCPointerTypes(QualType LHS, QualType RHS) {
// get the "pointed to" types
const auto *LHSOPT = LHS->getAs<ObjCObjectPointerType>();
const auto *RHSOPT = RHS->getAs<ObjCObjectPointerType>();
if (!LHSOPT || !RHSOPT)
return false;
return canAssignObjCInterfaces(LHSOPT, RHSOPT) ||
canAssignObjCInterfaces(RHSOPT, LHSOPT);
}
bool ASTContext::canBindObjCObjectType(QualType To, QualType From) {
return canAssignObjCInterfaces(
getObjCObjectPointerType(To)->castAs<ObjCObjectPointerType>(),
getObjCObjectPointerType(From)->castAs<ObjCObjectPointerType>());
}
/// typesAreCompatible - C99 6.7.3p9: For two qualified types to be compatible,
/// both shall have the identically qualified version of a compatible type.
/// C99 6.2.7p1: Two types have compatible types if their types are the
/// same. See 6.7.[2,3,5] for additional rules.
bool ASTContext::typesAreCompatible(QualType LHS, QualType RHS,
bool CompareUnqualified) {
if (getLangOpts().CPlusPlus)
return hasSameType(LHS, RHS);
return !mergeTypes(LHS, RHS, false, CompareUnqualified).isNull();
}
bool ASTContext::propertyTypesAreCompatible(QualType LHS, QualType RHS) {
return typesAreCompatible(LHS, RHS);
}
bool ASTContext::typesAreBlockPointerCompatible(QualType LHS, QualType RHS) {
return !mergeTypes(LHS, RHS, true).isNull();
}
/// mergeTransparentUnionType - if T is a transparent union type and a member
/// of T is compatible with SubType, return the merged type, else return
/// QualType()
QualType ASTContext::mergeTransparentUnionType(QualType T, QualType SubType,
bool OfBlockPointer,
bool Unqualified) {
if (const RecordType *UT = T->getAsUnionType()) {
RecordDecl *UD = UT->getDecl();
if (UD->hasAttr<TransparentUnionAttr>()) {
for (const auto *I : UD->fields()) {
QualType ET = I->getType().getUnqualifiedType();
QualType MT = mergeTypes(ET, SubType, OfBlockPointer, Unqualified);
if (!MT.isNull())
return MT;
}
}
}
return {};
}
/// mergeFunctionParameterTypes - merge two types which appear as function
/// parameter types
QualType ASTContext::mergeFunctionParameterTypes(QualType lhs, QualType rhs,
bool OfBlockPointer,
bool Unqualified) {
// GNU extension: two types are compatible if they appear as a function
// argument, one of the types is a transparent union type and the other
// type is compatible with a union member
QualType lmerge = mergeTransparentUnionType(lhs, rhs, OfBlockPointer,
Unqualified);
if (!lmerge.isNull())
return lmerge;
QualType rmerge = mergeTransparentUnionType(rhs, lhs, OfBlockPointer,
Unqualified);
if (!rmerge.isNull())
return rmerge;
return mergeTypes(lhs, rhs, OfBlockPointer, Unqualified);
}
QualType ASTContext::mergeFunctionTypes(QualType lhs, QualType rhs,
bool OfBlockPointer, bool Unqualified,
bool AllowCXX) {
const auto *lbase = lhs->castAs<FunctionType>();
const auto *rbase = rhs->castAs<FunctionType>();
const auto *lproto = dyn_cast<FunctionProtoType>(lbase);
const auto *rproto = dyn_cast<FunctionProtoType>(rbase);
bool allLTypes = true;
bool allRTypes = true;
// Check return type
QualType retType;
if (OfBlockPointer) {
QualType RHS = rbase->getReturnType();
QualType LHS = lbase->getReturnType();
bool UnqualifiedResult = Unqualified;
if (!UnqualifiedResult)
UnqualifiedResult = (!RHS.hasQualifiers() && LHS.hasQualifiers());
retType = mergeTypes(LHS, RHS, true, UnqualifiedResult, true);
}
else
retType = mergeTypes(lbase->getReturnType(), rbase->getReturnType(), false,
Unqualified);
if (retType.isNull())
return {};
if (Unqualified)
retType = retType.getUnqualifiedType();
CanQualType LRetType = getCanonicalType(lbase->getReturnType());
CanQualType RRetType = getCanonicalType(rbase->getReturnType());
if (Unqualified) {
LRetType = LRetType.getUnqualifiedType();
RRetType = RRetType.getUnqualifiedType();
}
if (getCanonicalType(retType) != LRetType)
allLTypes = false;
if (getCanonicalType(retType) != RRetType)
allRTypes = false;
// FIXME: double check this
// FIXME: should we error if lbase->getRegParmAttr() != 0 &&
// rbase->getRegParmAttr() != 0 &&
// lbase->getRegParmAttr() != rbase->getRegParmAttr()?
FunctionType::ExtInfo lbaseInfo = lbase->getExtInfo();
FunctionType::ExtInfo rbaseInfo = rbase->getExtInfo();
// Compatible functions must have compatible calling conventions
if (lbaseInfo.getCC() != rbaseInfo.getCC())
return {};
// Regparm is part of the calling convention.
if (lbaseInfo.getHasRegParm() != rbaseInfo.getHasRegParm())
return {};
if (lbaseInfo.getRegParm() != rbaseInfo.getRegParm())
return {};
if (lbaseInfo.getProducesResult() != rbaseInfo.getProducesResult())
return {};
if (lbaseInfo.getNoCallerSavedRegs() != rbaseInfo.getNoCallerSavedRegs())
return {};
if (lbaseInfo.getNoCfCheck() != rbaseInfo.getNoCfCheck())
return {};
// FIXME: some uses, e.g. conditional exprs, really want this to be 'both'.
bool NoReturn = lbaseInfo.getNoReturn() || rbaseInfo.getNoReturn();
if (lbaseInfo.getNoReturn() != NoReturn)
allLTypes = false;
if (rbaseInfo.getNoReturn() != NoReturn)
allRTypes = false;
FunctionType::ExtInfo einfo = lbaseInfo.withNoReturn(NoReturn);
if (lproto && rproto) { // two C99 style function prototypes
assert((AllowCXX ||
(!lproto->hasExceptionSpec() && !rproto->hasExceptionSpec())) &&
"C++ shouldn't be here");
// Compatible functions must have the same number of parameters
if (lproto->getNumParams() != rproto->getNumParams())
return {};
// Variadic and non-variadic functions aren't compatible
if (lproto->isVariadic() != rproto->isVariadic())
return {};
if (lproto->getMethodQuals() != rproto->getMethodQuals())
return {};
SmallVector<FunctionProtoType::ExtParameterInfo, 4> newParamInfos;
bool canUseLeft, canUseRight;
if (!mergeExtParameterInfo(lproto, rproto, canUseLeft, canUseRight,
newParamInfos))
return {};
if (!canUseLeft)
allLTypes = false;
if (!canUseRight)
allRTypes = false;
// Check parameter type compatibility
SmallVector<QualType, 10> types;
for (unsigned i = 0, n = lproto->getNumParams(); i < n; i++) {
QualType lParamType = lproto->getParamType(i).getUnqualifiedType();
QualType rParamType = rproto->getParamType(i).getUnqualifiedType();
QualType paramType = mergeFunctionParameterTypes(
lParamType, rParamType, OfBlockPointer, Unqualified);
if (paramType.isNull())
return {};
if (Unqualified)
paramType = paramType.getUnqualifiedType();
types.push_back(paramType);
if (Unqualified) {
lParamType = lParamType.getUnqualifiedType();
rParamType = rParamType.getUnqualifiedType();
}
if (getCanonicalType(paramType) != getCanonicalType(lParamType))
allLTypes = false;
if (getCanonicalType(paramType) != getCanonicalType(rParamType))
allRTypes = false;
}
if (allLTypes) return lhs;
if (allRTypes) return rhs;
FunctionProtoType::ExtProtoInfo EPI = lproto->getExtProtoInfo();
EPI.ExtInfo = einfo;
EPI.ExtParameterInfos =
newParamInfos.empty() ? nullptr : newParamInfos.data();
return getFunctionType(retType, types, EPI);
}
if (lproto) allRTypes = false;
if (rproto) allLTypes = false;
const FunctionProtoType *proto = lproto ? lproto : rproto;
if (proto) {
assert((AllowCXX || !proto->hasExceptionSpec()) && "C++ shouldn't be here");
if (proto->isVariadic())
return {};
// Check that the types are compatible with the types that
// would result from default argument promotions (C99 6.7.5.3p15).
// The only types actually affected are promotable integer
// types and floats, which would be passed as a different
// type depending on whether the prototype is visible.
for (unsigned i = 0, n = proto->getNumParams(); i < n; ++i) {
QualType paramTy = proto->getParamType(i);
// Look at the converted type of enum types, since that is the type used
// to pass enum values.
if (const auto *Enum = paramTy->getAs<EnumType>()) {
paramTy = Enum->getDecl()->getIntegerType();
if (paramTy.isNull())
return {};
}
if (paramTy->isPromotableIntegerType() ||
getCanonicalType(paramTy).getUnqualifiedType() == FloatTy)
return {};
}
if (allLTypes) return lhs;
if (allRTypes) return rhs;
FunctionProtoType::ExtProtoInfo EPI = proto->getExtProtoInfo();
EPI.ExtInfo = einfo;
return getFunctionType(retType, proto->getParamTypes(), EPI);
}
if (allLTypes) return lhs;
if (allRTypes) return rhs;
return getFunctionNoProtoType(retType, einfo);
}
/// Given that we have an enum type and a non-enum type, try to merge them.
static QualType mergeEnumWithInteger(ASTContext &Context, const EnumType *ET,
QualType other, bool isBlockReturnType) {
// C99 6.7.2.2p4: Each enumerated type shall be compatible with char,
// a signed integer type, or an unsigned integer type.
// Compatibility is based on the underlying type, not the promotion
// type.
QualType underlyingType = ET->getDecl()->getIntegerType();
if (underlyingType.isNull())
return {};
if (Context.hasSameType(underlyingType, other))
return other;
// In block return types, we're more permissive and accept any
// integral type of the same size.
if (isBlockReturnType && other->isIntegerType() &&
Context.getTypeSize(underlyingType) == Context.getTypeSize(other))
return other;
return {};
}
QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
bool OfBlockPointer,
bool Unqualified, bool BlockReturnType) {
+ // For C++ we will not reach this code with reference types (see below),
+ // for OpenMP variant call overloading we might.
+ //
// C++ [expr]: If an expression initially has the type "reference to T", the
// type is adjusted to "T" prior to any further analysis, the expression
// designates the object or function denoted by the reference, and the
// expression is an lvalue unless the reference is an rvalue reference and
// the expression is a function call (possibly inside parentheses).
+ if (LangOpts.OpenMP && LHS->getAs<ReferenceType>() &&
+ RHS->getAs<ReferenceType>() && LHS->getTypeClass() == RHS->getTypeClass())
+ return mergeTypes(LHS->getAs<ReferenceType>()->getPointeeType(),
+ RHS->getAs<ReferenceType>()->getPointeeType(),
+ OfBlockPointer, Unqualified, BlockReturnType);
if (LHS->getAs<ReferenceType>() || RHS->getAs<ReferenceType>())
return {};
if (Unqualified) {
LHS = LHS.getUnqualifiedType();
RHS = RHS.getUnqualifiedType();
}
QualType LHSCan = getCanonicalType(LHS),
RHSCan = getCanonicalType(RHS);
// If two types are identical, they are compatible.
if (LHSCan == RHSCan)
return LHS;
// If the qualifiers are different, the types aren't compatible... mostly.
Qualifiers LQuals = LHSCan.getLocalQualifiers();
Qualifiers RQuals = RHSCan.getLocalQualifiers();
if (LQuals != RQuals) {
// If any of these qualifiers are different, we have a type
// mismatch.
if (LQuals.getCVRQualifiers() != RQuals.getCVRQualifiers() ||
LQuals.getAddressSpace() != RQuals.getAddressSpace() ||
LQuals.getObjCLifetime() != RQuals.getObjCLifetime() ||
LQuals.hasUnaligned() != RQuals.hasUnaligned())
return {};
// Exactly one GC qualifier difference is allowed: __strong is
// okay if the other type has no GC qualifier but is an Objective
// C object pointer (i.e. implicitly strong by default). We fix
// this by pretending that the unqualified type was actually
// qualified __strong.
Qualifiers::GC GC_L = LQuals.getObjCGCAttr();
Qualifiers::GC GC_R = RQuals.getObjCGCAttr();
assert((GC_L != GC_R) && "unequal qualifier sets had only equal elements");
if (GC_L == Qualifiers::Weak || GC_R == Qualifiers::Weak)
return {};
if (GC_L == Qualifiers::Strong && RHSCan->isObjCObjectPointerType()) {
return mergeTypes(LHS, getObjCGCQualType(RHS, Qualifiers::Strong));
}
if (GC_R == Qualifiers::Strong && LHSCan->isObjCObjectPointerType()) {
return mergeTypes(getObjCGCQualType(LHS, Qualifiers::Strong), RHS);
}
return {};
}
// Okay, qualifiers are equal.
Type::TypeClass LHSClass = LHSCan->getTypeClass();
Type::TypeClass RHSClass = RHSCan->getTypeClass();
// We want to consider the two function types to be the same for these
// comparisons, just force one to the other.
if (LHSClass == Type::FunctionProto) LHSClass = Type::FunctionNoProto;
if (RHSClass == Type::FunctionProto) RHSClass = Type::FunctionNoProto;
// Same as above for arrays
if (LHSClass == Type::VariableArray || LHSClass == Type::IncompleteArray)
LHSClass = Type::ConstantArray;
if (RHSClass == Type::VariableArray || RHSClass == Type::IncompleteArray)
RHSClass = Type::ConstantArray;
// ObjCInterfaces are just specialized ObjCObjects.
if (LHSClass == Type::ObjCInterface) LHSClass = Type::ObjCObject;
if (RHSClass == Type::ObjCInterface) RHSClass = Type::ObjCObject;
// Canonicalize ExtVector -> Vector.
if (LHSClass == Type::ExtVector) LHSClass = Type::Vector;
if (RHSClass == Type::ExtVector) RHSClass = Type::Vector;
// If the canonical type classes don't match.
if (LHSClass != RHSClass) {
// Note that we only have special rules for turning block enum
// returns into block int returns, not vice-versa.
if (const auto *ETy = LHS->getAs<EnumType>()) {
return mergeEnumWithInteger(*this, ETy, RHS, false);
}
if (const EnumType* ETy = RHS->getAs<EnumType>()) {
return mergeEnumWithInteger(*this, ETy, LHS, BlockReturnType);
}
// allow block pointer type to match an 'id' type.
if (OfBlockPointer && !BlockReturnType) {
if (LHS->isObjCIdType() && RHS->isBlockPointerType())
return LHS;
if (RHS->isObjCIdType() && LHS->isBlockPointerType())
return RHS;
}
return {};
}
// The canonical type classes match.
switch (LHSClass) {
#define TYPE(Class, Base)
#define ABSTRACT_TYPE(Class, Base)
#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
#define DEPENDENT_TYPE(Class, Base) case Type::Class:
#include "clang/AST/TypeNodes.inc"
llvm_unreachable("Non-canonical and dependent types shouldn't get here");
case Type::Auto:
case Type::DeducedTemplateSpecialization:
case Type::LValueReference:
case Type::RValueReference:
case Type::MemberPointer:
llvm_unreachable("C++ should never be in mergeTypes");
case Type::ObjCInterface:
case Type::IncompleteArray:
case Type::VariableArray:
case Type::FunctionProto:
case Type::ExtVector:
llvm_unreachable("Types are eliminated above");
case Type::Pointer:
{
// Merge two pointer types, while trying to preserve typedef info
QualType LHSPointee = LHS->castAs<PointerType>()->getPointeeType();
QualType RHSPointee = RHS->castAs<PointerType>()->getPointeeType();
if (Unqualified) {
LHSPointee = LHSPointee.getUnqualifiedType();
RHSPointee = RHSPointee.getUnqualifiedType();
}
QualType ResultType = mergeTypes(LHSPointee, RHSPointee, false,
Unqualified);
if (ResultType.isNull())
return {};
if (getCanonicalType(LHSPointee) == getCanonicalType(ResultType))
return LHS;
if (getCanonicalType(RHSPointee) == getCanonicalType(ResultType))
return RHS;
return getPointerType(ResultType);
}
case Type::BlockPointer:
{
// Merge two block pointer types, while trying to preserve typedef info
QualType LHSPointee = LHS->castAs<BlockPointerType>()->getPointeeType();
QualType RHSPointee = RHS->castAs<BlockPointerType>()->getPointeeType();
if (Unqualified) {
LHSPointee = LHSPointee.getUnqualifiedType();
RHSPointee = RHSPointee.getUnqualifiedType();
}
if (getLangOpts().OpenCL) {
Qualifiers LHSPteeQual = LHSPointee.getQualifiers();
Qualifiers RHSPteeQual = RHSPointee.getQualifiers();
// Blocks can't be an expression in a ternary operator (OpenCL v2.0
// 6.12.5) thus the following check is asymmetric.
if (!LHSPteeQual.isAddressSpaceSupersetOf(RHSPteeQual))
return {};
LHSPteeQual.removeAddressSpace();
RHSPteeQual.removeAddressSpace();
LHSPointee =
QualType(LHSPointee.getTypePtr(), LHSPteeQual.getAsOpaqueValue());
RHSPointee =
QualType(RHSPointee.getTypePtr(), RHSPteeQual.getAsOpaqueValue());
}
QualType ResultType = mergeTypes(LHSPointee, RHSPointee, OfBlockPointer,
Unqualified);
if (ResultType.isNull())
return {};
if (getCanonicalType(LHSPointee) == getCanonicalType(ResultType))
return LHS;
if (getCanonicalType(RHSPointee) == getCanonicalType(ResultType))
return RHS;
return getBlockPointerType(ResultType);
}
case Type::Atomic:
{
// Merge two pointer types, while trying to preserve typedef info
QualType LHSValue = LHS->castAs<AtomicType>()->getValueType();
QualType RHSValue = RHS->castAs<AtomicType>()->getValueType();
if (Unqualified) {
LHSValue = LHSValue.getUnqualifiedType();
RHSValue = RHSValue.getUnqualifiedType();
}
QualType ResultType = mergeTypes(LHSValue, RHSValue, false,
Unqualified);
if (ResultType.isNull())
return {};
if (getCanonicalType(LHSValue) == getCanonicalType(ResultType))
return LHS;
if (getCanonicalType(RHSValue) == getCanonicalType(ResultType))
return RHS;
return getAtomicType(ResultType);
}
case Type::ConstantArray:
{
const ConstantArrayType* LCAT = getAsConstantArrayType(LHS);
const ConstantArrayType* RCAT = getAsConstantArrayType(RHS);
if (LCAT && RCAT && RCAT->getSize() != LCAT->getSize())
return {};
QualType LHSElem = getAsArrayType(LHS)->getElementType();
QualType RHSElem = getAsArrayType(RHS)->getElementType();
if (Unqualified) {
LHSElem = LHSElem.getUnqualifiedType();
RHSElem = RHSElem.getUnqualifiedType();
}
QualType ResultType = mergeTypes(LHSElem, RHSElem, false, Unqualified);
if (ResultType.isNull())
return {};
const VariableArrayType* LVAT = getAsVariableArrayType(LHS);
const VariableArrayType* RVAT = getAsVariableArrayType(RHS);
// If either side is a variable array, and both are complete, check whether
// the current dimension is definite.
if (LVAT || RVAT) {
auto SizeFetch = [this](const VariableArrayType* VAT,
const ConstantArrayType* CAT)
-> std::pair<bool,llvm::APInt> {
if (VAT) {
Optional<llvm::APSInt> TheInt;
Expr *E = VAT->getSizeExpr();
if (E && (TheInt = E->getIntegerConstantExpr(*this)))
return std::make_pair(true, *TheInt);
return std::make_pair(false, llvm::APSInt());
}
if (CAT)
return std::make_pair(true, CAT->getSize());
return std::make_pair(false, llvm::APInt());
};
bool HaveLSize, HaveRSize;
llvm::APInt LSize, RSize;
std::tie(HaveLSize, LSize) = SizeFetch(LVAT, LCAT);
std::tie(HaveRSize, RSize) = SizeFetch(RVAT, RCAT);
if (HaveLSize && HaveRSize && !llvm::APInt::isSameValue(LSize, RSize))
return {}; // Definite, but unequal, array dimension
}
if (LCAT && getCanonicalType(LHSElem) == getCanonicalType(ResultType))
return LHS;
if (RCAT && getCanonicalType(RHSElem) == getCanonicalType(ResultType))
return RHS;
if (LCAT)
return getConstantArrayType(ResultType, LCAT->getSize(),
LCAT->getSizeExpr(),
ArrayType::ArraySizeModifier(), 0);
if (RCAT)
return getConstantArrayType(ResultType, RCAT->getSize(),
RCAT->getSizeExpr(),
ArrayType::ArraySizeModifier(), 0);
if (LVAT && getCanonicalType(LHSElem) == getCanonicalType(ResultType))
return LHS;
if (RVAT && getCanonicalType(RHSElem) == getCanonicalType(ResultType))
return RHS;
if (LVAT) {
// FIXME: This isn't correct! But tricky to implement because
// the array's size has to be the size of LHS, but the type
// has to be different.
return LHS;
}
if (RVAT) {
// FIXME: This isn't correct! But tricky to implement because
// the array's size has to be the size of RHS, but the type
// has to be different.
return RHS;
}
if (getCanonicalType(LHSElem) == getCanonicalType(ResultType)) return LHS;
if (getCanonicalType(RHSElem) == getCanonicalType(ResultType)) return RHS;
return getIncompleteArrayType(ResultType,
ArrayType::ArraySizeModifier(), 0);
}
case Type::FunctionNoProto:
return mergeFunctionTypes(LHS, RHS, OfBlockPointer, Unqualified);
case Type::Record:
case Type::Enum:
return {};
case Type::Builtin:
// Only exactly equal builtin types are compatible, which is tested above.
return {};
case Type::Complex:
// Distinct complex types are incompatible.
return {};
case Type::Vector:
// FIXME: The merged type should be an ExtVector!
if (areCompatVectorTypes(LHSCan->castAs<VectorType>(),
RHSCan->castAs<VectorType>()))
return LHS;
return {};
case Type::ConstantMatrix:
if (areCompatMatrixTypes(LHSCan->castAs<ConstantMatrixType>(),
RHSCan->castAs<ConstantMatrixType>()))
return LHS;
return {};
case Type::ObjCObject: {
// Check if the types are assignment compatible.
// FIXME: This should be type compatibility, e.g. whether
// "LHS x; RHS x;" at global scope is legal.
if (canAssignObjCInterfaces(LHS->castAs<ObjCObjectType>(),
RHS->castAs<ObjCObjectType>()))
return LHS;
return {};
}
case Type::ObjCObjectPointer:
if (OfBlockPointer) {
if (canAssignObjCInterfacesInBlockPointer(
LHS->castAs<ObjCObjectPointerType>(),
RHS->castAs<ObjCObjectPointerType>(), BlockReturnType))
return LHS;
return {};
}
if (canAssignObjCInterfaces(LHS->castAs<ObjCObjectPointerType>(),
RHS->castAs<ObjCObjectPointerType>()))
return LHS;
return {};
case Type::Pipe:
assert(LHS != RHS &&
"Equivalent pipe types should have already been handled!");
return {};
case Type::ExtInt: {
// Merge two ext-int types, while trying to preserve typedef info.
bool LHSUnsigned = LHS->castAs<ExtIntType>()->isUnsigned();
bool RHSUnsigned = RHS->castAs<ExtIntType>()->isUnsigned();
unsigned LHSBits = LHS->castAs<ExtIntType>()->getNumBits();
unsigned RHSBits = RHS->castAs<ExtIntType>()->getNumBits();
// Like unsigned/int, shouldn't have a type if they dont match.
if (LHSUnsigned != RHSUnsigned)
return {};
if (LHSBits != RHSBits)
return {};
return LHS;
}
}
llvm_unreachable("Invalid Type::Class!");
}
bool ASTContext::mergeExtParameterInfo(
const FunctionProtoType *FirstFnType, const FunctionProtoType *SecondFnType,
bool &CanUseFirst, bool &CanUseSecond,
SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &NewParamInfos) {
assert(NewParamInfos.empty() && "param info list not empty");
CanUseFirst = CanUseSecond = true;
bool FirstHasInfo = FirstFnType->hasExtParameterInfos();
bool SecondHasInfo = SecondFnType->hasExtParameterInfos();
// Fast path: if the first type doesn't have ext parameter infos,
// we match if and only if the second type also doesn't have them.
if (!FirstHasInfo && !SecondHasInfo)
return true;
bool NeedParamInfo = false;
size_t E = FirstHasInfo ? FirstFnType->getExtParameterInfos().size()
: SecondFnType->getExtParameterInfos().size();
for (size_t I = 0; I < E; ++I) {
FunctionProtoType::ExtParameterInfo FirstParam, SecondParam;
if (FirstHasInfo)
FirstParam = FirstFnType->getExtParameterInfo(I);
if (SecondHasInfo)
SecondParam = SecondFnType->getExtParameterInfo(I);
// Cannot merge unless everything except the noescape flag matches.
if (FirstParam.withIsNoEscape(false) != SecondParam.withIsNoEscape(false))
return false;
bool FirstNoEscape = FirstParam.isNoEscape();
bool SecondNoEscape = SecondParam.isNoEscape();
bool IsNoEscape = FirstNoEscape && SecondNoEscape;
NewParamInfos.push_back(FirstParam.withIsNoEscape(IsNoEscape));
if (NewParamInfos.back().getOpaqueValue())
NeedParamInfo = true;
if (FirstNoEscape != IsNoEscape)
CanUseFirst = false;
if (SecondNoEscape != IsNoEscape)
CanUseSecond = false;
}
if (!NeedParamInfo)
NewParamInfos.clear();
return true;
}
void ASTContext::ResetObjCLayout(const ObjCContainerDecl *CD) {
ObjCLayouts[CD] = nullptr;
}
/// mergeObjCGCQualifiers - This routine merges ObjC's GC attribute of 'LHS' and
/// 'RHS' attributes and returns the merged version; including for function
/// return types.
QualType ASTContext::mergeObjCGCQualifiers(QualType LHS, QualType RHS) {
QualType LHSCan = getCanonicalType(LHS),
RHSCan = getCanonicalType(RHS);
// If two types are identical, they are compatible.
if (LHSCan == RHSCan)
return LHS;
if (RHSCan->isFunctionType()) {
if (!LHSCan->isFunctionType())
return {};
QualType OldReturnType =
cast<FunctionType>(RHSCan.getTypePtr())->getReturnType();
QualType NewReturnType =
cast<FunctionType>(LHSCan.getTypePtr())->getReturnType();
QualType ResReturnType =
mergeObjCGCQualifiers(NewReturnType, OldReturnType);
if (ResReturnType.isNull())
return {};
if (ResReturnType == NewReturnType || ResReturnType == OldReturnType) {
// id foo(); ... __strong id foo(); or: __strong id foo(); ... id foo();
// In either case, use OldReturnType to build the new function type.
const auto *F = LHS->castAs<FunctionType>();
if (const auto *FPT = cast<FunctionProtoType>(F)) {
FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
EPI.ExtInfo = getFunctionExtInfo(LHS);
QualType ResultType =
getFunctionType(OldReturnType, FPT->getParamTypes(), EPI);
return ResultType;
}
}
return {};
}
// If the qualifiers are different, the types can still be merged.
Qualifiers LQuals = LHSCan.getLocalQualifiers();
Qualifiers RQuals = RHSCan.getLocalQualifiers();
if (LQuals != RQuals) {
// If any of these qualifiers are different, we have a type mismatch.
if (LQuals.getCVRQualifiers() != RQuals.getCVRQualifiers() ||
LQuals.getAddressSpace() != RQuals.getAddressSpace())
return {};
// Exactly one GC qualifier difference is allowed: __strong is
// okay if the other type has no GC qualifier but is an Objective
// C object pointer (i.e. implicitly strong by default). We fix
// this by pretending that the unqualified type was actually
// qualified __strong.
Qualifiers::GC GC_L = LQuals.getObjCGCAttr();
Qualifiers::GC GC_R = RQuals.getObjCGCAttr();
assert((GC_L != GC_R) && "unequal qualifier sets had only equal elements");
if (GC_L == Qualifiers::Weak || GC_R == Qualifiers::Weak)
return {};
if (GC_L == Qualifiers::Strong)
return LHS;
if (GC_R == Qualifiers::Strong)
return RHS;
return {};
}
if (LHSCan->isObjCObjectPointerType() && RHSCan->isObjCObjectPointerType()) {
QualType LHSBaseQT = LHS->castAs<ObjCObjectPointerType>()->getPointeeType();
QualType RHSBaseQT = RHS->castAs<ObjCObjectPointerType>()->getPointeeType();
QualType ResQT = mergeObjCGCQualifiers(LHSBaseQT, RHSBaseQT);
if (ResQT == LHSBaseQT)
return LHS;
if (ResQT == RHSBaseQT)
return RHS;
}
return {};
}
//===----------------------------------------------------------------------===//
// Integer Predicates
//===----------------------------------------------------------------------===//
unsigned ASTContext::getIntWidth(QualType T) const {
if (const auto *ET = T->getAs<EnumType>())
T = ET->getDecl()->getIntegerType();
if (T->isBooleanType())
return 1;
if(const auto *EIT = T->getAs<ExtIntType>())
return EIT->getNumBits();
// For builtin types, just use the standard type sizing method
return (unsigned)getTypeSize(T);
}
QualType ASTContext::getCorrespondingUnsignedType(QualType T) const {
assert((T->hasSignedIntegerRepresentation() || T->isSignedFixedPointType()) &&
"Unexpected type");
// Turn <4 x signed int> -> <4 x unsigned int>
if (const auto *VTy = T->getAs<VectorType>())
return getVectorType(getCorrespondingUnsignedType(VTy->getElementType()),
VTy->getNumElements(), VTy->getVectorKind());
// For _ExtInt, return an unsigned _ExtInt with same width.
if (const auto *EITy = T->getAs<ExtIntType>())
return getExtIntType(/*IsUnsigned=*/true, EITy->getNumBits());
// For enums, get the underlying integer type of the enum, and let the general
// integer type signchanging code handle it.
if (const auto *ETy = T->getAs<EnumType>())
T = ETy->getDecl()->getIntegerType();
switch (T->castAs<BuiltinType>()->getKind()) {
case BuiltinType::Char_S:
case BuiltinType::SChar:
return UnsignedCharTy;
case BuiltinType::Short:
return UnsignedShortTy;
case BuiltinType::Int:
return UnsignedIntTy;
case BuiltinType::Long:
return UnsignedLongTy;
case BuiltinType::LongLong:
return UnsignedLongLongTy;
case BuiltinType::Int128:
return UnsignedInt128Ty;
// wchar_t is special. It is either signed or not, but when it's signed,
// there's no matching "unsigned wchar_t". Therefore we return the unsigned
// version of it's underlying type instead.
case BuiltinType::WChar_S:
return getUnsignedWCharType();
case BuiltinType::ShortAccum:
return UnsignedShortAccumTy;
case BuiltinType::Accum:
return UnsignedAccumTy;
case BuiltinType::LongAccum:
return UnsignedLongAccumTy;
case BuiltinType::SatShortAccum:
return SatUnsignedShortAccumTy;
case BuiltinType::SatAccum:
return SatUnsignedAccumTy;
case BuiltinType::SatLongAccum:
return SatUnsignedLongAccumTy;
case BuiltinType::ShortFract:
return UnsignedShortFractTy;
case BuiltinType::Fract:
return UnsignedFractTy;
case BuiltinType::LongFract:
return UnsignedLongFractTy;
case BuiltinType::SatShortFract:
return SatUnsignedShortFractTy;
case BuiltinType::SatFract:
return SatUnsignedFractTy;
case BuiltinType::SatLongFract:
return SatUnsignedLongFractTy;
default:
llvm_unreachable("Unexpected signed integer or fixed point type");
}
}
QualType ASTContext::getCorrespondingSignedType(QualType T) const {
assert((T->hasUnsignedIntegerRepresentation() ||
T->isUnsignedFixedPointType()) &&
"Unexpected type");
// Turn <4 x unsigned int> -> <4 x signed int>
if (const auto *VTy = T->getAs<VectorType>())
return getVectorType(getCorrespondingSignedType(VTy->getElementType()),
VTy->getNumElements(), VTy->getVectorKind());
// For _ExtInt, return a signed _ExtInt with same width.
if (const auto *EITy = T->getAs<ExtIntType>())
return getExtIntType(/*IsUnsigned=*/false, EITy->getNumBits());
// For enums, get the underlying integer type of the enum, and let the general
// integer type signchanging code handle it.
if (const auto *ETy = T->getAs<EnumType>())
T = ETy->getDecl()->getIntegerType();
switch (T->castAs<BuiltinType>()->getKind()) {
case BuiltinType::Char_U:
case BuiltinType::UChar:
return SignedCharTy;
case BuiltinType::UShort:
return ShortTy;
case BuiltinType::UInt:
return IntTy;
case BuiltinType::ULong:
return LongTy;
case BuiltinType::ULongLong:
return LongLongTy;
case BuiltinType::UInt128:
return Int128Ty;
// wchar_t is special. It is either unsigned or not, but when it's unsigned,
// there's no matching "signed wchar_t". Therefore we return the signed
// version of it's underlying type instead.
case BuiltinType::WChar_U:
return getSignedWCharType();
case BuiltinType::UShortAccum:
return ShortAccumTy;
case BuiltinType::UAccum:
return AccumTy;
case BuiltinType::ULongAccum:
return LongAccumTy;
case BuiltinType::SatUShortAccum:
return SatShortAccumTy;
case BuiltinType::SatUAccum:
return SatAccumTy;
case BuiltinType::SatULongAccum:
return SatLongAccumTy;
case BuiltinType::UShortFract:
return ShortFractTy;
case BuiltinType::UFract:
return FractTy;
case BuiltinType::ULongFract:
return LongFractTy;
case BuiltinType::SatUShortFract:
return SatShortFractTy;
case BuiltinType::SatUFract:
return SatFractTy;
case BuiltinType::SatULongFract:
return SatLongFractTy;
default:
llvm_unreachable("Unexpected unsigned integer or fixed point type");
}
}
ASTMutationListener::~ASTMutationListener() = default;
void ASTMutationListener::DeducedReturnType(const FunctionDecl *FD,
QualType ReturnType) {}
//===----------------------------------------------------------------------===//
// Builtin Type Computation
//===----------------------------------------------------------------------===//
/// DecodeTypeFromStr - This decodes one type descriptor from Str, advancing the
/// pointer over the consumed characters. This returns the resultant type. If
/// AllowTypeModifiers is false then modifier like * are not parsed, just basic
/// types. This allows "v2i*" to be parsed as a pointer to a v2i instead of
/// a vector of "i*".
///
/// RequiresICE is filled in on return to indicate whether the value is required
/// to be an Integer Constant Expression.
static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
ASTContext::GetBuiltinTypeError &Error,
bool &RequiresICE,
bool AllowTypeModifiers) {
// Modifiers.
int HowLong = 0;
bool Signed = false, Unsigned = false;
RequiresICE = false;
// Read the prefixed modifiers first.
bool Done = false;
#ifndef NDEBUG
bool IsSpecial = false;
#endif
while (!Done) {
switch (*Str++) {
default: Done = true; --Str; break;
case 'I':
RequiresICE = true;
break;
case 'S':
assert(!Unsigned && "Can't use both 'S' and 'U' modifiers!");
assert(!Signed && "Can't use 'S' modifier multiple times!");
Signed = true;
break;
case 'U':
assert(!Signed && "Can't use both 'S' and 'U' modifiers!");
assert(!Unsigned && "Can't use 'U' modifier multiple times!");
Unsigned = true;
break;
case 'L':
assert(!IsSpecial && "Can't use 'L' with 'W', 'N', 'Z' or 'O' modifiers");
assert(HowLong <= 2 && "Can't have LLLL modifier");
++HowLong;
break;
case 'N':
// 'N' behaves like 'L' for all non LP64 targets and 'int' otherwise.
assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
assert(HowLong == 0 && "Can't use both 'L' and 'N' modifiers!");
#ifndef NDEBUG
IsSpecial = true;
#endif
if (Context.getTargetInfo().getLongWidth() == 32)
++HowLong;
break;
case 'W':
// This modifier represents int64 type.
assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
assert(HowLong == 0 && "Can't use both 'L' and 'W' modifiers!");
#ifndef NDEBUG
IsSpecial = true;
#endif
switch (Context.getTargetInfo().getInt64Type()) {
default:
llvm_unreachable("Unexpected integer type");
case TargetInfo::SignedLong:
HowLong = 1;
break;
case TargetInfo::SignedLongLong:
HowLong = 2;
break;
}
break;
case 'Z':
// This modifier represents int32 type.
assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
assert(HowLong == 0 && "Can't use both 'L' and 'Z' modifiers!");
#ifndef NDEBUG
IsSpecial = true;
#endif
switch (Context.getTargetInfo().getIntTypeByWidth(32, true)) {
default:
llvm_unreachable("Unexpected integer type");
case TargetInfo::SignedInt:
HowLong = 0;
break;
case TargetInfo::SignedLong:
HowLong = 1;
break;
case TargetInfo::SignedLongLong:
HowLong = 2;
break;
}
break;
case 'O':
assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
assert(HowLong == 0 && "Can't use both 'L' and 'O' modifiers!");
#ifndef NDEBUG
IsSpecial = true;
#endif
if (Context.getLangOpts().OpenCL)
HowLong = 1;
else
HowLong = 2;
break;
}
}
QualType Type;
// Read the base type.
switch (*Str++) {
default: llvm_unreachable("Unknown builtin type letter!");
case 'x':
assert(HowLong == 0 && !Signed && !Unsigned &&
"Bad modifiers used with 'x'!");
Type = Context.Float16Ty;
break;
case 'y':
assert(HowLong == 0 && !Signed && !Unsigned &&
"Bad modifiers used with 'y'!");
Type = Context.BFloat16Ty;
break;
case 'v':
assert(HowLong == 0 && !Signed && !Unsigned &&
"Bad modifiers used with 'v'!");
Type = Context.VoidTy;
break;
case 'h':
assert(HowLong == 0 && !Signed && !Unsigned &&
"Bad modifiers used with 'h'!");
Type = Context.HalfTy;
break;
case 'f':
assert(HowLong == 0 && !Signed && !Unsigned &&
"Bad modifiers used with 'f'!");
Type = Context.FloatTy;
break;
case 'd':
assert(HowLong < 3 && !Signed && !Unsigned &&
"Bad modifiers used with 'd'!");
if (HowLong == 1)
Type = Context.LongDoubleTy;
else if (HowLong == 2)
Type = Context.Float128Ty;
else
Type = Context.DoubleTy;
break;
case 's':
assert(HowLong == 0 && "Bad modifiers used with 's'!");
if (Unsigned)
Type = Context.UnsignedShortTy;
else
Type = Context.ShortTy;
break;
case 'i':
if (HowLong == 3)
Type = Unsigned ? Context.UnsignedInt128Ty : Context.Int128Ty;
else if (HowLong == 2)
Type = Unsigned ? Context.UnsignedLongLongTy : Context.LongLongTy;
else if (HowLong == 1)
Type = Unsigned ? Context.UnsignedLongTy : Context.LongTy;
else
Type = Unsigned ? Context.UnsignedIntTy : Context.IntTy;
break;
case 'c':
assert(HowLong == 0 && "Bad modifiers used with 'c'!");
if (Signed)
Type = Context.SignedCharTy;
else if (Unsigned)
Type = Context.UnsignedCharTy;
else
Type = Context.CharTy;
break;
case 'b': // boolean
assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'b'!");
Type = Context.BoolTy;
break;
case 'z': // size_t.
assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'z'!");
Type = Context.getSizeType();
break;
case 'w': // wchar_t.
assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'w'!");
Type = Context.getWideCharType();
break;
case 'F':
Type = Context.getCFConstantStringType();
break;
case 'G':
Type = Context.getObjCIdType();
break;
case 'H':
Type = Context.getObjCSelType();
break;
case 'M':
Type = Context.getObjCSuperType();
break;
case 'a':
Type = Context.getBuiltinVaListType();
assert(!Type.isNull() && "builtin va list type not initialized!");
break;
case 'A':
// This is a "reference" to a va_list; however, what exactly
// this means depends on how va_list is defined. There are two
// different kinds of va_list: ones passed by value, and ones
// passed by reference. An example of a by-value va_list is
// x86, where va_list is a char*. An example of by-ref va_list
// is x86-64, where va_list is a __va_list_tag[1]. For x86,
// we want this argument to be a char*&; for x86-64, we want
// it to be a __va_list_tag*.
Type = Context.getBuiltinVaListType();
assert(!Type.isNull() && "builtin va list type not initialized!");
if (Type->isArrayType())
Type = Context.getArrayDecayedType(Type);
else
Type = Context.getLValueReferenceType(Type);
break;
case 'q': {
char *End;
unsigned NumElements = strtoul(Str, &End, 10);
assert(End != Str && "Missing vector size");
Str = End;
QualType ElementType = DecodeTypeFromStr(Str, Context, Error,
RequiresICE, false);
assert(!RequiresICE && "Can't require vector ICE");
Type = Context.getScalableVectorType(ElementType, NumElements);
break;
}
case 'V': {
char *End;
unsigned NumElements = strtoul(Str, &End, 10);
assert(End != Str && "Missing vector size");
Str = End;
QualType ElementType = DecodeTypeFromStr(Str, Context, Error,
RequiresICE, false);
assert(!RequiresICE && "Can't require vector ICE");
// TODO: No way to make AltiVec vectors in builtins yet.
Type = Context.getVectorType(ElementType, NumElements,
VectorType::GenericVector);
break;
}
case 'E': {
char *End;
unsigned NumElements = strtoul(Str, &End, 10);
assert(End != Str && "Missing vector size");
Str = End;
QualType ElementType = DecodeTypeFromStr(Str, Context, Error, RequiresICE,
false);
Type = Context.getExtVectorType(ElementType, NumElements);
break;
}
case 'X': {
QualType ElementType = DecodeTypeFromStr(Str, Context, Error, RequiresICE,
false);
assert(!RequiresICE && "Can't require complex ICE");
Type = Context.getComplexType(ElementType);
break;
}
case 'Y':
Type = Context.getPointerDiffType();
break;
case 'P':
Type = Context.getFILEType();
if (Type.isNull()) {
Error = ASTContext::GE_Missing_stdio;
return {};
}
break;
case 'J':
if (Signed)
Type = Context.getsigjmp_bufType();
else
Type = Context.getjmp_bufType();
if (Type.isNull()) {
Error = ASTContext::GE_Missing_setjmp;
return {};
}
break;
case 'K':
assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'K'!");
Type = Context.getucontext_tType();
if (Type.isNull()) {
Error = ASTContext::GE_Missing_ucontext;
return {};
}
break;
case 'p':
Type = Context.getProcessIDType();
break;
}
// If there are modifiers and if we're allowed to parse them, go for it.
Done = !AllowTypeModifiers;
while (!Done) {
switch (char c = *Str++) {
default: Done = true; --Str; break;
case '*':
case '&': {
// Both pointers and references can have their pointee types
// qualified with an address space.
char *End;
unsigned AddrSpace = strtoul(Str, &End, 10);
if (End != Str) {
// Note AddrSpace == 0 is not the same as an unspecified address space.
Type = Context.getAddrSpaceQualType(
Type,
Context.getLangASForBuiltinAddressSpace(AddrSpace));
Str = End;
}
if (c == '*')
Type = Context.getPointerType(Type);
else
Type = Context.getLValueReferenceType(Type);
break;
}
// FIXME: There's no way to have a built-in with an rvalue ref arg.
case 'C':
Type = Type.withConst();
break;
case 'D':
Type = Context.getVolatileType(Type);
break;
case 'R':
Type = Type.withRestrict();
break;
}
}
assert((!RequiresICE || Type->isIntegralOrEnumerationType()) &&
"Integer constant 'I' type must be an integer");
return Type;
}
// On some targets such as PowerPC, some of the builtins are defined with custom
// type decriptors for target-dependent types. These descriptors are decoded in
// other functions, but it may be useful to be able to fall back to default
// descriptor decoding to define builtins mixing target-dependent and target-
// independent types. This function allows decoding one type descriptor with
// default decoding.
QualType ASTContext::DecodeTypeStr(const char *&Str, const ASTContext &Context,
GetBuiltinTypeError &Error, bool &RequireICE,
bool AllowTypeModifiers) const {
return DecodeTypeFromStr(Str, Context, Error, RequireICE, AllowTypeModifiers);
}
/// GetBuiltinType - Return the type for the specified builtin.
QualType ASTContext::GetBuiltinType(unsigned Id,
GetBuiltinTypeError &Error,
unsigned *IntegerConstantArgs) const {
const char *TypeStr = BuiltinInfo.getTypeString(Id);
if (TypeStr[0] == '\0') {
Error = GE_Missing_type;
return {};
}
SmallVector<QualType, 8> ArgTypes;
bool RequiresICE = false;
Error = GE_None;
QualType ResType = DecodeTypeFromStr(TypeStr, *this, Error,
RequiresICE, true);
if (Error != GE_None)
return {};
assert(!RequiresICE && "Result of intrinsic cannot be required to be an ICE");
while (TypeStr[0] && TypeStr[0] != '.') {
QualType Ty = DecodeTypeFromStr(TypeStr, *this, Error, RequiresICE, true);
if (Error != GE_None)
return {};
// If this argument is required to be an IntegerConstantExpression and the
// caller cares, fill in the bitmask we return.
if (RequiresICE && IntegerConstantArgs)
*IntegerConstantArgs |= 1 << ArgTypes.size();
// Do array -> pointer decay. The builtin should use the decayed type.
if (Ty->isArrayType())
Ty = getArrayDecayedType(Ty);
ArgTypes.push_back(Ty);
}
if (Id == Builtin::BI__GetExceptionInfo)
return {};
assert((TypeStr[0] != '.' || TypeStr[1] == 0) &&
"'.' should only occur at end of builtin type list!");
bool Variadic = (TypeStr[0] == '.');
FunctionType::ExtInfo EI(getDefaultCallingConvention(
Variadic, /*IsCXXMethod=*/false, /*IsBuiltin=*/true));
if (BuiltinInfo.isNoReturn(Id)) EI = EI.withNoReturn(true);
// We really shouldn't be making a no-proto type here.
if (ArgTypes.empty() && Variadic && !getLangOpts().CPlusPlus)
return getFunctionNoProtoType(ResType, EI);
FunctionProtoType::ExtProtoInfo EPI;
EPI.ExtInfo = EI;
EPI.Variadic = Variadic;
if (getLangOpts().CPlusPlus && BuiltinInfo.isNoThrow(Id))
EPI.ExceptionSpec.Type =
getLangOpts().CPlusPlus11 ? EST_BasicNoexcept : EST_DynamicNone;
return getFunctionType(ResType, ArgTypes, EPI);
}
static GVALinkage basicGVALinkageForFunction(const ASTContext &Context,
const FunctionDecl *FD) {
if (!FD->isExternallyVisible())
return GVA_Internal;
// Non-user-provided functions get emitted as weak definitions with every
// use, no matter whether they've been explicitly instantiated etc.
if (const auto *MD = dyn_cast<CXXMethodDecl>(FD))
if (!MD->isUserProvided())
return GVA_DiscardableODR;
GVALinkage External;
switch (FD->getTemplateSpecializationKind()) {
case TSK_Undeclared:
case TSK_ExplicitSpecialization:
External = GVA_StrongExternal;
break;
case TSK_ExplicitInstantiationDefinition:
return GVA_StrongODR;
// C++11 [temp.explicit]p10:
// [ Note: The intent is that an inline function that is the subject of
// an explicit instantiation declaration will still be implicitly
// instantiated when used so that the body can be considered for
// inlining, but that no out-of-line copy of the inline function would be
// generated in the translation unit. -- end note ]
case TSK_ExplicitInstantiationDeclaration:
return GVA_AvailableExternally;
case TSK_ImplicitInstantiation:
External = GVA_DiscardableODR;
break;
}
if (!FD->isInlined())
return External;
if ((!Context.getLangOpts().CPlusPlus &&
!Context.getTargetInfo().getCXXABI().isMicrosoft() &&
!FD->hasAttr<DLLExportAttr>()) ||
FD->hasAttr<GNUInlineAttr>()) {
// FIXME: This doesn't match gcc's behavior for dllexport inline functions.
// GNU or C99 inline semantics. Determine whether this symbol should be
// externally visible.
if (FD->isInlineDefinitionExternallyVisible())
return External;
// C99 inline semantics, where the symbol is not externally visible.
return GVA_AvailableExternally;
}
// Functions specified with extern and inline in -fms-compatibility mode
// forcibly get emitted. While the body of the function cannot be later
// replaced, the function definition cannot be discarded.
if (FD->isMSExternInline())
return GVA_StrongODR;
return GVA_DiscardableODR;
}
static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context,
const Decl *D, GVALinkage L) {
// See http://msdn.microsoft.com/en-us/library/xa0d9ste.aspx
// dllexport/dllimport on inline functions.
if (D->hasAttr<DLLImportAttr>()) {
if (L == GVA_DiscardableODR || L == GVA_StrongODR)
return GVA_AvailableExternally;
} else if (D->hasAttr<DLLExportAttr>()) {
if (L == GVA_DiscardableODR)
return GVA_StrongODR;
} else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) {
// Device-side functions with __global__ attribute must always be
// visible externally so they can be launched from host.
if (D->hasAttr<CUDAGlobalAttr>() &&
(L == GVA_DiscardableODR || L == GVA_Internal))
return GVA_StrongODR;
// Single source offloading languages like CUDA/HIP need to be able to
// access static device variables from host code of the same compilation
// unit. This is done by externalizing the static variable with a shared
// name between the host and device compilation which is the same for the
// same compilation unit whereas different among different compilation
// units.
if (Context.shouldExternalizeStaticVar(D))
return GVA_StrongExternal;
}
return L;
}
/// Adjust the GVALinkage for a declaration based on what an external AST source
/// knows about whether there can be other definitions of this declaration.
static GVALinkage
adjustGVALinkageForExternalDefinitionKind(const ASTContext &Ctx, const Decl *D,
GVALinkage L) {
ExternalASTSource *Source = Ctx.getExternalSource();
if (!Source)
return L;
switch (Source->hasExternalDefinitions(D)) {
case ExternalASTSource::EK_Never:
// Other translation units rely on us to provide the definition.
if (L == GVA_DiscardableODR)
return GVA_StrongODR;
break;
case ExternalASTSource::EK_Always:
return GVA_AvailableExternally;
case ExternalASTSource::EK_ReplyHazy:
break;
}
return L;
}
GVALinkage ASTContext::GetGVALinkageForFunction(const FunctionDecl *FD) const {
return adjustGVALinkageForExternalDefinitionKind(*this, FD,
adjustGVALinkageForAttributes(*this, FD,
basicGVALinkageForFunction(*this, FD)));
}
static GVALinkage basicGVALinkageForVariable(const ASTContext &Context,
const VarDecl *VD) {
if (!VD->isExternallyVisible())
return GVA_Internal;
if (VD->isStaticLocal()) {
const DeclContext *LexicalContext = VD->getParentFunctionOrMethod();
while (LexicalContext && !isa<FunctionDecl>(LexicalContext))
LexicalContext = LexicalContext->getLexicalParent();
// ObjC Blocks can create local variables that don't have a FunctionDecl
// LexicalContext.
if (!LexicalContext)
return GVA_DiscardableODR;
// Otherwise, let the static local variable inherit its linkage from the
// nearest enclosing function.
auto StaticLocalLinkage =
Context.GetGVALinkageForFunction(cast<FunctionDecl>(LexicalContext));
// Itanium ABI 5.2.2: "Each COMDAT group [for a static local variable] must
// be emitted in any object with references to the symbol for the object it
// contains, whether inline or out-of-line."
// Similar behavior is observed with MSVC. An alternative ABI could use
// StrongODR/AvailableExternally to match the function, but none are
// known/supported currently.
if (StaticLocalLinkage == GVA_StrongODR ||
StaticLocalLinkage == GVA_AvailableExternally)
return GVA_DiscardableODR;
return StaticLocalLinkage;
}
// MSVC treats in-class initialized static data members as definitions.
// By giving them non-strong linkage, out-of-line definitions won't
// cause link errors.
if (Context.isMSStaticDataMemberInlineDefinition(VD))
return GVA_DiscardableODR;
// Most non-template variables have strong linkage; inline variables are
// linkonce_odr or (occasionally, for compatibility) weak_odr.
GVALinkage StrongLinkage;
switch (Context.getInlineVariableDefinitionKind(VD)) {
case ASTContext::InlineVariableDefinitionKind::None:
StrongLinkage = GVA_StrongExternal;
break;
case ASTContext::InlineVariableDefinitionKind::Weak:
case ASTContext::InlineVariableDefinitionKind::WeakUnknown:
StrongLinkage = GVA_DiscardableODR;
break;
case ASTContext::InlineVariableDefinitionKind::Strong:
StrongLinkage = GVA_StrongODR;
break;
}
switch (VD->getTemplateSpecializationKind()) {
case TSK_Undeclared:
return StrongLinkage;
case TSK_ExplicitSpecialization:
return Context.getTargetInfo().getCXXABI().isMicrosoft() &&
VD->isStaticDataMember()
? GVA_StrongODR
: StrongLinkage;
case TSK_ExplicitInstantiationDefinition:
return GVA_StrongODR;
case TSK_ExplicitInstantiationDeclaration:
return GVA_AvailableExternally;
case TSK_ImplicitInstantiation:
return GVA_DiscardableODR;
}
llvm_unreachable("Invalid Linkage!");
}
GVALinkage ASTContext::GetGVALinkageForVariable(const VarDecl *VD) {
return adjustGVALinkageForExternalDefinitionKind(*this, VD,
adjustGVALinkageForAttributes(*this, VD,
basicGVALinkageForVariable(*this, VD)));
}
bool ASTContext::DeclMustBeEmitted(const Decl *D) {
if (const auto *VD = dyn_cast<VarDecl>(D)) {
if (!VD->isFileVarDecl())
return false;
// Global named register variables (GNU extension) are never emitted.
if (VD->getStorageClass() == SC_Register)
return false;
if (VD->getDescribedVarTemplate() ||
isa<VarTemplatePartialSpecializationDecl>(VD))
return false;
} else if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
// We never need to emit an uninstantiated function template.
if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
return false;
} else if (isa<PragmaCommentDecl>(D))
return true;
else if (isa<PragmaDetectMismatchDecl>(D))
return true;
else if (isa<OMPRequiresDecl>(D))
return true;
else if (isa<OMPThreadPrivateDecl>(D))
return !D->getDeclContext()->isDependentContext();
else if (isa<OMPAllocateDecl>(D))
return !D->getDeclContext()->isDependentContext();
else if (isa<OMPDeclareReductionDecl>(D) || isa<OMPDeclareMapperDecl>(D))
return !D->getDeclContext()->isDependentContext();
else if (isa<ImportDecl>(D))
return true;
else
return false;
// If this is a member of a class template, we do not need to emit it.
if (D->getDeclContext()->isDependentContext())
return false;
// Weak references don't produce any output by themselves.
if (D->hasAttr<WeakRefAttr>())
return false;
// Aliases and used decls are required.
if (D->hasAttr<AliasAttr>() || D->hasAttr<UsedAttr>())
return true;
if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
// Forward declarations aren't required.
if (!FD->doesThisDeclarationHaveABody())
return FD->doesDeclarationForceExternallyVisibleDefinition();
// Constructors and destructors are required.
if (FD->hasAttr<ConstructorAttr>() || FD->hasAttr<DestructorAttr>())
return true;
// The key function for a class is required. This rule only comes
// into play when inline functions can be key functions, though.
if (getTargetInfo().getCXXABI().canKeyFunctionBeInline()) {
if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
const CXXRecordDecl *RD = MD->getParent();
if (MD->isOutOfLine() && RD->isDynamicClass()) {
const CXXMethodDecl *KeyFunc = getCurrentKeyFunction(RD);
if (KeyFunc && KeyFunc->getCanonicalDecl() == MD->getCanonicalDecl())
return true;
}
}
}
GVALinkage Linkage = GetGVALinkageForFunction(FD);
// static, static inline, always_inline, and extern inline functions can
// always be deferred. Normal inline functions can be deferred in C99/C++.
// Implicit template instantiations can also be deferred in C++.
return !isDiscardableGVALinkage(Linkage);
}
const auto *VD = cast<VarDecl>(D);
assert(VD->isFileVarDecl() && "Expected file scoped var");
// If the decl is marked as `declare target to`, it should be emitted for the
// host and for the device.
if (LangOpts.OpenMP &&
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
return true;
if (VD->isThisDeclarationADefinition() == VarDecl::DeclarationOnly &&
!isMSStaticDataMemberInlineDefinition(VD))
return false;
// Variables that can be needed in other TUs are required.
auto Linkage = GetGVALinkageForVariable(VD);
if (!isDiscardableGVALinkage(Linkage))
return true;
// We never need to emit a variable that is available in another TU.
if (Linkage == GVA_AvailableExternally)
return false;
// Variables that have destruction with side-effects are required.
if (VD->needsDestruction(*this))
return true;
// Variables that have initialization with side-effects are required.
if (VD->getInit() && VD->getInit()->HasSideEffects(*this) &&
// We can get a value-dependent initializer during error recovery.
(VD->getInit()->isValueDependent() || !VD->evaluateValue()))
return true;
// Likewise, variables with tuple-like bindings are required if their
// bindings have side-effects.
if (const auto *DD = dyn_cast<DecompositionDecl>(VD))
for (const auto *BD : DD->bindings())
if (const auto *BindingVD = BD->getHoldingVar())
if (DeclMustBeEmitted(BindingVD))
return true;
return false;
}
void ASTContext::forEachMultiversionedFunctionVersion(
const FunctionDecl *FD,
llvm::function_ref<void(FunctionDecl *)> Pred) const {
assert(FD->isMultiVersion() && "Only valid for multiversioned functions");
llvm::SmallDenseSet<const FunctionDecl*, 4> SeenDecls;
FD = FD->getMostRecentDecl();
// FIXME: The order of traversal here matters and depends on the order of
// lookup results, which happens to be (mostly) oldest-to-newest, but we
// shouldn't rely on that.
for (auto *CurDecl :
FD->getDeclContext()->getRedeclContext()->lookup(FD->getDeclName())) {
FunctionDecl *CurFD = CurDecl->getAsFunction()->getMostRecentDecl();
if (CurFD && hasSameType(CurFD->getType(), FD->getType()) &&
std::end(SeenDecls) == llvm::find(SeenDecls, CurFD)) {
SeenDecls.insert(CurFD);
Pred(CurFD);
}
}
}
CallingConv ASTContext::getDefaultCallingConvention(bool IsVariadic,
bool IsCXXMethod,
bool IsBuiltin) const {
// Pass through to the C++ ABI object
if (IsCXXMethod)
return ABI->getDefaultMethodCallConv(IsVariadic);
// Builtins ignore user-specified default calling convention and remain the
// Target's default calling convention.
if (!IsBuiltin) {
switch (LangOpts.getDefaultCallingConv()) {
case LangOptions::DCC_None:
break;
case LangOptions::DCC_CDecl:
return CC_C;
case LangOptions::DCC_FastCall:
if (getTargetInfo().hasFeature("sse2") && !IsVariadic)
return CC_X86FastCall;
break;
case LangOptions::DCC_StdCall:
if (!IsVariadic)
return CC_X86StdCall;
break;
case LangOptions::DCC_VectorCall:
// __vectorcall cannot be applied to variadic functions.
if (!IsVariadic)
return CC_X86VectorCall;
break;
case LangOptions::DCC_RegCall:
// __regcall cannot be applied to variadic functions.
if (!IsVariadic)
return CC_X86RegCall;
break;
}
}
return Target->getDefaultCallingConv();
}
bool ASTContext::isNearlyEmpty(const CXXRecordDecl *RD) const {
// Pass through to the C++ ABI object
return ABI->isNearlyEmpty(RD);
}
VTableContextBase *ASTContext::getVTableContext() {
if (!VTContext.get()) {
auto ABI = Target->getCXXABI();
if (ABI.isMicrosoft())
VTContext.reset(new MicrosoftVTableContext(*this));
else {
auto ComponentLayout = getLangOpts().RelativeCXXABIVTables
? ItaniumVTableContext::Relative
: ItaniumVTableContext::Pointer;
VTContext.reset(new ItaniumVTableContext(*this, ComponentLayout));
}
}
return VTContext.get();
}
MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
if (!T)
T = Target;
switch (T->getCXXABI().getKind()) {
case TargetCXXABI::AppleARM64:
case TargetCXXABI::Fuchsia:
case TargetCXXABI::GenericAArch64:
case TargetCXXABI::GenericItanium:
case TargetCXXABI::GenericARM:
case TargetCXXABI::GenericMIPS:
case TargetCXXABI::iOS:
case TargetCXXABI::WebAssembly:
case TargetCXXABI::WatchOS:
case TargetCXXABI::XL:
return ItaniumMangleContext::create(*this, getDiagnostics());
case TargetCXXABI::Microsoft:
return MicrosoftMangleContext::create(*this, getDiagnostics());
}
llvm_unreachable("Unsupported ABI");
}
MangleContext *ASTContext::createDeviceMangleContext(const TargetInfo &T) {
assert(T.getCXXABI().getKind() != TargetCXXABI::Microsoft &&
"Device mangle context does not support Microsoft mangling.");
switch (T.getCXXABI().getKind()) {
case TargetCXXABI::AppleARM64:
case TargetCXXABI::Fuchsia:
case TargetCXXABI::GenericAArch64:
case TargetCXXABI::GenericItanium:
case TargetCXXABI::GenericARM:
case TargetCXXABI::GenericMIPS:
case TargetCXXABI::iOS:
case TargetCXXABI::WebAssembly:
case TargetCXXABI::WatchOS:
case TargetCXXABI::XL:
return ItaniumMangleContext::create(
*this, getDiagnostics(),
[](ASTContext &, const NamedDecl *ND) -> llvm::Optional<unsigned> {
if (const auto *RD = dyn_cast<CXXRecordDecl>(ND))
return RD->getDeviceLambdaManglingNumber();
return llvm::None;
});
case TargetCXXABI::Microsoft:
return MicrosoftMangleContext::create(*this, getDiagnostics());
}
llvm_unreachable("Unsupported ABI");
}
CXXABI::~CXXABI() = default;
size_t ASTContext::getSideTableAllocatedMemory() const {
return ASTRecordLayouts.getMemorySize() +
llvm::capacity_in_bytes(ObjCLayouts) +
llvm::capacity_in_bytes(KeyFunctions) +
llvm::capacity_in_bytes(ObjCImpls) +
llvm::capacity_in_bytes(BlockVarCopyInits) +
llvm::capacity_in_bytes(DeclAttrs) +
llvm::capacity_in_bytes(TemplateOrInstantiation) +
llvm::capacity_in_bytes(InstantiatedFromUsingDecl) +
llvm::capacity_in_bytes(InstantiatedFromUsingShadowDecl) +
llvm::capacity_in_bytes(InstantiatedFromUnnamedFieldDecl) +
llvm::capacity_in_bytes(OverriddenMethods) +
llvm::capacity_in_bytes(Types) +
llvm::capacity_in_bytes(VariableArrayTypes);
}
/// getIntTypeForBitwidth -
/// sets integer QualTy according to specified details:
/// bitwidth, signed/unsigned.
/// Returns empty type if there is no appropriate target types.
QualType ASTContext::getIntTypeForBitwidth(unsigned DestWidth,
unsigned Signed) const {
TargetInfo::IntType Ty = getTargetInfo().getIntTypeByWidth(DestWidth, Signed);
CanQualType QualTy = getFromTargetType(Ty);
if (!QualTy && DestWidth == 128)
return Signed ? Int128Ty : UnsignedInt128Ty;
return QualTy;
}
/// getRealTypeForBitwidth -
/// sets floating point QualTy according to specified bitwidth.
/// Returns empty type if there is no appropriate target types.
QualType ASTContext::getRealTypeForBitwidth(unsigned DestWidth,
bool ExplicitIEEE) const {
TargetInfo::RealType Ty =
getTargetInfo().getRealTypeByWidth(DestWidth, ExplicitIEEE);
switch (Ty) {
case TargetInfo::Float:
return FloatTy;
case TargetInfo::Double:
return DoubleTy;
case TargetInfo::LongDouble:
return LongDoubleTy;
case TargetInfo::Float128:
return Float128Ty;
case TargetInfo::NoFloat:
return {};
}
llvm_unreachable("Unhandled TargetInfo::RealType value");
}
void ASTContext::setManglingNumber(const NamedDecl *ND, unsigned Number) {
if (Number > 1)
MangleNumbers[ND] = Number;
}
unsigned ASTContext::getManglingNumber(const NamedDecl *ND) const {
auto I = MangleNumbers.find(ND);
return I != MangleNumbers.end() ? I->second : 1;
}
void ASTContext::setStaticLocalNumber(const VarDecl *VD, unsigned Number) {
if (Number > 1)
StaticLocalNumbers[VD] = Number;
}
unsigned ASTContext::getStaticLocalNumber(const VarDecl *VD) const {
auto I = StaticLocalNumbers.find(VD);
return I != StaticLocalNumbers.end() ? I->second : 1;
}
MangleNumberingContext &
ASTContext::getManglingNumberContext(const DeclContext *DC) {
assert(LangOpts.CPlusPlus); // We don't need mangling numbers for plain C.
std::unique_ptr<MangleNumberingContext> &MCtx = MangleNumberingContexts[DC];
if (!MCtx)
MCtx = createMangleNumberingContext();
return *MCtx;
}
MangleNumberingContext &
ASTContext::getManglingNumberContext(NeedExtraManglingDecl_t, const Decl *D) {
assert(LangOpts.CPlusPlus); // We don't need mangling numbers for plain C.
std::unique_ptr<MangleNumberingContext> &MCtx =
ExtraMangleNumberingContexts[D];
if (!MCtx)
MCtx = createMangleNumberingContext();
return *MCtx;
}
std::unique_ptr<MangleNumberingContext>
ASTContext::createMangleNumberingContext() const {
return ABI->createMangleNumberingContext();
}
const CXXConstructorDecl *
ASTContext::getCopyConstructorForExceptionObject(CXXRecordDecl *RD) {
return ABI->getCopyConstructorForExceptionObject(
cast<CXXRecordDecl>(RD->getFirstDecl()));
}
void ASTContext::addCopyConstructorForExceptionObject(CXXRecordDecl *RD,
CXXConstructorDecl *CD) {
return ABI->addCopyConstructorForExceptionObject(
cast<CXXRecordDecl>(RD->getFirstDecl()),
cast<CXXConstructorDecl>(CD->getFirstDecl()));
}
void ASTContext::addTypedefNameForUnnamedTagDecl(TagDecl *TD,
TypedefNameDecl *DD) {
return ABI->addTypedefNameForUnnamedTagDecl(TD, DD);
}
TypedefNameDecl *
ASTContext::getTypedefNameForUnnamedTagDecl(const TagDecl *TD) {
return ABI->getTypedefNameForUnnamedTagDecl(TD);
}
void ASTContext::addDeclaratorForUnnamedTagDecl(TagDecl *TD,
DeclaratorDecl *DD) {
return ABI->addDeclaratorForUnnamedTagDecl(TD, DD);
}
DeclaratorDecl *ASTContext::getDeclaratorForUnnamedTagDecl(const TagDecl *TD) {
return ABI->getDeclaratorForUnnamedTagDecl(TD);
}
void ASTContext::setParameterIndex(const ParmVarDecl *D, unsigned int index) {
ParamIndices[D] = index;
}
unsigned ASTContext::getParameterIndex(const ParmVarDecl *D) const {
ParameterIndexTable::const_iterator I = ParamIndices.find(D);
assert(I != ParamIndices.end() &&
"ParmIndices lacks entry set by ParmVarDecl");
return I->second;
}
QualType ASTContext::getStringLiteralArrayType(QualType EltTy,
unsigned Length) const {
// A C++ string literal has a const-qualified element type (C++ 2.13.4p1).
if (getLangOpts().CPlusPlus || getLangOpts().ConstStrings)
EltTy = EltTy.withConst();
EltTy = adjustStringLiteralBaseType(EltTy);
// Get an array type for the string, according to C99 6.4.5. This includes
// the null terminator character.
return getConstantArrayType(EltTy, llvm::APInt(32, Length + 1), nullptr,
ArrayType::Normal, /*IndexTypeQuals*/ 0);
}
StringLiteral *
ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const {
StringLiteral *&Result = StringLiteralCache[Key];
if (!Result)
Result = StringLiteral::Create(
*this, Key, StringLiteral::Ascii,
/*Pascal*/ false, getStringLiteralArrayType(CharTy, Key.size()),
SourceLocation());
return Result;
}
MSGuidDecl *
ASTContext::getMSGuidDecl(MSGuidDecl::Parts Parts) const {
assert(MSGuidTagDecl && "building MS GUID without MS extensions?");
llvm::FoldingSetNodeID ID;
MSGuidDecl::Profile(ID, Parts);
void *InsertPos;
if (MSGuidDecl *Existing = MSGuidDecls.FindNodeOrInsertPos(ID, InsertPos))
return Existing;
QualType GUIDType = getMSGuidType().withConst();
MSGuidDecl *New = MSGuidDecl::Create(*this, GUIDType, Parts);
MSGuidDecls.InsertNode(New, InsertPos);
return New;
}
TemplateParamObjectDecl *
ASTContext::getTemplateParamObjectDecl(QualType T, const APValue &V) const {
assert(T->isRecordType() && "template param object of unexpected type");
// C++ [temp.param]p8:
// [...] a static storage duration object of type 'const T' [...]
T.addConst();
llvm::FoldingSetNodeID ID;
TemplateParamObjectDecl::Profile(ID, T, V);
void *InsertPos;
if (TemplateParamObjectDecl *Existing =
TemplateParamObjectDecls.FindNodeOrInsertPos(ID, InsertPos))
return Existing;
TemplateParamObjectDecl *New = TemplateParamObjectDecl::Create(*this, T, V);
TemplateParamObjectDecls.InsertNode(New, InsertPos);
return New;
}
bool ASTContext::AtomicUsesUnsupportedLibcall(const AtomicExpr *E) const {
const llvm::Triple &T = getTargetInfo().getTriple();
if (!T.isOSDarwin())
return false;
if (!(T.isiOS() && T.isOSVersionLT(7)) &&
!(T.isMacOSX() && T.isOSVersionLT(10, 9)))
return false;
QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
CharUnits sizeChars = getTypeSizeInChars(AtomicTy);
uint64_t Size = sizeChars.getQuantity();
CharUnits alignChars = getTypeAlignInChars(AtomicTy);
unsigned Align = alignChars.getQuantity();
unsigned MaxInlineWidthInBits = getTargetInfo().getMaxAtomicInlineWidth();
return (Size != Align || toBits(sizeChars) > MaxInlineWidthInBits);
}
bool
ASTContext::ObjCMethodsAreEqual(const ObjCMethodDecl *MethodDecl,
const ObjCMethodDecl *MethodImpl) {
// No point trying to match an unavailable/deprecated mothod.
if (MethodDecl->hasAttr<UnavailableAttr>()
|| MethodDecl->hasAttr<DeprecatedAttr>())
return false;
if (MethodDecl->getObjCDeclQualifier() !=
MethodImpl->getObjCDeclQualifier())
return false;
if (!hasSameType(MethodDecl->getReturnType(), MethodImpl->getReturnType()))
return false;
if (MethodDecl->param_size() != MethodImpl->param_size())
return false;
for (ObjCMethodDecl::param_const_iterator IM = MethodImpl->param_begin(),
IF = MethodDecl->param_begin(), EM = MethodImpl->param_end(),
EF = MethodDecl->param_end();
IM != EM && IF != EF; ++IM, ++IF) {
const ParmVarDecl *DeclVar = (*IF);
const ParmVarDecl *ImplVar = (*IM);
if (ImplVar->getObjCDeclQualifier() != DeclVar->getObjCDeclQualifier())
return false;
if (!hasSameType(DeclVar->getType(), ImplVar->getType()))
return false;
}
return (MethodDecl->isVariadic() == MethodImpl->isVariadic());
}
uint64_t ASTContext::getTargetNullPointerValue(QualType QT) const {
LangAS AS;
if (QT->getUnqualifiedDesugaredType()->isNullPtrType())
AS = LangAS::Default;
else
AS = QT->getPointeeType().getAddressSpace();
return getTargetInfo().getNullPointerValue(AS);
}
unsigned ASTContext::getTargetAddressSpace(LangAS AS) const {
if (isTargetAddressSpace(AS))
return toTargetAddressSpace(AS);
else
return (*AddrSpaceMap)[(unsigned)AS];
}
QualType ASTContext::getCorrespondingSaturatedType(QualType Ty) const {
assert(Ty->isFixedPointType());
if (Ty->isSaturatedFixedPointType()) return Ty;
switch (Ty->castAs<BuiltinType>()->getKind()) {
default:
llvm_unreachable("Not a fixed point type!");
case BuiltinType::ShortAccum:
return SatShortAccumTy;
case BuiltinType::Accum:
return SatAccumTy;
case BuiltinType::LongAccum:
return SatLongAccumTy;
case BuiltinType::UShortAccum:
return SatUnsignedShortAccumTy;
case BuiltinType::UAccum:
return SatUnsignedAccumTy;
case BuiltinType::ULongAccum:
return SatUnsignedLongAccumTy;
case BuiltinType::ShortFract:
return SatShortFractTy;
case BuiltinType::Fract:
return SatFractTy;
case BuiltinType::LongFract:
return SatLongFractTy;
case BuiltinType::UShortFract:
return SatUnsignedShortFractTy;
case BuiltinType::UFract:
return SatUnsignedFractTy;
case BuiltinType::ULongFract:
return SatUnsignedLongFractTy;
}
}
LangAS ASTContext::getLangASForBuiltinAddressSpace(unsigned AS) const {
if (LangOpts.OpenCL)
return getTargetInfo().getOpenCLBuiltinAddressSpace(AS);
if (LangOpts.CUDA)
return getTargetInfo().getCUDABuiltinAddressSpace(AS);
return getLangASFromTargetAS(AS);
}
// Explicitly instantiate this in case a Redeclarable<T> is used from a TU that
// doesn't include ASTContext.h
template
clang::LazyGenerationalUpdatePtr<
const Decl *, Decl *, &ExternalASTSource::CompleteRedeclChain>::ValueType
clang::LazyGenerationalUpdatePtr<
const Decl *, Decl *, &ExternalASTSource::CompleteRedeclChain>::makeValue(
const clang::ASTContext &Ctx, Decl *Value);
unsigned char ASTContext::getFixedPointScale(QualType Ty) const {
assert(Ty->isFixedPointType());
const TargetInfo &Target = getTargetInfo();
switch (Ty->castAs<BuiltinType>()->getKind()) {
default:
llvm_unreachable("Not a fixed point type!");
case BuiltinType::ShortAccum:
case BuiltinType::SatShortAccum:
return Target.getShortAccumScale();
case BuiltinType::Accum:
case BuiltinType::SatAccum:
return Target.getAccumScale();
case BuiltinType::LongAccum:
case BuiltinType::SatLongAccum:
return Target.getLongAccumScale();
case BuiltinType::UShortAccum:
case BuiltinType::SatUShortAccum:
return Target.getUnsignedShortAccumScale();
case BuiltinType::UAccum:
case BuiltinType::SatUAccum:
return Target.getUnsignedAccumScale();
case BuiltinType::ULongAccum:
case BuiltinType::SatULongAccum:
return Target.getUnsignedLongAccumScale();
case BuiltinType::ShortFract:
case BuiltinType::SatShortFract:
return Target.getShortFractScale();
case BuiltinType::Fract:
case BuiltinType::SatFract:
return Target.getFractScale();
case BuiltinType::LongFract:
case BuiltinType::SatLongFract:
return Target.getLongFractScale();
case BuiltinType::UShortFract:
case BuiltinType::SatUShortFract:
return Target.getUnsignedShortFractScale();
case BuiltinType::UFract:
case BuiltinType::SatUFract:
return Target.getUnsignedFractScale();
case BuiltinType::ULongFract:
case BuiltinType::SatULongFract:
return Target.getUnsignedLongFractScale();
}
}
unsigned char ASTContext::getFixedPointIBits(QualType Ty) const {
assert(Ty->isFixedPointType());
const TargetInfo &Target = getTargetInfo();
switch (Ty->castAs<BuiltinType>()->getKind()) {
default:
llvm_unreachable("Not a fixed point type!");
case BuiltinType::ShortAccum:
case BuiltinType::SatShortAccum:
return Target.getShortAccumIBits();
case BuiltinType::Accum:
case BuiltinType::SatAccum:
return Target.getAccumIBits();
case BuiltinType::LongAccum:
case BuiltinType::SatLongAccum:
return Target.getLongAccumIBits();
case BuiltinType::UShortAccum:
case BuiltinType::SatUShortAccum:
return Target.getUnsignedShortAccumIBits();
case BuiltinType::UAccum:
case BuiltinType::SatUAccum:
return Target.getUnsignedAccumIBits();
case BuiltinType::ULongAccum:
case BuiltinType::SatULongAccum:
return Target.getUnsignedLongAccumIBits();
case BuiltinType::ShortFract:
case BuiltinType::SatShortFract:
case BuiltinType::Fract:
case BuiltinType::SatFract:
case BuiltinType::LongFract:
case BuiltinType::SatLongFract:
case BuiltinType::UShortFract:
case BuiltinType::SatUShortFract:
case BuiltinType::UFract:
case BuiltinType::SatUFract:
case BuiltinType::ULongFract:
case BuiltinType::SatULongFract:
return 0;
}
}
llvm::FixedPointSemantics
ASTContext::getFixedPointSemantics(QualType Ty) const {
assert((Ty->isFixedPointType() || Ty->isIntegerType()) &&
"Can only get the fixed point semantics for a "
"fixed point or integer type.");
if (Ty->isIntegerType())
return llvm::FixedPointSemantics::GetIntegerSemantics(
getIntWidth(Ty), Ty->isSignedIntegerType());
bool isSigned = Ty->isSignedFixedPointType();
return llvm::FixedPointSemantics(
static_cast<unsigned>(getTypeSize(Ty)), getFixedPointScale(Ty), isSigned,
Ty->isSaturatedFixedPointType(),
!isSigned && getTargetInfo().doUnsignedFixedPointTypesHavePadding());
}
llvm::APFixedPoint ASTContext::getFixedPointMax(QualType Ty) const {
assert(Ty->isFixedPointType());
return llvm::APFixedPoint::getMax(getFixedPointSemantics(Ty));
}
llvm::APFixedPoint ASTContext::getFixedPointMin(QualType Ty) const {
assert(Ty->isFixedPointType());
return llvm::APFixedPoint::getMin(getFixedPointSemantics(Ty));
}
QualType ASTContext::getCorrespondingSignedFixedPointType(QualType Ty) const {
assert(Ty->isUnsignedFixedPointType() &&
"Expected unsigned fixed point type");
switch (Ty->castAs<BuiltinType>()->getKind()) {
case BuiltinType::UShortAccum:
return ShortAccumTy;
case BuiltinType::UAccum:
return AccumTy;
case BuiltinType::ULongAccum:
return LongAccumTy;
case BuiltinType::SatUShortAccum:
return SatShortAccumTy;
case BuiltinType::SatUAccum:
return SatAccumTy;
case BuiltinType::SatULongAccum:
return SatLongAccumTy;
case BuiltinType::UShortFract:
return ShortFractTy;
case BuiltinType::UFract:
return FractTy;
case BuiltinType::ULongFract:
return LongFractTy;
case BuiltinType::SatUShortFract:
return SatShortFractTy;
case BuiltinType::SatUFract:
return SatFractTy;
case BuiltinType::SatULongFract:
return SatLongFractTy;
default:
llvm_unreachable("Unexpected unsigned fixed point type");
}
}
ParsedTargetAttr
ASTContext::filterFunctionTargetAttrs(const TargetAttr *TD) const {
assert(TD != nullptr);
ParsedTargetAttr ParsedAttr = TD->parse();
ParsedAttr.Features.erase(
llvm::remove_if(ParsedAttr.Features,
[&](const std::string &Feat) {
return !Target->isValidFeatureName(
StringRef{Feat}.substr(1));
}),
ParsedAttr.Features.end());
return ParsedAttr;
}
void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
const FunctionDecl *FD) const {
if (FD)
getFunctionFeatureMap(FeatureMap, GlobalDecl().getWithDecl(FD));
else
Target->initFeatureMap(FeatureMap, getDiagnostics(),
Target->getTargetOpts().CPU,
Target->getTargetOpts().Features);
}
// Fills in the supplied string map with the set of target features for the
// passed in function.
void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
GlobalDecl GD) const {
StringRef TargetCPU = Target->getTargetOpts().CPU;
const FunctionDecl *FD = GD.getDecl()->getAsFunction();
if (const auto *TD = FD->getAttr<TargetAttr>()) {
ParsedTargetAttr ParsedAttr = filterFunctionTargetAttrs(TD);
// Make a copy of the features as passed on the command line into the
// beginning of the additional features from the function to override.
ParsedAttr.Features.insert(
ParsedAttr.Features.begin(),
Target->getTargetOpts().FeaturesAsWritten.begin(),
Target->getTargetOpts().FeaturesAsWritten.end());
if (ParsedAttr.Architecture != "" &&
Target->isValidCPUName(ParsedAttr.Architecture))
TargetCPU = ParsedAttr.Architecture;
// Now populate the feature map, first with the TargetCPU which is either
// the default or a new one from the target attribute string. Then we'll use
// the passed in features (FeaturesAsWritten) along with the new ones from
// the attribute.
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU,
ParsedAttr.Features);
} else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) {
llvm::SmallVector<StringRef, 32> FeaturesTmp;
Target->getCPUSpecificCPUDispatchFeatures(
SD->getCPUName(GD.getMultiVersionIndex())->getName(), FeaturesTmp);
std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
} else {
FeatureMap = Target->getTargetOpts().FeatureMap;
}
}
OMPTraitInfo &ASTContext::getNewOMPTraitInfo() {
OMPTraitInfoVector.emplace_back(new OMPTraitInfo());
return *OMPTraitInfoVector.back();
}
const StreamingDiagnostic &clang::
operator<<(const StreamingDiagnostic &DB,
const ASTContext::SectionInfo &Section) {
if (Section.Decl)
return DB << Section.Decl;
return DB << "a prior #pragma section";
}
bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
bool IsStaticVar =
isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static;
bool IsExplicitDeviceVar = (D->hasAttr<CUDADeviceAttr>() &&
!D->getAttr<CUDADeviceAttr>()->isImplicit()) ||
(D->hasAttr<CUDAConstantAttr>() &&
!D->getAttr<CUDAConstantAttr>()->isImplicit());
// CUDA/HIP: static managed variables need to be externalized since it is
// a declaration in IR, therefore cannot have internal linkage.
return IsStaticVar &&
(D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar);
}
bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
return mayExternalizeStaticVar(D) &&
(D->hasAttr<HIPManagedAttr>() ||
CUDADeviceVarODRUsedByHost.count(cast<VarDecl>(D)));
}
StringRef ASTContext::getCUIDHash() const {
if (!CUIDHash.empty())
return CUIDHash;
if (LangOpts.CUID.empty())
return StringRef();
CUIDHash = llvm::utohexstr(llvm::MD5Hash(LangOpts.CUID), /*LowerCase=*/true);
return CUIDHash;
}
// Get the closest named parent, so we can order the sycl naming decls somewhere
// that mangling is meaningful.
static const DeclContext *GetNamedParent(const CXXRecordDecl *RD) {
const DeclContext *DC = RD->getDeclContext();
while (!isa<NamedDecl, TranslationUnitDecl>(DC))
DC = DC->getParent();
return DC;
}
void ASTContext::AddSYCLKernelNamingDecl(const CXXRecordDecl *RD) {
assert(getLangOpts().isSYCL() && "Only valid for SYCL programs");
RD = RD->getCanonicalDecl();
const DeclContext *DC = GetNamedParent(RD);
assert(RD->getLocation().isValid() &&
"Invalid location on kernel naming decl");
(void)SYCLKernelNamingTypes[DC].insert(RD);
}
bool ASTContext::IsSYCLKernelNamingDecl(const NamedDecl *ND) const {
assert(getLangOpts().isSYCL() && "Only valid for SYCL programs");
const auto *RD = dyn_cast<CXXRecordDecl>(ND);
if (!RD)
return false;
RD = RD->getCanonicalDecl();
const DeclContext *DC = GetNamedParent(RD);
auto Itr = SYCLKernelNamingTypes.find(DC);
if (Itr == SYCLKernelNamingTypes.end())
return false;
return Itr->getSecond().count(RD);
}
// Filters the Decls list to those that share the lambda mangling with the
// passed RD.
void ASTContext::FilterSYCLKernelNamingDecls(
const CXXRecordDecl *RD,
llvm::SmallVectorImpl<const CXXRecordDecl *> &Decls) {
if (!SYCLKernelFilterContext)
SYCLKernelFilterContext.reset(
ItaniumMangleContext::create(*this, getDiagnostics()));
llvm::SmallString<128> LambdaSig;
llvm::raw_svector_ostream Out(LambdaSig);
SYCLKernelFilterContext->mangleLambdaSig(RD, Out);
llvm::erase_if(Decls, [this, &LambdaSig](const CXXRecordDecl *LocalRD) {
llvm::SmallString<128> LocalLambdaSig;
llvm::raw_svector_ostream LocalOut(LocalLambdaSig);
SYCLKernelFilterContext->mangleLambdaSig(LocalRD, LocalOut);
return LambdaSig != LocalLambdaSig;
});
}
unsigned ASTContext::GetSYCLKernelNamingIndex(const NamedDecl *ND) {
assert(getLangOpts().isSYCL() && "Only valid for SYCL programs");
assert(IsSYCLKernelNamingDecl(ND) &&
"Lambda not involved in mangling asked for a naming index?");
const CXXRecordDecl *RD = cast<CXXRecordDecl>(ND)->getCanonicalDecl();
const DeclContext *DC = GetNamedParent(RD);
auto Itr = SYCLKernelNamingTypes.find(DC);
assert(Itr != SYCLKernelNamingTypes.end() && "Not a valid DeclContext?");
const llvm::SmallPtrSet<const CXXRecordDecl *, 4> &Set = Itr->getSecond();
llvm::SmallVector<const CXXRecordDecl *> Decls{Set.begin(), Set.end()};
FilterSYCLKernelNamingDecls(RD, Decls);
llvm::sort(Decls, [](const CXXRecordDecl *LHS, const CXXRecordDecl *RHS) {
return LHS->getLambdaManglingNumber() < RHS->getLambdaManglingNumber();
});
return llvm::find(Decls, RD) - Decls.begin();
}
diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/M68k.cpp b/contrib/llvm-project/clang/lib/Basic/Targets/M68k.cpp
index 31cb36d37636..c0cd8fa90ed6 100644
--- a/contrib/llvm-project/clang/lib/Basic/Targets/M68k.cpp
+++ b/contrib/llvm-project/clang/lib/Basic/Targets/M68k.cpp
@@ -1,236 +1,236 @@
//===--- M68k.cpp - Implement M68k targets feature support-------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements M68k TargetInfo objects.
//
//===----------------------------------------------------------------------===//
#include "M68k.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/TargetParser.h"
#include <cstdint>
#include <cstring>
#include <limits>
namespace clang {
namespace targets {
M68kTargetInfo::M68kTargetInfo(const llvm::Triple &Triple,
const TargetOptions &)
: TargetInfo(Triple) {
std::string Layout = "";
// M68k is Big Endian
Layout += "E";
// FIXME how to wire it with the used object format?
Layout += "-m:e";
- // M68k pointers are always 32 bit wide even for 16 bit cpus
- Layout += "-p:32:32";
+ // M68k pointers are always 32 bit wide even for 16-bit CPUs
+ Layout += "-p:32:16:32";
// M68k integer data types
Layout += "-i8:8:8-i16:16:16-i32:16:32";
// FIXME no floats at the moment
// The registers can hold 8, 16, 32 bits
Layout += "-n8:16:32";
// 16 bit alignment for both stack and aggregate
// in order to conform to ABI used by GCC
Layout += "-a:0:16-S16";
resetDataLayout(Layout);
SizeType = UnsignedInt;
PtrDiffType = SignedInt;
IntPtrType = SignedInt;
}
bool M68kTargetInfo::setCPU(const std::string &Name) {
StringRef N = Name;
CPU = llvm::StringSwitch<CPUKind>(N)
.Case("generic", CK_68000)
.Case("M68000", CK_68000)
.Case("M68010", CK_68010)
.Case("M68020", CK_68020)
.Case("M68030", CK_68030)
.Case("M68040", CK_68040)
.Case("M68060", CK_68060)
.Default(CK_Unknown);
return CPU != CK_Unknown;
}
void M68kTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
using llvm::Twine;
Builder.defineMacro("__m68k__");
Builder.defineMacro("mc68000");
Builder.defineMacro("__mc68000");
Builder.defineMacro("__mc68000__");
// For sub-architecture
switch (CPU) {
case CK_68010:
Builder.defineMacro("mc68010");
Builder.defineMacro("__mc68010");
Builder.defineMacro("__mc68010__");
break;
case CK_68020:
Builder.defineMacro("mc68020");
Builder.defineMacro("__mc68020");
Builder.defineMacro("__mc68020__");
break;
case CK_68030:
Builder.defineMacro("mc68030");
Builder.defineMacro("__mc68030");
Builder.defineMacro("__mc68030__");
break;
case CK_68040:
Builder.defineMacro("mc68040");
Builder.defineMacro("__mc68040");
Builder.defineMacro("__mc68040__");
break;
case CK_68060:
Builder.defineMacro("mc68060");
Builder.defineMacro("__mc68060");
Builder.defineMacro("__mc68060__");
break;
default:
break;
}
}
ArrayRef<Builtin::Info> M68kTargetInfo::getTargetBuiltins() const {
// FIXME: Implement.
return None;
}
bool M68kTargetInfo::hasFeature(StringRef Feature) const {
// FIXME elaborate moar
return Feature == "M68000";
}
const char *const M68kTargetInfo::GCCRegNames[] = {
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
"pc"};
ArrayRef<const char *> M68kTargetInfo::getGCCRegNames() const {
return llvm::makeArrayRef(GCCRegNames);
}
ArrayRef<TargetInfo::GCCRegAlias> M68kTargetInfo::getGCCRegAliases() const {
// No aliases.
return None;
}
bool M68kTargetInfo::validateAsmConstraint(
const char *&Name, TargetInfo::ConstraintInfo &info) const {
switch (*Name) {
case 'a': // address register
case 'd': // data register
info.setAllowsRegister();
return true;
case 'I': // constant integer in the range [1,8]
info.setRequiresImmediate(1, 8);
return true;
case 'J': // constant signed 16-bit integer
info.setRequiresImmediate(std::numeric_limits<int16_t>::min(),
std::numeric_limits<int16_t>::max());
return true;
case 'K': // constant that is NOT in the range of [-0x80, 0x80)
info.setRequiresImmediate();
return true;
case 'L': // constant integer in the range [-8,-1]
info.setRequiresImmediate(-8, -1);
return true;
case 'M': // constant that is NOT in the range of [-0x100, 0x100]
info.setRequiresImmediate();
return true;
case 'N': // constant integer in the range [24,31]
info.setRequiresImmediate(24, 31);
return true;
case 'O': // constant integer 16
info.setRequiresImmediate(16);
return true;
case 'P': // constant integer in the range [8,15]
info.setRequiresImmediate(8, 15);
return true;
case 'C':
++Name;
switch (*Name) {
case '0': // constant integer 0
info.setRequiresImmediate(0);
return true;
case 'i': // constant integer
case 'j': // integer constant that doesn't fit in 16 bits
info.setRequiresImmediate();
return true;
default:
break;
}
break;
default:
break;
}
return false;
}
llvm::Optional<std::string>
M68kTargetInfo::handleAsmEscapedChar(char EscChar) const {
char C;
switch (EscChar) {
case '.':
case '#':
C = EscChar;
break;
case '/':
C = '%';
break;
case '$':
C = 's';
break;
case '&':
C = 'd';
break;
default:
return llvm::None;
}
return std::string(1, C);
}
std::string M68kTargetInfo::convertConstraint(const char *&Constraint) const {
if (*Constraint == 'C')
// Two-character constraint; add "^" hint for later parsing
return std::string("^") + std::string(Constraint++, 2);
return std::string(1, *Constraint);
}
const char *M68kTargetInfo::getClobbers() const {
// FIXME: Is this really right?
return "";
}
TargetInfo::BuiltinVaListKind M68kTargetInfo::getBuiltinVaListKind() const {
return TargetInfo::VoidPtrBuiltinVaList;
}
} // namespace targets
} // namespace clang
diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h b/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h
index e24fb5cf082d..3fe39ed64d9c 100644
--- a/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h
+++ b/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h
@@ -1,966 +1,971 @@
//===--- OSTargets.h - Declare OS target feature support --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares OS specific TargetInfo types.
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H
#define LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H
#include "Targets.h"
namespace clang {
namespace targets {
template <typename TgtInfo>
class LLVM_LIBRARY_VISIBILITY OSTargetInfo : public TgtInfo {
protected:
virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const = 0;
public:
OSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: TgtInfo(Triple, Opts) {}
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override {
TgtInfo::getTargetDefines(Opts, Builder);
getOSDefines(Opts, TgtInfo::getTriple(), Builder);
}
};
// CloudABI Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY CloudABITargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
Builder.defineMacro("__CloudABI__");
Builder.defineMacro("__ELF__");
// CloudABI uses ISO/IEC 10646:2012 for wchar_t, char16_t and char32_t.
Builder.defineMacro("__STDC_ISO_10646__", "201206L");
Builder.defineMacro("__STDC_UTF_16__");
Builder.defineMacro("__STDC_UTF_32__");
}
public:
CloudABITargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {}
};
// Ananas target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY AnanasTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// Ananas defines
Builder.defineMacro("__Ananas__");
Builder.defineMacro("__ELF__");
}
public:
AnanasTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {}
};
void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
const llvm::Triple &Triple, StringRef &PlatformName,
VersionTuple &PlatformMinVersion);
template <typename Target>
class LLVM_LIBRARY_VISIBILITY DarwinTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
getDarwinDefines(Builder, Opts, Triple, this->PlatformName,
this->PlatformMinVersion);
}
public:
DarwinTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
// By default, no TLS, and we list permitted architecture/OS
// combinations.
this->TLSSupported = false;
if (Triple.isMacOSX())
this->TLSSupported = !Triple.isMacOSXVersionLT(10, 7);
else if (Triple.isiOS()) {
// 64-bit iOS supported it from 8 onwards, 32-bit device from 9 onwards,
// 32-bit simulator from 10 onwards.
if (Triple.isArch64Bit())
this->TLSSupported = !Triple.isOSVersionLT(8);
else if (Triple.isArch32Bit()) {
if (!Triple.isSimulatorEnvironment())
this->TLSSupported = !Triple.isOSVersionLT(9);
else
this->TLSSupported = !Triple.isOSVersionLT(10);
}
} else if (Triple.isWatchOS()) {
if (!Triple.isSimulatorEnvironment())
this->TLSSupported = !Triple.isOSVersionLT(2);
else
this->TLSSupported = !Triple.isOSVersionLT(3);
}
this->MCountName = "\01mcount";
}
const char *getStaticInitSectionSpecifier() const override {
// FIXME: We should return 0 when building kexts.
return "__TEXT,__StaticInit,regular,pure_instructions";
}
/// Darwin does not support protected visibility. Darwin's "default"
/// is very similar to ELF's "protected"; Darwin requires a "weak"
/// attribute on declarations that can be dynamically replaced.
bool hasProtectedVisibility() const override { return false; }
unsigned getExnObjectAlignment() const override {
// Older versions of libc++abi guarantee an alignment of only 8-bytes for
// exception objects because of a bug in __cxa_exception that was
// eventually fixed in r319123.
llvm::VersionTuple MinVersion;
const llvm::Triple &T = this->getTriple();
// Compute the earliest OS versions that have the fix to libc++abi.
switch (T.getOS()) {
case llvm::Triple::Darwin:
case llvm::Triple::MacOSX: // Earliest supporting version is 10.14.
MinVersion = llvm::VersionTuple(10U, 14U);
break;
case llvm::Triple::IOS:
case llvm::Triple::TvOS: // Earliest supporting version is 12.0.0.
MinVersion = llvm::VersionTuple(12U);
break;
case llvm::Triple::WatchOS: // Earliest supporting version is 5.0.0.
MinVersion = llvm::VersionTuple(5U);
break;
default:
// Conservatively return 8 bytes if OS is unknown.
return 64;
}
unsigned Major, Minor, Micro;
T.getOSVersion(Major, Minor, Micro);
if (llvm::VersionTuple(Major, Minor, Micro) < MinVersion)
return 64;
return OSTargetInfo<Target>::getExnObjectAlignment();
}
TargetInfo::IntType getLeastIntTypeByWidth(unsigned BitWidth,
bool IsSigned) const final {
// Darwin uses `long long` for `int_least64_t` and `int_fast64_t`.
return BitWidth == 64
? (IsSigned ? TargetInfo::SignedLongLong
: TargetInfo::UnsignedLongLong)
: TargetInfo::getLeastIntTypeByWidth(BitWidth, IsSigned);
}
};
// DragonFlyBSD Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY DragonFlyBSDTargetInfo
: public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// DragonFly defines; list based off of gcc output
Builder.defineMacro("__DragonFly__");
Builder.defineMacro("__DragonFly_cc_version", "100001");
Builder.defineMacro("__ELF__");
Builder.defineMacro("__KPRINTF_ATTRIBUTE__");
Builder.defineMacro("__tune_i386__");
DefineStd(Builder, "unix", Opts);
}
public:
DragonFlyBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
switch (Triple.getArch()) {
default:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
this->MCountName = ".mcount";
break;
}
}
};
#ifndef FREEBSD_CC_VERSION
#define FREEBSD_CC_VERSION 0U
#endif
// FreeBSD Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY FreeBSDTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// FreeBSD defines; list based off of gcc output
unsigned Release = Triple.getOSMajorVersion();
if (Release == 0U)
Release = 8U;
unsigned CCVersion = FREEBSD_CC_VERSION;
if (CCVersion == 0U)
CCVersion = Release * 100000U + 1U;
Builder.defineMacro("__FreeBSD__", Twine(Release));
Builder.defineMacro("__FreeBSD_cc_version", Twine(CCVersion));
Builder.defineMacro("__KPRINTF_ATTRIBUTE__");
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__ELF__");
// On FreeBSD, wchar_t contains the number of the code point as
// used by the character set of the locale. These character sets are
// not necessarily a superset of ASCII.
//
// FIXME: This is wrong; the macro refers to the numerical values
// of wchar_t *literals*, which are not locale-dependent. However,
// FreeBSD systems apparently depend on us getting this wrong, and
// setting this to 1 is conforming even if all the basic source
// character literals have the same encoding as char and wchar_t.
Builder.defineMacro("__STDC_MB_MIGHT_NEQ_WC__", "1");
}
public:
FreeBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
switch (Triple.getArch()) {
default:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
this->MCountName = ".mcount";
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::ppc:
case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
this->MCountName = "_mcount";
break;
case llvm::Triple::arm:
this->MCountName = "__mcount";
break;
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
break;
}
}
};
// GNU/kFreeBSD Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY KFreeBSDTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// GNU/kFreeBSD defines; list based off of gcc output
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__FreeBSD_kernel__");
Builder.defineMacro("__GLIBC__");
Builder.defineMacro("__ELF__");
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
}
public:
KFreeBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {}
};
// Haiku Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY HaikuTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// Haiku defines; list based off of gcc output
Builder.defineMacro("__HAIKU__");
Builder.defineMacro("__ELF__");
DefineStd(Builder, "unix", Opts);
if (this->HasFloat128)
Builder.defineMacro("__FLOAT128__");
}
public:
HaikuTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->SizeType = TargetInfo::UnsignedLong;
this->IntPtrType = TargetInfo::SignedLong;
this->PtrDiffType = TargetInfo::SignedLong;
this->ProcessIDType = TargetInfo::SignedLong;
this->TLSSupported = false;
switch (Triple.getArch()) {
default:
break;
case llvm::Triple::x86:
case llvm::Triple::x86_64:
this->HasFloat128 = true;
break;
}
}
};
// Hurd target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY HurdTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// Hurd defines; list based off of gcc output.
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__GNU__");
Builder.defineMacro("__gnu_hurd__");
Builder.defineMacro("__MACH__");
Builder.defineMacro("__GLIBC__");
Builder.defineMacro("__ELF__");
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
}
public:
HurdTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {}
};
// Minix Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY MinixTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// Minix defines
Builder.defineMacro("__minix", "3");
Builder.defineMacro("_EM_WSIZE", "4");
Builder.defineMacro("_EM_PSIZE", "4");
Builder.defineMacro("_EM_SSIZE", "2");
Builder.defineMacro("_EM_LSIZE", "4");
Builder.defineMacro("_EM_FSIZE", "4");
Builder.defineMacro("_EM_DSIZE", "8");
Builder.defineMacro("__ELF__");
DefineStd(Builder, "unix", Opts);
}
public:
MinixTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {}
};
// Linux target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY LinuxTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// Linux defines; list based off of gcc output
DefineStd(Builder, "unix", Opts);
DefineStd(Builder, "linux", Opts);
Builder.defineMacro("__ELF__");
if (Triple.isAndroid()) {
Builder.defineMacro("__ANDROID__", "1");
unsigned Maj, Min, Rev;
Triple.getEnvironmentVersion(Maj, Min, Rev);
this->PlatformName = "android";
this->PlatformMinVersion = VersionTuple(Maj, Min, Rev);
if (Maj) {
Builder.defineMacro("__ANDROID_MIN_SDK_VERSION__", Twine(Maj));
// This historical but ambiguous name for the minSdkVersion macro. Keep
// defined for compatibility.
Builder.defineMacro("__ANDROID_API__", "__ANDROID_MIN_SDK_VERSION__");
}
} else {
Builder.defineMacro("__gnu_linux__");
}
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
if (this->HasFloat128)
Builder.defineMacro("__FLOAT128__");
}
public:
LinuxTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->WIntType = TargetInfo::UnsignedInt;
switch (Triple.getArch()) {
default:
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::ppc:
case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
this->MCountName = "_mcount";
break;
case llvm::Triple::x86:
case llvm::Triple::x86_64:
this->HasFloat128 = true;
break;
}
}
const char *getStaticInitSectionSpecifier() const override {
return ".text.startup";
}
};
// NetBSD Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY NetBSDTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// NetBSD defines; list based off of gcc output
Builder.defineMacro("__NetBSD__");
Builder.defineMacro("__unix__");
Builder.defineMacro("__ELF__");
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
}
public:
NetBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->MCountName = "__mcount";
}
};
// OpenBSD Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY OpenBSDTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// OpenBSD defines; list based off of gcc output
Builder.defineMacro("__OpenBSD__");
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__ELF__");
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
if (this->HasFloat128)
Builder.defineMacro("__FLOAT128__");
+
+ if (Opts.C11) {
+ Builder.defineMacro("__STDC_NO_ATOMICS__");
+ Builder.defineMacro("__STDC_NO_THREADS__");
+ }
}
public:
OpenBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->WCharType = this->WIntType = this->SignedInt;
this->IntMaxType = TargetInfo::SignedLongLong;
this->Int64Type = TargetInfo::SignedLongLong;
switch (Triple.getArch()) {
case llvm::Triple::x86:
case llvm::Triple::x86_64:
this->HasFloat128 = true;
LLVM_FALLTHROUGH;
default:
this->MCountName = "__mcount";
break;
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
case llvm::Triple::sparcv9:
this->MCountName = "_mcount";
break;
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
break;
}
}
};
// PSP Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY PSPTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// PSP defines; list based on the output of the pspdev gcc toolchain.
Builder.defineMacro("PSP");
Builder.defineMacro("_PSP");
Builder.defineMacro("__psp__");
Builder.defineMacro("__ELF__");
}
public:
PSPTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {}
};
// PS3 PPU Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY PS3PPUTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// PS3 PPU defines.
Builder.defineMacro("__PPC__");
Builder.defineMacro("__PPU__");
Builder.defineMacro("__CELLOS_LV2__");
Builder.defineMacro("__ELF__");
Builder.defineMacro("__LP32__");
Builder.defineMacro("_ARCH_PPC64");
Builder.defineMacro("__powerpc64__");
}
public:
PS3PPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->LongWidth = this->LongAlign = 32;
this->PointerWidth = this->PointerAlign = 32;
this->IntMaxType = TargetInfo::SignedLongLong;
this->Int64Type = TargetInfo::SignedLongLong;
this->SizeType = TargetInfo::UnsignedInt;
this->resetDataLayout("E-m:e-p:32:32-i64:64-n32:64");
}
};
template <typename Target>
class LLVM_LIBRARY_VISIBILITY PS4OSTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
Builder.defineMacro("__FreeBSD__", "9");
Builder.defineMacro("__FreeBSD_cc_version", "900001");
Builder.defineMacro("__KPRINTF_ATTRIBUTE__");
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__ELF__");
Builder.defineMacro("__SCE__");
Builder.defineMacro("__ORBIS__");
}
public:
PS4OSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->WCharType = TargetInfo::UnsignedShort;
// On PS4, TLS variable cannot be aligned to more than 32 bytes (256 bits).
this->MaxTLSAlign = 256;
// On PS4, do not honor explicit bit field alignment,
// as in "__attribute__((aligned(2))) int b : 1;".
this->UseExplicitBitFieldAlignment = false;
switch (Triple.getArch()) {
default:
case llvm::Triple::x86_64:
this->MCountName = ".mcount";
this->NewAlign = 256;
break;
}
}
TargetInfo::CallingConvCheckResult
checkCallingConvention(CallingConv CC) const override {
return (CC == CC_C) ? TargetInfo::CCCR_OK : TargetInfo::CCCR_Error;
}
};
// RTEMS Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY RTEMSTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// RTEMS defines; list based off of gcc output
Builder.defineMacro("__rtems__");
Builder.defineMacro("__ELF__");
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
}
public:
RTEMSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
switch (Triple.getArch()) {
default:
case llvm::Triple::x86:
// this->MCountName = ".mcount";
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
// this->MCountName = "_mcount";
break;
case llvm::Triple::arm:
// this->MCountName = "__mcount";
break;
}
}
};
// Solaris target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY SolarisTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
DefineStd(Builder, "sun", Opts);
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__ELF__");
Builder.defineMacro("__svr4__");
Builder.defineMacro("__SVR4");
// Solaris headers require _XOPEN_SOURCE to be set to 600 for C99 and
// newer, but to 500 for everything else. feature_test.h has a check to
// ensure that you are not using C99 with an old version of X/Open or C89
// with a new version.
if (Opts.C99)
Builder.defineMacro("_XOPEN_SOURCE", "600");
else
Builder.defineMacro("_XOPEN_SOURCE", "500");
if (Opts.CPlusPlus) {
Builder.defineMacro("__C99FEATURES__");
Builder.defineMacro("_FILE_OFFSET_BITS", "64");
}
// GCC restricts the next two to C++.
Builder.defineMacro("_LARGEFILE_SOURCE");
Builder.defineMacro("_LARGEFILE64_SOURCE");
Builder.defineMacro("__EXTENSIONS__");
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
if (this->HasFloat128)
Builder.defineMacro("__FLOAT128__");
}
public:
SolarisTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
if (this->PointerWidth == 64) {
this->WCharType = this->WIntType = this->SignedInt;
} else {
this->WCharType = this->WIntType = this->SignedLong;
}
switch (Triple.getArch()) {
default:
break;
case llvm::Triple::x86:
case llvm::Triple::x86_64:
this->HasFloat128 = true;
break;
}
}
};
// AIX Target
template <typename Target>
class AIXTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("_IBMR2");
Builder.defineMacro("_POWER");
Builder.defineMacro("_AIX");
Builder.defineMacro("__TOS_AIX__");
if (Opts.C11) {
Builder.defineMacro("__STDC_NO_ATOMICS__");
Builder.defineMacro("__STDC_NO_THREADS__");
}
if (Opts.EnableAIXExtendedAltivecABI)
Builder.defineMacro("__EXTABI__");
unsigned Major, Minor, Micro;
Triple.getOSVersion(Major, Minor, Micro);
// Define AIX OS-Version Macros.
// Includes logic for legacy versions of AIX; no specific intent to support.
std::pair<int, int> OsVersion = {Major, Minor};
if (OsVersion >= std::make_pair(3, 2)) Builder.defineMacro("_AIX32");
if (OsVersion >= std::make_pair(4, 1)) Builder.defineMacro("_AIX41");
if (OsVersion >= std::make_pair(4, 3)) Builder.defineMacro("_AIX43");
if (OsVersion >= std::make_pair(5, 0)) Builder.defineMacro("_AIX50");
if (OsVersion >= std::make_pair(5, 1)) Builder.defineMacro("_AIX51");
if (OsVersion >= std::make_pair(5, 2)) Builder.defineMacro("_AIX52");
if (OsVersion >= std::make_pair(5, 3)) Builder.defineMacro("_AIX53");
if (OsVersion >= std::make_pair(6, 1)) Builder.defineMacro("_AIX61");
if (OsVersion >= std::make_pair(7, 1)) Builder.defineMacro("_AIX71");
if (OsVersion >= std::make_pair(7, 2)) Builder.defineMacro("_AIX72");
if (OsVersion >= std::make_pair(7, 3)) Builder.defineMacro("_AIX73");
// FIXME: Do not define _LONG_LONG when -fno-long-long is specified.
Builder.defineMacro("_LONG_LONG");
if (Opts.POSIXThreads) {
Builder.defineMacro("_THREAD_SAFE");
}
if (this->PointerWidth == 64) {
Builder.defineMacro("__64BIT__");
}
// Define _WCHAR_T when it is a fundamental type
// (i.e., for C++ without -fno-wchar).
if (Opts.CPlusPlus && Opts.WChar) {
Builder.defineMacro("_WCHAR_T");
}
}
public:
AIXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->TheCXXABI.set(TargetCXXABI::XL);
if (this->PointerWidth == 64) {
this->WCharType = this->UnsignedInt;
} else {
this->WCharType = this->UnsignedShort;
}
this->UseZeroLengthBitfieldAlignment = true;
}
// AIX sets FLT_EVAL_METHOD to be 1.
unsigned getFloatEvalMethod() const override { return 1; }
bool hasInt128Type() const override { return false; }
bool defaultsToAIXPowerAlignment() const override { return true; }
};
// z/OS target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY ZOSTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// FIXME: _LONG_LONG should not be defined under -std=c89.
Builder.defineMacro("_LONG_LONG");
Builder.defineMacro("_OPEN_DEFAULT");
// _UNIX03_WITHDRAWN is required to build libcxx.
Builder.defineMacro("_UNIX03_WITHDRAWN");
Builder.defineMacro("__370__");
Builder.defineMacro("__BFP__");
// FIXME: __BOOL__ should not be defined under -std=c89.
Builder.defineMacro("__BOOL__");
Builder.defineMacro("__LONGNAME__");
Builder.defineMacro("__MVS__");
Builder.defineMacro("__THW_370__");
Builder.defineMacro("__THW_BIG_ENDIAN__");
Builder.defineMacro("__TOS_390__");
Builder.defineMacro("__TOS_MVS__");
Builder.defineMacro("__XPLINK__");
if (this->PointerWidth == 64)
Builder.defineMacro("__64BIT__");
if (Opts.CPlusPlus) {
Builder.defineMacro("__DLL__");
// _XOPEN_SOURCE=600 is required to build libcxx.
Builder.defineMacro("_XOPEN_SOURCE", "600");
}
if (Opts.GNUMode) {
Builder.defineMacro("_MI_BUILTIN");
Builder.defineMacro("_EXT");
}
if (Opts.CPlusPlus && Opts.WChar) {
// Macro __wchar_t is defined so that the wchar_t data
// type is not declared as a typedef in system headers.
Builder.defineMacro("__wchar_t");
}
this->PlatformName = llvm::Triple::getOSTypeName(Triple.getOS());
}
public:
ZOSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->WCharType = TargetInfo::UnsignedInt;
this->MaxAlignedAttribute = 128;
this->UseBitFieldTypeAlignment = false;
this->UseZeroLengthBitfieldAlignment = true;
this->UseLeadingZeroLengthBitfield = false;
this->ZeroLengthBitfieldBoundary = 32;
this->DefaultAlignForAttributeAligned = 128;
}
};
void addWindowsDefines(const llvm::Triple &Triple, const LangOptions &Opts,
MacroBuilder &Builder);
// Windows target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY WindowsTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
addWindowsDefines(Triple, Opts, Builder);
}
public:
WindowsTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->WCharType = TargetInfo::UnsignedShort;
this->WIntType = TargetInfo::UnsignedShort;
}
};
template <typename Target>
class LLVM_LIBRARY_VISIBILITY NaClTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__ELF__");
Builder.defineMacro("__native_client__");
}
public:
NaClTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->LongAlign = 32;
this->LongWidth = 32;
this->PointerAlign = 32;
this->PointerWidth = 32;
this->IntMaxType = TargetInfo::SignedLongLong;
this->Int64Type = TargetInfo::SignedLongLong;
this->DoubleAlign = 64;
this->LongDoubleWidth = 64;
this->LongDoubleAlign = 64;
this->LongLongWidth = 64;
this->LongLongAlign = 64;
this->SizeType = TargetInfo::UnsignedInt;
this->PtrDiffType = TargetInfo::SignedInt;
this->IntPtrType = TargetInfo::SignedInt;
// RegParmMax is inherited from the underlying architecture.
this->LongDoubleFormat = &llvm::APFloat::IEEEdouble();
if (Triple.getArch() == llvm::Triple::arm) {
// Handled in ARM's setABI().
} else if (Triple.getArch() == llvm::Triple::x86) {
this->resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-"
"i64:64-n8:16:32-S128");
} else if (Triple.getArch() == llvm::Triple::x86_64) {
this->resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-"
"i64:64-n8:16:32:64-S128");
} else if (Triple.getArch() == llvm::Triple::mipsel) {
// Handled on mips' setDataLayout.
} else {
assert(Triple.getArch() == llvm::Triple::le32);
this->resetDataLayout("e-p:32:32-i64:64");
}
}
};
// Fuchsia Target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY FuchsiaTargetInfo : public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
Builder.defineMacro("__Fuchsia__");
Builder.defineMacro("__ELF__");
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
// Required by the libc++ locale support.
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
}
public:
FuchsiaTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->MCountName = "__mcount";
this->TheCXXABI.set(TargetCXXABI::Fuchsia);
}
};
// WebAssembly target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY WebAssemblyOSTargetInfo
: public OSTargetInfo<Target> {
protected:
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const override {
// A common platform macro.
if (Opts.POSIXThreads)
Builder.defineMacro("_REENTRANT");
// Follow g++ convention and predefine _GNU_SOURCE for C++.
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
// Indicate that we have __float128.
Builder.defineMacro("__FLOAT128__");
}
public:
explicit WebAssemblyOSTargetInfo(const llvm::Triple &Triple,
const TargetOptions &Opts)
: OSTargetInfo<Target>(Triple, Opts) {
this->MCountName = "__mcount";
this->TheCXXABI.set(TargetCXXABI::WebAssembly);
this->HasFloat128 = true;
}
};
// WASI target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY WASITargetInfo
: public WebAssemblyOSTargetInfo<Target> {
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const final {
WebAssemblyOSTargetInfo<Target>::getOSDefines(Opts, Triple, Builder);
Builder.defineMacro("__wasi__");
}
public:
explicit WASITargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: WebAssemblyOSTargetInfo<Target>(Triple, Opts) {}
};
// Emscripten target
template <typename Target>
class LLVM_LIBRARY_VISIBILITY EmscriptenTargetInfo
: public WebAssemblyOSTargetInfo<Target> {
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const final {
WebAssemblyOSTargetInfo<Target>::getOSDefines(Opts, Triple, Builder);
Builder.defineMacro("__EMSCRIPTEN__");
if (Opts.POSIXThreads)
Builder.defineMacro("__EMSCRIPTEN_PTHREADS__");
}
public:
explicit EmscriptenTargetInfo(const llvm::Triple &Triple,
const TargetOptions &Opts)
: WebAssemblyOSTargetInfo<Target>(Triple, Opts) {
// Keeping the alignment of long double to 8 bytes even though its size is
// 16 bytes allows emscripten to have an 8-byte-aligned max_align_t which
// in turn gives is a 8-byte aligned malloc.
// Emscripten's ABI is unstable and we may change this back to 128 to match
// the WebAssembly default in the future.
this->LongDoubleAlign = 64;
}
};
} // namespace targets
} // namespace clang
#endif // LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H
diff --git a/contrib/llvm-project/clang/lib/Driver/Driver.cpp b/contrib/llvm-project/clang/lib/Driver/Driver.cpp
index 5c323cb6ea23..94a7553e273b 100644
--- a/contrib/llvm-project/clang/lib/Driver/Driver.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/Driver.cpp
@@ -1,5578 +1,5577 @@
//===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Driver/Driver.h"
#include "ToolChains/AIX.h"
#include "ToolChains/AMDGPU.h"
#include "ToolChains/AMDGPUOpenMP.h"
#include "ToolChains/AVR.h"
#include "ToolChains/Ananas.h"
#include "ToolChains/BareMetal.h"
#include "ToolChains/Clang.h"
#include "ToolChains/CloudABI.h"
#include "ToolChains/Contiki.h"
#include "ToolChains/CrossWindows.h"
#include "ToolChains/Cuda.h"
#include "ToolChains/Darwin.h"
#include "ToolChains/DragonFly.h"
#include "ToolChains/FreeBSD.h"
#include "ToolChains/Fuchsia.h"
#include "ToolChains/Gnu.h"
#include "ToolChains/HIP.h"
#include "ToolChains/Haiku.h"
#include "ToolChains/Hexagon.h"
#include "ToolChains/Hurd.h"
#include "ToolChains/Lanai.h"
#include "ToolChains/Linux.h"
#include "ToolChains/MSP430.h"
#include "ToolChains/MSVC.h"
#include "ToolChains/MinGW.h"
#include "ToolChains/Minix.h"
#include "ToolChains/MipsLinux.h"
#include "ToolChains/Myriad.h"
#include "ToolChains/NaCl.h"
#include "ToolChains/NetBSD.h"
#include "ToolChains/OpenBSD.h"
#include "ToolChains/PPCLinux.h"
#include "ToolChains/PS4CPU.h"
#include "ToolChains/RISCVToolchain.h"
#include "ToolChains/Solaris.h"
#include "ToolChains/TCE.h"
#include "ToolChains/VEToolchain.h"
#include "ToolChains/WebAssembly.h"
#include "ToolChains/XCore.h"
#include "ToolChains/ZOS.h"
#include "clang/Basic/TargetID.h"
#include "clang/Basic/Version.h"
#include "clang/Config/config.h"
#include "clang/Driver/Action.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Job.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptSpecifier.h"
#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ExitCodes.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <memory>
#include <utility>
#if LLVM_ON_UNIX
#include <unistd.h> // getpid
#endif
using namespace clang::driver;
using namespace clang;
using namespace llvm::opt;
static llvm::Triple getHIPOffloadTargetTriple() {
static const llvm::Triple T("amdgcn-amd-amdhsa");
return T;
}
// static
std::string Driver::GetResourcesPath(StringRef BinaryPath,
StringRef CustomResourceDir) {
// Since the resource directory is embedded in the module hash, it's important
// that all places that need it call this function, so that they get the
// exact same string ("a/../b/" and "b/" get different hashes, for example).
// Dir is bin/ or lib/, depending on where BinaryPath is.
std::string Dir = std::string(llvm::sys::path::parent_path(BinaryPath));
SmallString<128> P(Dir);
if (CustomResourceDir != "") {
llvm::sys::path::append(P, CustomResourceDir);
} else {
// On Windows, libclang.dll is in bin/.
// On non-Windows, libclang.so/.dylib is in lib/.
// With a static-library build of libclang, LibClangPath will contain the
// path of the embedding binary, which for LLVM binaries will be in bin/.
// ../lib gets us to lib/ in both cases.
P = llvm::sys::path::parent_path(Dir);
llvm::sys::path::append(P, Twine("lib") + CLANG_LIBDIR_SUFFIX, "clang",
CLANG_VERSION_STRING);
}
return std::string(P.str());
}
Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple,
DiagnosticsEngine &Diags, std::string Title,
IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS)
: Diags(Diags), VFS(std::move(VFS)), Mode(GCCMode),
SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone), LTOMode(LTOK_None),
ClangExecutable(ClangExecutable), SysRoot(DEFAULT_SYSROOT),
DriverTitle(Title), CCPrintStatReportFilename(), CCPrintOptionsFilename(),
CCPrintHeadersFilename(), CCLogDiagnosticsFilename(),
CCCPrintBindings(false), CCPrintOptions(false), CCPrintHeaders(false),
CCLogDiagnostics(false), CCGenDiagnostics(false),
CCPrintProcessStats(false), TargetTriple(TargetTriple),
CCCGenericGCCName(""), Saver(Alloc), CheckInputsExist(true),
GenReproducer(false), SuppressMissingInputWarning(false) {
// Provide a sane fallback if no VFS is specified.
if (!this->VFS)
this->VFS = llvm::vfs::getRealFileSystem();
Name = std::string(llvm::sys::path::filename(ClangExecutable));
Dir = std::string(llvm::sys::path::parent_path(ClangExecutable));
InstalledDir = Dir; // Provide a sensible default installed dir.
if ((!SysRoot.empty()) && llvm::sys::path::is_relative(SysRoot)) {
// Prepend InstalledDir if SysRoot is relative
SmallString<128> P(InstalledDir);
llvm::sys::path::append(P, SysRoot);
SysRoot = std::string(P);
}
#if defined(CLANG_CONFIG_FILE_SYSTEM_DIR)
SystemConfigDir = CLANG_CONFIG_FILE_SYSTEM_DIR;
#endif
#if defined(CLANG_CONFIG_FILE_USER_DIR)
UserConfigDir = CLANG_CONFIG_FILE_USER_DIR;
#endif
// Compute the path to the resource directory.
ResourceDir = GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR);
}
void Driver::setDriverMode(StringRef Value) {
static const std::string OptName =
getOpts().getOption(options::OPT_driver_mode).getPrefixedName();
if (auto M = llvm::StringSwitch<llvm::Optional<DriverMode>>(Value)
.Case("gcc", GCCMode)
.Case("g++", GXXMode)
.Case("cpp", CPPMode)
.Case("cl", CLMode)
.Case("flang", FlangMode)
.Default(None))
Mode = *M;
else
Diag(diag::err_drv_unsupported_option_argument) << OptName << Value;
}
InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings,
bool IsClCompatMode,
bool &ContainsError) {
llvm::PrettyStackTraceString CrashInfo("Command line argument parsing");
ContainsError = false;
unsigned IncludedFlagsBitmask;
unsigned ExcludedFlagsBitmask;
std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
getIncludeExcludeOptionFlagMasks(IsClCompatMode);
// Make sure that Flang-only options don't pollute the Clang output
// TODO: Make sure that Clang-only options don't pollute Flang output
if (!IsFlangMode())
ExcludedFlagsBitmask |= options::FlangOnlyOption;
unsigned MissingArgIndex, MissingArgCount;
InputArgList Args =
getOpts().ParseArgs(ArgStrings, MissingArgIndex, MissingArgCount,
IncludedFlagsBitmask, ExcludedFlagsBitmask);
// Check for missing argument error.
if (MissingArgCount) {
Diag(diag::err_drv_missing_argument)
<< Args.getArgString(MissingArgIndex) << MissingArgCount;
ContainsError |=
Diags.getDiagnosticLevel(diag::err_drv_missing_argument,
SourceLocation()) > DiagnosticsEngine::Warning;
}
// Check for unsupported options.
for (const Arg *A : Args) {
if (A->getOption().hasFlag(options::Unsupported)) {
unsigned DiagID;
auto ArgString = A->getAsString(Args);
std::string Nearest;
if (getOpts().findNearest(
ArgString, Nearest, IncludedFlagsBitmask,
ExcludedFlagsBitmask | options::Unsupported) > 1) {
DiagID = diag::err_drv_unsupported_opt;
Diag(DiagID) << ArgString;
} else {
DiagID = diag::err_drv_unsupported_opt_with_suggestion;
Diag(DiagID) << ArgString << Nearest;
}
ContainsError |= Diags.getDiagnosticLevel(DiagID, SourceLocation()) >
DiagnosticsEngine::Warning;
continue;
}
// Warn about -mcpu= without an argument.
if (A->getOption().matches(options::OPT_mcpu_EQ) && A->containsValue("")) {
Diag(diag::warn_drv_empty_joined_argument) << A->getAsString(Args);
ContainsError |= Diags.getDiagnosticLevel(
diag::warn_drv_empty_joined_argument,
SourceLocation()) > DiagnosticsEngine::Warning;
}
}
for (const Arg *A : Args.filtered(options::OPT_UNKNOWN)) {
unsigned DiagID;
auto ArgString = A->getAsString(Args);
std::string Nearest;
if (getOpts().findNearest(
ArgString, Nearest, IncludedFlagsBitmask, ExcludedFlagsBitmask) > 1) {
DiagID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl
: diag::err_drv_unknown_argument;
Diags.Report(DiagID) << ArgString;
} else {
DiagID = IsCLMode()
? diag::warn_drv_unknown_argument_clang_cl_with_suggestion
: diag::err_drv_unknown_argument_with_suggestion;
Diags.Report(DiagID) << ArgString << Nearest;
}
ContainsError |= Diags.getDiagnosticLevel(DiagID, SourceLocation()) >
DiagnosticsEngine::Warning;
}
return Args;
}
// Determine which compilation mode we are in. We look for options which
// affect the phase, starting with the earliest phases, and record which
// option we used to determine the final phase.
phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
Arg **FinalPhaseArg) const {
Arg *PhaseArg = nullptr;
phases::ID FinalPhase;
// -{E,EP,P,M,MM} only run the preprocessor.
if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) ||
(PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) ||
(PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) ||
(PhaseArg = DAL.getLastArg(options::OPT__SLASH_P))) {
FinalPhase = phases::Preprocess;
// --precompile only runs up to precompilation.
} else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile))) {
FinalPhase = phases::Precompile;
// -{fsyntax-only,-analyze,emit-ast} only run up to the compiler.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) ||
(PhaseArg = DAL.getLastArg(options::OPT_print_supported_cpus)) ||
(PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) ||
(PhaseArg = DAL.getLastArg(options::OPT_verify_pch)) ||
(PhaseArg = DAL.getLastArg(options::OPT_rewrite_objc)) ||
(PhaseArg = DAL.getLastArg(options::OPT_rewrite_legacy_objc)) ||
(PhaseArg = DAL.getLastArg(options::OPT__migrate)) ||
(PhaseArg = DAL.getLastArg(options::OPT__analyze)) ||
(PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) {
FinalPhase = phases::Compile;
// -S only runs up to the backend.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
FinalPhase = phases::Backend;
// -c compilation only runs up to the assembler.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
FinalPhase = phases::Assemble;
// Otherwise do everything.
} else
FinalPhase = phases::Link;
if (FinalPhaseArg)
*FinalPhaseArg = PhaseArg;
return FinalPhase;
}
static Arg *MakeInputArg(DerivedArgList &Args, const OptTable &Opts,
StringRef Value, bool Claim = true) {
Arg *A = new Arg(Opts.getOption(options::OPT_INPUT), Value,
Args.getBaseArgs().MakeIndex(Value), Value.data());
Args.AddSynthesizedArg(A);
if (Claim)
A->claim();
return A;
}
DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
const llvm::opt::OptTable &Opts = getOpts();
DerivedArgList *DAL = new DerivedArgList(Args);
bool HasNostdlib = Args.hasArg(options::OPT_nostdlib);
bool HasNostdlibxx = Args.hasArg(options::OPT_nostdlibxx);
bool HasNodefaultlib = Args.hasArg(options::OPT_nodefaultlibs);
for (Arg *A : Args) {
// Unfortunately, we have to parse some forwarding options (-Xassembler,
// -Xlinker, -Xpreprocessor) because we either integrate their functionality
// (assembler and preprocessor), or bypass a previous driver ('collect2').
// Rewrite linker options, to replace --no-demangle with a custom internal
// option.
if ((A->getOption().matches(options::OPT_Wl_COMMA) ||
A->getOption().matches(options::OPT_Xlinker)) &&
A->containsValue("--no-demangle")) {
// Add the rewritten no-demangle argument.
DAL->AddFlagArg(A, Opts.getOption(options::OPT_Z_Xlinker__no_demangle));
// Add the remaining values as Xlinker arguments.
for (StringRef Val : A->getValues())
if (Val != "--no-demangle")
DAL->AddSeparateArg(A, Opts.getOption(options::OPT_Xlinker), Val);
continue;
}
// Rewrite preprocessor options, to replace -Wp,-MD,FOO which is used by
// some build systems. We don't try to be complete here because we don't
// care to encourage this usage model.
if (A->getOption().matches(options::OPT_Wp_COMMA) &&
(A->getValue(0) == StringRef("-MD") ||
A->getValue(0) == StringRef("-MMD"))) {
// Rewrite to -MD/-MMD along with -MF.
if (A->getValue(0) == StringRef("-MD"))
DAL->AddFlagArg(A, Opts.getOption(options::OPT_MD));
else
DAL->AddFlagArg(A, Opts.getOption(options::OPT_MMD));
if (A->getNumValues() == 2)
DAL->AddSeparateArg(A, Opts.getOption(options::OPT_MF), A->getValue(1));
continue;
}
// Rewrite reserved library names.
if (A->getOption().matches(options::OPT_l)) {
StringRef Value = A->getValue();
// Rewrite unless -nostdlib is present.
if (!HasNostdlib && !HasNodefaultlib && !HasNostdlibxx &&
Value == "stdc++") {
DAL->AddFlagArg(A, Opts.getOption(options::OPT_Z_reserved_lib_stdcxx));
continue;
}
// Rewrite unconditionally.
if (Value == "cc_kext") {
DAL->AddFlagArg(A, Opts.getOption(options::OPT_Z_reserved_lib_cckext));
continue;
}
}
// Pick up inputs via the -- option.
if (A->getOption().matches(options::OPT__DASH_DASH)) {
A->claim();
for (StringRef Val : A->getValues())
DAL->append(MakeInputArg(*DAL, Opts, Val, false));
continue;
}
DAL->append(A);
}
// Enforce -static if -miamcu is present.
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false))
DAL->AddFlagArg(0, Opts.getOption(options::OPT_static));
// Add a default value of -mlinker-version=, if one was given and the user
// didn't specify one.
#if defined(HOST_LINK_VERSION)
if (!Args.hasArg(options::OPT_mlinker_version_EQ) &&
strlen(HOST_LINK_VERSION) > 0) {
DAL->AddJoinedArg(0, Opts.getOption(options::OPT_mlinker_version_EQ),
HOST_LINK_VERSION);
DAL->getLastArg(options::OPT_mlinker_version_EQ)->claim();
}
#endif
return DAL;
}
/// Compute target triple from args.
///
/// This routine provides the logic to compute a target triple from various
/// args passed to the driver and the default triple string.
static llvm::Triple computeTargetTriple(const Driver &D,
StringRef TargetTriple,
const ArgList &Args,
StringRef DarwinArchName = "") {
// FIXME: Already done in Compilation *Driver::BuildCompilation
if (const Arg *A = Args.getLastArg(options::OPT_target))
TargetTriple = A->getValue();
llvm::Triple Target(llvm::Triple::normalize(TargetTriple));
// GNU/Hurd's triples should have been -hurd-gnu*, but were historically made
// -gnu* only, and we can not change this, so we have to detect that case as
// being the Hurd OS.
if (TargetTriple.find("-unknown-gnu") != StringRef::npos ||
TargetTriple.find("-pc-gnu") != StringRef::npos)
Target.setOSName("hurd");
// Handle Apple-specific options available here.
if (Target.isOSBinFormatMachO()) {
// If an explicit Darwin arch name is given, that trumps all.
if (!DarwinArchName.empty()) {
tools::darwin::setTripleTypeForMachOArchName(Target, DarwinArchName);
return Target;
}
// Handle the Darwin '-arch' flag.
if (Arg *A = Args.getLastArg(options::OPT_arch)) {
StringRef ArchName = A->getValue();
tools::darwin::setTripleTypeForMachOArchName(Target, ArchName);
}
}
// Handle pseudo-target flags '-mlittle-endian'/'-EL' and
// '-mbig-endian'/'-EB'.
if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
options::OPT_mbig_endian)) {
if (A->getOption().matches(options::OPT_mlittle_endian)) {
llvm::Triple LE = Target.getLittleEndianArchVariant();
if (LE.getArch() != llvm::Triple::UnknownArch)
Target = std::move(LE);
} else {
llvm::Triple BE = Target.getBigEndianArchVariant();
if (BE.getArch() != llvm::Triple::UnknownArch)
Target = std::move(BE);
}
}
// Skip further flag support on OSes which don't support '-m32' or '-m64'.
if (Target.getArch() == llvm::Triple::tce ||
Target.getOS() == llvm::Triple::Minix)
return Target;
// On AIX, the env OBJECT_MODE may affect the resulting arch variant.
if (Target.isOSAIX()) {
if (Optional<std::string> ObjectModeValue =
llvm::sys::Process::GetEnv("OBJECT_MODE")) {
StringRef ObjectMode = *ObjectModeValue;
llvm::Triple::ArchType AT = llvm::Triple::UnknownArch;
if (ObjectMode.equals("64")) {
AT = Target.get64BitArchVariant().getArch();
} else if (ObjectMode.equals("32")) {
AT = Target.get32BitArchVariant().getArch();
} else {
D.Diag(diag::err_drv_invalid_object_mode) << ObjectMode;
}
if (AT != llvm::Triple::UnknownArch && AT != Target.getArch())
Target.setArch(AT);
}
}
// Handle pseudo-target flags '-m64', '-mx32', '-m32' and '-m16'.
Arg *A = Args.getLastArg(options::OPT_m64, options::OPT_mx32,
options::OPT_m32, options::OPT_m16);
if (A) {
llvm::Triple::ArchType AT = llvm::Triple::UnknownArch;
if (A->getOption().matches(options::OPT_m64)) {
AT = Target.get64BitArchVariant().getArch();
if (Target.getEnvironment() == llvm::Triple::GNUX32)
Target.setEnvironment(llvm::Triple::GNU);
else if (Target.getEnvironment() == llvm::Triple::MuslX32)
Target.setEnvironment(llvm::Triple::Musl);
} else if (A->getOption().matches(options::OPT_mx32) &&
Target.get64BitArchVariant().getArch() == llvm::Triple::x86_64) {
AT = llvm::Triple::x86_64;
if (Target.getEnvironment() == llvm::Triple::Musl)
Target.setEnvironment(llvm::Triple::MuslX32);
else
Target.setEnvironment(llvm::Triple::GNUX32);
} else if (A->getOption().matches(options::OPT_m32)) {
AT = Target.get32BitArchVariant().getArch();
if (Target.getEnvironment() == llvm::Triple::GNUX32)
Target.setEnvironment(llvm::Triple::GNU);
else if (Target.getEnvironment() == llvm::Triple::MuslX32)
Target.setEnvironment(llvm::Triple::Musl);
} else if (A->getOption().matches(options::OPT_m16) &&
Target.get32BitArchVariant().getArch() == llvm::Triple::x86) {
AT = llvm::Triple::x86;
Target.setEnvironment(llvm::Triple::CODE16);
}
if (AT != llvm::Triple::UnknownArch && AT != Target.getArch())
Target.setArch(AT);
}
// Handle -miamcu flag.
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86)
D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu"
<< Target.str();
if (A && !A->getOption().matches(options::OPT_m32))
D.Diag(diag::err_drv_argument_not_allowed_with)
<< "-miamcu" << A->getBaseArg().getAsString(Args);
Target.setArch(llvm::Triple::x86);
Target.setArchName("i586");
Target.setEnvironment(llvm::Triple::UnknownEnvironment);
Target.setEnvironmentName("");
Target.setOS(llvm::Triple::ELFIAMCU);
Target.setVendor(llvm::Triple::UnknownVendor);
Target.setVendorName("intel");
}
// If target is MIPS adjust the target triple
// accordingly to provided ABI name.
A = Args.getLastArg(options::OPT_mabi_EQ);
if (A && Target.isMIPS()) {
StringRef ABIName = A->getValue();
if (ABIName == "32") {
Target = Target.get32BitArchVariant();
if (Target.getEnvironment() == llvm::Triple::GNUABI64 ||
Target.getEnvironment() == llvm::Triple::GNUABIN32)
Target.setEnvironment(llvm::Triple::GNU);
} else if (ABIName == "n32") {
Target = Target.get64BitArchVariant();
if (Target.getEnvironment() == llvm::Triple::GNU ||
Target.getEnvironment() == llvm::Triple::GNUABI64)
Target.setEnvironment(llvm::Triple::GNUABIN32);
} else if (ABIName == "64") {
Target = Target.get64BitArchVariant();
if (Target.getEnvironment() == llvm::Triple::GNU ||
Target.getEnvironment() == llvm::Triple::GNUABIN32)
Target.setEnvironment(llvm::Triple::GNUABI64);
}
}
// If target is RISC-V adjust the target triple according to
// provided architecture name
A = Args.getLastArg(options::OPT_march_EQ);
if (A && Target.isRISCV()) {
StringRef ArchName = A->getValue();
if (ArchName.startswith_insensitive("rv32"))
Target.setArch(llvm::Triple::riscv32);
else if (ArchName.startswith_insensitive("rv64"))
Target.setArch(llvm::Triple::riscv64);
}
return Target;
}
// Parse the LTO options and record the type of LTO compilation
// based on which -f(no-)?lto(=.*)? or -f(no-)?offload-lto(=.*)?
// option occurs last.
static llvm::Optional<driver::LTOKind>
parseLTOMode(Driver &D, const llvm::opt::ArgList &Args, OptSpecifier OptPos,
OptSpecifier OptNeg, OptSpecifier OptEq, bool IsOffload) {
driver::LTOKind LTOMode = LTOK_None;
// Non-offload LTO allows -flto=auto and -flto=jobserver. Offload LTO does
// not support those options.
if (!Args.hasFlag(OptPos, OptEq, OptNeg, false) &&
(IsOffload ||
(!Args.hasFlag(options::OPT_flto_EQ_auto, options::OPT_fno_lto, false) &&
!Args.hasFlag(options::OPT_flto_EQ_jobserver, options::OPT_fno_lto,
false))))
return None;
StringRef LTOName("full");
const Arg *A = Args.getLastArg(OptEq);
if (A)
LTOName = A->getValue();
LTOMode = llvm::StringSwitch<LTOKind>(LTOName)
.Case("full", LTOK_Full)
.Case("thin", LTOK_Thin)
.Default(LTOK_Unknown);
if (LTOMode == LTOK_Unknown) {
assert(A);
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << A->getValue();
return None;
}
return LTOMode;
}
// Parse the LTO options.
void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
LTOMode = LTOK_None;
if (auto M = parseLTOMode(*this, Args, options::OPT_flto,
options::OPT_fno_lto, options::OPT_flto_EQ,
/*IsOffload=*/false))
LTOMode = M.getValue();
OffloadLTOMode = LTOK_None;
if (auto M = parseLTOMode(*this, Args, options::OPT_foffload_lto,
options::OPT_fno_offload_lto,
options::OPT_foffload_lto_EQ,
/*IsOffload=*/true))
OffloadLTOMode = M.getValue();
}
/// Compute the desired OpenMP runtime from the flags provided.
Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const {
StringRef RuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME);
const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ);
if (A)
RuntimeName = A->getValue();
auto RT = llvm::StringSwitch<OpenMPRuntimeKind>(RuntimeName)
.Case("libomp", OMPRT_OMP)
.Case("libgomp", OMPRT_GOMP)
.Case("libiomp5", OMPRT_IOMP5)
.Default(OMPRT_Unknown);
if (RT == OMPRT_Unknown) {
if (A)
Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << A->getValue();
else
// FIXME: We could use a nicer diagnostic here.
Diag(diag::err_drv_unsupported_opt) << "-fopenmp";
}
return RT;
}
void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
InputList &Inputs) {
//
// CUDA/HIP
//
// We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA
// or HIP type. However, mixed CUDA/HIP compilation is not supported.
bool IsCuda =
llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
return types::isCuda(I.first);
});
bool IsHIP =
llvm::any_of(Inputs,
[](std::pair<types::ID, const llvm::opt::Arg *> &I) {
return types::isHIP(I.first);
}) ||
C.getInputArgs().hasArg(options::OPT_hip_link);
if (IsCuda && IsHIP) {
Diag(clang::diag::err_drv_mix_cuda_hip);
return;
}
if (IsCuda) {
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
const llvm::Triple &HostTriple = HostTC->getTriple();
StringRef DeviceTripleStr;
auto OFK = Action::OFK_Cuda;
DeviceTripleStr =
HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda";
llvm::Triple CudaTriple(DeviceTripleStr);
// Use the CUDA and host triples as the key into the ToolChains map,
// because the device toolchain we create depends on both.
auto &CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()];
if (!CudaTC) {
CudaTC = std::make_unique<toolchains::CudaToolChain>(
*this, CudaTriple, *HostTC, C.getInputArgs(), OFK);
}
C.addOffloadDeviceToolChain(CudaTC.get(), OFK);
} else if (IsHIP) {
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
const llvm::Triple &HostTriple = HostTC->getTriple();
auto OFK = Action::OFK_HIP;
llvm::Triple HIPTriple = getHIPOffloadTargetTriple();
// Use the HIP and host triples as the key into the ToolChains map,
// because the device toolchain we create depends on both.
auto &HIPTC = ToolChains[HIPTriple.str() + "/" + HostTriple.str()];
if (!HIPTC) {
HIPTC = std::make_unique<toolchains::HIPToolChain>(
*this, HIPTriple, *HostTC, C.getInputArgs());
}
C.addOffloadDeviceToolChain(HIPTC.get(), OFK);
}
//
// OpenMP
//
// We need to generate an OpenMP toolchain if the user specified targets with
// the -fopenmp-targets option.
if (Arg *OpenMPTargets =
C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) {
if (OpenMPTargets->getNumValues()) {
// We expect that -fopenmp-targets is always used in conjunction with the
// option -fopenmp specifying a valid runtime with offloading support,
// i.e. libomp or libiomp.
bool HasValidOpenMPRuntime = C.getInputArgs().hasFlag(
options::OPT_fopenmp, options::OPT_fopenmp_EQ,
options::OPT_fno_openmp, false);
if (HasValidOpenMPRuntime) {
OpenMPRuntimeKind OpenMPKind = getOpenMPRuntime(C.getInputArgs());
HasValidOpenMPRuntime =
OpenMPKind == OMPRT_OMP || OpenMPKind == OMPRT_IOMP5;
}
if (HasValidOpenMPRuntime) {
llvm::StringMap<const char *> FoundNormalizedTriples;
for (const char *Val : OpenMPTargets->getValues()) {
llvm::Triple TT(Val);
std::string NormalizedName = TT.normalize();
// Make sure we don't have a duplicate triple.
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
if (Duplicate != FoundNormalizedTriples.end()) {
Diag(clang::diag::warn_drv_omp_offload_target_duplicate)
<< Val << Duplicate->second;
continue;
}
// Store the current triple so that we can check for duplicates in the
// following iterations.
FoundNormalizedTriples[NormalizedName] = Val;
// If the specified target is invalid, emit a diagnostic.
if (TT.getArch() == llvm::Triple::UnknownArch)
Diag(clang::diag::err_drv_invalid_omp_target) << Val;
else {
const ToolChain *TC;
// Device toolchains have to be selected differently. They pair host
// and device in their implementation.
if (TT.isNVPTX() || TT.isAMDGCN()) {
const ToolChain *HostTC =
C.getSingleOffloadToolChain<Action::OFK_Host>();
assert(HostTC && "Host toolchain should be always defined.");
auto &DeviceTC =
ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()];
if (!DeviceTC) {
if (TT.isNVPTX())
DeviceTC = std::make_unique<toolchains::CudaToolChain>(
*this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP);
else if (TT.isAMDGCN())
DeviceTC =
std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
*this, TT, *HostTC, C.getInputArgs());
else
assert(DeviceTC && "Device toolchain not defined.");
}
TC = DeviceTC.get();
} else
TC = &getToolChain(C.getInputArgs(), TT);
C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP);
}
}
} else
Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets);
} else
Diag(clang::diag::warn_drv_empty_joined_argument)
<< OpenMPTargets->getAsString(C.getInputArgs());
}
//
// TODO: Add support for other offloading programming models here.
//
}
/// Looks the given directories for the specified file.
///
/// \param[out] FilePath File path, if the file was found.
/// \param[in] Dirs Directories used for the search.
/// \param[in] FileName Name of the file to search for.
/// \return True if file was found.
///
/// Looks for file specified by FileName sequentially in directories specified
/// by Dirs.
///
static bool searchForFile(SmallVectorImpl<char> &FilePath,
ArrayRef<StringRef> Dirs, StringRef FileName) {
SmallString<128> WPath;
for (const StringRef &Dir : Dirs) {
if (Dir.empty())
continue;
WPath.clear();
llvm::sys::path::append(WPath, Dir, FileName);
llvm::sys::path::native(WPath);
if (llvm::sys::fs::is_regular_file(WPath)) {
FilePath = std::move(WPath);
return true;
}
}
return false;
}
bool Driver::readConfigFile(StringRef FileName) {
// Try reading the given file.
SmallVector<const char *, 32> NewCfgArgs;
if (!llvm::cl::readConfigFile(FileName, Saver, NewCfgArgs)) {
Diag(diag::err_drv_cannot_read_config_file) << FileName;
return true;
}
// Read options from config file.
llvm::SmallString<128> CfgFileName(FileName);
llvm::sys::path::native(CfgFileName);
ConfigFile = std::string(CfgFileName);
bool ContainErrors;
CfgOptions = std::make_unique<InputArgList>(
ParseArgStrings(NewCfgArgs, IsCLMode(), ContainErrors));
if (ContainErrors) {
CfgOptions.reset();
return true;
}
if (CfgOptions->hasArg(options::OPT_config)) {
CfgOptions.reset();
Diag(diag::err_drv_nested_config_file);
return true;
}
// Claim all arguments that come from a configuration file so that the driver
// does not warn on any that is unused.
for (Arg *A : *CfgOptions)
A->claim();
return false;
}
bool Driver::loadConfigFile() {
std::string CfgFileName;
bool FileSpecifiedExplicitly = false;
// Process options that change search path for config files.
if (CLOptions) {
if (CLOptions->hasArg(options::OPT_config_system_dir_EQ)) {
SmallString<128> CfgDir;
CfgDir.append(
CLOptions->getLastArgValue(options::OPT_config_system_dir_EQ));
if (!CfgDir.empty()) {
if (llvm::sys::fs::make_absolute(CfgDir).value() != 0)
SystemConfigDir.clear();
else
SystemConfigDir = std::string(CfgDir.begin(), CfgDir.end());
}
}
if (CLOptions->hasArg(options::OPT_config_user_dir_EQ)) {
SmallString<128> CfgDir;
CfgDir.append(
CLOptions->getLastArgValue(options::OPT_config_user_dir_EQ));
if (!CfgDir.empty()) {
if (llvm::sys::fs::make_absolute(CfgDir).value() != 0)
UserConfigDir.clear();
else
UserConfigDir = std::string(CfgDir.begin(), CfgDir.end());
}
}
}
// First try to find config file specified in command line.
if (CLOptions) {
std::vector<std::string> ConfigFiles =
CLOptions->getAllArgValues(options::OPT_config);
if (ConfigFiles.size() > 1) {
if (!std::all_of(ConfigFiles.begin(), ConfigFiles.end(),
[ConfigFiles](const std::string &s) {
return s == ConfigFiles[0];
})) {
Diag(diag::err_drv_duplicate_config);
return true;
}
}
if (!ConfigFiles.empty()) {
CfgFileName = ConfigFiles.front();
assert(!CfgFileName.empty());
// If argument contains directory separator, treat it as a path to
// configuration file.
if (llvm::sys::path::has_parent_path(CfgFileName)) {
SmallString<128> CfgFilePath;
if (llvm::sys::path::is_relative(CfgFileName))
llvm::sys::fs::current_path(CfgFilePath);
llvm::sys::path::append(CfgFilePath, CfgFileName);
if (!llvm::sys::fs::is_regular_file(CfgFilePath)) {
Diag(diag::err_drv_config_file_not_exist) << CfgFilePath;
return true;
}
return readConfigFile(CfgFilePath);
}
FileSpecifiedExplicitly = true;
}
}
// If config file is not specified explicitly, try to deduce configuration
// from executable name. For instance, an executable 'armv7l-clang' will
// search for config file 'armv7l-clang.cfg'.
if (CfgFileName.empty() && !ClangNameParts.TargetPrefix.empty())
CfgFileName = ClangNameParts.TargetPrefix + '-' + ClangNameParts.ModeSuffix;
if (CfgFileName.empty())
return false;
// Determine architecture part of the file name, if it is present.
StringRef CfgFileArch = CfgFileName;
size_t ArchPrefixLen = CfgFileArch.find('-');
if (ArchPrefixLen == StringRef::npos)
ArchPrefixLen = CfgFileArch.size();
llvm::Triple CfgTriple;
CfgFileArch = CfgFileArch.take_front(ArchPrefixLen);
CfgTriple = llvm::Triple(llvm::Triple::normalize(CfgFileArch));
if (CfgTriple.getArch() == llvm::Triple::ArchType::UnknownArch)
ArchPrefixLen = 0;
if (!StringRef(CfgFileName).endswith(".cfg"))
CfgFileName += ".cfg";
// If config file starts with architecture name and command line options
// redefine architecture (with options like -m32 -LE etc), try finding new
// config file with that architecture.
SmallString<128> FixedConfigFile;
size_t FixedArchPrefixLen = 0;
if (ArchPrefixLen) {
// Get architecture name from config file name like 'i386.cfg' or
// 'armv7l-clang.cfg'.
// Check if command line options changes effective triple.
llvm::Triple EffectiveTriple = computeTargetTriple(*this,
CfgTriple.getTriple(), *CLOptions);
if (CfgTriple.getArch() != EffectiveTriple.getArch()) {
FixedConfigFile = EffectiveTriple.getArchName();
FixedArchPrefixLen = FixedConfigFile.size();
// Append the rest of original file name so that file name transforms
// like: i386-clang.cfg -> x86_64-clang.cfg.
if (ArchPrefixLen < CfgFileName.size())
FixedConfigFile += CfgFileName.substr(ArchPrefixLen);
}
}
// Prepare list of directories where config file is searched for.
StringRef CfgFileSearchDirs[] = {UserConfigDir, SystemConfigDir, Dir};
// Try to find config file. First try file with corrected architecture.
llvm::SmallString<128> CfgFilePath;
if (!FixedConfigFile.empty()) {
if (searchForFile(CfgFilePath, CfgFileSearchDirs, FixedConfigFile))
return readConfigFile(CfgFilePath);
// If 'x86_64-clang.cfg' was not found, try 'x86_64.cfg'.
FixedConfigFile.resize(FixedArchPrefixLen);
FixedConfigFile.append(".cfg");
if (searchForFile(CfgFilePath, CfgFileSearchDirs, FixedConfigFile))
return readConfigFile(CfgFilePath);
}
// Then try original file name.
if (searchForFile(CfgFilePath, CfgFileSearchDirs, CfgFileName))
return readConfigFile(CfgFilePath);
// Finally try removing driver mode part: 'x86_64-clang.cfg' -> 'x86_64.cfg'.
if (!ClangNameParts.ModeSuffix.empty() &&
!ClangNameParts.TargetPrefix.empty()) {
CfgFileName.assign(ClangNameParts.TargetPrefix);
CfgFileName.append(".cfg");
if (searchForFile(CfgFilePath, CfgFileSearchDirs, CfgFileName))
return readConfigFile(CfgFilePath);
}
// Report error but only if config file was specified explicitly, by option
// --config. If it was deduced from executable name, it is not an error.
if (FileSpecifiedExplicitly) {
Diag(diag::err_drv_config_file_not_found) << CfgFileName;
for (const StringRef &SearchDir : CfgFileSearchDirs)
if (!SearchDir.empty())
Diag(diag::note_drv_config_file_searched_in) << SearchDir;
return true;
}
return false;
}
Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
llvm::PrettyStackTraceString CrashInfo("Compilation construction");
// FIXME: Handle environment options which affect driver behavior, somewhere
// (client?). GCC_EXEC_PREFIX, LPATH, CC_PRINT_OPTIONS.
// We look for the driver mode option early, because the mode can affect
// how other options are parsed.
auto DriverMode = getDriverMode(ClangExecutable, ArgList.slice(1));
if (!DriverMode.empty())
setDriverMode(DriverMode);
// FIXME: What are we going to do with -V and -b?
// Arguments specified in command line.
bool ContainsError;
CLOptions = std::make_unique<InputArgList>(
ParseArgStrings(ArgList.slice(1), IsCLMode(), ContainsError));
// Try parsing configuration file.
if (!ContainsError)
ContainsError = loadConfigFile();
bool HasConfigFile = !ContainsError && (CfgOptions.get() != nullptr);
// All arguments, from both config file and command line.
InputArgList Args = std::move(HasConfigFile ? std::move(*CfgOptions)
: std::move(*CLOptions));
// The args for config files or /clang: flags belong to different InputArgList
// objects than Args. This copies an Arg from one of those other InputArgLists
// to the ownership of Args.
auto appendOneArg = [&Args](const Arg *Opt, const Arg *BaseArg) {
unsigned Index = Args.MakeIndex(Opt->getSpelling());
Arg *Copy = new llvm::opt::Arg(Opt->getOption(), Args.getArgString(Index),
Index, BaseArg);
Copy->getValues() = Opt->getValues();
if (Opt->isClaimed())
Copy->claim();
Copy->setOwnsValues(Opt->getOwnsValues());
Opt->setOwnsValues(false);
Args.append(Copy);
};
if (HasConfigFile)
for (auto *Opt : *CLOptions) {
if (Opt->getOption().matches(options::OPT_config))
continue;
const Arg *BaseArg = &Opt->getBaseArg();
if (BaseArg == Opt)
BaseArg = nullptr;
appendOneArg(Opt, BaseArg);
}
// In CL mode, look for any pass-through arguments
if (IsCLMode() && !ContainsError) {
SmallVector<const char *, 16> CLModePassThroughArgList;
for (const auto *A : Args.filtered(options::OPT__SLASH_clang)) {
A->claim();
CLModePassThroughArgList.push_back(A->getValue());
}
if (!CLModePassThroughArgList.empty()) {
// Parse any pass through args using default clang processing rather
// than clang-cl processing.
auto CLModePassThroughOptions = std::make_unique<InputArgList>(
ParseArgStrings(CLModePassThroughArgList, false, ContainsError));
if (!ContainsError)
for (auto *Opt : *CLModePassThroughOptions) {
appendOneArg(Opt, nullptr);
}
}
}
// Check for working directory option before accessing any files
if (Arg *WD = Args.getLastArg(options::OPT_working_directory))
if (VFS->setCurrentWorkingDirectory(WD->getValue()))
Diag(diag::err_drv_unable_to_set_working_directory) << WD->getValue();
// FIXME: This stuff needs to go into the Compilation, not the driver.
bool CCCPrintPhases;
// Silence driver warnings if requested
Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w));
// -no-canonical-prefixes is used very early in main.
Args.ClaimAllArgs(options::OPT_no_canonical_prefixes);
// f(no-)integated-cc1 is also used very early in main.
Args.ClaimAllArgs(options::OPT_fintegrated_cc1);
Args.ClaimAllArgs(options::OPT_fno_integrated_cc1);
// Ignore -pipe.
Args.ClaimAllArgs(options::OPT_pipe);
// Extract -ccc args.
//
// FIXME: We need to figure out where this behavior should live. Most of it
// should be outside in the client; the parts that aren't should have proper
// options, either by introducing new ones or by overloading gcc ones like -V
// or -b.
CCCPrintPhases = Args.hasArg(options::OPT_ccc_print_phases);
CCCPrintBindings = Args.hasArg(options::OPT_ccc_print_bindings);
if (const Arg *A = Args.getLastArg(options::OPT_ccc_gcc_name))
CCCGenericGCCName = A->getValue();
GenReproducer = Args.hasFlag(options::OPT_gen_reproducer,
options::OPT_fno_crash_diagnostics,
!!::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"));
// Process -fproc-stat-report options.
if (const Arg *A = Args.getLastArg(options::OPT_fproc_stat_report_EQ)) {
CCPrintProcessStats = true;
CCPrintStatReportFilename = A->getValue();
}
if (Args.hasArg(options::OPT_fproc_stat_report))
CCPrintProcessStats = true;
// FIXME: TargetTriple is used by the target-prefixed calls to as/ld
// and getToolChain is const.
if (IsCLMode()) {
// clang-cl targets MSVC-style Win32.
llvm::Triple T(TargetTriple);
T.setOS(llvm::Triple::Win32);
T.setVendor(llvm::Triple::PC);
T.setEnvironment(llvm::Triple::MSVC);
T.setObjectFormat(llvm::Triple::COFF);
TargetTriple = T.str();
}
if (const Arg *A = Args.getLastArg(options::OPT_target))
TargetTriple = A->getValue();
if (const Arg *A = Args.getLastArg(options::OPT_ccc_install_dir))
Dir = InstalledDir = A->getValue();
for (const Arg *A : Args.filtered(options::OPT_B)) {
A->claim();
PrefixDirs.push_back(A->getValue(0));
}
if (Optional<std::string> CompilerPathValue =
llvm::sys::Process::GetEnv("COMPILER_PATH")) {
StringRef CompilerPath = *CompilerPathValue;
while (!CompilerPath.empty()) {
std::pair<StringRef, StringRef> Split =
CompilerPath.split(llvm::sys::EnvPathSeparator);
PrefixDirs.push_back(std::string(Split.first));
CompilerPath = Split.second;
}
}
if (const Arg *A = Args.getLastArg(options::OPT__sysroot_EQ))
SysRoot = A->getValue();
if (const Arg *A = Args.getLastArg(options::OPT__dyld_prefix_EQ))
DyldPrefix = A->getValue();
if (const Arg *A = Args.getLastArg(options::OPT_resource_dir))
ResourceDir = A->getValue();
if (const Arg *A = Args.getLastArg(options::OPT_save_temps_EQ)) {
SaveTemps = llvm::StringSwitch<SaveTempsMode>(A->getValue())
.Case("cwd", SaveTempsCwd)
.Case("obj", SaveTempsObj)
.Default(SaveTempsCwd);
}
setLTOMode(Args);
// Process -fembed-bitcode= flags.
if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) {
StringRef Name = A->getValue();
unsigned Model = llvm::StringSwitch<unsigned>(Name)
.Case("off", EmbedNone)
.Case("all", EmbedBitcode)
.Case("bitcode", EmbedBitcode)
.Case("marker", EmbedMarker)
.Default(~0U);
if (Model == ~0U) {
Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
<< Name;
} else
BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);
}
std::unique_ptr<llvm::opt::InputArgList> UArgs =
std::make_unique<InputArgList>(std::move(Args));
// Perform the default argument translations.
DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs);
// Owned by the host.
const ToolChain &TC = getToolChain(
*UArgs, computeTargetTriple(*this, TargetTriple, *UArgs));
// The compilation takes ownership of Args.
Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs,
ContainsError);
if (!HandleImmediateArgs(*C))
return C;
// Construct the list of inputs.
InputList Inputs;
BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs);
// Populate the tool chains for the offloading devices, if any.
CreateOffloadingDeviceToolChains(*C, Inputs);
// Construct the list of abstract actions to perform for this compilation. On
// MachO targets this uses the driver-driver and universal actions.
if (TC.getTriple().isOSBinFormatMachO())
BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
else
BuildActions(*C, C->getArgs(), Inputs, C->getActions());
if (CCCPrintPhases) {
PrintActions(*C);
return C;
}
BuildJobs(*C);
return C;
}
static void printArgList(raw_ostream &OS, const llvm::opt::ArgList &Args) {
llvm::opt::ArgStringList ASL;
for (const auto *A : Args)
A->render(Args, ASL);
for (auto I = ASL.begin(), E = ASL.end(); I != E; ++I) {
if (I != ASL.begin())
OS << ' ';
llvm::sys::printArg(OS, *I, true);
}
OS << '\n';
}
bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename,
SmallString<128> &CrashDiagDir) {
using namespace llvm::sys;
assert(llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin() &&
"Only knows about .crash files on Darwin");
// The .crash file can be found on at ~/Library/Logs/DiagnosticReports/
// (or /Library/Logs/DiagnosticReports for root) and has the filename pattern
// clang-<VERSION>_<YYYY-MM-DD-HHMMSS>_<hostname>.crash.
path::home_directory(CrashDiagDir);
if (CrashDiagDir.startswith("/var/root"))
CrashDiagDir = "/";
path::append(CrashDiagDir, "Library/Logs/DiagnosticReports");
int PID =
#if LLVM_ON_UNIX
getpid();
#else
0;
#endif
std::error_code EC;
fs::file_status FileStatus;
TimePoint<> LastAccessTime;
SmallString<128> CrashFilePath;
// Lookup the .crash files and get the one generated by a subprocess spawned
// by this driver invocation.
for (fs::directory_iterator File(CrashDiagDir, EC), FileEnd;
File != FileEnd && !EC; File.increment(EC)) {
StringRef FileName = path::filename(File->path());
if (!FileName.startswith(Name))
continue;
if (fs::status(File->path(), FileStatus))
continue;
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CrashFile =
llvm::MemoryBuffer::getFile(File->path());
if (!CrashFile)
continue;
// The first line should start with "Process:", otherwise this isn't a real
// .crash file.
StringRef Data = CrashFile.get()->getBuffer();
if (!Data.startswith("Process:"))
continue;
// Parse parent process pid line, e.g: "Parent Process: clang-4.0 [79141]"
size_t ParentProcPos = Data.find("Parent Process:");
if (ParentProcPos == StringRef::npos)
continue;
size_t LineEnd = Data.find_first_of("\n", ParentProcPos);
if (LineEnd == StringRef::npos)
continue;
StringRef ParentProcess = Data.slice(ParentProcPos+15, LineEnd).trim();
int OpenBracket = -1, CloseBracket = -1;
for (size_t i = 0, e = ParentProcess.size(); i < e; ++i) {
if (ParentProcess[i] == '[')
OpenBracket = i;
if (ParentProcess[i] == ']')
CloseBracket = i;
}
// Extract the parent process PID from the .crash file and check whether
// it matches this driver invocation pid.
int CrashPID;
if (OpenBracket < 0 || CloseBracket < 0 ||
ParentProcess.slice(OpenBracket + 1, CloseBracket)
.getAsInteger(10, CrashPID) || CrashPID != PID) {
continue;
}
// Found a .crash file matching the driver pid. To avoid getting an older
// and misleading crash file, continue looking for the most recent.
// FIXME: the driver can dispatch multiple cc1 invocations, leading to
// multiple crashes poiting to the same parent process. Since the driver
// does not collect pid information for the dispatched invocation there's
// currently no way to distinguish among them.
const auto FileAccessTime = FileStatus.getLastModificationTime();
if (FileAccessTime > LastAccessTime) {
CrashFilePath.assign(File->path());
LastAccessTime = FileAccessTime;
}
}
// If found, copy it over to the location of other reproducer files.
if (!CrashFilePath.empty()) {
EC = fs::copy_file(CrashFilePath, ReproCrashFilename);
if (EC)
return false;
return true;
}
return false;
}
// When clang crashes, produce diagnostic information including the fully
// preprocessed source file(s). Request that the developer attach the
// diagnostic information to a bug report.
void Driver::generateCompilationDiagnostics(
Compilation &C, const Command &FailingCommand,
StringRef AdditionalInformation, CompilationDiagnosticReport *Report) {
if (C.getArgs().hasArg(options::OPT_fno_crash_diagnostics))
return;
// Don't try to generate diagnostics for link or dsymutil jobs.
if (FailingCommand.getCreator().isLinkJob() ||
FailingCommand.getCreator().isDsymutilJob())
return;
// Print the version of the compiler.
PrintVersion(C, llvm::errs());
// Suppress driver output and emit preprocessor output to temp file.
Mode = CPPMode;
CCGenDiagnostics = true;
// Save the original job command(s).
Command Cmd = FailingCommand;
// Keep track of whether we produce any errors while trying to produce
// preprocessed sources.
DiagnosticErrorTrap Trap(Diags);
// Suppress tool output.
C.initCompilationForDiagnostics();
// Construct the list of inputs.
InputList Inputs;
BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs);
for (InputList::iterator it = Inputs.begin(), ie = Inputs.end(); it != ie;) {
bool IgnoreInput = false;
// Ignore input from stdin or any inputs that cannot be preprocessed.
// Check type first as not all linker inputs have a value.
if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
IgnoreInput = true;
} else if (!strcmp(it->second->getValue(), "-")) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - "
"ignoring input from stdin.";
IgnoreInput = true;
}
if (IgnoreInput) {
it = Inputs.erase(it);
ie = Inputs.end();
} else {
++it;
}
}
if (Inputs.empty()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - "
"no preprocessable inputs.";
return;
}
// Don't attempt to generate preprocessed files if multiple -arch options are
// used, unless they're all duplicates.
llvm::StringSet<> ArchNames;
for (const Arg *A : C.getArgs()) {
if (A->getOption().matches(options::OPT_arch)) {
StringRef ArchName = A->getValue();
ArchNames.insert(ArchName);
}
}
if (ArchNames.size() > 1) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - cannot generate "
"preprocessed source with multiple -arch options.";
return;
}
// Construct the list of abstract actions to perform for this compilation. On
// Darwin OSes this uses the driver-driver and builds universal actions.
const ToolChain &TC = C.getDefaultToolChain();
if (TC.getTriple().isOSBinFormatMachO())
BuildUniversalActions(C, TC, Inputs);
else
BuildActions(C, C.getArgs(), Inputs, C.getActions());
BuildJobs(C);
// If there were errors building the compilation, quit now.
if (Trap.hasErrorOccurred()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s).";
return;
}
// Generate preprocessed output.
SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
C.ExecuteJobs(C.getJobs(), FailingCommands);
// If any of the preprocessing commands failed, clean up and exit.
if (!FailingCommands.empty()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s).";
return;
}
const ArgStringList &TempFiles = C.getTempFiles();
if (TempFiles.empty()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s).";
return;
}
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "\n********************\n\n"
"PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n"
"Preprocessed source(s) and associated run script(s) are located at:";
SmallString<128> VFS;
SmallString<128> ReproCrashFilename;
for (const char *TempFile : TempFiles) {
Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile;
if (Report)
Report->TemporaryFiles.push_back(TempFile);
if (ReproCrashFilename.empty()) {
ReproCrashFilename = TempFile;
llvm::sys::path::replace_extension(ReproCrashFilename, ".crash");
}
if (StringRef(TempFile).endswith(".cache")) {
// In some cases (modules) we'll dump extra data to help with reproducing
// the crash into a directory next to the output.
VFS = llvm::sys::path::filename(TempFile);
llvm::sys::path::append(VFS, "vfs", "vfs.yaml");
}
}
// Assume associated files are based off of the first temporary file.
CrashReportInfo CrashInfo(TempFiles[0], VFS);
llvm::SmallString<128> Script(CrashInfo.Filename);
llvm::sys::path::replace_extension(Script, "sh");
std::error_code EC;
llvm::raw_fd_ostream ScriptOS(Script, EC, llvm::sys::fs::CD_CreateNew,
llvm::sys::fs::FA_Write,
llvm::sys::fs::OF_Text);
if (EC) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating run script: " << Script << " " << EC.message();
} else {
ScriptOS << "# Crash reproducer for " << getClangFullVersion() << "\n"
<< "# Driver args: ";
printArgList(ScriptOS, C.getInputArgs());
ScriptOS << "# Original command: ";
Cmd.Print(ScriptOS, "\n", /*Quote=*/true);
Cmd.Print(ScriptOS, "\n", /*Quote=*/true, &CrashInfo);
if (!AdditionalInformation.empty())
ScriptOS << "\n# Additional information: " << AdditionalInformation
<< "\n";
if (Report)
Report->TemporaryFiles.push_back(std::string(Script.str()));
Diag(clang::diag::note_drv_command_failed_diag_msg) << Script;
}
// On darwin, provide information about the .crash diagnostic report.
if (llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin()) {
SmallString<128> CrashDiagDir;
if (getCrashDiagnosticFile(ReproCrashFilename, CrashDiagDir)) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< ReproCrashFilename.str();
} else { // Suggest a directory for the user to look for .crash files.
llvm::sys::path::append(CrashDiagDir, Name);
CrashDiagDir += "_<YYYY-MM-DD-HHMMSS>_<hostname>.crash";
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Crash backtrace is located in";
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< CrashDiagDir.str();
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "(choose the .crash file that corresponds to your crash)";
}
}
for (const auto &A : C.getArgs().filtered(options::OPT_frewrite_map_file_EQ))
Diag(clang::diag::note_drv_command_failed_diag_msg) << A->getValue();
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "\n\n********************";
}
void Driver::setUpResponseFiles(Compilation &C, Command &Cmd) {
// Since commandLineFitsWithinSystemLimits() may underestimate system's
// capacity if the tool does not support response files, there is a chance/
// that things will just work without a response file, so we silently just
// skip it.
if (Cmd.getResponseFileSupport().ResponseKind ==
ResponseFileSupport::RF_None ||
llvm::sys::commandLineFitsWithinSystemLimits(Cmd.getExecutable(),
Cmd.getArguments()))
return;
std::string TmpName = GetTemporaryPath("response", "txt");
Cmd.setResponseFile(C.addTempFile(C.getArgs().MakeArgString(TmpName)));
}
int Driver::ExecuteCompilation(
Compilation &C,
SmallVectorImpl<std::pair<int, const Command *>> &FailingCommands) {
// Just print if -### was present.
if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) {
C.getJobs().Print(llvm::errs(), "\n", true);
return 0;
}
// If there were errors building the compilation, quit now.
if (Diags.hasErrorOccurred())
return 1;
// Set up response file names for each command, if necessary
for (auto &Job : C.getJobs())
setUpResponseFiles(C, Job);
C.ExecuteJobs(C.getJobs(), FailingCommands);
// If the command succeeded, we are done.
if (FailingCommands.empty())
return 0;
// Otherwise, remove result files and print extra information about abnormal
// failures.
int Res = 0;
for (const auto &CmdPair : FailingCommands) {
int CommandRes = CmdPair.first;
const Command *FailingCommand = CmdPair.second;
// Remove result files if we're not saving temps.
if (!isSaveTempsEnabled()) {
const JobAction *JA = cast<JobAction>(&FailingCommand->getSource());
C.CleanupFileMap(C.getResultFiles(), JA, true);
// Failure result files are valid unless we crashed.
if (CommandRes < 0)
C.CleanupFileMap(C.getFailureResultFiles(), JA, true);
}
#if LLVM_ON_UNIX
// llvm/lib/Support/Unix/Signals.inc will exit with a special return code
// for SIGPIPE. Do not print diagnostics for this case.
if (CommandRes == EX_IOERR) {
Res = CommandRes;
continue;
}
#endif
// Print extra information about abnormal failures, if possible.
//
// This is ad-hoc, but we don't want to be excessively noisy. If the result
// status was 1, assume the command failed normally. In particular, if it
// was the compiler then assume it gave a reasonable error code. Failures
// in other tools are less common, and they generally have worse
// diagnostics, so always print the diagnostic there.
const Tool &FailingTool = FailingCommand->getCreator();
if (!FailingCommand->getCreator().hasGoodDiagnostics() || CommandRes != 1) {
// FIXME: See FIXME above regarding result code interpretation.
if (CommandRes < 0)
Diag(clang::diag::err_drv_command_signalled)
<< FailingTool.getShortName();
else
Diag(clang::diag::err_drv_command_failed)
<< FailingTool.getShortName() << CommandRes;
}
}
return Res;
}
void Driver::PrintHelp(bool ShowHidden) const {
unsigned IncludedFlagsBitmask;
unsigned ExcludedFlagsBitmask;
std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
getIncludeExcludeOptionFlagMasks(IsCLMode());
ExcludedFlagsBitmask |= options::NoDriverOption;
if (!ShowHidden)
ExcludedFlagsBitmask |= HelpHidden;
if (IsFlangMode())
IncludedFlagsBitmask |= options::FlangOption;
else
ExcludedFlagsBitmask |= options::FlangOnlyOption;
std::string Usage = llvm::formatv("{0} [options] file...", Name).str();
getOpts().printHelp(llvm::outs(), Usage.c_str(), DriverTitle.c_str(),
IncludedFlagsBitmask, ExcludedFlagsBitmask,
/*ShowAllAliases=*/false);
}
void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
if (IsFlangMode()) {
OS << getClangToolFullVersion("flang-new") << '\n';
} else {
// FIXME: The following handlers should use a callback mechanism, we don't
// know what the client would like to do.
OS << getClangFullVersion() << '\n';
}
const ToolChain &TC = C.getDefaultToolChain();
OS << "Target: " << TC.getTripleString() << '\n';
// Print the threading model.
if (Arg *A = C.getArgs().getLastArg(options::OPT_mthread_model)) {
// Don't print if the ToolChain would have barfed on it already
if (TC.isThreadModelSupported(A->getValue()))
OS << "Thread model: " << A->getValue();
} else
OS << "Thread model: " << TC.getThreadModel();
OS << '\n';
// Print out the install directory.
OS << "InstalledDir: " << InstalledDir << '\n';
// If configuration file was used, print its path.
if (!ConfigFile.empty())
OS << "Configuration file: " << ConfigFile << '\n';
}
/// PrintDiagnosticCategories - Implement the --print-diagnostic-categories
/// option.
static void PrintDiagnosticCategories(raw_ostream &OS) {
// Skip the empty category.
for (unsigned i = 1, max = DiagnosticIDs::getNumberOfCategories(); i != max;
++i)
OS << i << ',' << DiagnosticIDs::getCategoryNameFromID(i) << '\n';
}
void Driver::HandleAutocompletions(StringRef PassedFlags) const {
if (PassedFlags == "")
return;
// Print out all options that start with a given argument. This is used for
// shell autocompletion.
std::vector<std::string> SuggestedCompletions;
std::vector<std::string> Flags;
unsigned int DisableFlags =
options::NoDriverOption | options::Unsupported | options::Ignored;
// Make sure that Flang-only options don't pollute the Clang output
// TODO: Make sure that Clang-only options don't pollute Flang output
if (!IsFlangMode())
DisableFlags |= options::FlangOnlyOption;
// Distinguish "--autocomplete=-someflag" and "--autocomplete=-someflag,"
// because the latter indicates that the user put space before pushing tab
// which should end up in a file completion.
const bool HasSpace = PassedFlags.endswith(",");
// Parse PassedFlags by "," as all the command-line flags are passed to this
// function separated by ","
StringRef TargetFlags = PassedFlags;
while (TargetFlags != "") {
StringRef CurFlag;
std::tie(CurFlag, TargetFlags) = TargetFlags.split(",");
Flags.push_back(std::string(CurFlag));
}
// We want to show cc1-only options only when clang is invoked with -cc1 or
// -Xclang.
if (llvm::is_contained(Flags, "-Xclang") || llvm::is_contained(Flags, "-cc1"))
DisableFlags &= ~options::NoDriverOption;
const llvm::opt::OptTable &Opts = getOpts();
StringRef Cur;
Cur = Flags.at(Flags.size() - 1);
StringRef Prev;
if (Flags.size() >= 2) {
Prev = Flags.at(Flags.size() - 2);
SuggestedCompletions = Opts.suggestValueCompletions(Prev, Cur);
}
if (SuggestedCompletions.empty())
SuggestedCompletions = Opts.suggestValueCompletions(Cur, "");
// If Flags were empty, it means the user typed `clang [tab]` where we should
// list all possible flags. If there was no value completion and the user
// pressed tab after a space, we should fall back to a file completion.
// We're printing a newline to be consistent with what we print at the end of
// this function.
if (SuggestedCompletions.empty() && HasSpace && !Flags.empty()) {
llvm::outs() << '\n';
return;
}
// When flag ends with '=' and there was no value completion, return empty
// string and fall back to the file autocompletion.
if (SuggestedCompletions.empty() && !Cur.endswith("=")) {
// If the flag is in the form of "--autocomplete=-foo",
// we were requested to print out all option names that start with "-foo".
// For example, "--autocomplete=-fsyn" is expanded to "-fsyntax-only".
SuggestedCompletions = Opts.findByPrefix(Cur, DisableFlags);
// We have to query the -W flags manually as they're not in the OptTable.
// TODO: Find a good way to add them to OptTable instead and them remove
// this code.
for (StringRef S : DiagnosticIDs::getDiagnosticFlags())
if (S.startswith(Cur))
SuggestedCompletions.push_back(std::string(S));
}
// Sort the autocomplete candidates so that shells print them out in a
// deterministic order. We could sort in any way, but we chose
// case-insensitive sorting for consistency with the -help option
// which prints out options in the case-insensitive alphabetical order.
llvm::sort(SuggestedCompletions, [](StringRef A, StringRef B) {
if (int X = A.compare_insensitive(B))
return X < 0;
return A.compare(B) > 0;
});
llvm::outs() << llvm::join(SuggestedCompletions, "\n") << '\n';
}
bool Driver::HandleImmediateArgs(const Compilation &C) {
// The order these options are handled in gcc is all over the place, but we
// don't expect inconsistencies w.r.t. that to matter in practice.
if (C.getArgs().hasArg(options::OPT_dumpmachine)) {
llvm::outs() << C.getDefaultToolChain().getTripleString() << '\n';
return false;
}
if (C.getArgs().hasArg(options::OPT_dumpversion)) {
// Since -dumpversion is only implemented for pedantic GCC compatibility, we
// return an answer which matches our definition of __VERSION__.
llvm::outs() << CLANG_VERSION_STRING << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT__print_diagnostic_categories)) {
PrintDiagnosticCategories(llvm::outs());
return false;
}
if (C.getArgs().hasArg(options::OPT_help) ||
C.getArgs().hasArg(options::OPT__help_hidden)) {
PrintHelp(C.getArgs().hasArg(options::OPT__help_hidden));
return false;
}
if (C.getArgs().hasArg(options::OPT__version)) {
// Follow gcc behavior and use stdout for --version and stderr for -v.
PrintVersion(C, llvm::outs());
return false;
}
if (C.getArgs().hasArg(options::OPT_v) ||
C.getArgs().hasArg(options::OPT__HASH_HASH_HASH) ||
C.getArgs().hasArg(options::OPT_print_supported_cpus)) {
PrintVersion(C, llvm::errs());
SuppressMissingInputWarning = true;
}
if (C.getArgs().hasArg(options::OPT_v)) {
if (!SystemConfigDir.empty())
llvm::errs() << "System configuration file directory: "
<< SystemConfigDir << "\n";
if (!UserConfigDir.empty())
llvm::errs() << "User configuration file directory: "
<< UserConfigDir << "\n";
}
const ToolChain &TC = C.getDefaultToolChain();
if (C.getArgs().hasArg(options::OPT_v))
TC.printVerboseInfo(llvm::errs());
if (C.getArgs().hasArg(options::OPT_print_resource_dir)) {
llvm::outs() << ResourceDir << '\n';
return false;
}
if (C.getArgs().hasArg(options::OPT_print_search_dirs)) {
llvm::outs() << "programs: =";
bool separator = false;
// Print -B and COMPILER_PATH.
for (const std::string &Path : PrefixDirs) {
if (separator)
llvm::outs() << llvm::sys::EnvPathSeparator;
llvm::outs() << Path;
separator = true;
}
for (const std::string &Path : TC.getProgramPaths()) {
if (separator)
llvm::outs() << llvm::sys::EnvPathSeparator;
llvm::outs() << Path;
separator = true;
}
llvm::outs() << "\n";
llvm::outs() << "libraries: =" << ResourceDir;
StringRef sysroot = C.getSysRoot();
for (const std::string &Path : TC.getFilePaths()) {
// Always print a separator. ResourceDir was the first item shown.
llvm::outs() << llvm::sys::EnvPathSeparator;
// Interpretation of leading '=' is needed only for NetBSD.
if (Path[0] == '=')
llvm::outs() << sysroot << Path.substr(1);
else
llvm::outs() << Path;
}
llvm::outs() << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_runtime_dir)) {
std::string CandidateRuntimePath = TC.getRuntimePath();
if (getVFS().exists(CandidateRuntimePath))
llvm::outs() << CandidateRuntimePath << '\n';
else
llvm::outs() << TC.getCompilerRTPath() << '\n';
return false;
}
// FIXME: The following handlers should use a callback mechanism, we don't
// know what the client would like to do.
if (Arg *A = C.getArgs().getLastArg(options::OPT_print_file_name_EQ)) {
llvm::outs() << GetFilePath(A->getValue(), TC) << "\n";
return false;
}
if (Arg *A = C.getArgs().getLastArg(options::OPT_print_prog_name_EQ)) {
StringRef ProgName = A->getValue();
// Null program name cannot have a path.
if (! ProgName.empty())
llvm::outs() << GetProgramPath(ProgName, TC);
llvm::outs() << "\n";
return false;
}
if (Arg *A = C.getArgs().getLastArg(options::OPT_autocomplete)) {
StringRef PassedFlags = A->getValue();
HandleAutocompletions(PassedFlags);
return false;
}
if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs());
const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));
RegisterEffectiveTriple TripleRAII(TC, Triple);
switch (RLT) {
case ToolChain::RLT_CompilerRT:
llvm::outs() << TC.getCompilerRT(C.getArgs(), "builtins") << "\n";
break;
case ToolChain::RLT_Libgcc:
llvm::outs() << GetFilePath("libgcc.a", TC) << "\n";
break;
}
return false;
}
if (C.getArgs().hasArg(options::OPT_print_multi_lib)) {
for (const Multilib &Multilib : TC.getMultilibs())
llvm::outs() << Multilib << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_multi_directory)) {
const Multilib &Multilib = TC.getMultilib();
if (Multilib.gccSuffix().empty())
llvm::outs() << ".\n";
else {
StringRef Suffix(Multilib.gccSuffix());
assert(Suffix.front() == '/');
llvm::outs() << Suffix.substr(1) << "\n";
}
return false;
}
if (C.getArgs().hasArg(options::OPT_print_target_triple)) {
llvm::outs() << TC.getTripleString() << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_effective_triple)) {
const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));
llvm::outs() << Triple.getTriple() << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_multiarch)) {
llvm::outs() << TC.getMultiarchTriple(*this, TC.getTriple(), SysRoot)
<< "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_targets)) {
llvm::TargetRegistry::printRegisteredTargetsForVersion(llvm::outs());
return false;
}
return true;
}
enum {
TopLevelAction = 0,
HeadSibAction = 1,
OtherSibAction = 2,
};
// Display an action graph human-readably. Action A is the "sink" node
// and latest-occuring action. Traversal is in pre-order, visiting the
// inputs to each action before printing the action itself.
static unsigned PrintActions1(const Compilation &C, Action *A,
std::map<Action *, unsigned> &Ids,
Twine Indent = {}, int Kind = TopLevelAction) {
if (Ids.count(A)) // A was already visited.
return Ids[A];
std::string str;
llvm::raw_string_ostream os(str);
auto getSibIndent = [](int K) -> Twine {
return (K == HeadSibAction) ? " " : (K == OtherSibAction) ? "| " : "";
};
Twine SibIndent = Indent + getSibIndent(Kind);
int SibKind = HeadSibAction;
os << Action::getClassName(A->getKind()) << ", ";
if (InputAction *IA = dyn_cast<InputAction>(A)) {
os << "\"" << IA->getInputArg().getValue() << "\"";
} else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
os << '"' << BIA->getArchName() << '"' << ", {"
<< PrintActions1(C, *BIA->input_begin(), Ids, SibIndent, SibKind) << "}";
} else if (OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
bool IsFirst = true;
OA->doOnEachDependence(
[&](Action *A, const ToolChain *TC, const char *BoundArch) {
assert(TC && "Unknown host toolchain");
// E.g. for two CUDA device dependences whose bound arch is sm_20 and
// sm_35 this will generate:
// "cuda-device" (nvptx64-nvidia-cuda:sm_20) {#ID}, "cuda-device"
// (nvptx64-nvidia-cuda:sm_35) {#ID}
if (!IsFirst)
os << ", ";
os << '"';
os << A->getOffloadingKindPrefix();
os << " (";
os << TC->getTriple().normalize();
if (BoundArch)
os << ":" << BoundArch;
os << ")";
os << '"';
os << " {" << PrintActions1(C, A, Ids, SibIndent, SibKind) << "}";
IsFirst = false;
SibKind = OtherSibAction;
});
} else {
const ActionList *AL = &A->getInputs();
if (AL->size()) {
const char *Prefix = "{";
for (Action *PreRequisite : *AL) {
os << Prefix << PrintActions1(C, PreRequisite, Ids, SibIndent, SibKind);
Prefix = ", ";
SibKind = OtherSibAction;
}
os << "}";
} else
os << "{}";
}
// Append offload info for all options other than the offloading action
// itself (e.g. (cuda-device, sm_20) or (cuda-host)).
std::string offload_str;
llvm::raw_string_ostream offload_os(offload_str);
if (!isa<OffloadAction>(A)) {
auto S = A->getOffloadingKindPrefix();
if (!S.empty()) {
offload_os << ", (" << S;
if (A->getOffloadingArch())
offload_os << ", " << A->getOffloadingArch();
offload_os << ")";
}
}
auto getSelfIndent = [](int K) -> Twine {
return (K == HeadSibAction) ? "+- " : (K == OtherSibAction) ? "|- " : "";
};
unsigned Id = Ids.size();
Ids[A] = Id;
llvm::errs() << Indent + getSelfIndent(Kind) << Id << ": " << os.str() << ", "
<< types::getTypeName(A->getType()) << offload_os.str() << "\n";
return Id;
}
// Print the action graphs in a compilation C.
// For example "clang -c file1.c file2.c" is composed of two subgraphs.
void Driver::PrintActions(const Compilation &C) const {
std::map<Action *, unsigned> Ids;
for (Action *A : C.getActions())
PrintActions1(C, A, Ids);
}
/// Check whether the given input tree contains any compilation or
/// assembly actions.
static bool ContainsCompileOrAssembleAction(const Action *A) {
if (isa<CompileJobAction>(A) || isa<BackendJobAction>(A) ||
isa<AssembleJobAction>(A))
return true;
for (const Action *Input : A->inputs())
if (ContainsCompileOrAssembleAction(Input))
return true;
return false;
}
void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
const InputList &BAInputs) const {
DerivedArgList &Args = C.getArgs();
ActionList &Actions = C.getActions();
llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
// Collect the list of architectures. Duplicates are allowed, but should only
// be handled once (in the order seen).
llvm::StringSet<> ArchNames;
SmallVector<const char *, 4> Archs;
for (Arg *A : Args) {
if (A->getOption().matches(options::OPT_arch)) {
// Validate the option here; we don't save the type here because its
// particular spelling may participate in other driver choices.
llvm::Triple::ArchType Arch =
tools::darwin::getArchTypeForMachOArchName(A->getValue());
if (Arch == llvm::Triple::UnknownArch) {
Diag(clang::diag::err_drv_invalid_arch_name) << A->getAsString(Args);
continue;
}
A->claim();
if (ArchNames.insert(A->getValue()).second)
Archs.push_back(A->getValue());
}
}
// When there is no explicit arch for this platform, make sure we still bind
// the architecture (to the default) so that -Xarch_ is handled correctly.
if (!Archs.size())
Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
ActionList SingleActions;
BuildActions(C, Args, BAInputs, SingleActions);
// Add in arch bindings for every top level action, as well as lipo and
// dsymutil steps if needed.
for (Action* Act : SingleActions) {
// Make sure we can lipo this kind of output. If not (and it is an actual
// output) then we disallow, since we can't create an output file with the
// right name without overwriting it. We could remove this oddity by just
// changing the output names to include the arch, which would also fix
// -save-temps. Compatibility wins for now.
if (Archs.size() > 1 && !types::canLipoType(Act->getType()))
Diag(clang::diag::err_drv_invalid_output_with_multiple_archs)
<< types::getTypeName(Act->getType());
ActionList Inputs;
for (unsigned i = 0, e = Archs.size(); i != e; ++i)
Inputs.push_back(C.MakeAction<BindArchAction>(Act, Archs[i]));
// Lipo if necessary, we do it this way because we need to set the arch flag
// so that -Xarch_ gets overwritten.
if (Inputs.size() == 1 || Act->getType() == types::TY_Nothing)
Actions.append(Inputs.begin(), Inputs.end());
else
Actions.push_back(C.MakeAction<LipoJobAction>(Inputs, Act->getType()));
// Handle debug info queries.
Arg *A = Args.getLastArg(options::OPT_g_Group);
bool enablesDebugInfo = A && !A->getOption().matches(options::OPT_g0) &&
!A->getOption().matches(options::OPT_gstabs);
if ((enablesDebugInfo || willEmitRemarks(Args)) &&
ContainsCompileOrAssembleAction(Actions.back())) {
// Add a 'dsymutil' step if necessary, when debug info is enabled and we
// have a compile input. We need to run 'dsymutil' ourselves in such cases
// because the debug info will refer to a temporary object file which
// will be removed at the end of the compilation process.
if (Act->getType() == types::TY_Image) {
ActionList Inputs;
Inputs.push_back(Actions.back());
Actions.pop_back();
Actions.push_back(
C.MakeAction<DsymutilJobAction>(Inputs, types::TY_dSYM));
}
// Verify the debug info output.
if (Args.hasArg(options::OPT_verify_debug_info)) {
Action* LastAction = Actions.back();
Actions.pop_back();
Actions.push_back(C.MakeAction<VerifyDebugInfoJobAction>(
LastAction, types::TY_Nothing));
}
}
}
}
bool Driver::DiagnoseInputExistence(const DerivedArgList &Args, StringRef Value,
types::ID Ty, bool TypoCorrect) const {
if (!getCheckInputsExist())
return true;
// stdin always exists.
if (Value == "-")
return true;
if (getVFS().exists(Value))
return true;
if (IsCLMode()) {
if (!llvm::sys::path::is_absolute(Twine(Value)) &&
llvm::sys::Process::FindInEnvPath("LIB", Value, ';'))
return true;
if (Args.hasArg(options::OPT__SLASH_link) && Ty == types::TY_Object) {
// Arguments to the /link flag might cause the linker to search for object
// and library files in paths we don't know about. Don't error in such
// cases.
return true;
}
}
if (TypoCorrect) {
// Check if the filename is a typo for an option flag. OptTable thinks
// that all args that are not known options and that start with / are
// filenames, but e.g. `/diagnostic:caret` is more likely a typo for
// the option `/diagnostics:caret` than a reference to a file in the root
// directory.
unsigned IncludedFlagsBitmask;
unsigned ExcludedFlagsBitmask;
std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
getIncludeExcludeOptionFlagMasks(IsCLMode());
std::string Nearest;
if (getOpts().findNearest(Value, Nearest, IncludedFlagsBitmask,
ExcludedFlagsBitmask) <= 1) {
Diag(clang::diag::err_drv_no_such_file_with_suggestion)
<< Value << Nearest;
return false;
}
}
Diag(clang::diag::err_drv_no_such_file) << Value;
return false;
}
// Construct a the list of inputs and their types.
void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
InputList &Inputs) const {
const llvm::opt::OptTable &Opts = getOpts();
// Track the current user specified (-x) input. We also explicitly track the
// argument used to set the type; we only want to claim the type when we
// actually use it, so we warn about unused -x arguments.
types::ID InputType = types::TY_Nothing;
Arg *InputTypeArg = nullptr;
// The last /TC or /TP option sets the input type to C or C++ globally.
if (Arg *TCTP = Args.getLastArgNoClaim(options::OPT__SLASH_TC,
options::OPT__SLASH_TP)) {
InputTypeArg = TCTP;
InputType = TCTP->getOption().matches(options::OPT__SLASH_TC)
? types::TY_C
: types::TY_CXX;
Arg *Previous = nullptr;
bool ShowNote = false;
for (Arg *A :
Args.filtered(options::OPT__SLASH_TC, options::OPT__SLASH_TP)) {
if (Previous) {
Diag(clang::diag::warn_drv_overriding_flag_option)
<< Previous->getSpelling() << A->getSpelling();
ShowNote = true;
}
Previous = A;
}
if (ShowNote)
Diag(clang::diag::note_drv_t_option_is_global);
// No driver mode exposes -x and /TC or /TP; we don't support mixing them.
assert(!Args.hasArg(options::OPT_x) && "-x and /TC or /TP is not allowed");
}
for (Arg *A : Args) {
if (A->getOption().getKind() == Option::InputClass) {
const char *Value = A->getValue();
types::ID Ty = types::TY_INVALID;
// Infer the input type if necessary.
if (InputType == types::TY_Nothing) {
// If there was an explicit arg for this, claim it.
if (InputTypeArg)
InputTypeArg->claim();
// stdin must be handled specially.
if (memcmp(Value, "-", 2) == 0) {
if (IsFlangMode()) {
Ty = types::TY_Fortran;
} else {
// If running with -E, treat as a C input (this changes the
// builtin macros, for example). This may be overridden by -ObjC
// below.
//
// Otherwise emit an error but still use a valid type to avoid
// spurious errors (e.g., no inputs).
if (!Args.hasArgNoClaim(options::OPT_E) && !CCCIsCPP())
Diag(IsCLMode() ? clang::diag::err_drv_unknown_stdin_type_clang_cl
: clang::diag::err_drv_unknown_stdin_type);
Ty = types::TY_C;
}
} else {
// Otherwise lookup by extension.
// Fallback is C if invoked as C preprocessor, C++ if invoked with
// clang-cl /E, or Object otherwise.
// We use a host hook here because Darwin at least has its own
// idea of what .s is.
if (const char *Ext = strrchr(Value, '.'))
Ty = TC.LookupTypeForExtension(Ext + 1);
if (Ty == types::TY_INVALID) {
if (CCCIsCPP())
Ty = types::TY_C;
else if (IsCLMode() && Args.hasArgNoClaim(options::OPT_E))
Ty = types::TY_CXX;
else
Ty = types::TY_Object;
}
// If the driver is invoked as C++ compiler (like clang++ or c++) it
// should autodetect some input files as C++ for g++ compatibility.
if (CCCIsCXX()) {
types::ID OldTy = Ty;
Ty = types::lookupCXXTypeForCType(Ty);
if (Ty != OldTy)
Diag(clang::diag::warn_drv_treating_input_as_cxx)
<< getTypeName(OldTy) << getTypeName(Ty);
}
// If running with -fthinlto-index=, extensions that normally identify
// native object files actually identify LLVM bitcode files.
if (Args.hasArgNoClaim(options::OPT_fthinlto_index_EQ) &&
Ty == types::TY_Object)
Ty = types::TY_LLVM_BC;
}
// -ObjC and -ObjC++ override the default language, but only for "source
// files". We just treat everything that isn't a linker input as a
// source file.
//
// FIXME: Clean this up if we move the phase sequence into the type.
if (Ty != types::TY_Object) {
if (Args.hasArg(options::OPT_ObjC))
Ty = types::TY_ObjC;
else if (Args.hasArg(options::OPT_ObjCXX))
Ty = types::TY_ObjCXX;
}
} else {
assert(InputTypeArg && "InputType set w/o InputTypeArg");
if (!InputTypeArg->getOption().matches(options::OPT_x)) {
// If emulating cl.exe, make sure that /TC and /TP don't affect input
// object files.
const char *Ext = strrchr(Value, '.');
if (Ext && TC.LookupTypeForExtension(Ext + 1) == types::TY_Object)
Ty = types::TY_Object;
}
if (Ty == types::TY_INVALID) {
Ty = InputType;
InputTypeArg->claim();
}
}
if (DiagnoseInputExistence(Args, Value, Ty, /*TypoCorrect=*/true))
Inputs.push_back(std::make_pair(Ty, A));
} else if (A->getOption().matches(options::OPT__SLASH_Tc)) {
StringRef Value = A->getValue();
if (DiagnoseInputExistence(Args, Value, types::TY_C,
/*TypoCorrect=*/false)) {
Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
Inputs.push_back(std::make_pair(types::TY_C, InputArg));
}
A->claim();
} else if (A->getOption().matches(options::OPT__SLASH_Tp)) {
StringRef Value = A->getValue();
if (DiagnoseInputExistence(Args, Value, types::TY_CXX,
/*TypoCorrect=*/false)) {
Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
Inputs.push_back(std::make_pair(types::TY_CXX, InputArg));
}
A->claim();
} else if (A->getOption().hasFlag(options::LinkerInput)) {
// Just treat as object type, we could make a special type for this if
// necessary.
Inputs.push_back(std::make_pair(types::TY_Object, A));
} else if (A->getOption().matches(options::OPT_x)) {
InputTypeArg = A;
InputType = types::lookupTypeForTypeSpecifier(A->getValue());
A->claim();
// Follow gcc behavior and treat as linker input for invalid -x
// options. Its not clear why we shouldn't just revert to unknown; but
// this isn't very important, we might as well be bug compatible.
if (!InputType) {
Diag(clang::diag::err_drv_unknown_language) << A->getValue();
InputType = types::TY_Object;
}
} else if (A->getOption().getID() == options::OPT_U) {
assert(A->getNumValues() == 1 && "The /U option has one value.");
StringRef Val = A->getValue(0);
if (Val.find_first_of("/\\") != StringRef::npos) {
// Warn about e.g. "/Users/me/myfile.c".
Diag(diag::warn_slash_u_filename) << Val;
Diag(diag::note_use_dashdash);
}
}
}
if (CCCIsCPP() && Inputs.empty()) {
// If called as standalone preprocessor, stdin is processed
// if no other input is present.
Arg *A = MakeInputArg(Args, Opts, "-");
Inputs.push_back(std::make_pair(types::TY_C, A));
}
}
namespace {
/// Provides a convenient interface for different programming models to generate
/// the required device actions.
class OffloadingActionBuilder final {
/// Flag used to trace errors in the builder.
bool IsValid = false;
/// The compilation that is using this builder.
Compilation &C;
/// Map between an input argument and the offload kinds used to process it.
std::map<const Arg *, unsigned> InputArgToOffloadKindMap;
/// Builder interface. It doesn't build anything or keep any state.
class DeviceActionBuilder {
public:
typedef const llvm::SmallVectorImpl<phases::ID> PhasesTy;
enum ActionBuilderReturnCode {
// The builder acted successfully on the current action.
ABRT_Success,
// The builder didn't have to act on the current action.
ABRT_Inactive,
// The builder was successful and requested the host action to not be
// generated.
ABRT_Ignore_Host,
};
protected:
/// Compilation associated with this builder.
Compilation &C;
/// Tool chains associated with this builder. The same programming
/// model may have associated one or more tool chains.
SmallVector<const ToolChain *, 2> ToolChains;
/// The derived arguments associated with this builder.
DerivedArgList &Args;
/// The inputs associated with this builder.
const Driver::InputList &Inputs;
/// The associated offload kind.
Action::OffloadKind AssociatedOffloadKind = Action::OFK_None;
public:
DeviceActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs,
Action::OffloadKind AssociatedOffloadKind)
: C(C), Args(Args), Inputs(Inputs),
AssociatedOffloadKind(AssociatedOffloadKind) {}
virtual ~DeviceActionBuilder() {}
/// Fill up the array \a DA with all the device dependences that should be
/// added to the provided host action \a HostAction. By default it is
/// inactive.
virtual ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) {
return ABRT_Inactive;
}
/// Update the state to include the provided host action \a HostAction as a
/// dependency of the current device action. By default it is inactive.
virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction) {
return ABRT_Inactive;
}
/// Append top level actions generated by the builder.
virtual void appendTopLevelActions(ActionList &AL) {}
/// Append linker device actions generated by the builder.
virtual void appendLinkDeviceActions(ActionList &AL) {}
/// Append linker host action generated by the builder.
virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; }
/// Append linker actions generated by the builder.
virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
/// Initialize the builder. Return true if any initialization errors are
/// found.
virtual bool initialize() { return false; }
/// Return true if the builder can use bundling/unbundling.
virtual bool canUseBundlerUnbundler() const { return false; }
/// Return true if this builder is valid. We have a valid builder if we have
/// associated device tool chains.
bool isValid() { return !ToolChains.empty(); }
/// Return the associated offload kind.
Action::OffloadKind getAssociatedOffloadKind() {
return AssociatedOffloadKind;
}
};
/// Base class for CUDA/HIP action builder. It injects device code in
/// the host backend action.
class CudaActionBuilderBase : public DeviceActionBuilder {
protected:
/// Flags to signal if the user requested host-only or device-only
/// compilation.
bool CompileHostOnly = false;
bool CompileDeviceOnly = false;
bool EmitLLVM = false;
bool EmitAsm = false;
/// ID to identify each device compilation. For CUDA it is simply the
/// GPU arch string. For HIP it is either the GPU arch string or GPU
/// arch string plus feature strings delimited by a plus sign, e.g.
/// gfx906+xnack.
struct TargetID {
/// Target ID string which is persistent throughout the compilation.
const char *ID;
TargetID(CudaArch Arch) { ID = CudaArchToString(Arch); }
TargetID(const char *ID) : ID(ID) {}
operator const char *() { return ID; }
operator StringRef() { return StringRef(ID); }
};
/// List of GPU architectures to use in this compilation.
SmallVector<TargetID, 4> GpuArchList;
/// The CUDA actions for the current input.
ActionList CudaDeviceActions;
/// The CUDA fat binary if it was generated for the current input.
Action *CudaFatBinary = nullptr;
/// Flag that is set to true if this builder acted on the current input.
bool IsActive = false;
/// Flag for -fgpu-rdc.
bool Relocatable = false;
/// Default GPU architecture if there's no one specified.
CudaArch DefaultCudaArch = CudaArch::UNKNOWN;
/// Method to generate compilation unit ID specified by option
/// '-fuse-cuid='.
enum UseCUIDKind { CUID_Hash, CUID_Random, CUID_None, CUID_Invalid };
UseCUIDKind UseCUID = CUID_Hash;
/// Compilation unit ID specified by option '-cuid='.
StringRef FixedCUID;
public:
CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs,
Action::OffloadKind OFKind)
: DeviceActionBuilder(C, Args, Inputs, OFKind) {}
ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
// While generating code for CUDA, we only depend on the host input action
// to trigger the creation of all the CUDA device actions.
// If we are dealing with an input action, replicate it for each GPU
// architecture. If we are in host-only mode we return 'success' so that
// the host uses the CUDA offload kind.
if (auto *IA = dyn_cast<InputAction>(HostAction)) {
assert(!GpuArchList.empty() &&
"We should have at least one GPU architecture.");
// If the host input is not CUDA or HIP, we don't need to bother about
// this input.
if (!(IA->getType() == types::TY_CUDA ||
IA->getType() == types::TY_HIP ||
IA->getType() == types::TY_PP_HIP)) {
// The builder will ignore this input.
IsActive = false;
return ABRT_Inactive;
}
// Set the flag to true, so that the builder acts on the current input.
IsActive = true;
if (CompileHostOnly)
return ABRT_Success;
// Replicate inputs for each GPU architecture.
auto Ty = IA->getType() == types::TY_HIP ? types::TY_HIP_DEVICE
: types::TY_CUDA_DEVICE;
std::string CUID = FixedCUID.str();
if (CUID.empty()) {
if (UseCUID == CUID_Random)
CUID = llvm::utohexstr(llvm::sys::Process::GetRandomNumber(),
/*LowerCase=*/true);
else if (UseCUID == CUID_Hash) {
llvm::MD5 Hasher;
llvm::MD5::MD5Result Hash;
SmallString<256> RealPath;
llvm::sys::fs::real_path(IA->getInputArg().getValue(), RealPath,
/*expand_tilde=*/true);
Hasher.update(RealPath);
for (auto *A : Args) {
if (A->getOption().matches(options::OPT_INPUT))
continue;
Hasher.update(A->getAsString(Args));
}
Hasher.final(Hash);
CUID = llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
}
}
IA->setId(CUID);
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
CudaDeviceActions.push_back(
C.MakeAction<InputAction>(IA->getInputArg(), Ty, IA->getId()));
}
return ABRT_Success;
}
// If this is an unbundling action use it as is for each CUDA toolchain.
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {
// If -fgpu-rdc is disabled, should not unbundle since there is no
// device code to link.
if (UA->getType() == types::TY_Object && !Relocatable)
return ABRT_Inactive;
CudaDeviceActions.clear();
auto *IA = cast<InputAction>(UA->getInputs().back());
std::string FileName = IA->getInputArg().getAsString(Args);
// Check if the type of the file is the same as the action. Do not
// unbundle it if it is not. Do not unbundle .so files, for example,
// which are not object files.
if (IA->getType() == types::TY_Object &&
(!llvm::sys::path::has_extension(FileName) ||
types::lookupTypeForExtension(
llvm::sys::path::extension(FileName).drop_front()) !=
types::TY_Object))
return ABRT_Inactive;
for (auto Arch : GpuArchList) {
CudaDeviceActions.push_back(UA);
UA->registerDependentActionInfo(ToolChains[0], Arch,
AssociatedOffloadKind);
}
return ABRT_Success;
}
return IsActive ? ABRT_Success : ABRT_Inactive;
}
void appendTopLevelActions(ActionList &AL) override {
// Utility to append actions to the top level list.
auto AddTopLevel = [&](Action *A, TargetID TargetID) {
OffloadAction::DeviceDependences Dep;
Dep.add(*A, *ToolChains.front(), TargetID, AssociatedOffloadKind);
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
};
// If we have a fat binary, add it to the list.
if (CudaFatBinary) {
AddTopLevel(CudaFatBinary, CudaArch::UNUSED);
CudaDeviceActions.clear();
CudaFatBinary = nullptr;
return;
}
if (CudaDeviceActions.empty())
return;
// If we have CUDA actions at this point, that's because we have a have
// partial compilation, so we should have an action for each GPU
// architecture.
assert(CudaDeviceActions.size() == GpuArchList.size() &&
"Expecting one action per GPU architecture.");
assert(ToolChains.size() == 1 &&
"Expecting to have a sing CUDA toolchain.");
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
AddTopLevel(CudaDeviceActions[I], GpuArchList[I]);
CudaDeviceActions.clear();
}
/// Get canonicalized offload arch option. \returns empty StringRef if the
/// option is invalid.
virtual StringRef getCanonicalOffloadArch(StringRef Arch) = 0;
virtual llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
getConflictOffloadArchCombination(const std::set<StringRef> &GpuArchs) = 0;
bool initialize() override {
assert(AssociatedOffloadKind == Action::OFK_Cuda ||
AssociatedOffloadKind == Action::OFK_HIP);
// We don't need to support CUDA.
if (AssociatedOffloadKind == Action::OFK_Cuda &&
!C.hasOffloadToolChain<Action::OFK_Cuda>())
return false;
// We don't need to support HIP.
if (AssociatedOffloadKind == Action::OFK_HIP &&
!C.hasOffloadToolChain<Action::OFK_HIP>())
return false;
Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
options::OPT_fno_gpu_rdc, /*Default=*/false);
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
assert(HostTC && "No toolchain for host compilation.");
if (HostTC->getTriple().isNVPTX() ||
HostTC->getTriple().getArch() == llvm::Triple::amdgcn) {
// We do not support targeting NVPTX/AMDGCN for host compilation. Throw
// an error and abort pipeline construction early so we don't trip
// asserts that assume device-side compilation.
C.getDriver().Diag(diag::err_drv_cuda_host_arch)
<< HostTC->getTriple().getArchName();
return true;
}
ToolChains.push_back(
AssociatedOffloadKind == Action::OFK_Cuda
? C.getSingleOffloadToolChain<Action::OFK_Cuda>()
: C.getSingleOffloadToolChain<Action::OFK_HIP>());
Arg *PartialCompilationArg = Args.getLastArg(
options::OPT_cuda_host_only, options::OPT_cuda_device_only,
options::OPT_cuda_compile_host_device);
CompileHostOnly = PartialCompilationArg &&
PartialCompilationArg->getOption().matches(
options::OPT_cuda_host_only);
CompileDeviceOnly = PartialCompilationArg &&
PartialCompilationArg->getOption().matches(
options::OPT_cuda_device_only);
EmitLLVM = Args.getLastArg(options::OPT_emit_llvm);
EmitAsm = Args.getLastArg(options::OPT_S);
FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ);
if (Arg *A = Args.getLastArg(options::OPT_fuse_cuid_EQ)) {
StringRef UseCUIDStr = A->getValue();
UseCUID = llvm::StringSwitch<UseCUIDKind>(UseCUIDStr)
.Case("hash", CUID_Hash)
.Case("random", CUID_Random)
.Case("none", CUID_None)
.Default(CUID_Invalid);
if (UseCUID == CUID_Invalid) {
C.getDriver().Diag(diag::err_drv_invalid_value)
<< A->getAsString(Args) << UseCUIDStr;
C.setContainsError();
return true;
}
}
// Collect all cuda_gpu_arch parameters, removing duplicates.
std::set<StringRef> GpuArchs;
bool Error = false;
for (Arg *A : Args) {
if (!(A->getOption().matches(options::OPT_offload_arch_EQ) ||
A->getOption().matches(options::OPT_no_offload_arch_EQ)))
continue;
A->claim();
StringRef ArchStr = A->getValue();
if (A->getOption().matches(options::OPT_no_offload_arch_EQ) &&
ArchStr == "all") {
GpuArchs.clear();
continue;
}
ArchStr = getCanonicalOffloadArch(ArchStr);
if (ArchStr.empty()) {
Error = true;
} else if (A->getOption().matches(options::OPT_offload_arch_EQ))
GpuArchs.insert(ArchStr);
else if (A->getOption().matches(options::OPT_no_offload_arch_EQ))
GpuArchs.erase(ArchStr);
else
llvm_unreachable("Unexpected option.");
}
auto &&ConflictingArchs = getConflictOffloadArchCombination(GpuArchs);
if (ConflictingArchs) {
C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
<< ConflictingArchs.getValue().first
<< ConflictingArchs.getValue().second;
C.setContainsError();
return true;
}
// Collect list of GPUs remaining in the set.
for (auto Arch : GpuArchs)
GpuArchList.push_back(Arch.data());
// Default to sm_20 which is the lowest common denominator for
// supported GPUs. sm_20 code should work correctly, if
// suboptimally, on all newer GPUs.
if (GpuArchList.empty())
GpuArchList.push_back(DefaultCudaArch);
return Error;
}
};
/// \brief CUDA action builder. It injects device code in the host backend
/// action.
class CudaActionBuilder final : public CudaActionBuilderBase {
public:
CudaActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {
DefaultCudaArch = CudaArch::SM_20;
}
StringRef getCanonicalOffloadArch(StringRef ArchStr) override {
CudaArch Arch = StringToCudaArch(ArchStr);
if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
return StringRef();
}
return CudaArchToString(Arch);
}
llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
getConflictOffloadArchCombination(
const std::set<StringRef> &GpuArchs) override {
return llvm::None;
}
ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) override {
if (!IsActive)
return ABRT_Inactive;
// If we don't have more CUDA actions, we don't have any dependences to
// create for the host.
if (CudaDeviceActions.empty())
return ABRT_Success;
assert(CudaDeviceActions.size() == GpuArchList.size() &&
"Expecting one action per GPU architecture.");
assert(!CompileHostOnly &&
"Not expecting CUDA actions in host-only compilation.");
// If we are generating code for the device or we are in a backend phase,
// we attempt to generate the fat binary. We compile each arch to ptx and
// assemble to cubin, then feed the cubin *and* the ptx into a device
// "link" action, which uses fatbinary to combine these cubins into one
// fatbin. The fatbin is then an input to the host action if not in
// device-only mode.
if (CompileDeviceOnly || CurPhase == phases::Backend) {
ActionList DeviceActions;
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
// Produce the device action from the current phase up to the assemble
// phase.
for (auto Ph : Phases) {
// Skip the phases that were already dealt with.
if (Ph < CurPhase)
continue;
// We have to be consistent with the host final phase.
if (Ph > FinalPhase)
break;
CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
C, Args, Ph, CudaDeviceActions[I], Action::OFK_Cuda);
if (Ph == phases::Assemble)
break;
}
// If we didn't reach the assemble phase, we can't generate the fat
// binary. We don't need to generate the fat binary if we are not in
// device-only mode.
if (!isa<AssembleJobAction>(CudaDeviceActions[I]) ||
CompileDeviceOnly)
continue;
Action *AssembleAction = CudaDeviceActions[I];
assert(AssembleAction->getType() == types::TY_Object);
assert(AssembleAction->getInputs().size() == 1);
Action *BackendAction = AssembleAction->getInputs()[0];
assert(BackendAction->getType() == types::TY_PP_Asm);
for (auto &A : {AssembleAction, BackendAction}) {
OffloadAction::DeviceDependences DDep;
DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda);
DeviceActions.push_back(
C.MakeAction<OffloadAction>(DDep, A->getType()));
}
}
// We generate the fat binary if we have device input actions.
if (!DeviceActions.empty()) {
CudaFatBinary =
C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);
if (!CompileDeviceOnly) {
DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
Action::OFK_Cuda);
// Clear the fat binary, it is already a dependence to an host
// action.
CudaFatBinary = nullptr;
}
// Remove the CUDA actions as they are already connected to an host
// action or fat binary.
CudaDeviceActions.clear();
}
// We avoid creating host action in device-only mode.
return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
} else if (CurPhase > phases::Backend) {
// If we are past the backend phase and still have a device action, we
// don't have to do anything as this action is already a device
// top-level action.
return ABRT_Success;
}
assert(CurPhase < phases::Backend && "Generating single CUDA "
"instructions should only occur "
"before the backend phase!");
// By default, we produce an action for each device arch.
for (Action *&A : CudaDeviceActions)
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
return ABRT_Success;
}
};
/// \brief HIP action builder. It injects device code in the host backend
/// action.
class HIPActionBuilder final : public CudaActionBuilderBase {
/// The linker inputs obtained for each device arch.
SmallVector<ActionList, 8> DeviceLinkerInputs;
bool GPUSanitize;
// The default bundling behavior depends on the type of output, therefore
// BundleOutput needs to be tri-value: None, true, or false.
// Bundle code objects except --no-gpu-output is specified for device
// only compilation. Bundle other type of output files only if
// --gpu-bundle-output is specified for device only compilation.
Optional<bool> BundleOutput;
public:
HIPActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
DefaultCudaArch = CudaArch::GFX803;
GPUSanitize = Args.hasFlag(options::OPT_fgpu_sanitize,
options::OPT_fno_gpu_sanitize, false);
if (Args.hasArg(options::OPT_gpu_bundle_output,
options::OPT_no_gpu_bundle_output))
BundleOutput = Args.hasFlag(options::OPT_gpu_bundle_output,
options::OPT_no_gpu_bundle_output);
}
bool canUseBundlerUnbundler() const override { return true; }
StringRef getCanonicalOffloadArch(StringRef IdStr) override {
llvm::StringMap<bool> Features;
auto ArchStr =
parseTargetID(getHIPOffloadTargetTriple(), IdStr, &Features);
if (!ArchStr) {
C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr;
C.setContainsError();
return StringRef();
}
auto CanId = getCanonicalTargetID(ArchStr.getValue(), Features);
return Args.MakeArgStringRef(CanId);
};
llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
getConflictOffloadArchCombination(
const std::set<StringRef> &GpuArchs) override {
return getConflictTargetIDCombination(GpuArchs);
}
ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) override {
// amdgcn does not support linking of object files, therefore we skip
// backend and assemble phases to output LLVM IR. Except for generating
// non-relocatable device coee, where we generate fat binary for device
// code and pass to host in Backend phase.
if (CudaDeviceActions.empty())
return ABRT_Success;
assert(((CurPhase == phases::Link && Relocatable) ||
CudaDeviceActions.size() == GpuArchList.size()) &&
"Expecting one action per GPU architecture.");
assert(!CompileHostOnly &&
"Not expecting CUDA actions in host-only compilation.");
if (!Relocatable && CurPhase == phases::Backend && !EmitLLVM &&
!EmitAsm) {
// If we are in backend phase, we attempt to generate the fat binary.
// We compile each arch to IR and use a link action to generate code
// object containing ISA. Then we use a special "link" action to create
// a fat binary containing all the code objects for different GPU's.
// The fat binary is then an input to the host action.
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
if (C.getDriver().isUsingLTO(/*IsOffload=*/true)) {
// When LTO is enabled, skip the backend and assemble phases and
// use lld to link the bitcode.
ActionList AL;
AL.push_back(CudaDeviceActions[I]);
// Create a link action to link device IR with device library
// and generate ISA.
CudaDeviceActions[I] =
C.MakeAction<LinkJobAction>(AL, types::TY_Image);
} else {
// When LTO is not enabled, we follow the conventional
// compiler phases, including backend and assemble phases.
ActionList AL;
auto BackendAction = C.getDriver().ConstructPhaseAction(
C, Args, phases::Backend, CudaDeviceActions[I],
AssociatedOffloadKind);
auto AssembleAction = C.getDriver().ConstructPhaseAction(
C, Args, phases::Assemble, BackendAction,
AssociatedOffloadKind);
AL.push_back(AssembleAction);
// Create a link action to link device IR with device library
// and generate ISA.
CudaDeviceActions[I] =
C.MakeAction<LinkJobAction>(AL, types::TY_Image);
}
// OffloadingActionBuilder propagates device arch until an offload
// action. Since the next action for creating fatbin does
// not have device arch, whereas the above link action and its input
// have device arch, an offload action is needed to stop the null
// device arch of the next action being propagated to the above link
// action.
OffloadAction::DeviceDependences DDep;
DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I],
AssociatedOffloadKind);
CudaDeviceActions[I] = C.MakeAction<OffloadAction>(
DDep, CudaDeviceActions[I]->getType());
}
if (!CompileDeviceOnly || !BundleOutput.hasValue() ||
BundleOutput.getValue()) {
// Create HIP fat binary with a special "link" action.
CudaFatBinary = C.MakeAction<LinkJobAction>(CudaDeviceActions,
types::TY_HIP_FATBIN);
if (!CompileDeviceOnly) {
DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
AssociatedOffloadKind);
// Clear the fat binary, it is already a dependence to an host
// action.
CudaFatBinary = nullptr;
}
// Remove the CUDA actions as they are already connected to an host
// action or fat binary.
CudaDeviceActions.clear();
}
return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
} else if (CurPhase == phases::Link) {
// Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch.
// This happens to each device action originated from each input file.
// Later on, device actions in DeviceLinkerInputs are used to create
// device link actions in appendLinkDependences and the created device
// link actions are passed to the offload action as device dependence.
DeviceLinkerInputs.resize(CudaDeviceActions.size());
auto LI = DeviceLinkerInputs.begin();
for (auto *A : CudaDeviceActions) {
LI->push_back(A);
++LI;
}
// We will pass the device action as a host dependence, so we don't
// need to do anything else with them.
CudaDeviceActions.clear();
return ABRT_Success;
}
// By default, we produce an action for each device arch.
for (Action *&A : CudaDeviceActions)
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
AssociatedOffloadKind);
if (CompileDeviceOnly && CurPhase == FinalPhase &&
BundleOutput.hasValue() && BundleOutput.getValue()) {
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
OffloadAction::DeviceDependences DDep;
DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I],
AssociatedOffloadKind);
CudaDeviceActions[I] = C.MakeAction<OffloadAction>(
DDep, CudaDeviceActions[I]->getType());
}
CudaFatBinary =
C.MakeAction<OffloadBundlingJobAction>(CudaDeviceActions);
CudaDeviceActions.clear();
}
return (CompileDeviceOnly && CurPhase == FinalPhase) ? ABRT_Ignore_Host
: ABRT_Success;
}
void appendLinkDeviceActions(ActionList &AL) override {
if (DeviceLinkerInputs.size() == 0)
return;
assert(DeviceLinkerInputs.size() == GpuArchList.size() &&
"Linker inputs and GPU arch list sizes do not match.");
// Append a new link action for each device.
unsigned I = 0;
for (auto &LI : DeviceLinkerInputs) {
// Each entry in DeviceLinkerInputs corresponds to a GPU arch.
auto *DeviceLinkAction =
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
// Linking all inputs for the current GPU arch.
// LI contains all the inputs for the linker.
OffloadAction::DeviceDependences DeviceLinkDeps;
DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0],
GpuArchList[I], AssociatedOffloadKind);
AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
DeviceLinkAction->getType()));
++I;
}
DeviceLinkerInputs.clear();
// Create a host object from all the device images by embedding them
// in a fat binary.
OffloadAction::DeviceDependences DDeps;
auto *TopDeviceLinkAction =
C.MakeAction<LinkJobAction>(AL, types::TY_Object);
DDeps.add(*TopDeviceLinkAction, *ToolChains[0],
nullptr, AssociatedOffloadKind);
// Offload the host object to the host linker.
AL.push_back(C.MakeAction<OffloadAction>(DDeps, TopDeviceLinkAction->getType()));
}
Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); }
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
};
/// OpenMP action builder. The host bitcode is passed to the device frontend
/// and all the device linked images are passed to the host link phase.
class OpenMPActionBuilder final : public DeviceActionBuilder {
/// The OpenMP actions for the current input.
ActionList OpenMPDeviceActions;
/// The linker inputs obtained for each toolchain.
SmallVector<ActionList, 8> DeviceLinkerInputs;
public:
OpenMPActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_OpenMP) {}
ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) override {
if (OpenMPDeviceActions.empty())
return ABRT_Inactive;
// We should always have an action for each input.
assert(OpenMPDeviceActions.size() == ToolChains.size() &&
"Number of OpenMP actions and toolchains do not match.");
// The host only depends on device action in the linking phase, when all
// the device images have to be embedded in the host image.
if (CurPhase == phases::Link) {
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
"Toolchains and linker inputs sizes do not match.");
auto LI = DeviceLinkerInputs.begin();
for (auto *A : OpenMPDeviceActions) {
LI->push_back(A);
++LI;
}
// We passed the device action as a host dependence, so we don't need to
// do anything else with them.
OpenMPDeviceActions.clear();
return ABRT_Success;
}
// By default, we produce an action for each device arch.
for (Action *&A : OpenMPDeviceActions)
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
return ABRT_Success;
}
ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
// If this is an input action replicate it for each OpenMP toolchain.
if (auto *IA = dyn_cast<InputAction>(HostAction)) {
OpenMPDeviceActions.clear();
for (unsigned I = 0; I < ToolChains.size(); ++I)
OpenMPDeviceActions.push_back(
C.MakeAction<InputAction>(IA->getInputArg(), IA->getType()));
return ABRT_Success;
}
// If this is an unbundling action use it as is for each OpenMP toolchain.
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {
OpenMPDeviceActions.clear();
auto *IA = cast<InputAction>(UA->getInputs().back());
std::string FileName = IA->getInputArg().getAsString(Args);
// Check if the type of the file is the same as the action. Do not
// unbundle it if it is not. Do not unbundle .so files, for example,
// which are not object files.
if (IA->getType() == types::TY_Object &&
(!llvm::sys::path::has_extension(FileName) ||
types::lookupTypeForExtension(
llvm::sys::path::extension(FileName).drop_front()) !=
types::TY_Object))
return ABRT_Inactive;
for (unsigned I = 0; I < ToolChains.size(); ++I) {
OpenMPDeviceActions.push_back(UA);
UA->registerDependentActionInfo(
ToolChains[I], /*BoundArch=*/StringRef(), Action::OFK_OpenMP);
}
return ABRT_Success;
}
// When generating code for OpenMP we use the host compile phase result as
// a dependence to the device compile phase so that it can learn what
// declarations should be emitted. However, this is not the only use for
// the host action, so we prevent it from being collapsed.
if (isa<CompileJobAction>(HostAction)) {
HostAction->setCannotBeCollapsedWithNextDependentAction();
assert(ToolChains.size() == OpenMPDeviceActions.size() &&
"Toolchains and device action sizes do not match.");
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/nullptr, Action::OFK_OpenMP);
auto TC = ToolChains.begin();
for (Action *&A : OpenMPDeviceActions) {
assert(isa<CompileJobAction>(A));
OffloadAction::DeviceDependences DDep;
DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP);
A = C.MakeAction<OffloadAction>(HDep, DDep);
++TC;
}
}
return ABRT_Success;
}
void appendTopLevelActions(ActionList &AL) override {
if (OpenMPDeviceActions.empty())
return;
// We should always have an action for each input.
assert(OpenMPDeviceActions.size() == ToolChains.size() &&
"Number of OpenMP actions and toolchains do not match.");
// Append all device actions followed by the proper offload action.
auto TI = ToolChains.begin();
for (auto *A : OpenMPDeviceActions) {
OffloadAction::DeviceDependences Dep;
Dep.add(*A, **TI, /*BoundArch=*/nullptr, Action::OFK_OpenMP);
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
++TI;
}
// We no longer need the action stored in this builder.
OpenMPDeviceActions.clear();
}
void appendLinkDeviceActions(ActionList &AL) override {
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
"Toolchains and linker inputs sizes do not match.");
// Append a new link action for each device.
auto TC = ToolChains.begin();
for (auto &LI : DeviceLinkerInputs) {
auto *DeviceLinkAction =
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
OffloadAction::DeviceDependences DeviceLinkDeps;
DeviceLinkDeps.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
Action::OFK_OpenMP);
AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
DeviceLinkAction->getType()));
++TC;
}
DeviceLinkerInputs.clear();
}
Action* appendLinkHostActions(ActionList &AL) override {
// Create wrapper bitcode from the result of device link actions and compile
// it to an object which will be added to the host link command.
auto *BC = C.MakeAction<OffloadWrapperJobAction>(AL, types::TY_LLVM_BC);
auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
}
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
bool initialize() override {
// Get the OpenMP toolchains. If we don't get any, the action builder will
// know there is nothing to do related to OpenMP offloading.
auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>();
for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE;
++TI)
ToolChains.push_back(TI->second);
DeviceLinkerInputs.resize(ToolChains.size());
return false;
}
bool canUseBundlerUnbundler() const override {
// OpenMP should use bundled files whenever possible.
return true;
}
};
///
/// TODO: Add the implementation for other specialized builders here.
///
/// Specialized builders being used by this offloading action builder.
SmallVector<DeviceActionBuilder *, 4> SpecializedBuilders;
/// Flag set to true if all valid builders allow file bundling/unbundling.
bool CanUseBundler;
public:
OffloadingActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: C(C) {
// Create a specialized builder for each device toolchain.
IsValid = true;
// Create a specialized builder for CUDA.
SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs));
// Create a specialized builder for HIP.
SpecializedBuilders.push_back(new HIPActionBuilder(C, Args, Inputs));
// Create a specialized builder for OpenMP.
SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs));
//
// TODO: Build other specialized builders here.
//
// Initialize all the builders, keeping track of errors. If all valid
// builders agree that we can use bundling, set the flag to true.
unsigned ValidBuilders = 0u;
unsigned ValidBuildersSupportingBundling = 0u;
for (auto *SB : SpecializedBuilders) {
IsValid = IsValid && !SB->initialize();
// Update the counters if the builder is valid.
if (SB->isValid()) {
++ValidBuilders;
if (SB->canUseBundlerUnbundler())
++ValidBuildersSupportingBundling;
}
}
CanUseBundler =
ValidBuilders && ValidBuilders == ValidBuildersSupportingBundling;
}
~OffloadingActionBuilder() {
for (auto *SB : SpecializedBuilders)
delete SB;
}
/// Generate an action that adds device dependences (if any) to a host action.
/// If no device dependence actions exist, just return the host action \a
/// HostAction. If an error is found or if no builder requires the host action
/// to be generated, return nullptr.
Action *
addDeviceDependencesToHostAction(Action *HostAction, const Arg *InputArg,
phases::ID CurPhase, phases::ID FinalPhase,
DeviceActionBuilder::PhasesTy &Phases) {
if (!IsValid)
return nullptr;
if (SpecializedBuilders.empty())
return HostAction;
assert(HostAction && "Invalid host action!");
OffloadAction::DeviceDependences DDeps;
// Check if all the programming models agree we should not emit the host
// action. Also, keep track of the offloading kinds employed.
auto &OffloadKind = InputArgToOffloadKindMap[InputArg];
unsigned InactiveBuilders = 0u;
unsigned IgnoringBuilders = 0u;
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid()) {
++InactiveBuilders;
continue;
}
auto RetCode =
SB->getDeviceDependences(DDeps, CurPhase, FinalPhase, Phases);
// If the builder explicitly says the host action should be ignored,
// we need to increment the variable that tracks the builders that request
// the host object to be ignored.
if (RetCode == DeviceActionBuilder::ABRT_Ignore_Host)
++IgnoringBuilders;
// Unless the builder was inactive for this action, we have to record the
// offload kind because the host will have to use it.
if (RetCode != DeviceActionBuilder::ABRT_Inactive)
OffloadKind |= SB->getAssociatedOffloadKind();
}
// If all builders agree that the host object should be ignored, just return
// nullptr.
if (IgnoringBuilders &&
SpecializedBuilders.size() == (InactiveBuilders + IgnoringBuilders))
return nullptr;
if (DDeps.getActions().empty())
return HostAction;
// We have dependences we need to bundle together. We use an offload action
// for that.
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/nullptr, DDeps);
return C.MakeAction<OffloadAction>(HDep, DDeps);
}
/// Generate an action that adds a host dependence to a device action. The
/// results will be kept in this action builder. Return true if an error was
/// found.
bool addHostDependenceToDeviceActions(Action *&HostAction,
const Arg *InputArg) {
if (!IsValid)
return true;
// If we are supporting bundling/unbundling and the current action is an
// input action of non-source file, we replace the host action by the
// unbundling action. The bundler tool has the logic to detect if an input
// is a bundle or not and if the input is not a bundle it assumes it is a
// host file. Therefore it is safe to create an unbundling action even if
// the input is not a bundle.
if (CanUseBundler && isa<InputAction>(HostAction) &&
InputArg->getOption().getKind() == llvm::opt::Option::InputClass &&
(!types::isSrcFile(HostAction->getType()) ||
HostAction->getType() == types::TY_PP_HIP)) {
auto UnbundlingHostAction =
C.MakeAction<OffloadUnbundlingJobAction>(HostAction);
UnbundlingHostAction->registerDependentActionInfo(
C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/StringRef(), Action::OFK_Host);
HostAction = UnbundlingHostAction;
}
assert(HostAction && "Invalid host action!");
// Register the offload kinds that are used.
auto &OffloadKind = InputArgToOffloadKindMap[InputArg];
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
auto RetCode = SB->addDeviceDepences(HostAction);
// Host dependences for device actions are not compatible with that same
// action being ignored.
assert(RetCode != DeviceActionBuilder::ABRT_Ignore_Host &&
"Host dependence not expected to be ignored.!");
// Unless the builder was inactive for this action, we have to record the
// offload kind because the host will have to use it.
if (RetCode != DeviceActionBuilder::ABRT_Inactive)
OffloadKind |= SB->getAssociatedOffloadKind();
}
// Do not use unbundler if the Host does not depend on device action.
if (OffloadKind == Action::OFK_None && CanUseBundler)
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction))
HostAction = UA->getInputs().back();
return false;
}
/// Add the offloading top level actions to the provided action list. This
/// function can replace the host action by a bundling action if the
/// programming models allow it.
bool appendTopLevelActions(ActionList &AL, Action *HostAction,
const Arg *InputArg) {
// Get the device actions to be appended.
ActionList OffloadAL;
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
SB->appendTopLevelActions(OffloadAL);
}
// If we can use the bundler, replace the host action by the bundling one in
// the resulting list. Otherwise, just append the device actions. For
// device only compilation, HostAction is a null pointer, therefore only do
// this when HostAction is not a null pointer.
if (CanUseBundler && HostAction &&
HostAction->getType() != types::TY_Nothing && !OffloadAL.empty()) {
// Add the host action to the list in order to create the bundling action.
OffloadAL.push_back(HostAction);
// We expect that the host action was just appended to the action list
// before this method was called.
assert(HostAction == AL.back() && "Host action not in the list??");
HostAction = C.MakeAction<OffloadBundlingJobAction>(OffloadAL);
AL.back() = HostAction;
} else
AL.append(OffloadAL.begin(), OffloadAL.end());
// Propagate to the current host action (if any) the offload information
// associated with the current input.
if (HostAction)
HostAction->propagateHostOffloadInfo(InputArgToOffloadKindMap[InputArg],
/*BoundArch=*/nullptr);
return false;
}
Action* makeHostLinkAction() {
// Build a list of device linking actions.
ActionList DeviceAL;
for (DeviceActionBuilder *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
SB->appendLinkDeviceActions(DeviceAL);
}
if (DeviceAL.empty())
return nullptr;
// Let builders add host linking actions.
Action* HA = nullptr;
for (DeviceActionBuilder *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
HA = SB->appendLinkHostActions(DeviceAL);
}
return HA;
}
/// Processes the host linker action. This currently consists of replacing it
/// with an offload action if there are device link objects and propagate to
/// the host action all the offload kinds used in the current compilation. The
/// resulting action is returned.
Action *processHostLinkAction(Action *HostAction) {
// Add all the dependences from the device linking actions.
OffloadAction::DeviceDependences DDeps;
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
SB->appendLinkDependences(DDeps);
}
// Calculate all the offload kinds used in the current compilation.
unsigned ActiveOffloadKinds = 0u;
for (auto &I : InputArgToOffloadKindMap)
ActiveOffloadKinds |= I.second;
// If we don't have device dependencies, we don't have to create an offload
// action.
if (DDeps.getActions().empty()) {
// Propagate all the active kinds to host action. Given that it is a link
// action it is assumed to depend on all actions generated so far.
HostAction->propagateHostOffloadInfo(ActiveOffloadKinds,
/*BoundArch=*/nullptr);
return HostAction;
}
// Create the offload action with all dependences. When an offload action
// is created the kinds are propagated to the host action, so we don't have
// to do that explicitly here.
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch*/ nullptr, ActiveOffloadKinds);
return C.MakeAction<OffloadAction>(HDep, DDeps);
}
};
} // anonymous namespace.
void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
const InputList &Inputs,
ActionList &Actions) const {
// Ignore /Yc/Yu if both /Yc and /Yu passed but with different filenames.
Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc);
Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu);
if (YcArg && YuArg && strcmp(YcArg->getValue(), YuArg->getValue()) != 0) {
Diag(clang::diag::warn_drv_ycyu_different_arg_clang_cl);
Args.eraseArg(options::OPT__SLASH_Yc);
Args.eraseArg(options::OPT__SLASH_Yu);
YcArg = YuArg = nullptr;
}
if (YcArg && Inputs.size() > 1) {
Diag(clang::diag::warn_drv_yc_multiple_inputs_clang_cl);
Args.eraseArg(options::OPT__SLASH_Yc);
YcArg = nullptr;
}
Arg *FinalPhaseArg;
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
if (FinalPhase == phases::Link) {
if (Args.hasArg(options::OPT_emit_llvm))
Diag(clang::diag::err_drv_emit_llvm_link);
if (IsCLMode() && LTOMode != LTOK_None &&
!Args.getLastArgValue(options::OPT_fuse_ld_EQ)
.equals_insensitive("lld"))
Diag(clang::diag::err_drv_lto_without_lld);
}
if (FinalPhase == phases::Preprocess || Args.hasArg(options::OPT__SLASH_Y_)) {
// If only preprocessing or /Y- is used, all pch handling is disabled.
// Rather than check for it everywhere, just remove clang-cl pch-related
// flags here.
Args.eraseArg(options::OPT__SLASH_Fp);
Args.eraseArg(options::OPT__SLASH_Yc);
Args.eraseArg(options::OPT__SLASH_Yu);
YcArg = YuArg = nullptr;
}
unsigned LastPLSize = 0;
for (auto &I : Inputs) {
types::ID InputType = I.first;
const Arg *InputArg = I.second;
auto PL = types::getCompilationPhases(InputType);
LastPLSize = PL.size();
// If the first step comes after the final phase we are doing as part of
// this compilation, warn the user about it.
phases::ID InitialPhase = PL[0];
if (InitialPhase > FinalPhase) {
if (InputArg->isClaimed())
continue;
// Claim here to avoid the more general unused warning.
InputArg->claim();
// Suppress all unused style warnings with -Qunused-arguments
if (Args.hasArg(options::OPT_Qunused_arguments))
continue;
// Special case when final phase determined by binary name, rather than
// by a command-line argument with a corresponding Arg.
if (CCCIsCPP())
Diag(clang::diag::warn_drv_input_file_unused_by_cpp)
<< InputArg->getAsString(Args) << getPhaseName(InitialPhase);
// Special case '-E' warning on a previously preprocessed file to make
// more sense.
else if (InitialPhase == phases::Compile &&
(Args.getLastArg(options::OPT__SLASH_EP,
options::OPT__SLASH_P) ||
Args.getLastArg(options::OPT_E) ||
Args.getLastArg(options::OPT_M, options::OPT_MM)) &&
getPreprocessedType(InputType) == types::TY_INVALID)
Diag(clang::diag::warn_drv_preprocessed_input_file_unused)
<< InputArg->getAsString(Args) << !!FinalPhaseArg
<< (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
else
Diag(clang::diag::warn_drv_input_file_unused)
<< InputArg->getAsString(Args) << getPhaseName(InitialPhase)
<< !!FinalPhaseArg
<< (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
continue;
}
if (YcArg) {
// Add a separate precompile phase for the compile phase.
if (FinalPhase >= phases::Compile) {
const types::ID HeaderType = lookupHeaderTypeForSourceType(InputType);
// Build the pipeline for the pch file.
Action *ClangClPch = C.MakeAction<InputAction>(*InputArg, HeaderType);
for (phases::ID Phase : types::getCompilationPhases(HeaderType))
ClangClPch = ConstructPhaseAction(C, Args, Phase, ClangClPch);
assert(ClangClPch);
Actions.push_back(ClangClPch);
// The driver currently exits after the first failed command. This
// relies on that behavior, to make sure if the pch generation fails,
// the main compilation won't run.
// FIXME: If the main compilation fails, the PCH generation should
// probably not be considered successful either.
}
}
}
// If we are linking, claim any options which are obviously only used for
// compilation.
// FIXME: Understand why the last Phase List length is used here.
if (FinalPhase == phases::Link && LastPLSize == 1) {
Args.ClaimAllArgs(options::OPT_CompileOnly_Group);
Args.ClaimAllArgs(options::OPT_cl_compile_Group);
}
}
void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
const InputList &Inputs, ActionList &Actions) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
if (!SuppressMissingInputWarning && Inputs.empty()) {
Diag(clang::diag::err_drv_no_input_files);
return;
}
// Reject -Z* at the top level, these options should never have been exposed
// by gcc.
if (Arg *A = Args.getLastArg(options::OPT_Z_Joined))
Diag(clang::diag::err_drv_use_of_Z_option) << A->getAsString(Args);
// Diagnose misuse of /Fo.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fo)) {
StringRef V = A->getValue();
if (Inputs.size() > 1 && !V.empty() &&
!llvm::sys::path::is_separator(V.back())) {
// Check whether /Fo tries to name an output file for multiple inputs.
Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
<< A->getSpelling() << V;
Args.eraseArg(options::OPT__SLASH_Fo);
}
}
// Diagnose misuse of /Fa.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fa)) {
StringRef V = A->getValue();
if (Inputs.size() > 1 && !V.empty() &&
!llvm::sys::path::is_separator(V.back())) {
// Check whether /Fa tries to name an asm file for multiple inputs.
Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
<< A->getSpelling() << V;
Args.eraseArg(options::OPT__SLASH_Fa);
}
}
// Diagnose misuse of /o.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_o)) {
if (A->getValue()[0] == '\0') {
// It has to have a value.
Diag(clang::diag::err_drv_missing_argument) << A->getSpelling() << 1;
Args.eraseArg(options::OPT__SLASH_o);
}
}
handleArguments(C, Args, Inputs, Actions);
// Builder to be used to build offloading actions.
OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);
// Construct the actions to perform.
HeaderModulePrecompileJobAction *HeaderModuleAction = nullptr;
ActionList LinkerInputs;
ActionList MergerInputs;
for (auto &I : Inputs) {
types::ID InputType = I.first;
const Arg *InputArg = I.second;
auto PL = types::getCompilationPhases(*this, Args, InputType);
if (PL.empty())
continue;
auto FullPL = types::getCompilationPhases(InputType);
// Build the pipeline for this file.
Action *Current = C.MakeAction<InputAction>(*InputArg, InputType);
// Use the current host action in any of the offloading actions, if
// required.
if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
break;
for (phases::ID Phase : PL) {
// Add any offload action the host action depends on.
Current = OffloadBuilder.addDeviceDependencesToHostAction(
Current, InputArg, Phase, PL.back(), FullPL);
if (!Current)
break;
// Queue linker inputs.
if (Phase == phases::Link) {
assert(Phase == PL.back() && "linking must be final compilation step.");
LinkerInputs.push_back(Current);
Current = nullptr;
break;
}
// TODO: Consider removing this because the merged may not end up being
// the final Phase in the pipeline. Perhaps the merged could just merge
// and then pass an artifact of some sort to the Link Phase.
// Queue merger inputs.
if (Phase == phases::IfsMerge) {
assert(Phase == PL.back() && "merging must be final compilation step.");
MergerInputs.push_back(Current);
Current = nullptr;
break;
}
// Each precompiled header file after a module file action is a module
// header of that same module file, rather than being compiled to a
// separate PCH.
if (Phase == phases::Precompile && HeaderModuleAction &&
getPrecompiledType(InputType) == types::TY_PCH) {
HeaderModuleAction->addModuleHeaderInput(Current);
Current = nullptr;
break;
}
// FIXME: Should we include any prior module file outputs as inputs of
// later actions in the same command line?
// Otherwise construct the appropriate action.
Action *NewCurrent = ConstructPhaseAction(C, Args, Phase, Current);
// We didn't create a new action, so we will just move to the next phase.
if (NewCurrent == Current)
continue;
if (auto *HMA = dyn_cast<HeaderModulePrecompileJobAction>(NewCurrent))
HeaderModuleAction = HMA;
Current = NewCurrent;
// Use the current host action in any of the offloading actions, if
// required.
if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
break;
if (Current->getType() == types::TY_Nothing)
break;
}
// If we ended with something, add to the output list.
if (Current)
Actions.push_back(Current);
// Add any top level actions generated for offloading.
OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg);
}
// Add a link action if necessary.
if (!LinkerInputs.empty()) {
if (Action *Wrapper = OffloadBuilder.makeHostLinkAction())
LinkerInputs.push_back(Wrapper);
Action *LA;
// Check if this Linker Job should emit a static library.
if (ShouldEmitStaticLibrary(Args)) {
LA = C.MakeAction<StaticLibJobAction>(LinkerInputs, types::TY_Image);
} else {
LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image);
}
LA = OffloadBuilder.processHostLinkAction(LA);
Actions.push_back(LA);
}
// Add an interface stubs merge action if necessary.
if (!MergerInputs.empty())
Actions.push_back(
C.MakeAction<IfsMergeJobAction>(MergerInputs, types::TY_Image));
if (Args.hasArg(options::OPT_emit_interface_stubs)) {
auto PhaseList = types::getCompilationPhases(
types::TY_IFS_CPP,
Args.hasArg(options::OPT_c) ? phases::Compile : phases::LastPhase);
ActionList MergerInputs;
for (auto &I : Inputs) {
types::ID InputType = I.first;
const Arg *InputArg = I.second;
// Currently clang and the llvm assembler do not support generating symbol
// stubs from assembly, so we skip the input on asm files. For ifs files
// we rely on the normal pipeline setup in the pipeline setup code above.
if (InputType == types::TY_IFS || InputType == types::TY_PP_Asm ||
InputType == types::TY_Asm)
continue;
Action *Current = C.MakeAction<InputAction>(*InputArg, InputType);
for (auto Phase : PhaseList) {
switch (Phase) {
default:
llvm_unreachable(
"IFS Pipeline can only consist of Compile followed by IfsMerge.");
case phases::Compile: {
// Only IfsMerge (llvm-ifs) can handle .o files by looking for ifs
// files where the .o file is located. The compile action can not
// handle this.
if (InputType == types::TY_Object)
break;
Current = C.MakeAction<CompileJobAction>(Current, types::TY_IFS_CPP);
break;
}
case phases::IfsMerge: {
assert(Phase == PhaseList.back() &&
"merging must be final compilation step.");
MergerInputs.push_back(Current);
Current = nullptr;
break;
}
}
}
// If we ended with something, add to the output list.
if (Current)
Actions.push_back(Current);
}
// Add an interface stubs merge action if necessary.
if (!MergerInputs.empty())
Actions.push_back(
C.MakeAction<IfsMergeJobAction>(MergerInputs, types::TY_Image));
}
// If --print-supported-cpus, -mcpu=? or -mtune=? is specified, build a custom
// Compile phase that prints out supported cpu models and quits.
if (Arg *A = Args.getLastArg(options::OPT_print_supported_cpus)) {
// Use the -mcpu=? flag as the dummy input to cc1.
Actions.clear();
Action *InputAc = C.MakeAction<InputAction>(*A, types::TY_C);
Actions.push_back(
C.MakeAction<PrecompileJobAction>(InputAc, types::TY_Nothing));
for (auto &I : Inputs)
I.second->claim();
}
// Claim ignored clang-cl options.
Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
// Claim --cuda-host-only and --cuda-compile-host-device, which may be passed
// to non-CUDA compilations and should not trigger warnings there.
Args.ClaimAllArgs(options::OPT_cuda_host_only);
Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
}
Action *Driver::ConstructPhaseAction(
Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input,
Action::OffloadKind TargetDeviceOffloadKind) const {
llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");
// Some types skip the assembler phase (e.g., llvm-bc), but we can't
// encode this in the steps because the intermediate type depends on
// arguments. Just special case here.
if (Phase == phases::Assemble && Input->getType() != types::TY_PP_Asm)
return Input;
// Build the appropriate action.
switch (Phase) {
case phases::Link:
llvm_unreachable("link action invalid here.");
case phases::IfsMerge:
llvm_unreachable("ifsmerge action invalid here.");
case phases::Preprocess: {
types::ID OutputTy;
// -M and -MM specify the dependency file name by altering the output type,
// -if -MD and -MMD are not specified.
if (Args.hasArg(options::OPT_M, options::OPT_MM) &&
!Args.hasArg(options::OPT_MD, options::OPT_MMD)) {
OutputTy = types::TY_Dependencies;
} else {
OutputTy = Input->getType();
if (!Args.hasFlag(options::OPT_frewrite_includes,
options::OPT_fno_rewrite_includes, false) &&
!Args.hasFlag(options::OPT_frewrite_imports,
options::OPT_fno_rewrite_imports, false) &&
!CCGenDiagnostics)
OutputTy = types::getPreprocessedType(OutputTy);
assert(OutputTy != types::TY_INVALID &&
"Cannot preprocess this input type!");
}
return C.MakeAction<PreprocessJobAction>(Input, OutputTy);
}
case phases::Precompile: {
types::ID OutputTy = getPrecompiledType(Input->getType());
assert(OutputTy != types::TY_INVALID &&
"Cannot precompile this input type!");
// If we're given a module name, precompile header file inputs as a
// module, not as a precompiled header.
const char *ModName = nullptr;
if (OutputTy == types::TY_PCH) {
if (Arg *A = Args.getLastArg(options::OPT_fmodule_name_EQ))
ModName = A->getValue();
if (ModName)
OutputTy = types::TY_ModuleFile;
}
if (Args.hasArg(options::OPT_fsyntax_only)) {
// Syntax checks should not emit a PCH file
OutputTy = types::TY_Nothing;
}
if (ModName)
return C.MakeAction<HeaderModulePrecompileJobAction>(Input, OutputTy,
ModName);
return C.MakeAction<PrecompileJobAction>(Input, OutputTy);
}
case phases::Compile: {
if (Args.hasArg(options::OPT_fsyntax_only))
return C.MakeAction<CompileJobAction>(Input, types::TY_Nothing);
if (Args.hasArg(options::OPT_rewrite_objc))
return C.MakeAction<CompileJobAction>(Input, types::TY_RewrittenObjC);
if (Args.hasArg(options::OPT_rewrite_legacy_objc))
return C.MakeAction<CompileJobAction>(Input,
types::TY_RewrittenLegacyObjC);
if (Args.hasArg(options::OPT__analyze))
return C.MakeAction<AnalyzeJobAction>(Input, types::TY_Plist);
if (Args.hasArg(options::OPT__migrate))
return C.MakeAction<MigrateJobAction>(Input, types::TY_Remap);
if (Args.hasArg(options::OPT_emit_ast))
return C.MakeAction<CompileJobAction>(Input, types::TY_AST);
if (Args.hasArg(options::OPT_module_file_info))
return C.MakeAction<CompileJobAction>(Input, types::TY_ModuleFile);
if (Args.hasArg(options::OPT_verify_pch))
return C.MakeAction<VerifyPCHJobAction>(Input, types::TY_Nothing);
return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
}
case phases::Backend: {
if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) {
types::ID Output =
Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
}
if (Args.hasArg(options::OPT_emit_llvm) ||
(TargetDeviceOffloadKind == Action::OFK_HIP &&
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false))) {
types::ID Output =
Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
}
return C.MakeAction<BackendJobAction>(Input, types::TY_PP_Asm);
}
case phases::Assemble:
return C.MakeAction<AssembleJobAction>(std::move(Input), types::TY_Object);
}
llvm_unreachable("invalid phase in ConstructPhaseAction");
}
void Driver::BuildJobs(Compilation &C) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
// It is an error to provide a -o option if we are making multiple output
// files. There are exceptions:
//
// IfsMergeJob: when generating interface stubs enabled we want to be able to
// generate the stub file at the same time that we generate the real
// library/a.out. So when a .o, .so, etc are the output, with clang interface
// stubs there will also be a .ifs and .ifso at the same location.
//
// CompileJob of type TY_IFS_CPP: when generating interface stubs is enabled
// and -c is passed, we still want to be able to generate a .ifs file while
// we are also generating .o files. So we allow more than one output file in
// this case as well.
//
if (FinalOutput) {
unsigned NumOutputs = 0;
unsigned NumIfsOutputs = 0;
for (const Action *A : C.getActions())
if (A->getType() != types::TY_Nothing &&
!(A->getKind() == Action::IfsMergeJobClass ||
(A->getType() == clang::driver::types::TY_IFS_CPP &&
A->getKind() == clang::driver::Action::CompileJobClass &&
0 == NumIfsOutputs++) ||
(A->getKind() == Action::BindArchClass && A->getInputs().size() &&
A->getInputs().front()->getKind() == Action::IfsMergeJobClass)))
++NumOutputs;
if (NumOutputs > 1) {
Diag(clang::diag::err_drv_output_argument_with_multiple_files);
FinalOutput = nullptr;
}
}
const llvm::Triple &RawTriple = C.getDefaultToolChain().getTriple();
if (RawTriple.isOSAIX()) {
if (Arg *A = C.getArgs().getLastArg(options::OPT_G))
Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getSpelling() << RawTriple.str();
if (LTOMode == LTOK_Thin)
Diag(diag::err_drv_clang_unsupported) << "thinLTO on AIX";
}
// Collect the list of architectures.
llvm::StringSet<> ArchNames;
if (RawTriple.isOSBinFormatMachO())
for (const Arg *A : C.getArgs())
if (A->getOption().matches(options::OPT_arch))
ArchNames.insert(A->getValue());
// Set of (Action, canonical ToolChain triple) pairs we've built jobs for.
std::map<std::pair<const Action *, std::string>, InputInfo> CachedResults;
for (Action *A : C.getActions()) {
// If we are linking an image for multiple archs then the linker wants
// -arch_multiple and -final_output <final image name>. Unfortunately, this
// doesn't fit in cleanly because we have to pass this information down.
//
// FIXME: This is a hack; find a cleaner way to integrate this into the
// process.
const char *LinkingOutput = nullptr;
if (isa<LipoJobAction>(A)) {
if (FinalOutput)
LinkingOutput = FinalOutput->getValue();
else
LinkingOutput = getDefaultImageName();
}
BuildJobsForAction(C, A, &C.getDefaultToolChain(),
/*BoundArch*/ StringRef(),
/*AtTopLevel*/ true,
/*MultipleArchs*/ ArchNames.size() > 1,
/*LinkingOutput*/ LinkingOutput, CachedResults,
/*TargetDeviceOffloadKind*/ Action::OFK_None);
}
// If we have more than one job, then disable integrated-cc1 for now. Do this
// also when we need to report process execution statistics.
if (C.getJobs().size() > 1 || CCPrintProcessStats)
for (auto &J : C.getJobs())
J.InProcess = false;
if (CCPrintProcessStats) {
C.setPostCallback([=](const Command &Cmd, int Res) {
Optional<llvm::sys::ProcessStatistics> ProcStat =
Cmd.getProcessStatistics();
if (!ProcStat)
return;
const char *LinkingOutput = nullptr;
if (FinalOutput)
LinkingOutput = FinalOutput->getValue();
else if (!Cmd.getOutputFilenames().empty())
LinkingOutput = Cmd.getOutputFilenames().front().c_str();
else
LinkingOutput = getDefaultImageName();
if (CCPrintStatReportFilename.empty()) {
using namespace llvm;
// Human readable output.
outs() << sys::path::filename(Cmd.getExecutable()) << ": "
<< "output=" << LinkingOutput;
outs() << ", total="
<< format("%.3f", ProcStat->TotalTime.count() / 1000.) << " ms"
<< ", user="
<< format("%.3f", ProcStat->UserTime.count() / 1000.) << " ms"
<< ", mem=" << ProcStat->PeakMemory << " Kb\n";
} else {
// CSV format.
std::string Buffer;
llvm::raw_string_ostream Out(Buffer);
llvm::sys::printArg(Out, llvm::sys::path::filename(Cmd.getExecutable()),
/*Quote*/ true);
Out << ',';
llvm::sys::printArg(Out, LinkingOutput, true);
Out << ',' << ProcStat->TotalTime.count() << ','
<< ProcStat->UserTime.count() << ',' << ProcStat->PeakMemory
<< '\n';
Out.flush();
std::error_code EC;
llvm::raw_fd_ostream OS(CCPrintStatReportFilename.c_str(), EC,
llvm::sys::fs::OF_Append |
llvm::sys::fs::OF_Text);
if (EC)
return;
auto L = OS.lock();
if (!L) {
llvm::errs() << "ERROR: Cannot lock file "
<< CCPrintStatReportFilename << ": "
<< toString(L.takeError()) << "\n";
return;
}
OS << Buffer;
OS.flush();
}
});
}
// If the user passed -Qunused-arguments or there were errors, don't warn
// about any unused arguments.
if (Diags.hasErrorOccurred() ||
C.getArgs().hasArg(options::OPT_Qunused_arguments))
return;
// Claim -### here.
(void)C.getArgs().hasArg(options::OPT__HASH_HASH_HASH);
// Claim --driver-mode, --rsp-quoting, it was handled earlier.
(void)C.getArgs().hasArg(options::OPT_driver_mode);
(void)C.getArgs().hasArg(options::OPT_rsp_quoting);
for (Arg *A : C.getArgs()) {
// FIXME: It would be nice to be able to send the argument to the
// DiagnosticsEngine, so that extra values, position, and so on could be
// printed.
if (!A->isClaimed()) {
if (A->getOption().hasFlag(options::NoArgumentUnused))
continue;
// Suppress the warning automatically if this is just a flag, and it is an
// instance of an argument we already claimed.
const Option &Opt = A->getOption();
if (Opt.getKind() == Option::FlagClass) {
bool DuplicateClaimed = false;
for (const Arg *AA : C.getArgs().filtered(&Opt)) {
if (AA->isClaimed()) {
DuplicateClaimed = true;
break;
}
}
if (DuplicateClaimed)
continue;
}
// In clang-cl, don't mention unknown arguments here since they have
// already been warned about.
if (!IsCLMode() || !A->getOption().matches(options::OPT_UNKNOWN))
Diag(clang::diag::warn_drv_unused_argument)
<< A->getAsString(C.getArgs());
}
}
}
namespace {
/// Utility class to control the collapse of dependent actions and select the
/// tools accordingly.
class ToolSelector final {
/// The tool chain this selector refers to.
const ToolChain &TC;
/// The compilation this selector refers to.
const Compilation &C;
/// The base action this selector refers to.
const JobAction *BaseAction;
/// Set to true if the current toolchain refers to host actions.
bool IsHostSelector;
/// Set to true if save-temps and embed-bitcode functionalities are active.
bool SaveTemps;
bool EmbedBitcode;
/// Get previous dependent action or null if that does not exist. If
/// \a CanBeCollapsed is false, that action must be legal to collapse or
/// null will be returned.
const JobAction *getPrevDependentAction(const ActionList &Inputs,
ActionList &SavedOffloadAction,
bool CanBeCollapsed = true) {
// An option can be collapsed only if it has a single input.
if (Inputs.size() != 1)
return nullptr;
Action *CurAction = *Inputs.begin();
if (CanBeCollapsed &&
!CurAction->isCollapsingWithNextDependentActionLegal())
return nullptr;
// If the input action is an offload action. Look through it and save any
// offload action that can be dropped in the event of a collapse.
if (auto *OA = dyn_cast<OffloadAction>(CurAction)) {
// If the dependent action is a device action, we will attempt to collapse
// only with other device actions. Otherwise, we would do the same but
// with host actions only.
if (!IsHostSelector) {
if (OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)) {
CurAction =
OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true);
if (CanBeCollapsed &&
!CurAction->isCollapsingWithNextDependentActionLegal())
return nullptr;
SavedOffloadAction.push_back(OA);
return dyn_cast<JobAction>(CurAction);
}
} else if (OA->hasHostDependence()) {
CurAction = OA->getHostDependence();
if (CanBeCollapsed &&
!CurAction->isCollapsingWithNextDependentActionLegal())
return nullptr;
SavedOffloadAction.push_back(OA);
return dyn_cast<JobAction>(CurAction);
}
return nullptr;
}
return dyn_cast<JobAction>(CurAction);
}
/// Return true if an assemble action can be collapsed.
bool canCollapseAssembleAction() const {
return TC.useIntegratedAs() && !SaveTemps &&
!C.getArgs().hasArg(options::OPT_via_file_asm) &&
!C.getArgs().hasArg(options::OPT__SLASH_FA) &&
!C.getArgs().hasArg(options::OPT__SLASH_Fa);
}
/// Return true if a preprocessor action can be collapsed.
bool canCollapsePreprocessorAction() const {
return !C.getArgs().hasArg(options::OPT_no_integrated_cpp) &&
!C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps &&
!C.getArgs().hasArg(options::OPT_rewrite_objc);
}
/// Struct that relates an action with the offload actions that would be
/// collapsed with it.
struct JobActionInfo final {
/// The action this info refers to.
const JobAction *JA = nullptr;
/// The offload actions we need to take care off if this action is
/// collapsed.
ActionList SavedOffloadAction;
};
/// Append collapsed offload actions from the give nnumber of elements in the
/// action info array.
static void AppendCollapsedOffloadAction(ActionList &CollapsedOffloadAction,
ArrayRef<JobActionInfo> &ActionInfo,
unsigned ElementNum) {
assert(ElementNum <= ActionInfo.size() && "Invalid number of elements.");
for (unsigned I = 0; I < ElementNum; ++I)
CollapsedOffloadAction.append(ActionInfo[I].SavedOffloadAction.begin(),
ActionInfo[I].SavedOffloadAction.end());
}
/// Functions that attempt to perform the combining. They detect if that is
/// legal, and if so they update the inputs \a Inputs and the offload action
/// that were collapsed in \a CollapsedOffloadAction. A tool that deals with
/// the combined action is returned. If the combining is not legal or if the
/// tool does not exist, null is returned.
/// Currently three kinds of collapsing are supported:
/// - Assemble + Backend + Compile;
/// - Assemble + Backend ;
/// - Backend + Compile.
const Tool *
combineAssembleBackendCompile(ArrayRef<JobActionInfo> ActionInfo,
ActionList &Inputs,
ActionList &CollapsedOffloadAction) {
if (ActionInfo.size() < 3 || !canCollapseAssembleAction())
return nullptr;
auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA);
auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA);
auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[2].JA);
if (!AJ || !BJ || !CJ)
return nullptr;
// Get compiler tool.
const Tool *T = TC.SelectTool(*CJ);
if (!T)
return nullptr;
// When using -fembed-bitcode, it is required to have the same tool (clang)
// for both CompilerJA and BackendJA. Otherwise, combine two stages.
if (EmbedBitcode) {
const Tool *BT = TC.SelectTool(*BJ);
if (BT == T)
return nullptr;
}
if (!T->hasIntegratedAssembler())
return nullptr;
Inputs = CJ->getInputs();
AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
/*NumElements=*/3);
return T;
}
const Tool *combineAssembleBackend(ArrayRef<JobActionInfo> ActionInfo,
ActionList &Inputs,
ActionList &CollapsedOffloadAction) {
if (ActionInfo.size() < 2 || !canCollapseAssembleAction())
return nullptr;
auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA);
auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA);
if (!AJ || !BJ)
return nullptr;
// Get backend tool.
const Tool *T = TC.SelectTool(*BJ);
if (!T)
return nullptr;
if (!T->hasIntegratedAssembler())
return nullptr;
Inputs = BJ->getInputs();
AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
/*NumElements=*/2);
return T;
}
const Tool *combineBackendCompile(ArrayRef<JobActionInfo> ActionInfo,
ActionList &Inputs,
ActionList &CollapsedOffloadAction) {
if (ActionInfo.size() < 2)
return nullptr;
auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[0].JA);
auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[1].JA);
if (!BJ || !CJ)
return nullptr;
// Check if the initial input (to the compile job or its predessor if one
// exists) is LLVM bitcode. In that case, no preprocessor step is required
// and we can still collapse the compile and backend jobs when we have
// -save-temps. I.e. there is no need for a separate compile job just to
// emit unoptimized bitcode.
bool InputIsBitcode = true;
for (size_t i = 1; i < ActionInfo.size(); i++)
if (ActionInfo[i].JA->getType() != types::TY_LLVM_BC &&
ActionInfo[i].JA->getType() != types::TY_LTO_BC) {
InputIsBitcode = false;
break;
}
if (!InputIsBitcode && !canCollapsePreprocessorAction())
return nullptr;
// Get compiler tool.
const Tool *T = TC.SelectTool(*CJ);
if (!T)
return nullptr;
if (T->canEmitIR() && ((SaveTemps && !InputIsBitcode) || EmbedBitcode))
return nullptr;
Inputs = CJ->getInputs();
AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
/*NumElements=*/2);
return T;
}
/// Updates the inputs if the obtained tool supports combining with
/// preprocessor action, and the current input is indeed a preprocessor
/// action. If combining results in the collapse of offloading actions, those
/// are appended to \a CollapsedOffloadAction.
void combineWithPreprocessor(const Tool *T, ActionList &Inputs,
ActionList &CollapsedOffloadAction) {
if (!T || !canCollapsePreprocessorAction() || !T->hasIntegratedCPP())
return;
// Attempt to get a preprocessor action dependence.
ActionList PreprocessJobOffloadActions;
ActionList NewInputs;
for (Action *A : Inputs) {
auto *PJ = getPrevDependentAction({A}, PreprocessJobOffloadActions);
if (!PJ || !isa<PreprocessJobAction>(PJ)) {
NewInputs.push_back(A);
continue;
}
// This is legal to combine. Append any offload action we found and add the
// current input to preprocessor inputs.
CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(),
PreprocessJobOffloadActions.end());
NewInputs.append(PJ->input_begin(), PJ->input_end());
}
Inputs = NewInputs;
}
public:
ToolSelector(const JobAction *BaseAction, const ToolChain &TC,
const Compilation &C, bool SaveTemps, bool EmbedBitcode)
: TC(TC), C(C), BaseAction(BaseAction), SaveTemps(SaveTemps),
EmbedBitcode(EmbedBitcode) {
assert(BaseAction && "Invalid base action.");
IsHostSelector = BaseAction->getOffloadingDeviceKind() == Action::OFK_None;
}
/// Check if a chain of actions can be combined and return the tool that can
/// handle the combination of actions. The pointer to the current inputs \a
/// Inputs and the list of offload actions \a CollapsedOffloadActions
/// connected to collapsed actions are updated accordingly. The latter enables
/// the caller of the selector to process them afterwards instead of just
/// dropping them. If no suitable tool is found, null will be returned.
const Tool *getTool(ActionList &Inputs,
ActionList &CollapsedOffloadAction) {
//
// Get the largest chain of actions that we could combine.
//
SmallVector<JobActionInfo, 5> ActionChain(1);
ActionChain.back().JA = BaseAction;
while (ActionChain.back().JA) {
const Action *CurAction = ActionChain.back().JA;
// Grow the chain by one element.
ActionChain.resize(ActionChain.size() + 1);
JobActionInfo &AI = ActionChain.back();
// Attempt to fill it with the
AI.JA =
getPrevDependentAction(CurAction->getInputs(), AI.SavedOffloadAction);
}
// Pop the last action info as it could not be filled.
ActionChain.pop_back();
//
// Attempt to combine actions. If all combining attempts failed, just return
// the tool of the provided action. At the end we attempt to combine the
// action with any preprocessor action it may depend on.
//
const Tool *T = combineAssembleBackendCompile(ActionChain, Inputs,
CollapsedOffloadAction);
if (!T)
T = combineAssembleBackend(ActionChain, Inputs, CollapsedOffloadAction);
if (!T)
T = combineBackendCompile(ActionChain, Inputs, CollapsedOffloadAction);
if (!T) {
Inputs = BaseAction->getInputs();
T = TC.SelectTool(*BaseAction);
}
combineWithPreprocessor(T, Inputs, CollapsedOffloadAction);
return T;
}
};
}
/// Return a string that uniquely identifies the result of a job. The bound arch
/// is not necessarily represented in the toolchain's triple -- for example,
/// armv7 and armv7s both map to the same triple -- so we need both in our map.
/// Also, we need to add the offloading device kind, as the same tool chain can
/// be used for host and device for some programming models, e.g. OpenMP.
static std::string GetTriplePlusArchString(const ToolChain *TC,
StringRef BoundArch,
Action::OffloadKind OffloadKind) {
std::string TriplePlusArch = TC->getTriple().normalize();
if (!BoundArch.empty()) {
TriplePlusArch += "-";
TriplePlusArch += BoundArch;
}
TriplePlusArch += "-";
TriplePlusArch += Action::GetOffloadKindName(OffloadKind);
return TriplePlusArch;
}
InputInfo Driver::BuildJobsForAction(
Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
Action::OffloadKind TargetDeviceOffloadKind) const {
std::pair<const Action *, std::string> ActionTC = {
A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)};
auto CachedResult = CachedResults.find(ActionTC);
if (CachedResult != CachedResults.end()) {
return CachedResult->second;
}
InputInfo Result = BuildJobsForActionNoCache(
C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput,
CachedResults, TargetDeviceOffloadKind);
CachedResults[ActionTC] = Result;
return Result;
}
InputInfo Driver::BuildJobsForActionNoCache(
Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
Action::OffloadKind TargetDeviceOffloadKind) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
InputInfoList OffloadDependencesInputInfo;
bool BuildingForOffloadDevice = TargetDeviceOffloadKind != Action::OFK_None;
if (const OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
// The 'Darwin' toolchain is initialized only when its arguments are
// computed. Get the default arguments for OFK_None to ensure that
// initialization is performed before processing the offload action.
// FIXME: Remove when darwin's toolchain is initialized during construction.
C.getArgsForToolChain(TC, BoundArch, Action::OFK_None);
// The offload action is expected to be used in four different situations.
//
// a) Set a toolchain/architecture/kind for a host action:
// Host Action 1 -> OffloadAction -> Host Action 2
//
// b) Set a toolchain/architecture/kind for a device action;
// Device Action 1 -> OffloadAction -> Device Action 2
//
// c) Specify a device dependence to a host action;
// Device Action 1 _
// \
// Host Action 1 ---> OffloadAction -> Host Action 2
//
// d) Specify a host dependence to a device action.
// Host Action 1 _
// \
// Device Action 1 ---> OffloadAction -> Device Action 2
//
// For a) and b), we just return the job generated for the dependence. For
// c) and d) we override the current action with the host/device dependence
// if the current toolchain is host/device and set the offload dependences
// info with the jobs obtained from the device/host dependence(s).
// If there is a single device option, just generate the job for it.
if (OA->hasSingleDeviceDependence()) {
InputInfo DevA;
OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC,
const char *DepBoundArch) {
DevA =
BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel,
/*MultipleArchs*/ !!DepBoundArch, LinkingOutput,
CachedResults, DepA->getOffloadingDeviceKind());
});
return DevA;
}
// If 'Action 2' is host, we generate jobs for the device dependences and
// override the current action with the host dependence. Otherwise, we
// generate the host dependences and override the action with the device
// dependence. The dependences can't therefore be a top-level action.
OA->doOnEachDependence(
/*IsHostDependence=*/BuildingForOffloadDevice,
[&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
OffloadDependencesInputInfo.push_back(BuildJobsForAction(
C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false,
/*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults,
DepA->getOffloadingDeviceKind()));
});
A = BuildingForOffloadDevice
? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)
: OA->getHostDependence();
}
if (const InputAction *IA = dyn_cast<InputAction>(A)) {
// FIXME: It would be nice to not claim this here; maybe the old scheme of
// just using Args was better?
const Arg &Input = IA->getInputArg();
Input.claim();
if (Input.getOption().matches(options::OPT_INPUT)) {
const char *Name = Input.getValue();
return InputInfo(A, Name, /* _BaseInput = */ Name);
}
return InputInfo(A, &Input, /* _BaseInput = */ "");
}
if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) {
const ToolChain *TC;
StringRef ArchName = BAA->getArchName();
if (!ArchName.empty())
TC = &getToolChain(C.getArgs(),
computeTargetTriple(*this, TargetTriple,
C.getArgs(), ArchName));
else
TC = &C.getDefaultToolChain();
return BuildJobsForAction(C, *BAA->input_begin(), TC, ArchName, AtTopLevel,
MultipleArchs, LinkingOutput, CachedResults,
TargetDeviceOffloadKind);
}
ActionList Inputs = A->getInputs();
const JobAction *JA = cast<JobAction>(A);
ActionList CollapsedOffloadActions;
ToolSelector TS(JA, *TC, C, isSaveTempsEnabled(),
embedBitcodeInObject() && !isUsingLTO());
const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions);
if (!T)
return InputInfo();
if (BuildingForOffloadDevice &&
A->getOffloadingDeviceKind() == Action::OFK_OpenMP) {
if (TC->getTriple().isAMDGCN()) {
// AMDGCN treats backend and assemble actions as no-op because
// linker does not support object files.
if (const BackendJobAction *BA = dyn_cast<BackendJobAction>(A)) {
return BuildJobsForAction(C, *BA->input_begin(), TC, BoundArch,
AtTopLevel, MultipleArchs, LinkingOutput,
CachedResults, TargetDeviceOffloadKind);
}
if (const AssembleJobAction *AA = dyn_cast<AssembleJobAction>(A)) {
return BuildJobsForAction(C, *AA->input_begin(), TC, BoundArch,
AtTopLevel, MultipleArchs, LinkingOutput,
CachedResults, TargetDeviceOffloadKind);
}
}
}
// If we've collapsed action list that contained OffloadAction we
// need to build jobs for host/device-side inputs it may have held.
for (const auto *OA : CollapsedOffloadActions)
cast<OffloadAction>(OA)->doOnEachDependence(
/*IsHostDependence=*/BuildingForOffloadDevice,
[&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
OffloadDependencesInputInfo.push_back(BuildJobsForAction(
C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false,
/*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults,
DepA->getOffloadingDeviceKind()));
});
// Only use pipes when there is exactly one input.
InputInfoList InputInfos;
for (const Action *Input : Inputs) {
// Treat dsymutil and verify sub-jobs as being at the top-level too, they
// shouldn't get temporary output names.
// FIXME: Clean this up.
bool SubJobAtTopLevel =
AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A));
InputInfos.push_back(BuildJobsForAction(
C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput,
CachedResults, A->getOffloadingDeviceKind()));
}
// Always use the first input as the base input.
const char *BaseInput = InputInfos[0].getBaseInput();
// ... except dsymutil actions, which use their actual input as the base
// input.
if (JA->getType() == types::TY_dSYM)
BaseInput = InputInfos[0].getFilename();
// ... and in header module compilations, which use the module name.
if (auto *ModuleJA = dyn_cast<HeaderModulePrecompileJobAction>(JA))
BaseInput = ModuleJA->getModuleName();
// Append outputs of offload device jobs to the input list
if (!OffloadDependencesInputInfo.empty())
InputInfos.append(OffloadDependencesInputInfo.begin(),
OffloadDependencesInputInfo.end());
// Set the effective triple of the toolchain for the duration of this job.
llvm::Triple EffectiveTriple;
const ToolChain &ToolTC = T->getToolChain();
const ArgList &Args =
C.getArgsForToolChain(TC, BoundArch, A->getOffloadingDeviceKind());
if (InputInfos.size() != 1) {
EffectiveTriple = llvm::Triple(ToolTC.ComputeEffectiveClangTriple(Args));
} else {
// Pass along the input type if it can be unambiguously determined.
EffectiveTriple = llvm::Triple(
ToolTC.ComputeEffectiveClangTriple(Args, InputInfos[0].getType()));
}
RegisterEffectiveTriple TripleRAII(ToolTC, EffectiveTriple);
// Determine the place to write output to, if any.
InputInfo Result;
InputInfoList UnbundlingResults;
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(JA)) {
// If we have an unbundling job, we need to create results for all the
// outputs. We also update the results cache so that other actions using
// this unbundling action can get the right results.
for (auto &UI : UA->getDependentActionsInfo()) {
assert(UI.DependentOffloadKind != Action::OFK_None &&
"Unbundling with no offloading??");
// Unbundling actions are never at the top level. When we generate the
// offloading prefix, we also do that for the host file because the
// unbundling action does not change the type of the output which can
// cause a overwrite.
std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
UI.DependentOffloadKind,
UI.DependentToolChain->getTriple().normalize(),
/*CreatePrefixForHost=*/true);
auto CurI = InputInfo(
UA,
GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch,
/*AtTopLevel=*/false,
MultipleArchs ||
UI.DependentOffloadKind == Action::OFK_HIP,
OffloadingPrefix),
BaseInput);
// Save the unbundling result.
UnbundlingResults.push_back(CurI);
// Get the unique string identifier for this dependence and cache the
// result.
StringRef Arch;
if (TargetDeviceOffloadKind == Action::OFK_HIP) {
if (UI.DependentOffloadKind == Action::OFK_Host)
Arch = StringRef();
else
Arch = UI.DependentBoundArch;
} else
Arch = BoundArch;
CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch,
UI.DependentOffloadKind)}] =
CurI;
}
// Now that we have all the results generated, select the one that should be
// returned for the current depending action.
std::pair<const Action *, std::string> ActionTC = {
A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)};
assert(CachedResults.find(ActionTC) != CachedResults.end() &&
"Result does not exist??");
Result = CachedResults[ActionTC];
} else if (JA->getType() == types::TY_Nothing)
Result = InputInfo(A, BaseInput);
else {
// We only have to generate a prefix for the host if this is not a top-level
// action.
std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
A->getOffloadingDeviceKind(), TC->getTriple().normalize(),
/*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() &&
!AtTopLevel);
if (isa<OffloadWrapperJobAction>(JA)) {
if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
BaseInput = FinalOutput->getValue();
else
BaseInput = getDefaultImageName();
BaseInput =
C.getArgs().MakeArgString(std::string(BaseInput) + "-wrapper");
}
Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
AtTopLevel, MultipleArchs,
OffloadingPrefix),
BaseInput);
}
if (CCCPrintBindings && !CCGenDiagnostics) {
llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"'
<< " - \"" << T->getName() << "\", inputs: [";
for (unsigned i = 0, e = InputInfos.size(); i != e; ++i) {
llvm::errs() << InputInfos[i].getAsString();
if (i + 1 != e)
llvm::errs() << ", ";
}
if (UnbundlingResults.empty())
llvm::errs() << "], output: " << Result.getAsString() << "\n";
else {
llvm::errs() << "], outputs: [";
for (unsigned i = 0, e = UnbundlingResults.size(); i != e; ++i) {
llvm::errs() << UnbundlingResults[i].getAsString();
if (i + 1 != e)
llvm::errs() << ", ";
}
llvm::errs() << "] \n";
}
} else {
if (UnbundlingResults.empty())
T->ConstructJob(
C, *JA, Result, InputInfos,
C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()),
LinkingOutput);
else
T->ConstructJobMultipleOutputs(
C, *JA, UnbundlingResults, InputInfos,
C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()),
LinkingOutput);
}
return Result;
}
const char *Driver::getDefaultImageName() const {
llvm::Triple Target(llvm::Triple::normalize(TargetTriple));
return Target.isOSWindows() ? "a.exe" : "a.out";
}
/// Create output filename based on ArgValue, which could either be a
/// full filename, filename without extension, or a directory. If ArgValue
/// does not provide a filename, then use BaseName, and use the extension
/// suitable for FileType.
static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue,
StringRef BaseName,
types::ID FileType) {
SmallString<128> Filename = ArgValue;
if (ArgValue.empty()) {
// If the argument is empty, output to BaseName in the current dir.
Filename = BaseName;
} else if (llvm::sys::path::is_separator(Filename.back())) {
// If the argument is a directory, output to BaseName in that dir.
llvm::sys::path::append(Filename, BaseName);
}
if (!llvm::sys::path::has_extension(ArgValue)) {
// If the argument didn't provide an extension, then set it.
const char *Extension = types::getTypeTempSuffix(FileType, true);
if (FileType == types::TY_Image &&
Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd)) {
// The output file is a dll.
Extension = "dll";
}
llvm::sys::path::replace_extension(Filename, Extension);
}
return Args.MakeArgString(Filename.c_str());
}
static bool HasPreprocessOutput(const Action &JA) {
if (isa<PreprocessJobAction>(JA))
return true;
if (isa<OffloadAction>(JA) && isa<PreprocessJobAction>(JA.getInputs()[0]))
return true;
if (isa<OffloadBundlingJobAction>(JA) &&
HasPreprocessOutput(*(JA.getInputs()[0])))
return true;
return false;
}
const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
const char *BaseInput,
StringRef OrigBoundArch, bool AtTopLevel,
bool MultipleArchs,
StringRef OffloadingPrefix) const {
std::string BoundArch = OrigBoundArch.str();
#if defined(_WIN32)
// BoundArch may contains ':', which is invalid in file names on Windows,
// therefore replace it with '%'.
std::replace(BoundArch.begin(), BoundArch.end(), ':', '@');
#endif
llvm::PrettyStackTraceString CrashInfo("Computing output path");
// Output to a user requested destination?
if (AtTopLevel && !isa<DsymutilJobAction>(JA) && !isa<VerifyJobAction>(JA)) {
if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
return C.addResultFile(FinalOutput->getValue(), &JA);
}
// For /P, preprocess to file named after BaseInput.
if (C.getArgs().hasArg(options::OPT__SLASH_P)) {
assert(AtTopLevel && isa<PreprocessJobAction>(JA));
StringRef BaseName = llvm::sys::path::filename(BaseInput);
StringRef NameArg;
if (Arg *A = C.getArgs().getLastArg(options::OPT__SLASH_Fi))
NameArg = A->getValue();
return C.addResultFile(
MakeCLOutputFilename(C.getArgs(), NameArg, BaseName, types::TY_PP_C),
&JA);
}
// Default to writing to stdout?
if (AtTopLevel && !CCGenDiagnostics && HasPreprocessOutput(JA)) {
return "-";
}
if (JA.getType() == types::TY_ModuleFile &&
C.getArgs().getLastArg(options::OPT_module_file_info)) {
return "-";
}
// Is this the assembly listing for /FA?
if (JA.getType() == types::TY_PP_Asm &&
(C.getArgs().hasArg(options::OPT__SLASH_FA) ||
C.getArgs().hasArg(options::OPT__SLASH_Fa))) {
// Use /Fa and the input filename to determine the asm file name.
StringRef BaseName = llvm::sys::path::filename(BaseInput);
StringRef FaValue = C.getArgs().getLastArgValue(options::OPT__SLASH_Fa);
return C.addResultFile(
MakeCLOutputFilename(C.getArgs(), FaValue, BaseName, JA.getType()),
&JA);
}
// Output to a temporary file?
if ((!AtTopLevel && !isSaveTempsEnabled() &&
!C.getArgs().hasArg(options::OPT__SLASH_Fo)) ||
CCGenDiagnostics) {
StringRef Name = llvm::sys::path::filename(BaseInput);
std::pair<StringRef, StringRef> Split = Name.split('.');
SmallString<128> TmpName;
const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode());
Arg *A = C.getArgs().getLastArg(options::OPT_fcrash_diagnostics_dir);
if (CCGenDiagnostics && A) {
SmallString<128> CrashDirectory(A->getValue());
if (!getVFS().exists(CrashDirectory))
llvm::sys::fs::create_directories(CrashDirectory);
llvm::sys::path::append(CrashDirectory, Split.first);
const char *Middle = Suffix ? "-%%%%%%." : "-%%%%%%";
std::error_code EC = llvm::sys::fs::createUniqueFile(
CrashDirectory + Middle + Suffix, TmpName);
if (EC) {
Diag(clang::diag::err_unable_to_make_temp) << EC.message();
return "";
}
} else {
TmpName = GetTemporaryPath(Split.first, Suffix);
}
return C.addTempFile(C.getArgs().MakeArgString(TmpName));
}
SmallString<128> BasePath(BaseInput);
SmallString<128> ExternalPath("");
StringRef BaseName;
// Dsymutil actions should use the full path.
if (isa<DsymutilJobAction>(JA) && C.getArgs().hasArg(options::OPT_dsym_dir)) {
ExternalPath += C.getArgs().getLastArg(options::OPT_dsym_dir)->getValue();
// We use posix style here because the tests (specifically
// darwin-dsymutil.c) demonstrate that posix style paths are acceptable
// even on Windows and if we don't then the similar test covering this
// fails.
llvm::sys::path::append(ExternalPath, llvm::sys::path::Style::posix,
llvm::sys::path::filename(BasePath));
BaseName = ExternalPath;
} else if (isa<DsymutilJobAction>(JA) || isa<VerifyJobAction>(JA))
BaseName = BasePath;
else
BaseName = llvm::sys::path::filename(BasePath);
// Determine what the derived output name should be.
const char *NamedOutput;
if ((JA.getType() == types::TY_Object || JA.getType() == types::TY_LTO_BC) &&
C.getArgs().hasArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)) {
// The /Fo or /o flag decides the object filename.
StringRef Val =
C.getArgs()
.getLastArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)
->getValue();
NamedOutput =
MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object);
} else if (JA.getType() == types::TY_Image &&
C.getArgs().hasArg(options::OPT__SLASH_Fe,
options::OPT__SLASH_o)) {
// The /Fe or /o flag names the linked file.
StringRef Val =
C.getArgs()
.getLastArg(options::OPT__SLASH_Fe, options::OPT__SLASH_o)
->getValue();
NamedOutput =
MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Image);
} else if (JA.getType() == types::TY_Image) {
if (IsCLMode()) {
// clang-cl uses BaseName for the executable name.
NamedOutput =
MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image);
} else {
SmallString<128> Output(getDefaultImageName());
// HIP image for device compilation with -fno-gpu-rdc is per compilation
// unit.
bool IsHIPNoRDC = JA.getOffloadingDeviceKind() == Action::OFK_HIP &&
!C.getArgs().hasFlag(options::OPT_fgpu_rdc,
options::OPT_fno_gpu_rdc, false);
if (IsHIPNoRDC) {
Output = BaseName;
llvm::sys::path::replace_extension(Output, "");
}
Output += OffloadingPrefix;
if (MultipleArchs && !BoundArch.empty()) {
Output += "-";
Output.append(BoundArch);
}
if (IsHIPNoRDC)
Output += ".out";
NamedOutput = C.getArgs().MakeArgString(Output.c_str());
}
} else if (JA.getType() == types::TY_PCH && IsCLMode()) {
NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName));
} else {
const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode());
assert(Suffix && "All types used for output should have a suffix.");
std::string::size_type End = std::string::npos;
if (!types::appendSuffixForType(JA.getType()))
End = BaseName.rfind('.');
SmallString<128> Suffixed(BaseName.substr(0, End));
Suffixed += OffloadingPrefix;
if (MultipleArchs && !BoundArch.empty()) {
Suffixed += "-";
Suffixed.append(BoundArch);
}
// When using both -save-temps and -emit-llvm, use a ".tmp.bc" suffix for
// the unoptimized bitcode so that it does not get overwritten by the ".bc"
// optimized bitcode output.
auto IsHIPRDCInCompilePhase = [](const JobAction &JA,
const llvm::opt::DerivedArgList &Args) {
// The relocatable compilation in HIP implies -emit-llvm. Similarly, use a
// ".tmp.bc" suffix for the unoptimized bitcode (generated in the compile
// phase.)
return isa<CompileJobAction>(JA) &&
JA.getOffloadingDeviceKind() == Action::OFK_HIP &&
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false);
};
if (!AtTopLevel && JA.getType() == types::TY_LLVM_BC &&
(C.getArgs().hasArg(options::OPT_emit_llvm) ||
IsHIPRDCInCompilePhase(JA, C.getArgs())))
Suffixed += ".tmp";
Suffixed += '.';
Suffixed += Suffix;
NamedOutput = C.getArgs().MakeArgString(Suffixed.c_str());
}
// Prepend object file path if -save-temps=obj
if (!AtTopLevel && isSaveTempsObj() && C.getArgs().hasArg(options::OPT_o) &&
JA.getType() != types::TY_PCH) {
Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
SmallString<128> TempPath(FinalOutput->getValue());
llvm::sys::path::remove_filename(TempPath);
StringRef OutputFileName = llvm::sys::path::filename(NamedOutput);
llvm::sys::path::append(TempPath, OutputFileName);
NamedOutput = C.getArgs().MakeArgString(TempPath.c_str());
}
// If we're saving temps and the temp file conflicts with the input file,
// then avoid overwriting input file.
if (!AtTopLevel && isSaveTempsEnabled() && NamedOutput == BaseName) {
bool SameFile = false;
SmallString<256> Result;
llvm::sys::fs::current_path(Result);
llvm::sys::path::append(Result, BaseName);
llvm::sys::fs::equivalent(BaseInput, Result.c_str(), SameFile);
// Must share the same path to conflict.
if (SameFile) {
StringRef Name = llvm::sys::path::filename(BaseInput);
std::pair<StringRef, StringRef> Split = Name.split('.');
std::string TmpName = GetTemporaryPath(
Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode()));
return C.addTempFile(C.getArgs().MakeArgString(TmpName));
}
}
// As an annoying special case, PCH generation doesn't strip the pathname.
if (JA.getType() == types::TY_PCH && !IsCLMode()) {
llvm::sys::path::remove_filename(BasePath);
if (BasePath.empty())
BasePath = NamedOutput;
else
llvm::sys::path::append(BasePath, NamedOutput);
return C.addResultFile(C.getArgs().MakeArgString(BasePath.c_str()), &JA);
} else {
return C.addResultFile(NamedOutput, &JA);
}
}
std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const {
// Search for Name in a list of paths.
auto SearchPaths = [&](const llvm::SmallVectorImpl<std::string> &P)
-> llvm::Optional<std::string> {
// Respect a limited subset of the '-Bprefix' functionality in GCC by
// attempting to use this prefix when looking for file paths.
for (const auto &Dir : P) {
if (Dir.empty())
continue;
SmallString<128> P(Dir[0] == '=' ? SysRoot + Dir.substr(1) : Dir);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::exists(Twine(P)))
return std::string(P);
}
return None;
};
if (auto P = SearchPaths(PrefixDirs))
return *P;
SmallString<128> R(ResourceDir);
llvm::sys::path::append(R, Name);
if (llvm::sys::fs::exists(Twine(R)))
return std::string(R.str());
SmallString<128> P(TC.getCompilerRTPath());
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::exists(Twine(P)))
return std::string(P.str());
SmallString<128> D(Dir);
llvm::sys::path::append(D, "..", Name);
if (llvm::sys::fs::exists(Twine(D)))
return std::string(D.str());
if (auto P = SearchPaths(TC.getLibraryPaths()))
return *P;
if (auto P = SearchPaths(TC.getFilePaths()))
return *P;
return std::string(Name);
}
void Driver::generatePrefixedToolNames(
StringRef Tool, const ToolChain &TC,
SmallVectorImpl<std::string> &Names) const {
// FIXME: Needs a better variable than TargetTriple
Names.emplace_back((TargetTriple + "-" + Tool).str());
Names.emplace_back(Tool);
}
static bool ScanDirForExecutable(SmallString<128> &Dir, StringRef Name) {
llvm::sys::path::append(Dir, Name);
if (llvm::sys::fs::can_execute(Twine(Dir)))
return true;
llvm::sys::path::remove_filename(Dir);
return false;
}
std::string Driver::GetProgramPath(StringRef Name, const ToolChain &TC) const {
SmallVector<std::string, 2> TargetSpecificExecutables;
generatePrefixedToolNames(Name, TC, TargetSpecificExecutables);
// Respect a limited subset of the '-Bprefix' functionality in GCC by
// attempting to use this prefix when looking for program paths.
for (const auto &PrefixDir : PrefixDirs) {
if (llvm::sys::fs::is_directory(PrefixDir)) {
SmallString<128> P(PrefixDir);
if (ScanDirForExecutable(P, Name))
return std::string(P.str());
} else {
SmallString<128> P((PrefixDir + Name).str());
if (llvm::sys::fs::can_execute(Twine(P)))
return std::string(P.str());
}
}
const ToolChain::path_list &List = TC.getProgramPaths();
for (const auto &TargetSpecificExecutable : TargetSpecificExecutables) {
// For each possible name of the tool look for it in
// program paths first, then the path.
// Higher priority names will be first, meaning that
// a higher priority name in the path will be found
// instead of a lower priority name in the program path.
// E.g. <triple>-gcc on the path will be found instead
// of gcc in the program path
for (const auto &Path : List) {
SmallString<128> P(Path);
if (ScanDirForExecutable(P, TargetSpecificExecutable))
return std::string(P.str());
}
// Fall back to the path
if (llvm::ErrorOr<std::string> P =
llvm::sys::findProgramByName(TargetSpecificExecutable))
return *P;
}
return std::string(Name);
}
std::string Driver::GetTemporaryPath(StringRef Prefix, StringRef Suffix) const {
SmallString<128> Path;
std::error_code EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, Path);
if (EC) {
Diag(clang::diag::err_unable_to_make_temp) << EC.message();
return "";
}
return std::string(Path.str());
}
std::string Driver::GetTemporaryDirectory(StringRef Prefix) const {
SmallString<128> Path;
std::error_code EC = llvm::sys::fs::createUniqueDirectory(Prefix, Path);
if (EC) {
Diag(clang::diag::err_unable_to_make_temp) << EC.message();
return "";
}
return std::string(Path.str());
}
std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const {
SmallString<128> Output;
if (Arg *FpArg = C.getArgs().getLastArg(options::OPT__SLASH_Fp)) {
// FIXME: If anybody needs it, implement this obscure rule:
// "If you specify a directory without a file name, the default file name
// is VCx0.pch., where x is the major version of Visual C++ in use."
Output = FpArg->getValue();
// "If you do not specify an extension as part of the path name, an
// extension of .pch is assumed. "
if (!llvm::sys::path::has_extension(Output))
Output += ".pch";
} else {
if (Arg *YcArg = C.getArgs().getLastArg(options::OPT__SLASH_Yc))
Output = YcArg->getValue();
if (Output.empty())
Output = BaseName;
llvm::sys::path::replace_extension(Output, ".pch");
}
return std::string(Output.str());
}
const ToolChain &Driver::getToolChain(const ArgList &Args,
const llvm::Triple &Target) const {
auto &TC = ToolChains[Target.str()];
if (!TC) {
switch (Target.getOS()) {
case llvm::Triple::AIX:
TC = std::make_unique<toolchains::AIX>(*this, Target, Args);
break;
case llvm::Triple::Haiku:
TC = std::make_unique<toolchains::Haiku>(*this, Target, Args);
break;
case llvm::Triple::Ananas:
TC = std::make_unique<toolchains::Ananas>(*this, Target, Args);
break;
case llvm::Triple::CloudABI:
TC = std::make_unique<toolchains::CloudABI>(*this, Target, Args);
break;
case llvm::Triple::Darwin:
case llvm::Triple::MacOSX:
case llvm::Triple::IOS:
case llvm::Triple::TvOS:
case llvm::Triple::WatchOS:
TC = std::make_unique<toolchains::DarwinClang>(*this, Target, Args);
break;
case llvm::Triple::DragonFly:
TC = std::make_unique<toolchains::DragonFly>(*this, Target, Args);
break;
case llvm::Triple::OpenBSD:
TC = std::make_unique<toolchains::OpenBSD>(*this, Target, Args);
break;
case llvm::Triple::NetBSD:
TC = std::make_unique<toolchains::NetBSD>(*this, Target, Args);
break;
case llvm::Triple::FreeBSD:
TC = std::make_unique<toolchains::FreeBSD>(*this, Target, Args);
break;
case llvm::Triple::Minix:
TC = std::make_unique<toolchains::Minix>(*this, Target, Args);
break;
case llvm::Triple::Linux:
case llvm::Triple::ELFIAMCU:
if (Target.getArch() == llvm::Triple::hexagon)
TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target,
Args);
else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) &&
!Target.hasEnvironment())
TC = std::make_unique<toolchains::MipsLLVMToolChain>(*this, Target,
Args);
else if (Target.isPPC())
TC = std::make_unique<toolchains::PPCLinuxToolChain>(*this, Target,
Args);
else if (Target.getArch() == llvm::Triple::ve)
TC = std::make_unique<toolchains::VEToolChain>(*this, Target, Args);
else
TC = std::make_unique<toolchains::Linux>(*this, Target, Args);
break;
case llvm::Triple::NaCl:
TC = std::make_unique<toolchains::NaClToolChain>(*this, Target, Args);
break;
case llvm::Triple::Fuchsia:
TC = std::make_unique<toolchains::Fuchsia>(*this, Target, Args);
break;
case llvm::Triple::Solaris:
TC = std::make_unique<toolchains::Solaris>(*this, Target, Args);
break;
case llvm::Triple::AMDHSA:
TC = std::make_unique<toolchains::ROCMToolChain>(*this, Target, Args);
break;
case llvm::Triple::AMDPAL:
case llvm::Triple::Mesa3D:
TC = std::make_unique<toolchains::AMDGPUToolChain>(*this, Target, Args);
break;
case llvm::Triple::Win32:
switch (Target.getEnvironment()) {
default:
if (Target.isOSBinFormatELF())
TC = std::make_unique<toolchains::Generic_ELF>(*this, Target, Args);
else if (Target.isOSBinFormatMachO())
TC = std::make_unique<toolchains::MachO>(*this, Target, Args);
else
TC = std::make_unique<toolchains::Generic_GCC>(*this, Target, Args);
break;
case llvm::Triple::GNU:
TC = std::make_unique<toolchains::MinGW>(*this, Target, Args);
break;
case llvm::Triple::Itanium:
TC = std::make_unique<toolchains::CrossWindowsToolChain>(*this, Target,
Args);
break;
case llvm::Triple::MSVC:
case llvm::Triple::UnknownEnvironment:
if (Args.getLastArgValue(options::OPT_fuse_ld_EQ)
.startswith_insensitive("bfd"))
TC = std::make_unique<toolchains::CrossWindowsToolChain>(
*this, Target, Args);
else
TC =
std::make_unique<toolchains::MSVCToolChain>(*this, Target, Args);
break;
}
break;
case llvm::Triple::PS4:
TC = std::make_unique<toolchains::PS4CPU>(*this, Target, Args);
break;
case llvm::Triple::Contiki:
TC = std::make_unique<toolchains::Contiki>(*this, Target, Args);
break;
case llvm::Triple::Hurd:
TC = std::make_unique<toolchains::Hurd>(*this, Target, Args);
break;
case llvm::Triple::ZOS:
TC = std::make_unique<toolchains::ZOS>(*this, Target, Args);
break;
default:
// Of these targets, Hexagon is the only one that might have
// an OS of Linux, in which case it got handled above already.
switch (Target.getArch()) {
case llvm::Triple::tce:
TC = std::make_unique<toolchains::TCEToolChain>(*this, Target, Args);
break;
case llvm::Triple::tcele:
TC = std::make_unique<toolchains::TCELEToolChain>(*this, Target, Args);
break;
case llvm::Triple::hexagon:
TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target,
Args);
break;
case llvm::Triple::lanai:
TC = std::make_unique<toolchains::LanaiToolChain>(*this, Target, Args);
break;
case llvm::Triple::xcore:
TC = std::make_unique<toolchains::XCoreToolChain>(*this, Target, Args);
break;
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
TC = std::make_unique<toolchains::WebAssembly>(*this, Target, Args);
break;
case llvm::Triple::avr:
TC = std::make_unique<toolchains::AVRToolChain>(*this, Target, Args);
break;
case llvm::Triple::msp430:
TC =
std::make_unique<toolchains::MSP430ToolChain>(*this, Target, Args);
break;
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
if (toolchains::RISCVToolChain::hasGCCToolchain(*this, Args))
TC =
std::make_unique<toolchains::RISCVToolChain>(*this, Target, Args);
else
TC = std::make_unique<toolchains::BareMetal>(*this, Target, Args);
break;
case llvm::Triple::ve:
TC = std::make_unique<toolchains::VEToolChain>(*this, Target, Args);
break;
default:
if (Target.getVendor() == llvm::Triple::Myriad)
TC = std::make_unique<toolchains::MyriadToolChain>(*this, Target,
Args);
else if (toolchains::BareMetal::handlesTarget(Target))
TC = std::make_unique<toolchains::BareMetal>(*this, Target, Args);
else if (Target.isOSBinFormatELF())
TC = std::make_unique<toolchains::Generic_ELF>(*this, Target, Args);
else if (Target.isOSBinFormatMachO())
TC = std::make_unique<toolchains::MachO>(*this, Target, Args);
else
TC = std::make_unique<toolchains::Generic_GCC>(*this, Target, Args);
}
}
}
// Intentionally omitted from the switch above: llvm::Triple::CUDA. CUDA
// compiles always need two toolchains, the CUDA toolchain and the host
// toolchain. So the only valid way to create a CUDA toolchain is via
// CreateOffloadingDeviceToolChains.
return *TC;
}
bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {
// Say "no" if there is not exactly one input of a type clang understands.
if (JA.size() != 1 ||
!types::isAcceptedByClang((*JA.input_begin())->getType()))
return false;
// And say "no" if this is not a kind of action clang understands.
if (!isa<PreprocessJobAction>(JA) && !isa<PrecompileJobAction>(JA) &&
!isa<CompileJobAction>(JA) && !isa<BackendJobAction>(JA))
return false;
return true;
}
bool Driver::ShouldUseFlangCompiler(const JobAction &JA) const {
// Say "no" if there is not exactly one input of a type flang understands.
if (JA.size() != 1 ||
!types::isFortran((*JA.input_begin())->getType()))
return false;
// And say "no" if this is not a kind of action flang understands.
if (!isa<PreprocessJobAction>(JA) && !isa<CompileJobAction>(JA) && !isa<BackendJobAction>(JA))
return false;
return true;
}
bool Driver::ShouldEmitStaticLibrary(const ArgList &Args) const {
// Only emit static library if the flag is set explicitly.
if (Args.hasArg(options::OPT_emit_static_lib))
return true;
return false;
}
/// GetReleaseVersion - Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and return the
/// grouped values as integers. Numbers which are not provided are set to 0.
///
/// \return True if the entire string was parsed (9.2), or all groups were
/// parsed (10.3.5extrastuff).
bool Driver::GetReleaseVersion(StringRef Str, unsigned &Major, unsigned &Minor,
unsigned &Micro, bool &HadExtra) {
HadExtra = false;
Major = Minor = Micro = 0;
if (Str.empty())
return false;
if (Str.consumeInteger(10, Major))
return false;
if (Str.empty())
return true;
if (Str[0] != '.')
return false;
Str = Str.drop_front(1);
if (Str.consumeInteger(10, Minor))
return false;
if (Str.empty())
return true;
if (Str[0] != '.')
return false;
Str = Str.drop_front(1);
if (Str.consumeInteger(10, Micro))
return false;
if (!Str.empty())
HadExtra = true;
return true;
}
/// Parse digits from a string \p Str and fulfill \p Digits with
/// the parsed numbers. This method assumes that the max number of
/// digits to look for is equal to Digits.size().
///
/// \return True if the entire string was parsed and there are
/// no extra characters remaining at the end.
bool Driver::GetReleaseVersion(StringRef Str,
MutableArrayRef<unsigned> Digits) {
if (Str.empty())
return false;
unsigned CurDigit = 0;
while (CurDigit < Digits.size()) {
unsigned Digit;
if (Str.consumeInteger(10, Digit))
return false;
Digits[CurDigit] = Digit;
if (Str.empty())
return true;
if (Str[0] != '.')
return false;
Str = Str.drop_front(1);
CurDigit++;
}
// More digits than requested, bail out...
return false;
}
std::pair<unsigned, unsigned>
Driver::getIncludeExcludeOptionFlagMasks(bool IsClCompatMode) const {
unsigned IncludedFlagsBitmask = 0;
unsigned ExcludedFlagsBitmask = options::NoDriverOption;
if (IsClCompatMode) {
// Include CL and Core options.
IncludedFlagsBitmask |= options::CLOption;
IncludedFlagsBitmask |= options::CoreOption;
} else {
ExcludedFlagsBitmask |= options::CLOption;
}
return std::make_pair(IncludedFlagsBitmask, ExcludedFlagsBitmask);
}
bool clang::driver::isOptimizationLevelFast(const ArgList &Args) {
return Args.hasFlag(options::OPT_Ofast, options::OPT_O_Group, false);
}
bool clang::driver::willEmitRemarks(const ArgList &Args) {
// -fsave-optimization-record enables it.
if (Args.hasFlag(options::OPT_fsave_optimization_record,
options::OPT_fno_save_optimization_record, false))
return true;
// -fsave-optimization-record=<format> enables it as well.
if (Args.hasFlag(options::OPT_fsave_optimization_record_EQ,
options::OPT_fno_save_optimization_record, false))
return true;
// -foptimization-record-file alone enables it too.
if (Args.hasFlag(options::OPT_foptimization_record_file_EQ,
options::OPT_fno_save_optimization_record, false))
return true;
// -foptimization-record-passes alone enables it too.
if (Args.hasFlag(options::OPT_foptimization_record_passes_EQ,
options::OPT_fno_save_optimization_record, false))
return true;
return false;
}
llvm::StringRef clang::driver::getDriverMode(StringRef ProgName,
ArrayRef<const char *> Args) {
static const std::string OptName =
getDriverOptTable().getOption(options::OPT_driver_mode).getPrefixedName();
llvm::StringRef Opt;
for (StringRef Arg : Args) {
if (!Arg.startswith(OptName))
continue;
Opt = Arg;
- break;
}
if (Opt.empty())
Opt = ToolChain::getTargetAndModeFromProgramName(ProgName).DriverMode;
return Opt.consume_front(OptName) ? Opt : "";
}
bool driver::IsClangCL(StringRef DriverMode) { return DriverMode.equals("cl"); }
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.cpp
index d63c5e12c4af..4a7413112b55 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -1,895 +1,930 @@
//===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "CommonArgs.h"
#include "clang/Basic/TargetID.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Options.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/VirtualFileSystem.h"
#include <system_error>
#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch"
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang::driver::toolchains;
using namespace clang;
using namespace llvm::opt;
// Look for sub-directory starts with PackageName under ROCm candidate path.
// If there is one and only one matching sub-directory found, append the
// sub-directory to Path. If there is no matching sub-directory or there are
// more than one matching sub-directories, diagnose them. Returns the full
// path of the package if there is only one matching sub-directory, otherwise
// returns an empty string.
llvm::SmallString<0>
RocmInstallationDetector::findSPACKPackage(const Candidate &Cand,
StringRef PackageName) {
if (!Cand.isSPACK())
return {};
std::error_code EC;
std::string Prefix = Twine(PackageName + "-" + Cand.SPACKReleaseStr).str();
llvm::SmallVector<llvm::SmallString<0>> SubDirs;
for (llvm::vfs::directory_iterator File = D.getVFS().dir_begin(Cand.Path, EC),
FileEnd;
File != FileEnd && !EC; File.increment(EC)) {
llvm::StringRef FileName = llvm::sys::path::filename(File->path());
if (FileName.startswith(Prefix)) {
SubDirs.push_back(FileName);
if (SubDirs.size() > 1)
break;
}
}
if (SubDirs.size() == 1) {
auto PackagePath = Cand.Path;
llvm::sys::path::append(PackagePath, SubDirs[0]);
return PackagePath;
}
if (SubDirs.size() == 0 && Verbose) {
llvm::errs() << "SPACK package " << Prefix << " not found at " << Cand.Path
<< '\n';
return {};
}
if (SubDirs.size() > 1 && Verbose) {
llvm::errs() << "Cannot use SPACK package " << Prefix << " at " << Cand.Path
<< " due to multiple installations for the same version\n";
}
return {};
}
void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
assert(!Path.empty());
const StringRef Suffix(".bc");
const StringRef Suffix2(".amdgcn.bc");
std::error_code EC;
for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE;
!EC && LI != LE; LI = LI.increment(EC)) {
StringRef FilePath = LI->path();
StringRef FileName = llvm::sys::path::filename(FilePath);
if (!FileName.endswith(Suffix))
continue;
StringRef BaseName;
if (FileName.endswith(Suffix2))
BaseName = FileName.drop_back(Suffix2.size());
else if (FileName.endswith(Suffix))
BaseName = FileName.drop_back(Suffix.size());
if (BaseName == "ocml") {
OCML = FilePath;
} else if (BaseName == "ockl") {
OCKL = FilePath;
} else if (BaseName == "opencl") {
OpenCL = FilePath;
} else if (BaseName == "hip") {
HIP = FilePath;
} else if (BaseName == "asanrtl") {
AsanRTL = FilePath;
} else if (BaseName == "oclc_finite_only_off") {
FiniteOnly.Off = FilePath;
} else if (BaseName == "oclc_finite_only_on") {
FiniteOnly.On = FilePath;
} else if (BaseName == "oclc_daz_opt_on") {
DenormalsAreZero.On = FilePath;
} else if (BaseName == "oclc_daz_opt_off") {
DenormalsAreZero.Off = FilePath;
} else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
CorrectlyRoundedSqrt.On = FilePath;
} else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
CorrectlyRoundedSqrt.Off = FilePath;
} else if (BaseName == "oclc_unsafe_math_on") {
UnsafeMath.On = FilePath;
} else if (BaseName == "oclc_unsafe_math_off") {
UnsafeMath.Off = FilePath;
} else if (BaseName == "oclc_wavefrontsize64_on") {
WavefrontSize64.On = FilePath;
} else if (BaseName == "oclc_wavefrontsize64_off") {
WavefrontSize64.Off = FilePath;
} else {
// Process all bitcode filenames that look like
// ocl_isa_version_XXX.amdgcn.bc
const StringRef DeviceLibPrefix = "oclc_isa_version_";
if (!BaseName.startswith(DeviceLibPrefix))
continue;
StringRef IsaVersionNumber =
BaseName.drop_front(DeviceLibPrefix.size());
llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
SmallString<8> Tmp;
LibDeviceMap.insert(
std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
}
}
}
// Parse and extract version numbers from `.hipVersion`. Return `true` if
// the parsing fails.
bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) {
SmallVector<StringRef, 4> VersionParts;
V.split(VersionParts, '\n');
unsigned Major = ~0U;
unsigned Minor = ~0U;
for (auto Part : VersionParts) {
auto Splits = Part.rtrim().split('=');
if (Splits.first == "HIP_VERSION_MAJOR") {
if (Splits.second.getAsInteger(0, Major))
return true;
} else if (Splits.first == "HIP_VERSION_MINOR") {
if (Splits.second.getAsInteger(0, Minor))
return true;
} else if (Splits.first == "HIP_VERSION_PATCH")
VersionPatch = Splits.second.str();
}
if (Major == ~0U || Minor == ~0U)
return true;
VersionMajorMinor = llvm::VersionTuple(Major, Minor);
DetectedVersion =
(Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
return false;
}
/// \returns a list of candidate directories for ROCm installation, which is
/// cached and populated only once.
const SmallVectorImpl<RocmInstallationDetector::Candidate> &
RocmInstallationDetector::getInstallationPathCandidates() {
// Return the cached candidate list if it has already been populated.
if (!ROCmSearchDirs.empty())
return ROCmSearchDirs;
auto DoPrintROCmSearchDirs = [&]() {
if (PrintROCmSearchDirs)
for (auto Cand : ROCmSearchDirs) {
llvm::errs() << "ROCm installation search path";
if (Cand.isSPACK())
llvm::errs() << " (Spack " << Cand.SPACKReleaseStr << ")";
llvm::errs() << ": " << Cand.Path << '\n';
}
};
// For candidate specified by --rocm-path we do not do strict check, i.e.,
// checking existence of HIP version file and device library files.
if (!RocmPathArg.empty()) {
ROCmSearchDirs.emplace_back(RocmPathArg.str());
DoPrintROCmSearchDirs();
return ROCmSearchDirs;
} else if (const char *RocmPathEnv = ::getenv("ROCM_PATH")) {
if (!StringRef(RocmPathEnv).empty()) {
ROCmSearchDirs.emplace_back(RocmPathEnv);
DoPrintROCmSearchDirs();
return ROCmSearchDirs;
}
}
// Try to find relative to the compiler binary.
const char *InstallDir = D.getInstalledDir();
// Check both a normal Unix prefix position of the clang binary, as well as
// the Windows-esque layout the ROCm packages use with the host architecture
// subdirectory of bin.
auto DeduceROCmPath = [](StringRef ClangPath) {
// Strip off directory (usually bin)
StringRef ParentDir = llvm::sys::path::parent_path(ClangPath);
StringRef ParentName = llvm::sys::path::filename(ParentDir);
// Some builds use bin/{host arch}, so go up again.
if (ParentName == "bin") {
ParentDir = llvm::sys::path::parent_path(ParentDir);
ParentName = llvm::sys::path::filename(ParentDir);
}
// Detect ROCm packages built with SPACK.
// clang is installed at
// <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory.
// We only consider the parent directory of llvm-amdgpu package as ROCm
// installation candidate for SPACK.
if (ParentName.startswith("llvm-amdgpu-")) {
auto SPACKPostfix =
ParentName.drop_front(strlen("llvm-amdgpu-")).split('-');
auto SPACKReleaseStr = SPACKPostfix.first;
if (!SPACKReleaseStr.empty()) {
ParentDir = llvm::sys::path::parent_path(ParentDir);
return Candidate(ParentDir.str(), /*StrictChecking=*/true,
SPACKReleaseStr);
}
}
// Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
// Some versions of the aomp package install to /opt/rocm/aomp/bin
if (ParentName == "llvm" || ParentName.startswith("aomp"))
ParentDir = llvm::sys::path::parent_path(ParentDir);
return Candidate(ParentDir.str(), /*StrictChecking=*/true);
};
// Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic
// link of clang itself.
ROCmSearchDirs.emplace_back(DeduceROCmPath(InstallDir));
// Deduce ROCm path by the real path of the invoked clang, resolving symbolic
// link of clang itself.
llvm::SmallString<256> RealClangPath;
llvm::sys::fs::real_path(D.getClangProgramPath(), RealClangPath);
auto ParentPath = llvm::sys::path::parent_path(RealClangPath);
if (ParentPath != InstallDir)
ROCmSearchDirs.emplace_back(DeduceROCmPath(ParentPath));
// Device library may be installed in clang or resource directory.
auto ClangRoot = llvm::sys::path::parent_path(InstallDir);
auto RealClangRoot = llvm::sys::path::parent_path(ParentPath);
ROCmSearchDirs.emplace_back(ClangRoot.str(), /*StrictChecking=*/true);
if (RealClangRoot != ClangRoot)
ROCmSearchDirs.emplace_back(RealClangRoot.str(), /*StrictChecking=*/true);
ROCmSearchDirs.emplace_back(D.ResourceDir,
/*StrictChecking=*/true);
ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/rocm",
/*StrictChecking=*/true);
// Find the latest /opt/rocm-{release} directory.
std::error_code EC;
std::string LatestROCm;
llvm::VersionTuple LatestVer;
// Get ROCm version from ROCm directory name.
auto GetROCmVersion = [](StringRef DirName) {
llvm::VersionTuple V;
std::string VerStr = DirName.drop_front(strlen("rocm-")).str();
// The ROCm directory name follows the format of
// rocm-{major}.{minor}.{subMinor}[-{build}]
std::replace(VerStr.begin(), VerStr.end(), '-', '.');
V.tryParse(VerStr);
return V;
};
for (llvm::vfs::directory_iterator
File = D.getVFS().dir_begin(D.SysRoot + "/opt", EC),
FileEnd;
File != FileEnd && !EC; File.increment(EC)) {
llvm::StringRef FileName = llvm::sys::path::filename(File->path());
if (!FileName.startswith("rocm-"))
continue;
if (LatestROCm.empty()) {
LatestROCm = FileName.str();
LatestVer = GetROCmVersion(LatestROCm);
continue;
}
auto Ver = GetROCmVersion(FileName);
if (LatestVer < Ver) {
LatestROCm = FileName.str();
LatestVer = Ver;
}
}
if (!LatestROCm.empty())
ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/" + LatestROCm,
/*StrictChecking=*/true);
DoPrintROCmSearchDirs();
return ROCmSearchDirs;
}
RocmInstallationDetector::RocmInstallationDetector(
const Driver &D, const llvm::Triple &HostTriple,
const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib)
: D(D) {
Verbose = Args.hasArg(options::OPT_v);
RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ);
PrintROCmSearchDirs =
Args.hasArg(clang::driver::options::OPT_print_rocm_search_dirs);
RocmDeviceLibPathArg =
Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ);
if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
HIPVersionArg = A->getValue();
unsigned Major = ~0U;
unsigned Minor = ~0U;
SmallVector<StringRef, 3> Parts;
HIPVersionArg.split(Parts, '.');
if (Parts.size())
Parts[0].getAsInteger(0, Major);
if (Parts.size() > 1)
Parts[1].getAsInteger(0, Minor);
if (Parts.size() > 2)
VersionPatch = Parts[2].str();
if (VersionPatch.empty())
VersionPatch = "0";
if (Major != ~0U && Minor == ~0U)
Minor = 0;
if (Major == ~0U || Minor == ~0U)
D.Diag(diag::err_drv_invalid_value)
<< A->getAsString(Args) << HIPVersionArg;
VersionMajorMinor = llvm::VersionTuple(Major, Minor);
DetectedVersion =
(Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
} else {
VersionPatch = DefaultVersionPatch;
VersionMajorMinor =
llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor);
DetectedVersion = (Twine(DefaultVersionMajor) + "." +
Twine(DefaultVersionMinor) + "." + VersionPatch)
.str();
}
if (DetectHIPRuntime)
detectHIPRuntime();
if (DetectDeviceLib)
detectDeviceLibrary();
}
void RocmInstallationDetector::detectDeviceLibrary() {
assert(LibDevicePath.empty());
if (!RocmDeviceLibPathArg.empty())
LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1];
else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH"))
LibDevicePath = LibPathEnv;
auto &FS = D.getVFS();
if (!LibDevicePath.empty()) {
// Maintain compatability with HIP flag/envvar pointing directly at the
// bitcode library directory. This points directly at the library path instead
// of the rocm root installation.
if (!FS.exists(LibDevicePath))
return;
scanLibDevicePath(LibDevicePath);
HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty();
return;
}
// The install path situation in old versions of ROCm is a real mess, and
// use a different install layout. Multiple copies of the device libraries
// exist for each frontend project, and differ depending on which build
// system produced the packages. Standalone OpenCL builds also have a
// different directory structure from the ROCm OpenCL package.
auto &ROCmDirs = getInstallationPathCandidates();
for (const auto &Candidate : ROCmDirs) {
auto CandidatePath = Candidate.Path;
// Check device library exists at the given path.
auto CheckDeviceLib = [&](StringRef Path) {
bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
if (CheckLibDevice && !FS.exists(Path))
return false;
scanLibDevicePath(Path);
if (!NoBuiltinLibs) {
// Check that the required non-target libraries are all available.
if (!allGenericLibsValid())
return false;
// Check that we have found at least one libdevice that we can link in
// if -nobuiltinlib hasn't been specified.
if (LibDeviceMap.empty())
return false;
}
return true;
};
// The possible structures are:
// - ${ROCM_ROOT}/amdgcn/bitcode/*
// - ${ROCM_ROOT}/lib/*
// - ${ROCM_ROOT}/lib/bitcode/*
// so try to detect these layouts.
static constexpr std::array<const char *, 2> SubDirsList[] = {
{"amdgcn", "bitcode"},
{"lib", ""},
{"lib", "bitcode"},
};
// Make a path by appending sub-directories to InstallPath.
auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) {
auto Path = CandidatePath;
for (auto SubDir : SubDirs)
llvm::sys::path::append(Path, SubDir);
return Path;
};
for (auto SubDirs : SubDirsList) {
LibDevicePath = MakePath(SubDirs);
HasDeviceLibrary = CheckDeviceLib(LibDevicePath);
if (HasDeviceLibrary)
return;
}
}
}
void RocmInstallationDetector::detectHIPRuntime() {
SmallVector<Candidate, 4> HIPSearchDirs;
if (!HIPPathArg.empty())
HIPSearchDirs.emplace_back(HIPPathArg.str(), /*StrictChecking=*/true);
else
HIPSearchDirs.append(getInstallationPathCandidates());
auto &FS = D.getVFS();
for (const auto &Candidate : HIPSearchDirs) {
InstallPath = Candidate.Path;
if (InstallPath.empty() || !FS.exists(InstallPath))
continue;
// HIP runtime built by SPACK is installed to
// <rocm_root>/hip-<rocm_release_string>-<hash> directory.
auto SPACKPath = findSPACKPackage(Candidate, "hip");
InstallPath = SPACKPath.empty() ? InstallPath : SPACKPath;
BinPath = InstallPath;
llvm::sys::path::append(BinPath, "bin");
IncludePath = InstallPath;
llvm::sys::path::append(IncludePath, "include");
LibPath = InstallPath;
llvm::sys::path::append(LibPath, "lib");
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
FS.getBufferForFile(BinPath + "/.hipVersion");
if (!VersionFile && Candidate.StrictChecking)
continue;
if (HIPVersionArg.empty() && VersionFile)
if (parseHIPVersionFile((*VersionFile)->getBuffer()))
continue;
HasHIPRuntime = true;
return;
}
HasHIPRuntime = false;
}
void RocmInstallationDetector::print(raw_ostream &OS) const {
if (hasHIPRuntime())
OS << "Found HIP installation: " << InstallPath << ", version "
<< DetectedVersion << '\n';
}
void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5);
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
// HIP header includes standard library wrapper headers under clang
// cuda_wrappers directory. Since these wrapper headers include_next
// standard C++ headers, whereas libc++ headers include_next other clang
// headers. The include paths have to follow this order:
// - wrapper include path
// - standard C++ include path
// - other clang include path
// Since standard C++ and other clang include paths are added in other
// places after this function, here we only need to make sure wrapper
// include path is added.
//
// ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
// a workaround.
SmallString<128> P(D.ResourceDir);
if (UsesRuntimeWrapper)
llvm::sys::path::append(P, "include", "cuda_wrappers");
CC1Args.push_back("-internal-isystem");
CC1Args.push_back(DriverArgs.MakeArgString(P));
}
if (DriverArgs.hasArg(options::OPT_nogpuinc))
return;
if (!hasHIPRuntime()) {
D.Diag(diag::err_drv_no_hip_runtime);
return;
}
CC1Args.push_back("-internal-isystem");
CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
if (UsesRuntimeWrapper)
CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
}
void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
std::string Linker = getToolChain().GetProgramPath(getShortName());
ArgStringList CmdArgs;
addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
CmdArgs.push_back("-shared");
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker),
CmdArgs, Inputs, Output));
}
void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
const llvm::Triple &Triple,
const llvm::opt::ArgList &Args,
std::vector<StringRef> &Features) {
// Add target ID features to -target-feature options. No diagnostics should
// be emitted here since invalid target ID is diagnosed at other places.
StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
if (!TargetID.empty()) {
llvm::StringMap<bool> FeatureMap;
auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
if (OptionalGpuArch) {
StringRef GpuArch = OptionalGpuArch.getValue();
// Iterate through all possible target ID features for the given GPU.
// If it is mapped to true, add +feature.
// If it is mapped to false, add -feature.
// If it is not in the map (default), do not add it
for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
auto Pos = FeatureMap.find(Feature);
if (Pos == FeatureMap.end())
continue;
Features.push_back(Args.MakeArgStringRef(
(Twine(Pos->second ? "+" : "-") + Feature).str()));
}
}
}
if (Args.hasFlag(options::OPT_mwavefrontsize64,
options::OPT_mno_wavefrontsize64, false))
Features.push_back("+wavefrontsize64");
handleTargetFeaturesGroup(
Args, Features, options::OPT_m_amdgpu_Features_Group);
}
/// AMDGPU Toolchain
AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: Generic_ELF(D, Triple, Args),
OptionsDefault(
{{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) {
// Check code object version options. Emit warnings for legacy options
// and errors for the last invalid code object version options.
// It is done here to avoid repeated warning or error messages for
// each tool invocation.
checkAMDGPUCodeObjectVersion(D, Args);
}
Tool *AMDGPUToolChain::buildLinker() const {
return new tools::amdgpu::Linker(*this);
}
DerivedArgList *
AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const {
DerivedArgList *DAL =
Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
const OptTable &Opts = getDriver().getOpts();
if (!DAL)
DAL = new DerivedArgList(Args.getBaseArgs());
for (Arg *A : Args) {
if (!shouldSkipArgument(A))
DAL->append(A);
}
checkTargetID(*DAL);
if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
return DAL;
// Phase 1 (.cl -> .bc)
if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
? options::OPT_m64
: options::OPT_m32));
// Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
// as they defined that way in Options.td
if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
options::OPT_Ofast))
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
getOptionDefault(options::OPT_O));
}
return DAL;
}
bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
llvm::AMDGPU::GPUKind Kind) {
// Assume nothing without a specific target.
if (Kind == llvm::AMDGPU::GK_NONE)
return false;
const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
// Default to enabling f32 denormals by default on subtargets where fma is
// fast with denormals
const bool BothDenormAndFMAFast =
(ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
(ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
return !BothDenormAndFMAFast;
}
llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
const llvm::fltSemantics *FPType) const {
// Denormals should always be enabled for f16 and f64.
if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
return llvm::DenormalMode::getIEEE();
if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
options::OPT_fno_gpu_flush_denormals_to_zero,
getDefaultDenormsAreZeroForTarget(Kind)))
return llvm::DenormalMode::getPreserveSign();
return llvm::DenormalMode::getIEEE();
}
const StringRef GpuArch = getGPUArch(DriverArgs);
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
// TODO: There are way too many flags that change this. Do we need to check
// them all?
bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
getDefaultDenormsAreZeroForTarget(Kind);
// Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
// also implicit treated as zero (DAZ).
return DAZ ? llvm::DenormalMode::getPreserveSign() :
llvm::DenormalMode::getIEEE();
}
bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
llvm::AMDGPU::GPUKind Kind) {
const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
return !HasWave32 || DriverArgs.hasFlag(
options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
}
/// ROCM Toolchain
ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: AMDGPUToolChain(D, Triple, Args) {
RocmInstallation.detectDeviceLibrary();
}
void AMDGPUToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadingKind) const {
// Default to "hidden" visibility, as object level linking will not be
// supported for the foreseeable future.
if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
options::OPT_fvisibility_ms_compat)) {
CC1Args.push_back("-fvisibility");
CC1Args.push_back("hidden");
CC1Args.push_back("-fapply-global-visibility-to-externs");
}
}
StringRef
AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
return getProcessorFromTargetID(
getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
}
AMDGPUToolChain::ParsedTargetIDType
AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const {
StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
if (TargetID.empty())
return {None, None, None};
llvm::StringMap<bool> FeatureMap;
auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap);
if (!OptionalGpuArch)
return {TargetID.str(), None, None};
return {TargetID.str(), OptionalGpuArch.getValue().str(), FeatureMap};
}
void AMDGPUToolChain::checkTargetID(
const llvm::opt::ArgList &DriverArgs) const {
auto PTID = getParsedTargetID(DriverArgs);
if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
getDriver().Diag(clang::diag::err_drv_bad_target_id)
<< PTID.OptionalTargetID.getValue();
}
}
llvm::Error
AMDGPUToolChain::detectSystemGPUs(const ArgList &Args,
SmallVector<std::string, 1> &GPUArchs) const {
std::string Program;
if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ))
Program = A->getValue();
else
Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME);
llvm::SmallString<64> OutputFile;
llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */,
OutputFile);
llvm::FileRemover OutputRemover(OutputFile.c_str());
llvm::Optional<llvm::StringRef> Redirects[] = {
{""},
OutputFile.str(),
{""},
};
std::string ErrorMessage;
if (int Result = llvm::sys::ExecuteAndWait(
Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0,
/*MemoryLimit*/ 0, &ErrorMessage)) {
if (Result > 0) {
ErrorMessage = "Exited with error code " + std::to_string(Result);
} else if (Result == -1) {
ErrorMessage = "Execute failed: " + ErrorMessage;
} else {
ErrorMessage = "Crashed: " + ErrorMessage;
}
return llvm::createStringError(std::error_code(),
Program + ": " + ErrorMessage);
}
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf =
llvm::MemoryBuffer::getFile(OutputFile.c_str());
if (!OutputBuf) {
return llvm::createStringError(OutputBuf.getError(),
"Failed to read stdout of " + Program +
": " + OutputBuf.getError().message());
}
for (llvm::line_iterator LineIt(**OutputBuf); !LineIt.is_at_end(); ++LineIt) {
GPUArchs.push_back(LineIt->str());
}
return llvm::Error::success();
}
llvm::Error AMDGPUToolChain::getSystemGPUArch(const ArgList &Args,
std::string &GPUArch) const {
// detect the AMDGPU installed in system
SmallVector<std::string, 1> GPUArchs;
auto Err = detectSystemGPUs(Args, GPUArchs);
if (Err) {
return Err;
}
if (GPUArchs.empty()) {
return llvm::createStringError(std::error_code(),
"No AMD GPU detected in the system");
}
GPUArch = GPUArchs[0];
if (GPUArchs.size() > 1) {
bool AllSame = std::all_of(
GPUArchs.begin(), GPUArchs.end(),
[&](const StringRef &GPUArch) { return GPUArch == GPUArchs.front(); });
if (!AllSame)
return llvm::createStringError(
std::error_code(), "Multiple AMD GPUs found with different archs");
}
return llvm::Error::success();
}
void ROCMToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadingKind) const {
AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
DeviceOffloadingKind);
// For the OpenCL case where there is no offload target, accept -nostdlib to
// disable bitcode linking.
if (DeviceOffloadingKind == Action::OFK_None &&
DriverArgs.hasArg(options::OPT_nostdlib))
return;
if (DriverArgs.hasArg(options::OPT_nogpulib))
return;
if (!RocmInstallation.hasDeviceLibrary()) {
getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
return;
}
// Get the device name and canonicalize it
const StringRef GpuArch = getGPUArch(DriverArgs);
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
if (LibDeviceFile.empty()) {
getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
return;
}
bool Wave64 = isWave64(DriverArgs, Kind);
// TODO: There are way too many flags that change this. Do we need to check
// them all?
bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
getDefaultDenormsAreZeroForTarget(Kind);
bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
bool UnsafeMathOpt =
DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
bool CorrectSqrt =
DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
// Add the OpenCL specific bitcode library.
llvm::SmallVector<std::string, 12> BCLibs;
BCLibs.push_back(RocmInstallation.getOpenCLPath().str());
// Add the generic set of libraries.
BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
FastRelaxedMath, CorrectSqrt));
llvm::for_each(BCLibs, [&](StringRef BCFile) {
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
});
}
llvm::SmallVector<std::string, 12>
RocmInstallationDetector::getCommonBitcodeLibs(
const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
bool CorrectSqrt) const {
llvm::SmallVector<std::string, 12> BCLibs;
auto AddBCLib = [&](StringRef BCFile) { BCLibs.push_back(BCFile.str()); };
AddBCLib(getOCMLPath());
AddBCLib(getOCKLPath());
AddBCLib(getDenormalsAreZeroPath(DAZ));
AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath));
AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath));
AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt));
AddBCLib(getWavefrontSize64Path(Wave64));
AddBCLib(LibDeviceFile);
return BCLibs;
}
bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
Option O = A->getOption();
if (O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie))
return true;
return false;
}
+
+llvm::SmallVector<std::string, 12>
+ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
+ const std::string &GPUArch) const {
+ auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
+ const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
+
+ std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
+ if (LibDeviceFile.empty()) {
+ getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch;
+ return {};
+ }
+
+ // If --hip-device-lib is not set, add the default bitcode libraries.
+ // TODO: There are way too many flags that change this. Do we need to check
+ // them all?
+ bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
+ options::OPT_fno_gpu_flush_denormals_to_zero,
+ getDefaultDenormsAreZeroForTarget(Kind));
+ bool FiniteOnly = DriverArgs.hasFlag(
+ options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false);
+ bool UnsafeMathOpt =
+ DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
+ options::OPT_fno_unsafe_math_optimizations, false);
+ bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math,
+ options::OPT_fno_fast_math, false);
+ bool CorrectSqrt = DriverArgs.hasFlag(
+ options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
+ options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
+ bool Wave64 = isWave64(DriverArgs, Kind);
+
+ return RocmInstallation.getCommonBitcodeLibs(
+ DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
+ FastRelaxedMath, CorrectSqrt);
+}
\ No newline at end of file
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.h b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.h
index 50ed3b3ded9a..a4bcf315ca76 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -1,145 +1,150 @@
//===--- AMDGPU.h - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPU_H
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPU_H
#include "Gnu.h"
#include "ROCm.h"
#include "clang/Basic/TargetID.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/TargetParser.h"
#include <map>
namespace clang {
namespace driver {
namespace tools {
namespace amdgpu {
class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
public:
Linker(const ToolChain &TC) : Tool("amdgpu::Linker", "ld.lld", TC) {}
bool isLinkJob() const override { return true; }
bool hasIntegratedCPP() const override { return false; }
void ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output, const InputInfoList &Inputs,
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const override;
};
void getAMDGPUTargetFeatures(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args,
std::vector<StringRef> &Features);
} // end namespace amdgpu
} // end namespace tools
namespace toolchains {
class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
protected:
const std::map<options::ID, const StringRef> OptionsDefault;
Tool *buildLinker() const override;
const StringRef getOptionDefault(options::ID OptID) const {
auto opt = OptionsDefault.find(OptID);
assert(opt != OptionsDefault.end() && "No Default for Option");
return opt->second;
}
public:
AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
unsigned GetDefaultDwarfVersion() const override { return 4; }
bool IsIntegratedAssemblerDefault() const override { return true; }
bool IsMathErrnoDefault() const override { return false; }
bool useIntegratedAs() const override { return true; }
bool isCrossCompiling() const override { return true; }
bool isPICDefault() const override { return false; }
bool isPIEDefault() const override { return false; }
bool isPICDefaultForced() const override { return false; }
bool SupportsProfiling() const override { return false; }
llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const override;
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadKind) const override;
/// Return whether denormals should be flushed, and treated as 0 by default
/// for the subtarget.
static bool getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind GPUKind);
llvm::DenormalMode getDefaultDenormalModeForType(
const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
const llvm::fltSemantics *FPType = nullptr) const override;
static bool isWave64(const llvm::opt::ArgList &DriverArgs,
llvm::AMDGPU::GPUKind Kind);
/// Needed for using lto.
bool HasNativeLLVMSupport() const override {
return true;
}
/// Needed for translating LTO options.
const char *getDefaultLinker() const override { return "ld.lld"; }
/// Should skip argument.
bool shouldSkipArgument(const llvm::opt::Arg *Arg) const;
/// Uses amdgpu_arch tool to get arch of the system GPU. Will return error
/// if unable to find one.
llvm::Error getSystemGPUArch(const llvm::opt::ArgList &Args,
std::string &GPUArch) const;
protected:
/// Check and diagnose invalid target ID specified by -mcpu.
virtual void checkTargetID(const llvm::opt::ArgList &DriverArgs) const;
/// The struct type returned by getParsedTargetID.
struct ParsedTargetIDType {
Optional<std::string> OptionalTargetID;
Optional<std::string> OptionalGPUArch;
Optional<llvm::StringMap<bool>> OptionalFeatures;
};
/// Get target ID, GPU arch, and target ID features if the target ID is
/// specified and valid.
ParsedTargetIDType
getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const;
/// Get GPU arch from -mcpu without checking.
StringRef getGPUArch(const llvm::opt::ArgList &DriverArgs) const;
llvm::Error detectSystemGPUs(const llvm::opt::ArgList &Args,
SmallVector<std::string, 1> &GPUArchs) const;
};
class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
public:
ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
void
addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadKind) const override;
+
+ // Returns a list of device library names shared by different languages
+ llvm::SmallVector<std::string, 12>
+ getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
+ const std::string &GPUArch) const;
};
} // end namespace toolchains
} // end namespace driver
} // end namespace clang
#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPU_H
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index fe1d19c2dd67..135e3694434d 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -1,304 +1,326 @@
//===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AMDGPUOpenMP.h"
#include "AMDGPU.h"
#include "CommonArgs.h"
+#include "ToolChains/ROCm.h"
#include "clang/Basic/DiagnosticDriver.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Options.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatAdapters.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
using namespace clang::driver;
using namespace clang::driver::toolchains;
using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
namespace {
static const char *getOutputFileName(Compilation &C, StringRef Base,
const char *Postfix,
const char *Extension) {
const char *OutputFileName;
if (C.getDriver().isSaveTempsEnabled()) {
OutputFileName =
C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension);
} else {
std::string TmpName =
C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension);
OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName));
}
return OutputFileName;
}
static void addLLCOptArg(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) {
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
StringRef OOpt = "0";
if (A->getOption().matches(options::OPT_O4) ||
A->getOption().matches(options::OPT_Ofast))
OOpt = "3";
else if (A->getOption().matches(options::OPT_O0))
OOpt = "0";
else if (A->getOption().matches(options::OPT_O)) {
// Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3
// so we map -Os/-Oz to -O2.
// Only clang supports -Og, and maps it to -O1.
// We map anything else to -O2.
OOpt = llvm::StringSwitch<const char *>(A->getValue())
.Case("1", "1")
.Case("2", "2")
.Case("3", "3")
.Case("s", "2")
.Case("z", "2")
.Case("g", "1")
.Default("0");
}
CmdArgs.push_back(Args.MakeArgString("-O" + OOpt));
}
}
static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
std::string &GPUArch) {
if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) {
std::string ErrMsg =
llvm::formatv("{0}", llvm::fmt_consume(std::move(Err)));
TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg;
return false;
}
return true;
}
} // namespace
const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
- Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
- const ArgList &Args, StringRef SubArchName,
- StringRef OutputFilePrefix) const {
+ const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
+ const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args,
+ StringRef SubArchName, StringRef OutputFilePrefix) const {
ArgStringList CmdArgs;
for (const auto &II : Inputs)
if (II.isFilename())
CmdArgs.push_back(II.getFilename());
+
+ if (Args.hasArg(options::OPT_l)) {
+ auto Lm = Args.getAllArgValues(options::OPT_l);
+ bool HasLibm = false;
+ for (auto &Lib : Lm) {
+ if (Lib == "m") {
+ HasLibm = true;
+ break;
+ }
+ }
+
+ if (HasLibm) {
+ SmallVector<std::string, 12> BCLibs =
+ AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str());
+ llvm::for_each(BCLibs, [&](StringRef BCFile) {
+ CmdArgs.push_back(Args.MakeArgString(BCFile));
+ });
+ }
+ }
+
// Add an intermediate output file.
CmdArgs.push_back("-o");
const char *OutputFileName =
getOutputFileName(C, OutputFilePrefix, "-linked", "bc");
CmdArgs.push_back(OutputFileName);
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs,
InputInfo(&JA, Args.MakeArgString(OutputFileName))));
return OutputFileName;
}
const char *AMDGCN::OpenMPLinker::constructLlcCommand(
Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
llvm::StringRef OutputFilePrefix, const char *InputFileName,
bool OutputIsAsm) const {
// Construct llc command.
ArgStringList LlcArgs;
// The input to llc is the output from opt.
LlcArgs.push_back(InputFileName);
// Pass optimization arg to llc.
addLLCOptArg(Args, LlcArgs);
LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
LlcArgs.push_back(
Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")));
for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
LlcArgs.push_back(A->getValue(0));
}
// Add output filename
LlcArgs.push_back("-o");
const char *LlcOutputFile =
getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o");
LlcArgs.push_back(LlcOutputFile);
const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc"));
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs,
InputInfo(&JA, Args.MakeArgString(LlcOutputFile))));
return LlcOutputFile;
}
void AMDGCN::OpenMPLinker::constructLldCommand(
Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
const InputInfo &Output, const llvm::opt::ArgList &Args,
const char *InputFileName) const {
// Construct lld command.
// The output from ld.lld is an HSA code object file.
ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined",
"-shared", "-o", Output.getFilename(),
InputFileName};
const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs,
InputInfo(&JA, Args.MakeArgString(Output.getFilename()))));
}
// For amdgcn the inputs of the linker job are device bitcode and output is
// object file. It calls llvm-link, opt, llc, then lld steps.
void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const ToolChain &TC = getToolChain();
assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target");
const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC =
static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC);
std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str();
if (GPUArch.empty()) {
if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch))
return;
}
// Prefix for temporary file name.
std::string Prefix;
for (const auto &II : Inputs)
if (II.isFilename())
Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch;
assert(Prefix.length() && "no linker inputs are files ");
// Each command outputs different files.
- const char *LLVMLinkCommand =
- constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix);
+ const char *LLVMLinkCommand = constructLLVMLinkCommand(
+ AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix);
// Produce readable assembly if save-temps is enabled.
if (C.getDriver().isSaveTempsEnabled())
constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand,
/*OutputIsAsm=*/true);
const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch,
Prefix, LLVMLinkCommand);
constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
}
AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D,
const llvm::Triple &Triple,
const ToolChain &HostTC,
const ArgList &Args)
: ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
// Lookup binaries into the driver directory, this is used to
// discover the clang-offload-bundler executable.
getProgramPaths().push_back(getDriver().Dir);
}
void AMDGPUOpenMPToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadingKind) const {
HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str();
if (GPUArch.empty()) {
if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch))
return;
}
assert(DeviceOffloadingKind == Action::OFK_OpenMP &&
"Only OpenMP offloading kinds are supported.");
CC1Args.push_back("-target-cpu");
CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch));
CC1Args.push_back("-fcuda-is-device");
if (DriverArgs.hasArg(options::OPT_nogpulib))
return;
std::string BitcodeSuffix;
if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
options::OPT_fno_openmp_target_new_runtime, false))
BitcodeSuffix = "new-amdgcn-" + GPUArch;
else
BitcodeSuffix = "amdgcn-" + GPUArch;
addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
getTriple());
}
llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const {
DerivedArgList *DAL =
HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
if (!DAL)
DAL = new DerivedArgList(Args.getBaseArgs());
const OptTable &Opts = getDriver().getOpts();
if (DeviceOffloadKind != Action::OFK_OpenMP) {
for (Arg *A : Args) {
DAL->append(A);
}
}
if (!BoundArch.empty()) {
DAL->eraseArg(options::OPT_march_EQ);
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
BoundArch);
}
return DAL;
}
Tool *AMDGPUOpenMPToolChain::buildLinker() const {
assert(getTriple().isAMDGCN());
return new tools::AMDGCN::OpenMPLinker(*this);
}
void AMDGPUOpenMPToolChain::addClangWarningOptions(
ArgStringList &CC1Args) const {
HostTC.addClangWarningOptions(CC1Args);
}
ToolChain::CXXStdlibType
AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
return HostTC.GetCXXStdlibType(Args);
}
void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
const ArgList &DriverArgs, ArgStringList &CC1Args) const {
HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
}
void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
ArgStringList &CC1Args) const {
HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
}
SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const {
// The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it
// allows sanitizer arguments on the command line if they are supported by the
// host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command
// line arguments for any of these "supported" sanitizers. That means that no
// sanitization of device code is actually supported at this time.
//
// This behavior is necessary because the host and device toolchains
// invocations often share the command line, so the device toolchain must
// tolerate flags meant only for the host toolchain.
return HostTC.getSupportedSanitizers();
}
VersionTuple
AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
const ArgList &Args) const {
return HostTC.computeMSVCVersion(D, Args);
}
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
index effca7e212cc..233256bf7378 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
@@ -1,106 +1,110 @@
//===- AMDGPUOpenMP.h - AMDGPUOpenMP ToolChain Implementation -*- C++ -*---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H
#include "AMDGPU.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
namespace clang {
namespace driver {
+namespace toolchains {
+class AMDGPUOpenMPToolChain;
+}
+
namespace tools {
namespace AMDGCN {
// Runs llvm-link/opt/llc/lld, which links multiple LLVM bitcode, together with
// device library, then compiles it to ISA in a shared object.
class LLVM_LIBRARY_VISIBILITY OpenMPLinker : public Tool {
public:
OpenMPLinker(const ToolChain &TC)
: Tool("AMDGCN::OpenMPLinker", "amdgcn-link", TC) {}
bool hasIntegratedCPP() const override { return false; }
void ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output, const InputInfoList &Inputs,
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const override;
private:
/// \return llvm-link output file name.
- const char *constructLLVMLinkCommand(Compilation &C, const JobAction &JA,
- const InputInfoList &Inputs,
- const llvm::opt::ArgList &Args,
- llvm::StringRef SubArchName,
- llvm::StringRef OutputFilePrefix) const;
+ const char *constructLLVMLinkCommand(
+ const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
+ const JobAction &JA, const InputInfoList &Inputs,
+ const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
+ llvm::StringRef OutputFilePrefix) const;
/// \return llc output file name.
const char *constructLlcCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const llvm::opt::ArgList &Args,
llvm::StringRef SubArchName,
llvm::StringRef OutputFilePrefix,
const char *InputFileName,
bool OutputIsAsm = false) const;
void constructLldCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs, const InputInfo &Output,
const llvm::opt::ArgList &Args,
const char *InputFileName) const;
};
} // end namespace AMDGCN
} // end namespace tools
namespace toolchains {
class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final
: public ROCMToolChain {
public:
AMDGPUOpenMPToolChain(const Driver &D, const llvm::Triple &Triple,
const ToolChain &HostTC,
const llvm::opt::ArgList &Args);
const llvm::Triple *getAuxTriple() const override {
return &HostTC.getTriple();
}
llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const override;
void
addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadKind) const override;
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override;
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
void
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
SanitizerMask getSupportedSanitizers() const override;
VersionTuple
computeMSVCVersion(const Driver *D,
const llvm::opt::ArgList &Args) const override;
const ToolChain &HostTC;
protected:
Tool *buildLinker() const override;
};
} // end namespace toolchains
} // end namespace driver
} // end namespace clang
#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
index 4a7dc3a33a5f..cb38ab51327c 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1,7810 +1,7811 @@
//===-- Clang.cpp - Clang+LLVM ToolChain Implementations --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Clang.h"
#include "AMDGPU.h"
#include "Arch/AArch64.h"
#include "Arch/ARM.h"
#include "Arch/M68k.h"
#include "Arch/Mips.h"
#include "Arch/PPC.h"
#include "Arch/RISCV.h"
#include "Arch/Sparc.h"
#include "Arch/SystemZ.h"
#include "Arch/VE.h"
#include "Arch/X86.h"
#include "CommonArgs.h"
#include "Hexagon.h"
#include "MSP430.h"
#include "PS4CPU.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/Version.h"
#include "clang/Driver/Distro.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "clang/Driver/XRayArgs.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/YAMLParser.h"
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) {
if (Arg *A =
Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC)) {
if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_P) &&
!Args.hasArg(options::OPT__SLASH_EP) && !D.CCCIsCPP()) {
D.Diag(clang::diag::err_drv_argument_only_allowed_with)
<< A->getBaseArg().getAsString(Args)
<< (D.IsCLMode() ? "/E, /P or /EP" : "-E");
}
}
}
static void CheckCodeGenerationOptions(const Driver &D, const ArgList &Args) {
// In gcc, only ARM checks this, but it seems reasonable to check universally.
if (Args.hasArg(options::OPT_static))
if (const Arg *A =
Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic))
D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
<< "-static";
}
// Add backslashes to escape spaces and other backslashes.
// This is used for the space-separated argument list specified with
// the -dwarf-debug-flags option.
static void EscapeSpacesAndBackslashes(const char *Arg,
SmallVectorImpl<char> &Res) {
for (; *Arg; ++Arg) {
switch (*Arg) {
default:
break;
case ' ':
case '\\':
Res.push_back('\\');
break;
}
Res.push_back(*Arg);
}
}
// Quote target names for inclusion in GNU Make dependency files.
// Only the characters '$', '#', ' ', '\t' are quoted.
static void QuoteTarget(StringRef Target, SmallVectorImpl<char> &Res) {
for (unsigned i = 0, e = Target.size(); i != e; ++i) {
switch (Target[i]) {
case ' ':
case '\t':
// Escape the preceding backslashes
for (int j = i - 1; j >= 0 && Target[j] == '\\'; --j)
Res.push_back('\\');
// Escape the space/tab
Res.push_back('\\');
break;
case '$':
Res.push_back('$');
break;
case '#':
Res.push_back('\\');
break;
default:
break;
}
Res.push_back(Target[i]);
}
}
/// Apply \a Work on the current tool chain \a RegularToolChain and any other
/// offloading tool chain that is associated with the current action \a JA.
static void
forAllAssociatedToolChains(Compilation &C, const JobAction &JA,
const ToolChain &RegularToolChain,
llvm::function_ref<void(const ToolChain &)> Work) {
// Apply Work on the current/regular tool chain.
Work(RegularToolChain);
// Apply Work on all the offloading tool chains associated with the current
// action.
if (JA.isHostOffloading(Action::OFK_Cuda))
Work(*C.getSingleOffloadToolChain<Action::OFK_Cuda>());
else if (JA.isDeviceOffloading(Action::OFK_Cuda))
Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());
else if (JA.isHostOffloading(Action::OFK_HIP))
Work(*C.getSingleOffloadToolChain<Action::OFK_HIP>());
else if (JA.isDeviceOffloading(Action::OFK_HIP))
Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());
if (JA.isHostOffloading(Action::OFK_OpenMP)) {
auto TCs = C.getOffloadToolChains<Action::OFK_OpenMP>();
for (auto II = TCs.first, IE = TCs.second; II != IE; ++II)
Work(*II->second);
} else if (JA.isDeviceOffloading(Action::OFK_OpenMP))
Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());
//
// TODO: Add support for other offloading programming models here.
//
}
/// This is a helper function for validating the optional refinement step
/// parameter in reciprocal argument strings. Return false if there is an error
/// parsing the refinement step. Otherwise, return true and set the Position
/// of the refinement step in the input string.
static bool getRefinementStep(StringRef In, const Driver &D,
const Arg &A, size_t &Position) {
const char RefinementStepToken = ':';
Position = In.find(RefinementStepToken);
if (Position != StringRef::npos) {
StringRef Option = A.getOption().getName();
StringRef RefStep = In.substr(Position + 1);
// Allow exactly one numeric character for the additional refinement
// step parameter. This is reasonable for all currently-supported
// operations and architectures because we would expect that a larger value
// of refinement steps would cause the estimate "optimization" to
// under-perform the native operation. Also, if the estimate does not
// converge quickly, it probably will not ever converge, so further
// refinement steps will not produce a better answer.
if (RefStep.size() != 1) {
D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
return false;
}
char RefStepChar = RefStep[0];
if (RefStepChar < '0' || RefStepChar > '9') {
D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
return false;
}
}
return true;
}
/// The -mrecip flag requires processing of many optional parameters.
static void ParseMRecip(const Driver &D, const ArgList &Args,
ArgStringList &OutStrings) {
StringRef DisabledPrefixIn = "!";
StringRef DisabledPrefixOut = "!";
StringRef EnabledPrefixOut = "";
StringRef Out = "-mrecip=";
Arg *A = Args.getLastArg(options::OPT_mrecip, options::OPT_mrecip_EQ);
if (!A)
return;
unsigned NumOptions = A->getNumValues();
if (NumOptions == 0) {
// No option is the same as "all".
OutStrings.push_back(Args.MakeArgString(Out + "all"));
return;
}
// Pass through "all", "none", or "default" with an optional refinement step.
if (NumOptions == 1) {
StringRef Val = A->getValue(0);
size_t RefStepLoc;
if (!getRefinementStep(Val, D, *A, RefStepLoc))
return;
StringRef ValBase = Val.slice(0, RefStepLoc);
if (ValBase == "all" || ValBase == "none" || ValBase == "default") {
OutStrings.push_back(Args.MakeArgString(Out + Val));
return;
}
}
// Each reciprocal type may be enabled or disabled individually.
// Check each input value for validity, concatenate them all back together,
// and pass through.
llvm::StringMap<bool> OptionStrings;
OptionStrings.insert(std::make_pair("divd", false));
OptionStrings.insert(std::make_pair("divf", false));
OptionStrings.insert(std::make_pair("vec-divd", false));
OptionStrings.insert(std::make_pair("vec-divf", false));
OptionStrings.insert(std::make_pair("sqrtd", false));
OptionStrings.insert(std::make_pair("sqrtf", false));
OptionStrings.insert(std::make_pair("vec-sqrtd", false));
OptionStrings.insert(std::make_pair("vec-sqrtf", false));
for (unsigned i = 0; i != NumOptions; ++i) {
StringRef Val = A->getValue(i);
bool IsDisabled = Val.startswith(DisabledPrefixIn);
// Ignore the disablement token for string matching.
if (IsDisabled)
Val = Val.substr(1);
size_t RefStep;
if (!getRefinementStep(Val, D, *A, RefStep))
return;
StringRef ValBase = Val.slice(0, RefStep);
llvm::StringMap<bool>::iterator OptionIter = OptionStrings.find(ValBase);
if (OptionIter == OptionStrings.end()) {
// Try again specifying float suffix.
OptionIter = OptionStrings.find(ValBase.str() + 'f');
if (OptionIter == OptionStrings.end()) {
// The input name did not match any known option string.
D.Diag(diag::err_drv_unknown_argument) << Val;
return;
}
// The option was specified without a float or double suffix.
// Make sure that the double entry was not already specified.
// The float entry will be checked below.
if (OptionStrings[ValBase.str() + 'd']) {
D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
return;
}
}
if (OptionIter->second == true) {
// Duplicate option specified.
D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
return;
}
// Mark the matched option as found. Do not allow duplicate specifiers.
OptionIter->second = true;
// If the precision was not specified, also mark the double entry as found.
if (ValBase.back() != 'f' && ValBase.back() != 'd')
OptionStrings[ValBase.str() + 'd'] = true;
// Build the output string.
StringRef Prefix = IsDisabled ? DisabledPrefixOut : EnabledPrefixOut;
Out = Args.MakeArgString(Out + Prefix + Val);
if (i != NumOptions - 1)
Out = Args.MakeArgString(Out + ",");
}
OutStrings.push_back(Args.MakeArgString(Out));
}
/// The -mprefer-vector-width option accepts either a positive integer
/// or the string "none".
static void ParseMPreferVectorWidth(const Driver &D, const ArgList &Args,
ArgStringList &CmdArgs) {
Arg *A = Args.getLastArg(options::OPT_mprefer_vector_width_EQ);
if (!A)
return;
StringRef Value = A->getValue();
if (Value == "none") {
CmdArgs.push_back("-mprefer-vector-width=none");
} else {
unsigned Width;
if (Value.getAsInteger(10, Width)) {
D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Value;
return;
}
CmdArgs.push_back(Args.MakeArgString("-mprefer-vector-width=" + Value));
}
}
static void getWebAssemblyTargetFeatures(const ArgList &Args,
std::vector<StringRef> &Features) {
handleTargetFeaturesGroup(Args, Features, options::OPT_m_wasm_Features_Group);
}
static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args, ArgStringList &CmdArgs,
bool ForAS, bool IsAux = false) {
std::vector<StringRef> Features;
switch (Triple.getArch()) {
default:
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
mips::getMIPSTargetFeatures(D, Triple, Args, Features);
break;
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
arm::getARMTargetFeatures(D, Triple, Args, CmdArgs, Features, ForAS);
break;
case llvm::Triple::ppc:
case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
ppc::getPPCTargetFeatures(D, Triple, Args, Features);
break;
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
riscv::getRISCVTargetFeatures(D, Triple, Args, Features);
break;
case llvm::Triple::systemz:
systemz::getSystemZTargetFeatures(D, Args, Features);
break;
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_32:
case llvm::Triple::aarch64_be:
aarch64::getAArch64TargetFeatures(D, Triple, Args, Features, ForAS);
break;
case llvm::Triple::x86:
case llvm::Triple::x86_64:
x86::getX86TargetFeatures(D, Triple, Args, Features);
break;
case llvm::Triple::hexagon:
hexagon::getHexagonTargetFeatures(D, Args, Features);
break;
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
getWebAssemblyTargetFeatures(Args, Features);
break;
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
case llvm::Triple::sparcv9:
sparc::getSparcTargetFeatures(D, Args, Features);
break;
case llvm::Triple::r600:
case llvm::Triple::amdgcn:
amdgpu::getAMDGPUTargetFeatures(D, Triple, Args, Features);
break;
case llvm::Triple::m68k:
m68k::getM68kTargetFeatures(D, Triple, Args, Features);
break;
case llvm::Triple::msp430:
msp430::getMSP430TargetFeatures(D, Args, Features);
break;
case llvm::Triple::ve:
ve::getVETargetFeatures(D, Args, Features);
break;
}
for (auto Feature : unifyTargetFeatures(Features)) {
CmdArgs.push_back(IsAux ? "-aux-target-feature" : "-target-feature");
CmdArgs.push_back(Feature.data());
}
}
static bool
shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime,
const llvm::Triple &Triple) {
// We use the zero-cost exception tables for Objective-C if the non-fragile
// ABI is enabled or when compiling for x86_64 and ARM on Snow Leopard and
// later.
if (runtime.isNonFragile())
return true;
if (!Triple.isMacOSX())
return false;
return (!Triple.isMacOSXVersionLT(10, 5) &&
(Triple.getArch() == llvm::Triple::x86_64 ||
Triple.getArch() == llvm::Triple::arm));
}
/// Adds exception related arguments to the driver command arguments. There's a
/// master flag, -fexceptions and also language specific flags to enable/disable
/// C++ and Objective-C exceptions. This makes it possible to for example
/// disable C++ exceptions but enable Objective-C exceptions.
static bool addExceptionArgs(const ArgList &Args, types::ID InputType,
const ToolChain &TC, bool KernelOrKext,
const ObjCRuntime &objcRuntime,
ArgStringList &CmdArgs) {
const llvm::Triple &Triple = TC.getTriple();
if (KernelOrKext) {
// -mkernel and -fapple-kext imply no exceptions, so claim exception related
// arguments now to avoid warnings about unused arguments.
Args.ClaimAllArgs(options::OPT_fexceptions);
Args.ClaimAllArgs(options::OPT_fno_exceptions);
Args.ClaimAllArgs(options::OPT_fobjc_exceptions);
Args.ClaimAllArgs(options::OPT_fno_objc_exceptions);
Args.ClaimAllArgs(options::OPT_fcxx_exceptions);
Args.ClaimAllArgs(options::OPT_fno_cxx_exceptions);
Args.ClaimAllArgs(options::OPT_fasync_exceptions);
Args.ClaimAllArgs(options::OPT_fno_async_exceptions);
return false;
}
// See if the user explicitly enabled exceptions.
bool EH = Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions,
false);
bool EHa = Args.hasFlag(options::OPT_fasync_exceptions,
options::OPT_fno_async_exceptions, false);
if (EHa) {
CmdArgs.push_back("-fasync-exceptions");
EH = true;
}
// Obj-C exceptions are enabled by default, regardless of -fexceptions. This
// is not necessarily sensible, but follows GCC.
if (types::isObjC(InputType) &&
Args.hasFlag(options::OPT_fobjc_exceptions,
options::OPT_fno_objc_exceptions, true)) {
CmdArgs.push_back("-fobjc-exceptions");
EH |= shouldUseExceptionTablesForObjCExceptions(objcRuntime, Triple);
}
if (types::isCXX(InputType)) {
// Disable C++ EH by default on XCore and PS4.
bool CXXExceptionsEnabled =
Triple.getArch() != llvm::Triple::xcore && !Triple.isPS4CPU();
Arg *ExceptionArg = Args.getLastArg(
options::OPT_fcxx_exceptions, options::OPT_fno_cxx_exceptions,
options::OPT_fexceptions, options::OPT_fno_exceptions);
if (ExceptionArg)
CXXExceptionsEnabled =
ExceptionArg->getOption().matches(options::OPT_fcxx_exceptions) ||
ExceptionArg->getOption().matches(options::OPT_fexceptions);
if (CXXExceptionsEnabled) {
CmdArgs.push_back("-fcxx-exceptions");
EH = true;
}
}
// OPT_fignore_exceptions means exception could still be thrown,
// but no clean up or catch would happen in current module.
// So we do not set EH to false.
Args.AddLastArg(CmdArgs, options::OPT_fignore_exceptions);
if (EH)
CmdArgs.push_back("-fexceptions");
return EH;
}
static bool ShouldEnableAutolink(const ArgList &Args, const ToolChain &TC,
const JobAction &JA) {
bool Default = true;
if (TC.getTriple().isOSDarwin()) {
// The native darwin assembler doesn't support the linker_option directives,
// so we disable them if we think the .s file will be passed to it.
Default = TC.useIntegratedAs();
}
// The linker_option directives are intended for host compilation.
if (JA.isDeviceOffloading(Action::OFK_Cuda) ||
JA.isDeviceOffloading(Action::OFK_HIP))
Default = false;
return Args.hasFlag(options::OPT_fautolink, options::OPT_fno_autolink,
Default);
}
// Convert an arg of the form "-gN" or "-ggdbN" or one of their aliases
// to the corresponding DebugInfoKind.
static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) {
assert(A.getOption().matches(options::OPT_gN_Group) &&
"Not a -g option that specifies a debug-info level");
if (A.getOption().matches(options::OPT_g0) ||
A.getOption().matches(options::OPT_ggdb0))
return codegenoptions::NoDebugInfo;
if (A.getOption().matches(options::OPT_gline_tables_only) ||
A.getOption().matches(options::OPT_ggdb1))
return codegenoptions::DebugLineTablesOnly;
if (A.getOption().matches(options::OPT_gline_directives_only))
return codegenoptions::DebugDirectivesOnly;
return codegenoptions::DebugInfoConstructor;
}
static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) {
switch (Triple.getArch()){
default:
return false;
case llvm::Triple::arm:
case llvm::Triple::thumb:
// ARM Darwin targets require a frame pointer to be always present to aid
// offline debugging via backtraces.
return Triple.isOSDarwin();
}
}
static bool useFramePointerForTargetByDefault(const ArgList &Args,
const llvm::Triple &Triple) {
if (Args.hasArg(options::OPT_pg) && !Args.hasArg(options::OPT_mfentry))
return true;
switch (Triple.getArch()) {
case llvm::Triple::xcore:
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
case llvm::Triple::msp430:
// XCore never wants frame pointers, regardless of OS.
// WebAssembly never wants frame pointers.
return false;
case llvm::Triple::ppc:
case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
case llvm::Triple::amdgcn:
case llvm::Triple::r600:
return !areOptimizationsEnabled(Args);
default:
break;
}
if (Triple.isOSNetBSD()) {
return !areOptimizationsEnabled(Args);
}
if (Triple.isOSLinux() || Triple.getOS() == llvm::Triple::CloudABI ||
Triple.isOSHurd()) {
switch (Triple.getArch()) {
// Don't use a frame pointer on linux if optimizing for certain targets.
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
if (Triple.isAndroid())
return true;
LLVM_FALLTHROUGH;
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::systemz:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return !areOptimizationsEnabled(Args);
default:
return true;
}
}
if (Triple.isOSWindows()) {
switch (Triple.getArch()) {
case llvm::Triple::x86:
return !areOptimizationsEnabled(Args);
case llvm::Triple::x86_64:
return Triple.isOSBinFormatMachO();
case llvm::Triple::arm:
case llvm::Triple::thumb:
// Windows on ARM builds with FPO disabled to aid fast stack walking
return true;
default:
// All other supported Windows ISAs use xdata unwind information, so frame
// pointers are not generally useful.
return false;
}
}
return true;
}
static CodeGenOptions::FramePointerKind
getFramePointerKind(const ArgList &Args, const llvm::Triple &Triple) {
// We have 4 states:
//
// 00) leaf retained, non-leaf retained
// 01) leaf retained, non-leaf omitted (this is invalid)
// 10) leaf omitted, non-leaf retained
// (what -momit-leaf-frame-pointer was designed for)
// 11) leaf omitted, non-leaf omitted
//
// "omit" options taking precedence over "no-omit" options is the only way
// to make 3 valid states representable
Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer,
options::OPT_fno_omit_frame_pointer);
bool OmitFP = A && A->getOption().matches(options::OPT_fomit_frame_pointer);
bool NoOmitFP =
A && A->getOption().matches(options::OPT_fno_omit_frame_pointer);
bool OmitLeafFP = Args.hasFlag(options::OPT_momit_leaf_frame_pointer,
options::OPT_mno_omit_leaf_frame_pointer,
Triple.isAArch64() || Triple.isPS4CPU());
if (NoOmitFP || mustUseNonLeafFramePointerForTarget(Triple) ||
(!OmitFP && useFramePointerForTargetByDefault(Args, Triple))) {
if (OmitLeafFP)
return CodeGenOptions::FramePointerKind::NonLeaf;
return CodeGenOptions::FramePointerKind::All;
}
return CodeGenOptions::FramePointerKind::None;
}
/// Add a CC1 option to specify the debug compilation directory.
static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs,
const llvm::vfs::FileSystem &VFS) {
if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
options::OPT_fdebug_compilation_dir_EQ)) {
if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ))
CmdArgs.push_back(Args.MakeArgString(Twine("-fdebug-compilation-dir=") +
A->getValue()));
else
A->render(Args, CmdArgs);
} else if (llvm::ErrorOr<std::string> CWD =
VFS.getCurrentWorkingDirectory()) {
CmdArgs.push_back(Args.MakeArgString("-fdebug-compilation-dir=" + *CWD));
}
}
/// Add a CC1 and CC1AS option to specify the debug file path prefix map.
static void addDebugPrefixMapArg(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs) {
for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
options::OPT_fdebug_prefix_map_EQ)) {
StringRef Map = A->getValue();
if (Map.find('=') == StringRef::npos)
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Map << A->getOption().getName();
else
CmdArgs.push_back(Args.MakeArgString("-fdebug-prefix-map=" + Map));
A->claim();
}
}
/// Add a CC1 and CC1AS option to specify the macro file path prefix map.
static void addMacroPrefixMapArg(const Driver &D, const ArgList &Args,
ArgStringList &CmdArgs) {
for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
options::OPT_fmacro_prefix_map_EQ)) {
StringRef Map = A->getValue();
if (Map.find('=') == StringRef::npos)
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Map << A->getOption().getName();
else
CmdArgs.push_back(Args.MakeArgString("-fmacro-prefix-map=" + Map));
A->claim();
}
}
/// Add a CC1 and CC1AS option to specify the coverage file path prefix map.
static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args,
ArgStringList &CmdArgs) {
for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
options::OPT_fcoverage_prefix_map_EQ)) {
StringRef Map = A->getValue();
if (Map.find('=') == StringRef::npos)
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Map << A->getOption().getName();
else
CmdArgs.push_back(Args.MakeArgString("-fcoverage-prefix-map=" + Map));
A->claim();
}
}
/// Vectorize at all optimization levels greater than 1 except for -Oz.
/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is
/// enabled.
static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) {
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
if (A->getOption().matches(options::OPT_O4) ||
A->getOption().matches(options::OPT_Ofast))
return true;
if (A->getOption().matches(options::OPT_O0))
return false;
assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");
// Vectorize -Os.
StringRef S(A->getValue());
if (S == "s")
return true;
// Don't vectorize -Oz, unless it's the slp vectorizer.
if (S == "z")
return isSlpVec;
unsigned OptLevel = 0;
if (S.getAsInteger(10, OptLevel))
return false;
return OptLevel > 1;
}
return false;
}
/// Add -x lang to \p CmdArgs for \p Input.
static void addDashXForInput(const ArgList &Args, const InputInfo &Input,
ArgStringList &CmdArgs) {
// When using -verify-pch, we don't want to provide the type
// 'precompiled-header' if it was inferred from the file extension
if (Args.hasArg(options::OPT_verify_pch) && Input.getType() == types::TY_PCH)
return;
CmdArgs.push_back("-x");
if (Args.hasArg(options::OPT_rewrite_objc))
CmdArgs.push_back(types::getTypeName(types::TY_PP_ObjCXX));
else {
// Map the driver type to the frontend type. This is mostly an identity
// mapping, except that the distinction between module interface units
// and other source files does not exist at the frontend layer.
const char *ClangType;
switch (Input.getType()) {
case types::TY_CXXModule:
ClangType = "c++";
break;
case types::TY_PP_CXXModule:
ClangType = "c++-cpp-output";
break;
default:
ClangType = types::getTypeName(Input.getType());
break;
}
CmdArgs.push_back(ClangType);
}
}
static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
const Driver &D, const InputInfo &Output,
const ArgList &Args,
ArgStringList &CmdArgs) {
auto *PGOGenerateArg = Args.getLastArg(options::OPT_fprofile_generate,
options::OPT_fprofile_generate_EQ,
options::OPT_fno_profile_generate);
if (PGOGenerateArg &&
PGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
PGOGenerateArg = nullptr;
auto *CSPGOGenerateArg = Args.getLastArg(options::OPT_fcs_profile_generate,
options::OPT_fcs_profile_generate_EQ,
options::OPT_fno_profile_generate);
if (CSPGOGenerateArg &&
CSPGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
CSPGOGenerateArg = nullptr;
auto *ProfileGenerateArg = Args.getLastArg(
options::OPT_fprofile_instr_generate,
options::OPT_fprofile_instr_generate_EQ,
options::OPT_fno_profile_instr_generate);
if (ProfileGenerateArg &&
ProfileGenerateArg->getOption().matches(
options::OPT_fno_profile_instr_generate))
ProfileGenerateArg = nullptr;
if (PGOGenerateArg && ProfileGenerateArg)
D.Diag(diag::err_drv_argument_not_allowed_with)
<< PGOGenerateArg->getSpelling() << ProfileGenerateArg->getSpelling();
auto *ProfileUseArg = getLastProfileUseArg(Args);
if (PGOGenerateArg && ProfileUseArg)
D.Diag(diag::err_drv_argument_not_allowed_with)
<< ProfileUseArg->getSpelling() << PGOGenerateArg->getSpelling();
if (ProfileGenerateArg && ProfileUseArg)
D.Diag(diag::err_drv_argument_not_allowed_with)
<< ProfileGenerateArg->getSpelling() << ProfileUseArg->getSpelling();
if (CSPGOGenerateArg && PGOGenerateArg) {
D.Diag(diag::err_drv_argument_not_allowed_with)
<< CSPGOGenerateArg->getSpelling() << PGOGenerateArg->getSpelling();
PGOGenerateArg = nullptr;
}
if (TC.getTriple().isOSAIX()) {
if (PGOGenerateArg)
if (!D.isUsingLTO(false /*IsDeviceOffloadAction */) ||
D.getLTOMode() != LTOK_Full)
D.Diag(clang::diag::err_drv_argument_only_allowed_with)
<< PGOGenerateArg->getSpelling() << "-flto";
if (ProfileGenerateArg)
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< ProfileGenerateArg->getSpelling() << TC.getTriple().str();
if (Arg *ProfileSampleUseArg = getLastProfileSampleUseArg(Args))
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< ProfileSampleUseArg->getSpelling() << TC.getTriple().str();
}
if (ProfileGenerateArg) {
if (ProfileGenerateArg->getOption().matches(
options::OPT_fprofile_instr_generate_EQ))
CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-instrument-path=") +
ProfileGenerateArg->getValue()));
// The default is to use Clang Instrumentation.
CmdArgs.push_back("-fprofile-instrument=clang");
if (TC.getTriple().isWindowsMSVCEnvironment()) {
// Add dependent lib for clang_rt.profile
CmdArgs.push_back(Args.MakeArgString(
"--dependent-lib=" + TC.getCompilerRTBasename(Args, "profile")));
}
}
Arg *PGOGenArg = nullptr;
if (PGOGenerateArg) {
assert(!CSPGOGenerateArg);
PGOGenArg = PGOGenerateArg;
CmdArgs.push_back("-fprofile-instrument=llvm");
}
if (CSPGOGenerateArg) {
assert(!PGOGenerateArg);
PGOGenArg = CSPGOGenerateArg;
CmdArgs.push_back("-fprofile-instrument=csllvm");
}
if (PGOGenArg) {
if (TC.getTriple().isWindowsMSVCEnvironment()) {
// Add dependent lib for clang_rt.profile
CmdArgs.push_back(Args.MakeArgString(
"--dependent-lib=" + TC.getCompilerRTBasename(Args, "profile")));
}
if (PGOGenArg->getOption().matches(
PGOGenerateArg ? options::OPT_fprofile_generate_EQ
: options::OPT_fcs_profile_generate_EQ)) {
SmallString<128> Path(PGOGenArg->getValue());
llvm::sys::path::append(Path, "default_%m.profraw");
CmdArgs.push_back(
Args.MakeArgString(Twine("-fprofile-instrument-path=") + Path));
}
}
if (ProfileUseArg) {
if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue()));
else if ((ProfileUseArg->getOption().matches(
options::OPT_fprofile_use_EQ) ||
ProfileUseArg->getOption().matches(
options::OPT_fprofile_instr_use))) {
SmallString<128> Path(
ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue());
if (Path.empty() || llvm::sys::fs::is_directory(Path))
llvm::sys::path::append(Path, "default.profdata");
CmdArgs.push_back(
Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path));
}
}
bool EmitCovNotes = Args.hasFlag(options::OPT_ftest_coverage,
options::OPT_fno_test_coverage, false) ||
Args.hasArg(options::OPT_coverage);
bool EmitCovData = TC.needsGCovInstrumentation(Args);
if (EmitCovNotes)
CmdArgs.push_back("-ftest-coverage");
if (EmitCovData)
CmdArgs.push_back("-fprofile-arcs");
if (Args.hasFlag(options::OPT_fcoverage_mapping,
options::OPT_fno_coverage_mapping, false)) {
if (!ProfileGenerateArg)
D.Diag(clang::diag::err_drv_argument_only_allowed_with)
<< "-fcoverage-mapping"
<< "-fprofile-instr-generate";
CmdArgs.push_back("-fcoverage-mapping");
}
if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
options::OPT_fcoverage_compilation_dir_EQ)) {
if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("-fcoverage-compilation-dir=") + A->getValue()));
else
A->render(Args, CmdArgs);
} else if (llvm::ErrorOr<std::string> CWD =
D.getVFS().getCurrentWorkingDirectory()) {
CmdArgs.push_back(Args.MakeArgString("-fcoverage-compilation-dir=" + *CWD));
}
if (Args.hasArg(options::OPT_fprofile_exclude_files_EQ)) {
auto *Arg = Args.getLastArg(options::OPT_fprofile_exclude_files_EQ);
if (!Args.hasArg(options::OPT_coverage))
D.Diag(clang::diag::err_drv_argument_only_allowed_with)
<< "-fprofile-exclude-files="
<< "--coverage";
StringRef v = Arg->getValue();
CmdArgs.push_back(
Args.MakeArgString(Twine("-fprofile-exclude-files=" + v)));
}
if (Args.hasArg(options::OPT_fprofile_filter_files_EQ)) {
auto *Arg = Args.getLastArg(options::OPT_fprofile_filter_files_EQ);
if (!Args.hasArg(options::OPT_coverage))
D.Diag(clang::diag::err_drv_argument_only_allowed_with)
<< "-fprofile-filter-files="
<< "--coverage";
StringRef v = Arg->getValue();
CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-filter-files=" + v)));
}
if (const auto *A = Args.getLastArg(options::OPT_fprofile_update_EQ)) {
StringRef Val = A->getValue();
if (Val == "atomic" || Val == "prefer-atomic")
CmdArgs.push_back("-fprofile-update=atomic");
else if (Val != "single")
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Val;
} else if (TC.getSanitizerArgs().needsTsanRt()) {
CmdArgs.push_back("-fprofile-update=atomic");
}
// Leave -fprofile-dir= an unused argument unless .gcda emission is
// enabled. To be polite, with '-fprofile-arcs -fno-profile-arcs' consider
// the flag used. There is no -fno-profile-dir, so the user has no
// targeted way to suppress the warning.
Arg *FProfileDir = nullptr;
if (Args.hasArg(options::OPT_fprofile_arcs) ||
Args.hasArg(options::OPT_coverage))
FProfileDir = Args.getLastArg(options::OPT_fprofile_dir);
// Put the .gcno and .gcda files (if needed) next to the object file or
// bitcode file in the case of LTO.
// FIXME: There should be a simpler way to find the object file for this
// input, and this code probably does the wrong thing for commands that
// compile and link all at once.
if ((Args.hasArg(options::OPT_c) || Args.hasArg(options::OPT_S)) &&
(EmitCovNotes || EmitCovData) && Output.isFilename()) {
SmallString<128> OutputFilename;
if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT__SLASH_Fo))
OutputFilename = FinalOutput->getValue();
else if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
OutputFilename = FinalOutput->getValue();
else
OutputFilename = llvm::sys::path::filename(Output.getBaseInput());
SmallString<128> CoverageFilename = OutputFilename;
if (llvm::sys::path::is_relative(CoverageFilename))
(void)D.getVFS().makeAbsolute(CoverageFilename);
llvm::sys::path::replace_extension(CoverageFilename, "gcno");
CmdArgs.push_back("-coverage-notes-file");
CmdArgs.push_back(Args.MakeArgString(CoverageFilename));
if (EmitCovData) {
if (FProfileDir) {
CoverageFilename = FProfileDir->getValue();
llvm::sys::path::append(CoverageFilename, OutputFilename);
}
llvm::sys::path::replace_extension(CoverageFilename, "gcda");
CmdArgs.push_back("-coverage-data-file");
CmdArgs.push_back(Args.MakeArgString(CoverageFilename));
}
}
}
/// Check whether the given input tree contains any compilation actions.
static bool ContainsCompileAction(const Action *A) {
if (isa<CompileJobAction>(A) || isa<BackendJobAction>(A))
return true;
for (const auto &AI : A->inputs())
if (ContainsCompileAction(AI))
return true;
return false;
}
/// Check if -relax-all should be passed to the internal assembler.
/// This is done by default when compiling non-assembler source with -O0.
static bool UseRelaxAll(Compilation &C, const ArgList &Args) {
bool RelaxDefault = true;
if (Arg *A = Args.getLastArg(options::OPT_O_Group))
RelaxDefault = A->getOption().matches(options::OPT_O0);
if (RelaxDefault) {
RelaxDefault = false;
for (const auto &Act : C.getActions()) {
if (ContainsCompileAction(Act)) {
RelaxDefault = true;
break;
}
}
}
return Args.hasFlag(options::OPT_mrelax_all, options::OPT_mno_relax_all,
RelaxDefault);
}
// Extract the integer N from a string spelled "-dwarf-N", returning 0
// on mismatch. The StringRef input (rather than an Arg) allows
// for use by the "-Xassembler" option parser.
static unsigned DwarfVersionNum(StringRef ArgValue) {
return llvm::StringSwitch<unsigned>(ArgValue)
.Case("-gdwarf-2", 2)
.Case("-gdwarf-3", 3)
.Case("-gdwarf-4", 4)
.Case("-gdwarf-5", 5)
.Default(0);
}
// Find a DWARF format version option.
// This function is a complementary for DwarfVersionNum().
static const Arg *getDwarfNArg(const ArgList &Args) {
return Args.getLastArg(options::OPT_gdwarf_2, options::OPT_gdwarf_3,
options::OPT_gdwarf_4, options::OPT_gdwarf_5,
options::OPT_gdwarf);
}
static void RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs,
codegenoptions::DebugInfoKind DebugInfoKind,
unsigned DwarfVersion,
llvm::DebuggerKind DebuggerTuning) {
switch (DebugInfoKind) {
case codegenoptions::DebugDirectivesOnly:
CmdArgs.push_back("-debug-info-kind=line-directives-only");
break;
case codegenoptions::DebugLineTablesOnly:
CmdArgs.push_back("-debug-info-kind=line-tables-only");
break;
case codegenoptions::DebugInfoConstructor:
CmdArgs.push_back("-debug-info-kind=constructor");
break;
case codegenoptions::LimitedDebugInfo:
CmdArgs.push_back("-debug-info-kind=limited");
break;
case codegenoptions::FullDebugInfo:
CmdArgs.push_back("-debug-info-kind=standalone");
break;
case codegenoptions::UnusedTypeInfo:
CmdArgs.push_back("-debug-info-kind=unused-types");
break;
default:
break;
}
if (DwarfVersion > 0)
CmdArgs.push_back(
Args.MakeArgString("-dwarf-version=" + Twine(DwarfVersion)));
switch (DebuggerTuning) {
case llvm::DebuggerKind::GDB:
CmdArgs.push_back("-debugger-tuning=gdb");
break;
case llvm::DebuggerKind::LLDB:
CmdArgs.push_back("-debugger-tuning=lldb");
break;
case llvm::DebuggerKind::SCE:
CmdArgs.push_back("-debugger-tuning=sce");
break;
case llvm::DebuggerKind::DBX:
CmdArgs.push_back("-debugger-tuning=dbx");
break;
default:
break;
}
}
static bool checkDebugInfoOption(const Arg *A, const ArgList &Args,
const Driver &D, const ToolChain &TC) {
assert(A && "Expected non-nullptr argument.");
if (TC.supportsDebugInfoOption(A))
return true;
D.Diag(diag::warn_drv_unsupported_debug_info_opt_for_target)
<< A->getAsString(Args) << TC.getTripleString();
return false;
}
static void RenderDebugInfoCompressionArgs(const ArgList &Args,
ArgStringList &CmdArgs,
const Driver &D,
const ToolChain &TC) {
const Arg *A = Args.getLastArg(options::OPT_gz_EQ);
if (!A)
return;
if (checkDebugInfoOption(A, Args, D, TC)) {
StringRef Value = A->getValue();
if (Value == "none") {
CmdArgs.push_back("--compress-debug-sections=none");
} else if (Value == "zlib" || Value == "zlib-gnu") {
if (llvm::zlib::isAvailable()) {
CmdArgs.push_back(
Args.MakeArgString("--compress-debug-sections=" + Twine(Value)));
} else {
D.Diag(diag::warn_debug_compression_unavailable);
}
} else {
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Value;
}
}
}
static const char *RelocationModelName(llvm::Reloc::Model Model) {
switch (Model) {
case llvm::Reloc::Static:
return "static";
case llvm::Reloc::PIC_:
return "pic";
case llvm::Reloc::DynamicNoPIC:
return "dynamic-no-pic";
case llvm::Reloc::ROPI:
return "ropi";
case llvm::Reloc::RWPI:
return "rwpi";
case llvm::Reloc::ROPI_RWPI:
return "ropi-rwpi";
}
llvm_unreachable("Unknown Reloc::Model kind");
}
static void handleAMDGPUCodeObjectVersionOptions(const Driver &D,
const ArgList &Args,
ArgStringList &CmdArgs) {
// If no version was requested by the user, use the default value from the
// back end. This is consistent with the value returned from
// getAMDGPUCodeObjectVersion. This lets clang emit IR for amdgpu without
// requiring the corresponding llvm to have the AMDGPU target enabled,
// provided the user (e.g. front end tests) can use the default.
if (haveAMDGPUCodeObjectVersionArgument(D, Args)) {
unsigned CodeObjVer = getAMDGPUCodeObjectVersion(D, Args);
CmdArgs.insert(CmdArgs.begin() + 1,
Args.MakeArgString(Twine("--amdhsa-code-object-version=") +
Twine(CodeObjVer)));
CmdArgs.insert(CmdArgs.begin() + 1, "-mllvm");
}
}
void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
const Driver &D, const ArgList &Args,
ArgStringList &CmdArgs,
const InputInfo &Output,
const InputInfoList &Inputs) const {
const bool IsIAMCU = getToolChain().getTriple().isOSIAMCU();
CheckPreprocessingOptions(D, Args);
Args.AddLastArg(CmdArgs, options::OPT_C);
Args.AddLastArg(CmdArgs, options::OPT_CC);
// Handle dependency file generation.
Arg *ArgM = Args.getLastArg(options::OPT_MM);
if (!ArgM)
ArgM = Args.getLastArg(options::OPT_M);
Arg *ArgMD = Args.getLastArg(options::OPT_MMD);
if (!ArgMD)
ArgMD = Args.getLastArg(options::OPT_MD);
// -M and -MM imply -w.
if (ArgM)
CmdArgs.push_back("-w");
else
ArgM = ArgMD;
if (ArgM) {
// Determine the output location.
const char *DepFile;
if (Arg *MF = Args.getLastArg(options::OPT_MF)) {
DepFile = MF->getValue();
C.addFailureResultFile(DepFile, &JA);
} else if (Output.getType() == types::TY_Dependencies) {
DepFile = Output.getFilename();
} else if (!ArgMD) {
DepFile = "-";
} else {
DepFile = getDependencyFileName(Args, Inputs);
C.addFailureResultFile(DepFile, &JA);
}
CmdArgs.push_back("-dependency-file");
CmdArgs.push_back(DepFile);
bool HasTarget = false;
for (const Arg *A : Args.filtered(options::OPT_MT, options::OPT_MQ)) {
HasTarget = true;
A->claim();
if (A->getOption().matches(options::OPT_MT)) {
A->render(Args, CmdArgs);
} else {
CmdArgs.push_back("-MT");
SmallString<128> Quoted;
QuoteTarget(A->getValue(), Quoted);
CmdArgs.push_back(Args.MakeArgString(Quoted));
}
}
// Add a default target if one wasn't specified.
if (!HasTarget) {
const char *DepTarget;
// If user provided -o, that is the dependency target, except
// when we are only generating a dependency file.
Arg *OutputOpt = Args.getLastArg(options::OPT_o);
if (OutputOpt && Output.getType() != types::TY_Dependencies) {
DepTarget = OutputOpt->getValue();
} else {
// Otherwise derive from the base input.
//
// FIXME: This should use the computed output file location.
SmallString<128> P(Inputs[0].getBaseInput());
llvm::sys::path::replace_extension(P, "o");
DepTarget = Args.MakeArgString(llvm::sys::path::filename(P));
}
CmdArgs.push_back("-MT");
SmallString<128> Quoted;
QuoteTarget(DepTarget, Quoted);
CmdArgs.push_back(Args.MakeArgString(Quoted));
}
if (ArgM->getOption().matches(options::OPT_M) ||
ArgM->getOption().matches(options::OPT_MD))
CmdArgs.push_back("-sys-header-deps");
if ((isa<PrecompileJobAction>(JA) &&
!Args.hasArg(options::OPT_fno_module_file_deps)) ||
Args.hasArg(options::OPT_fmodule_file_deps))
CmdArgs.push_back("-module-file-deps");
}
if (Args.hasArg(options::OPT_MG)) {
if (!ArgM || ArgM->getOption().matches(options::OPT_MD) ||
ArgM->getOption().matches(options::OPT_MMD))
D.Diag(diag::err_drv_mg_requires_m_or_mm);
CmdArgs.push_back("-MG");
}
Args.AddLastArg(CmdArgs, options::OPT_MP);
Args.AddLastArg(CmdArgs, options::OPT_MV);
// Add offload include arguments specific for CUDA/HIP. This must happen
// before we -I or -include anything else, because we must pick up the
// CUDA/HIP headers from the particular CUDA/ROCm installation, rather than
// from e.g. /usr/local/include.
if (JA.isOffloading(Action::OFK_Cuda))
getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
if (JA.isOffloading(Action::OFK_HIP))
getToolChain().AddHIPIncludeArgs(Args, CmdArgs);
// If we are offloading to a target via OpenMP we need to include the
// openmp_wrappers folder which contains alternative system headers.
if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
- getToolChain().getTriple().isNVPTX()){
+ (getToolChain().getTriple().isNVPTX() ||
+ getToolChain().getTriple().isAMDGCN())) {
if (!Args.hasArg(options::OPT_nobuiltininc)) {
// Add openmp_wrappers/* to our system include path. This lets us wrap
// standard library headers.
SmallString<128> P(D.ResourceDir);
llvm::sys::path::append(P, "include");
llvm::sys::path::append(P, "openmp_wrappers");
CmdArgs.push_back("-internal-isystem");
CmdArgs.push_back(Args.MakeArgString(P));
}
CmdArgs.push_back("-include");
CmdArgs.push_back("__clang_openmp_device_functions.h");
}
// Add -i* options, and automatically translate to
// -include-pch/-include-pth for transparent PCH support. It's
// wonky, but we include looking for .gch so we can support seamless
// replacement into a build system already set up to be generating
// .gch files.
if (getToolChain().getDriver().IsCLMode()) {
const Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc);
const Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu);
if (YcArg && JA.getKind() >= Action::PrecompileJobClass &&
JA.getKind() <= Action::AssembleJobClass) {
CmdArgs.push_back(Args.MakeArgString("-building-pch-with-obj"));
// -fpch-instantiate-templates is the default when creating
// precomp using /Yc
if (Args.hasFlag(options::OPT_fpch_instantiate_templates,
options::OPT_fno_pch_instantiate_templates, true))
CmdArgs.push_back(Args.MakeArgString("-fpch-instantiate-templates"));
}
if (YcArg || YuArg) {
StringRef ThroughHeader = YcArg ? YcArg->getValue() : YuArg->getValue();
if (!isa<PrecompileJobAction>(JA)) {
CmdArgs.push_back("-include-pch");
CmdArgs.push_back(Args.MakeArgString(D.GetClPchPath(
C, !ThroughHeader.empty()
? ThroughHeader
: llvm::sys::path::filename(Inputs[0].getBaseInput()))));
}
if (ThroughHeader.empty()) {
CmdArgs.push_back(Args.MakeArgString(
Twine("-pch-through-hdrstop-") + (YcArg ? "create" : "use")));
} else {
CmdArgs.push_back(
Args.MakeArgString(Twine("-pch-through-header=") + ThroughHeader));
}
}
}
bool RenderedImplicitInclude = false;
for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) {
if (A->getOption().matches(options::OPT_include)) {
// Handling of gcc-style gch precompiled headers.
bool IsFirstImplicitInclude = !RenderedImplicitInclude;
RenderedImplicitInclude = true;
bool FoundPCH = false;
SmallString<128> P(A->getValue());
// We want the files to have a name like foo.h.pch. Add a dummy extension
// so that replace_extension does the right thing.
P += ".dummy";
llvm::sys::path::replace_extension(P, "pch");
if (llvm::sys::fs::exists(P))
FoundPCH = true;
if (!FoundPCH) {
llvm::sys::path::replace_extension(P, "gch");
if (llvm::sys::fs::exists(P)) {
FoundPCH = true;
}
}
if (FoundPCH) {
if (IsFirstImplicitInclude) {
A->claim();
CmdArgs.push_back("-include-pch");
CmdArgs.push_back(Args.MakeArgString(P));
continue;
} else {
// Ignore the PCH if not first on command line and emit warning.
D.Diag(diag::warn_drv_pch_not_first_include) << P
<< A->getAsString(Args);
}
}
} else if (A->getOption().matches(options::OPT_isystem_after)) {
// Handling of paths which must come late. These entries are handled by
// the toolchain itself after the resource dir is inserted in the right
// search order.
// Do not claim the argument so that the use of the argument does not
// silently go unnoticed on toolchains which do not honour the option.
continue;
} else if (A->getOption().matches(options::OPT_stdlibxx_isystem)) {
// Translated to -internal-isystem by the driver, no need to pass to cc1.
continue;
}
// Not translated, render as usual.
A->claim();
A->render(Args, CmdArgs);
}
Args.AddAllArgs(CmdArgs,
{options::OPT_D, options::OPT_U, options::OPT_I_Group,
options::OPT_F, options::OPT_index_header_map});
// Add -Wp, and -Xpreprocessor if using the preprocessor.
// FIXME: There is a very unfortunate problem here, some troubled
// souls abuse -Wp, to pass preprocessor options in gcc syntax. To
// really support that we would have to parse and then translate
// those options. :(
Args.AddAllArgValues(CmdArgs, options::OPT_Wp_COMMA,
options::OPT_Xpreprocessor);
// -I- is a deprecated GCC feature, reject it.
if (Arg *A = Args.getLastArg(options::OPT_I_))
D.Diag(diag::err_drv_I_dash_not_supported) << A->getAsString(Args);
// If we have a --sysroot, and don't have an explicit -isysroot flag, add an
// -isysroot to the CC1 invocation.
StringRef sysroot = C.getSysRoot();
if (sysroot != "") {
if (!Args.hasArg(options::OPT_isysroot)) {
CmdArgs.push_back("-isysroot");
CmdArgs.push_back(C.getArgs().MakeArgString(sysroot));
}
}
// Parse additional include paths from environment variables.
// FIXME: We should probably sink the logic for handling these from the
// frontend into the driver. It will allow deleting 4 otherwise unused flags.
// CPATH - included following the user specified includes (but prior to
// builtin and standard includes).
addDirectoryList(Args, CmdArgs, "-I", "CPATH");
// C_INCLUDE_PATH - system includes enabled when compiling C.
addDirectoryList(Args, CmdArgs, "-c-isystem", "C_INCLUDE_PATH");
// CPLUS_INCLUDE_PATH - system includes enabled when compiling C++.
addDirectoryList(Args, CmdArgs, "-cxx-isystem", "CPLUS_INCLUDE_PATH");
// OBJC_INCLUDE_PATH - system includes enabled when compiling ObjC.
addDirectoryList(Args, CmdArgs, "-objc-isystem", "OBJC_INCLUDE_PATH");
// OBJCPLUS_INCLUDE_PATH - system includes enabled when compiling ObjC++.
addDirectoryList(Args, CmdArgs, "-objcxx-isystem", "OBJCPLUS_INCLUDE_PATH");
// While adding the include arguments, we also attempt to retrieve the
// arguments of related offloading toolchains or arguments that are specific
// of an offloading programming model.
// Add C++ include arguments, if needed.
if (types::isCXX(Inputs[0].getType())) {
bool HasStdlibxxIsystem = Args.hasArg(options::OPT_stdlibxx_isystem);
forAllAssociatedToolChains(
C, JA, getToolChain(),
[&Args, &CmdArgs, HasStdlibxxIsystem](const ToolChain &TC) {
HasStdlibxxIsystem ? TC.AddClangCXXStdlibIsystemArgs(Args, CmdArgs)
: TC.AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
});
}
// Add system include arguments for all targets but IAMCU.
if (!IsIAMCU)
forAllAssociatedToolChains(C, JA, getToolChain(),
[&Args, &CmdArgs](const ToolChain &TC) {
TC.AddClangSystemIncludeArgs(Args, CmdArgs);
});
else {
// For IAMCU add special include arguments.
getToolChain().AddIAMCUIncludeArgs(Args, CmdArgs);
}
addMacroPrefixMapArg(D, Args, CmdArgs);
addCoveragePrefixMapArg(D, Args, CmdArgs);
}
// FIXME: Move to target hook.
static bool isSignedCharDefault(const llvm::Triple &Triple) {
switch (Triple.getArch()) {
default:
return true;
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_32:
case llvm::Triple::aarch64_be:
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
if (Triple.isOSDarwin() || Triple.isOSWindows())
return true;
return false;
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
if (Triple.isOSDarwin())
return true;
return false;
case llvm::Triple::hexagon:
case llvm::Triple::ppcle:
case llvm::Triple::ppc64le:
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
case llvm::Triple::systemz:
case llvm::Triple::xcore:
return false;
}
}
static bool hasMultipleInvocations(const llvm::Triple &Triple,
const ArgList &Args) {
// Supported only on Darwin where we invoke the compiler multiple times
// followed by an invocation to lipo.
if (!Triple.isOSDarwin())
return false;
// If more than one "-arch <arch>" is specified, we're targeting multiple
// architectures resulting in a fat binary.
return Args.getAllArgValues(options::OPT_arch).size() > 1;
}
static bool checkRemarksOptions(const Driver &D, const ArgList &Args,
const llvm::Triple &Triple) {
// When enabling remarks, we need to error if:
// * The remark file is specified but we're targeting multiple architectures,
// which means more than one remark file is being generated.
bool hasMultipleInvocations = ::hasMultipleInvocations(Triple, Args);
bool hasExplicitOutputFile =
Args.getLastArg(options::OPT_foptimization_record_file_EQ);
if (hasMultipleInvocations && hasExplicitOutputFile) {
D.Diag(diag::err_drv_invalid_output_with_multiple_archs)
<< "-foptimization-record-file";
return false;
}
return true;
}
static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs,
const llvm::Triple &Triple,
const InputInfo &Input,
const InputInfo &Output, const JobAction &JA) {
StringRef Format = "yaml";
if (const Arg *A = Args.getLastArg(options::OPT_fsave_optimization_record_EQ))
Format = A->getValue();
CmdArgs.push_back("-opt-record-file");
const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ);
if (A) {
CmdArgs.push_back(A->getValue());
} else {
bool hasMultipleArchs =
Triple.isOSDarwin() && // Only supported on Darwin platforms.
Args.getAllArgValues(options::OPT_arch).size() > 1;
SmallString<128> F;
if (Args.hasArg(options::OPT_c) || Args.hasArg(options::OPT_S)) {
if (Arg *FinalOutput = Args.getLastArg(options::OPT_o))
F = FinalOutput->getValue();
} else {
if (Format != "yaml" && // For YAML, keep the original behavior.
Triple.isOSDarwin() && // Enable this only on darwin, since it's the only platform supporting .dSYM bundles.
Output.isFilename())
F = Output.getFilename();
}
if (F.empty()) {
// Use the input filename.
F = llvm::sys::path::stem(Input.getBaseInput());
// If we're compiling for an offload architecture (i.e. a CUDA device),
// we need to make the file name for the device compilation different
// from the host compilation.
if (!JA.isDeviceOffloading(Action::OFK_None) &&
!JA.isDeviceOffloading(Action::OFK_Host)) {
llvm::sys::path::replace_extension(F, "");
F += Action::GetOffloadingFileNamePrefix(JA.getOffloadingDeviceKind(),
Triple.normalize());
F += "-";
F += JA.getOffloadingArch();
}
}
// If we're having more than one "-arch", we should name the files
// differently so that every cc1 invocation writes to a different file.
// We're doing that by appending "-<arch>" with "<arch>" being the arch
// name from the triple.
if (hasMultipleArchs) {
// First, remember the extension.
SmallString<64> OldExtension = llvm::sys::path::extension(F);
// then, remove it.
llvm::sys::path::replace_extension(F, "");
// attach -<arch> to it.
F += "-";
F += Triple.getArchName();
// put back the extension.
llvm::sys::path::replace_extension(F, OldExtension);
}
SmallString<32> Extension;
Extension += "opt.";
Extension += Format;
llvm::sys::path::replace_extension(F, Extension);
CmdArgs.push_back(Args.MakeArgString(F));
}
if (const Arg *A =
Args.getLastArg(options::OPT_foptimization_record_passes_EQ)) {
CmdArgs.push_back("-opt-record-passes");
CmdArgs.push_back(A->getValue());
}
if (!Format.empty()) {
CmdArgs.push_back("-opt-record-format");
CmdArgs.push_back(Format.data());
}
}
void AddAAPCSVolatileBitfieldArgs(const ArgList &Args, ArgStringList &CmdArgs) {
if (!Args.hasFlag(options::OPT_faapcs_bitfield_width,
options::OPT_fno_aapcs_bitfield_width, true))
CmdArgs.push_back("-fno-aapcs-bitfield-width");
if (Args.getLastArg(options::OPT_ForceAAPCSBitfieldLoad))
CmdArgs.push_back("-faapcs-bitfield-load");
}
namespace {
void RenderARMABI(const llvm::Triple &Triple, const ArgList &Args,
ArgStringList &CmdArgs) {
// Select the ABI to use.
// FIXME: Support -meabi.
// FIXME: Parts of this are duplicated in the backend, unify this somehow.
const char *ABIName = nullptr;
if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
ABIName = A->getValue();
} else {
std::string CPU = getCPUName(Args, Triple, /*FromAs*/ false);
ABIName = llvm::ARM::computeDefaultTargetABI(Triple, CPU).data();
}
CmdArgs.push_back("-target-abi");
CmdArgs.push_back(ABIName);
}
}
void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
ArgStringList &CmdArgs, bool KernelOrKext) const {
RenderARMABI(Triple, Args, CmdArgs);
// Determine floating point ABI from the options & target defaults.
arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args);
if (ABI == arm::FloatABI::Soft) {
// Floating point operations and argument passing are soft.
// FIXME: This changes CPP defines, we need -target-soft-float.
CmdArgs.push_back("-msoft-float");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("soft");
} else if (ABI == arm::FloatABI::SoftFP) {
// Floating point operations are hard, but argument passing is soft.
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("soft");
} else {
// Floating point operations and argument passing are hard.
assert(ABI == arm::FloatABI::Hard && "Invalid float abi!");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("hard");
}
// Forward the -mglobal-merge option for explicit control over the pass.
if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge,
options::OPT_mno_global_merge)) {
CmdArgs.push_back("-mllvm");
if (A->getOption().matches(options::OPT_mno_global_merge))
CmdArgs.push_back("-arm-global-merge=false");
else
CmdArgs.push_back("-arm-global-merge=true");
}
if (!Args.hasFlag(options::OPT_mimplicit_float,
options::OPT_mno_implicit_float, true))
CmdArgs.push_back("-no-implicit-float");
if (Args.getLastArg(options::OPT_mcmse))
CmdArgs.push_back("-mcmse");
AddAAPCSVolatileBitfieldArgs(Args, CmdArgs);
}
void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple,
const ArgList &Args, bool KernelOrKext,
ArgStringList &CmdArgs) const {
const ToolChain &TC = getToolChain();
// Add the target features
getTargetFeatures(TC.getDriver(), EffectiveTriple, Args, CmdArgs, false);
// Add target specific flags.
switch (TC.getArch()) {
default:
break;
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
// Use the effective triple, which takes into account the deployment target.
AddARMTargetArgs(EffectiveTriple, Args, CmdArgs, KernelOrKext);
CmdArgs.push_back("-fallow-half-arguments-and-returns");
break;
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_32:
case llvm::Triple::aarch64_be:
AddAArch64TargetArgs(Args, CmdArgs);
CmdArgs.push_back("-fallow-half-arguments-and-returns");
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
AddMIPSTargetArgs(Args, CmdArgs);
break;
case llvm::Triple::ppc:
case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
AddPPCTargetArgs(Args, CmdArgs);
break;
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
AddRISCVTargetArgs(Args, CmdArgs);
break;
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
case llvm::Triple::sparcv9:
AddSparcTargetArgs(Args, CmdArgs);
break;
case llvm::Triple::systemz:
AddSystemZTargetArgs(Args, CmdArgs);
break;
case llvm::Triple::x86:
case llvm::Triple::x86_64:
AddX86TargetArgs(Args, CmdArgs);
break;
case llvm::Triple::lanai:
AddLanaiTargetArgs(Args, CmdArgs);
break;
case llvm::Triple::hexagon:
AddHexagonTargetArgs(Args, CmdArgs);
break;
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
AddWebAssemblyTargetArgs(Args, CmdArgs);
break;
case llvm::Triple::ve:
AddVETargetArgs(Args, CmdArgs);
break;
}
}
namespace {
void RenderAArch64ABI(const llvm::Triple &Triple, const ArgList &Args,
ArgStringList &CmdArgs) {
const char *ABIName = nullptr;
if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
ABIName = A->getValue();
else if (Triple.isOSDarwin())
ABIName = "darwinpcs";
else
ABIName = "aapcs";
CmdArgs.push_back("-target-abi");
CmdArgs.push_back(ABIName);
}
}
void Clang::AddAArch64TargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) ||
Args.hasArg(options::OPT_mkernel) ||
Args.hasArg(options::OPT_fapple_kext))
CmdArgs.push_back("-disable-red-zone");
if (!Args.hasFlag(options::OPT_mimplicit_float,
options::OPT_mno_implicit_float, true))
CmdArgs.push_back("-no-implicit-float");
RenderAArch64ABI(Triple, Args, CmdArgs);
if (Arg *A = Args.getLastArg(options::OPT_mfix_cortex_a53_835769,
options::OPT_mno_fix_cortex_a53_835769)) {
CmdArgs.push_back("-mllvm");
if (A->getOption().matches(options::OPT_mfix_cortex_a53_835769))
CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1");
else
CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=0");
} else if (Triple.isAndroid()) {
// Enabled A53 errata (835769) workaround by default on android
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1");
}
// Forward the -mglobal-merge option for explicit control over the pass.
if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge,
options::OPT_mno_global_merge)) {
CmdArgs.push_back("-mllvm");
if (A->getOption().matches(options::OPT_mno_global_merge))
CmdArgs.push_back("-aarch64-enable-global-merge=false");
else
CmdArgs.push_back("-aarch64-enable-global-merge=true");
}
// Enable/disable return address signing and indirect branch targets.
if (Arg *A = Args.getLastArg(options::OPT_msign_return_address_EQ,
options::OPT_mbranch_protection_EQ)) {
const Driver &D = getToolChain().getDriver();
StringRef Scope, Key;
bool IndirectBranches;
if (A->getOption().matches(options::OPT_msign_return_address_EQ)) {
Scope = A->getValue();
if (!Scope.equals("none") && !Scope.equals("non-leaf") &&
!Scope.equals("all"))
D.Diag(diag::err_invalid_branch_protection)
<< Scope << A->getAsString(Args);
Key = "a_key";
IndirectBranches = false;
} else {
StringRef Err;
llvm::AArch64::ParsedBranchProtection PBP;
if (!llvm::AArch64::parseBranchProtection(A->getValue(), PBP, Err))
D.Diag(diag::err_invalid_branch_protection)
<< Err << A->getAsString(Args);
Scope = PBP.Scope;
Key = PBP.Key;
IndirectBranches = PBP.BranchTargetEnforcement;
}
CmdArgs.push_back(
Args.MakeArgString(Twine("-msign-return-address=") + Scope));
CmdArgs.push_back(
Args.MakeArgString(Twine("-msign-return-address-key=") + Key));
if (IndirectBranches)
CmdArgs.push_back("-mbranch-target-enforce");
}
// Handle -msve_vector_bits=<bits>
if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) {
StringRef Val = A->getValue();
const Driver &D = getToolChain().getDriver();
if (Val.equals("128") || Val.equals("256") || Val.equals("512") ||
Val.equals("1024") || Val.equals("2048"))
CmdArgs.push_back(
Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
// Silently drop requests for vector-length agnostic code as it's implied.
else if (!Val.equals("scalable"))
// Handle the unsupported values passed to msve-vector-bits.
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Val;
}
AddAAPCSVolatileBitfieldArgs(Args, CmdArgs);
}
void Clang::AddMIPSTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
const Driver &D = getToolChain().getDriver();
StringRef CPUName;
StringRef ABIName;
const llvm::Triple &Triple = getToolChain().getTriple();
mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
CmdArgs.push_back("-target-abi");
CmdArgs.push_back(ABIName.data());
mips::FloatABI ABI = mips::getMipsFloatABI(D, Args, Triple);
if (ABI == mips::FloatABI::Soft) {
// Floating point operations and argument passing are soft.
CmdArgs.push_back("-msoft-float");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("soft");
} else {
// Floating point operations and argument passing are hard.
assert(ABI == mips::FloatABI::Hard && "Invalid float abi!");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("hard");
}
if (Arg *A = Args.getLastArg(options::OPT_mldc1_sdc1,
options::OPT_mno_ldc1_sdc1)) {
if (A->getOption().matches(options::OPT_mno_ldc1_sdc1)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-mno-ldc1-sdc1");
}
}
if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division,
options::OPT_mno_check_zero_division)) {
if (A->getOption().matches(options::OPT_mno_check_zero_division)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-mno-check-zero-division");
}
}
if (Arg *A = Args.getLastArg(options::OPT_G)) {
StringRef v = A->getValue();
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString("-mips-ssection-threshold=" + v));
A->claim();
}
Arg *GPOpt = Args.getLastArg(options::OPT_mgpopt, options::OPT_mno_gpopt);
Arg *ABICalls =
Args.getLastArg(options::OPT_mabicalls, options::OPT_mno_abicalls);
// -mabicalls is the default for many MIPS environments, even with -fno-pic.
// -mgpopt is the default for static, -fno-pic environments but these two
// options conflict. We want to be certain that -mno-abicalls -mgpopt is
// the only case where -mllvm -mgpopt is passed.
// NOTE: We need a warning here or in the backend to warn when -mgpopt is
// passed explicitly when compiling something with -mabicalls
// (implictly) in affect. Currently the warning is in the backend.
//
// When the ABI in use is N64, we also need to determine the PIC mode that
// is in use, as -fno-pic for N64 implies -mno-abicalls.
bool NoABICalls =
ABICalls && ABICalls->getOption().matches(options::OPT_mno_abicalls);
llvm::Reloc::Model RelocationModel;
unsigned PICLevel;
bool IsPIE;
std::tie(RelocationModel, PICLevel, IsPIE) =
ParsePICArgs(getToolChain(), Args);
NoABICalls = NoABICalls ||
(RelocationModel == llvm::Reloc::Static && ABIName == "n64");
bool WantGPOpt = GPOpt && GPOpt->getOption().matches(options::OPT_mgpopt);
// We quietly ignore -mno-gpopt as the backend defaults to -mno-gpopt.
if (NoABICalls && (!GPOpt || WantGPOpt)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-mgpopt");
Arg *LocalSData = Args.getLastArg(options::OPT_mlocal_sdata,
options::OPT_mno_local_sdata);
Arg *ExternSData = Args.getLastArg(options::OPT_mextern_sdata,
options::OPT_mno_extern_sdata);
Arg *EmbeddedData = Args.getLastArg(options::OPT_membedded_data,
options::OPT_mno_embedded_data);
if (LocalSData) {
CmdArgs.push_back("-mllvm");
if (LocalSData->getOption().matches(options::OPT_mlocal_sdata)) {
CmdArgs.push_back("-mlocal-sdata=1");
} else {
CmdArgs.push_back("-mlocal-sdata=0");
}
LocalSData->claim();
}
if (ExternSData) {
CmdArgs.push_back("-mllvm");
if (ExternSData->getOption().matches(options::OPT_mextern_sdata)) {
CmdArgs.push_back("-mextern-sdata=1");
} else {
CmdArgs.push_back("-mextern-sdata=0");
}
ExternSData->claim();
}
if (EmbeddedData) {
CmdArgs.push_back("-mllvm");
if (EmbeddedData->getOption().matches(options::OPT_membedded_data)) {
CmdArgs.push_back("-membedded-data=1");
} else {
CmdArgs.push_back("-membedded-data=0");
}
EmbeddedData->claim();
}
} else if ((!ABICalls || (!NoABICalls && ABICalls)) && WantGPOpt)
D.Diag(diag::warn_drv_unsupported_gpopt) << (ABICalls ? 0 : 1);
if (GPOpt)
GPOpt->claim();
if (Arg *A = Args.getLastArg(options::OPT_mcompact_branches_EQ)) {
StringRef Val = StringRef(A->getValue());
if (mips::hasCompactBranches(CPUName)) {
if (Val == "never" || Val == "always" || Val == "optimal") {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString("-mips-compact-branches=" + Val));
} else
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Val;
} else
D.Diag(diag::warn_target_unsupported_compact_branches) << CPUName;
}
if (Arg *A = Args.getLastArg(options::OPT_mrelax_pic_calls,
options::OPT_mno_relax_pic_calls)) {
if (A->getOption().matches(options::OPT_mno_relax_pic_calls)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-mips-jalr-reloc=0");
}
}
}
void Clang::AddPPCTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
// Select the ABI to use.
const char *ABIName = nullptr;
const llvm::Triple &T = getToolChain().getTriple();
if (T.isOSBinFormatELF()) {
switch (getToolChain().getArch()) {
case llvm::Triple::ppc64: {
if ((T.isOSFreeBSD() && T.getOSMajorVersion() >= 13) ||
T.isOSOpenBSD() || T.isMusl())
ABIName = "elfv2";
else
ABIName = "elfv1";
break;
}
case llvm::Triple::ppc64le:
ABIName = "elfv2";
break;
default:
break;
}
}
bool IEEELongDouble = false;
for (const Arg *A : Args.filtered(options::OPT_mabi_EQ)) {
StringRef V = A->getValue();
if (V == "ieeelongdouble")
IEEELongDouble = true;
else if (V == "ibmlongdouble")
IEEELongDouble = false;
else if (V != "altivec")
// The ppc64 linux abis are all "altivec" abis by default. Accept and ignore
// the option if given as we don't have backend support for any targets
// that don't use the altivec abi.
ABIName = A->getValue();
}
if (IEEELongDouble)
CmdArgs.push_back("-mabi=ieeelongdouble");
ppc::FloatABI FloatABI =
ppc::getPPCFloatABI(getToolChain().getDriver(), Args);
if (FloatABI == ppc::FloatABI::Soft) {
// Floating point operations and argument passing are soft.
CmdArgs.push_back("-msoft-float");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("soft");
} else {
// Floating point operations and argument passing are hard.
assert(FloatABI == ppc::FloatABI::Hard && "Invalid float abi!");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("hard");
}
if (ABIName) {
CmdArgs.push_back("-target-abi");
CmdArgs.push_back(ABIName);
}
}
static void SetRISCVSmallDataLimit(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
const Driver &D = TC.getDriver();
const llvm::Triple &Triple = TC.getTriple();
// Default small data limitation is eight.
const char *SmallDataLimit = "8";
// Get small data limitation.
if (Args.getLastArg(options::OPT_shared, options::OPT_fpic,
options::OPT_fPIC)) {
// Not support linker relaxation for PIC.
SmallDataLimit = "0";
if (Args.hasArg(options::OPT_G)) {
D.Diag(diag::warn_drv_unsupported_sdata);
}
} else if (Args.getLastArgValue(options::OPT_mcmodel_EQ)
.equals_insensitive("large") &&
(Triple.getArch() == llvm::Triple::riscv64)) {
// Not support linker relaxation for RV64 with large code model.
SmallDataLimit = "0";
if (Args.hasArg(options::OPT_G)) {
D.Diag(diag::warn_drv_unsupported_sdata);
}
} else if (Arg *A = Args.getLastArg(options::OPT_G)) {
SmallDataLimit = A->getValue();
}
// Forward the -msmall-data-limit= option.
CmdArgs.push_back("-msmall-data-limit");
CmdArgs.push_back(SmallDataLimit);
}
void Clang::AddRISCVTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
const llvm::Triple &Triple = getToolChain().getTriple();
StringRef ABIName = riscv::getRISCVABI(Args, Triple);
CmdArgs.push_back("-target-abi");
CmdArgs.push_back(ABIName.data());
SetRISCVSmallDataLimit(getToolChain(), Args, CmdArgs);
std::string TuneCPU;
if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) {
StringRef Name = A->getValue();
Name = llvm::RISCV::resolveTuneCPUAlias(Name, Triple.isArch64Bit());
TuneCPU = std::string(Name);
}
if (!TuneCPU.empty()) {
CmdArgs.push_back("-tune-cpu");
CmdArgs.push_back(Args.MakeArgString(TuneCPU));
}
}
void Clang::AddSparcTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
sparc::FloatABI FloatABI =
sparc::getSparcFloatABI(getToolChain().getDriver(), Args);
if (FloatABI == sparc::FloatABI::Soft) {
// Floating point operations and argument passing are soft.
CmdArgs.push_back("-msoft-float");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("soft");
} else {
// Floating point operations and argument passing are hard.
assert(FloatABI == sparc::FloatABI::Hard && "Invalid float abi!");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("hard");
}
}
void Clang::AddSystemZTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
bool HasBackchain = Args.hasFlag(options::OPT_mbackchain,
options::OPT_mno_backchain, false);
bool HasPackedStack = Args.hasFlag(options::OPT_mpacked_stack,
options::OPT_mno_packed_stack, false);
systemz::FloatABI FloatABI =
systemz::getSystemZFloatABI(getToolChain().getDriver(), Args);
bool HasSoftFloat = (FloatABI == systemz::FloatABI::Soft);
if (HasBackchain && HasPackedStack && !HasSoftFloat) {
const Driver &D = getToolChain().getDriver();
D.Diag(diag::err_drv_unsupported_opt)
<< "-mpacked-stack -mbackchain -mhard-float";
}
if (HasBackchain)
CmdArgs.push_back("-mbackchain");
if (HasPackedStack)
CmdArgs.push_back("-mpacked-stack");
if (HasSoftFloat) {
// Floating point operations and argument passing are soft.
CmdArgs.push_back("-msoft-float");
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("soft");
}
}
void Clang::AddX86TargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
const Driver &D = getToolChain().getDriver();
addX86AlignBranchArgs(D, Args, CmdArgs, /*IsLTO=*/false);
if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) ||
Args.hasArg(options::OPT_mkernel) ||
Args.hasArg(options::OPT_fapple_kext))
CmdArgs.push_back("-disable-red-zone");
if (!Args.hasFlag(options::OPT_mtls_direct_seg_refs,
options::OPT_mno_tls_direct_seg_refs, true))
CmdArgs.push_back("-mno-tls-direct-seg-refs");
// Default to avoid implicit floating-point for kernel/kext code, but allow
// that to be overridden with -mno-soft-float.
bool NoImplicitFloat = (Args.hasArg(options::OPT_mkernel) ||
Args.hasArg(options::OPT_fapple_kext));
if (Arg *A = Args.getLastArg(
options::OPT_msoft_float, options::OPT_mno_soft_float,
options::OPT_mimplicit_float, options::OPT_mno_implicit_float)) {
const Option &O = A->getOption();
NoImplicitFloat = (O.matches(options::OPT_mno_implicit_float) ||
O.matches(options::OPT_msoft_float));
}
if (NoImplicitFloat)
CmdArgs.push_back("-no-implicit-float");
if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) {
StringRef Value = A->getValue();
if (Value == "intel" || Value == "att") {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value));
} else {
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Value;
}
} else if (D.IsCLMode()) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-x86-asm-syntax=intel");
}
// Set flags to support MCU ABI.
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("soft");
CmdArgs.push_back("-mstack-alignment=4");
}
// Handle -mtune.
// Default to "generic" unless -march is present or targetting the PS4.
std::string TuneCPU;
if (!Args.hasArg(clang::driver::options::OPT_march_EQ) &&
!getToolChain().getTriple().isPS4CPU())
TuneCPU = "generic";
// Override based on -mtune.
if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) {
StringRef Name = A->getValue();
if (Name == "native") {
Name = llvm::sys::getHostCPUName();
if (!Name.empty())
TuneCPU = std::string(Name);
} else
TuneCPU = std::string(Name);
}
if (!TuneCPU.empty()) {
CmdArgs.push_back("-tune-cpu");
CmdArgs.push_back(Args.MakeArgString(TuneCPU));
}
}
void Clang::AddHexagonTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
CmdArgs.push_back("-mqdsp6-compat");
CmdArgs.push_back("-Wreturn-type");
if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString("-hexagon-small-data-threshold=" +
Twine(G.getValue())));
}
if (!Args.hasArg(options::OPT_fno_short_enums))
CmdArgs.push_back("-fshort-enums");
if (Args.getLastArg(options::OPT_mieee_rnd_near)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-enable-hexagon-ieee-rnd-near");
}
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-machine-sink-split=0");
}
void Clang::AddLanaiTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
StringRef CPUName = A->getValue();
CmdArgs.push_back("-target-cpu");
CmdArgs.push_back(Args.MakeArgString(CPUName));
}
if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) {
StringRef Value = A->getValue();
// Only support mregparm=4 to support old usage. Report error for all other
// cases.
int Mregparm;
if (Value.getAsInteger(10, Mregparm)) {
if (Mregparm != 4) {
getToolChain().getDriver().Diag(
diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Value;
}
}
}
}
void Clang::AddWebAssemblyTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
// Default to "hidden" visibility.
if (!Args.hasArg(options::OPT_fvisibility_EQ,
options::OPT_fvisibility_ms_compat)) {
CmdArgs.push_back("-fvisibility");
CmdArgs.push_back("hidden");
}
}
void Clang::AddVETargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const {
// Floating point operations and argument passing are hard.
CmdArgs.push_back("-mfloat-abi");
CmdArgs.push_back("hard");
}
void Clang::DumpCompilationDatabase(Compilation &C, StringRef Filename,
StringRef Target, const InputInfo &Output,
const InputInfo &Input, const ArgList &Args) const {
// If this is a dry run, do not create the compilation database file.
if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
return;
using llvm::yaml::escape;
const Driver &D = getToolChain().getDriver();
if (!CompilationDatabase) {
std::error_code EC;
auto File = std::make_unique<llvm::raw_fd_ostream>(
Filename, EC, llvm::sys::fs::OF_TextWithCRLF);
if (EC) {
D.Diag(clang::diag::err_drv_compilationdatabase) << Filename
<< EC.message();
return;
}
CompilationDatabase = std::move(File);
}
auto &CDB = *CompilationDatabase;
auto CWD = D.getVFS().getCurrentWorkingDirectory();
if (!CWD)
CWD = ".";
CDB << "{ \"directory\": \"" << escape(*CWD) << "\"";
CDB << ", \"file\": \"" << escape(Input.getFilename()) << "\"";
CDB << ", \"output\": \"" << escape(Output.getFilename()) << "\"";
CDB << ", \"arguments\": [\"" << escape(D.ClangExecutable) << "\"";
SmallString<128> Buf;
Buf = "-x";
Buf += types::getTypeName(Input.getType());
CDB << ", \"" << escape(Buf) << "\"";
if (!D.SysRoot.empty() && !Args.hasArg(options::OPT__sysroot_EQ)) {
Buf = "--sysroot=";
Buf += D.SysRoot;
CDB << ", \"" << escape(Buf) << "\"";
}
CDB << ", \"" << escape(Input.getFilename()) << "\"";
for (auto &A: Args) {
auto &O = A->getOption();
// Skip language selection, which is positional.
if (O.getID() == options::OPT_x)
continue;
// Skip writing dependency output and the compilation database itself.
if (O.getGroup().isValid() && O.getGroup().getID() == options::OPT_M_Group)
continue;
if (O.getID() == options::OPT_gen_cdb_fragment_path)
continue;
// Skip inputs.
if (O.getKind() == Option::InputClass)
continue;
// All other arguments are quoted and appended.
ArgStringList ASL;
A->render(Args, ASL);
for (auto &it: ASL)
CDB << ", \"" << escape(it) << "\"";
}
Buf = "--target=";
Buf += Target;
CDB << ", \"" << escape(Buf) << "\"]},\n";
}
void Clang::DumpCompilationDatabaseFragmentToDir(
StringRef Dir, Compilation &C, StringRef Target, const InputInfo &Output,
const InputInfo &Input, const llvm::opt::ArgList &Args) const {
// If this is a dry run, do not create the compilation database file.
if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
return;
if (CompilationDatabase)
DumpCompilationDatabase(C, "", Target, Output, Input, Args);
SmallString<256> Path = Dir;
const auto &Driver = C.getDriver();
Driver.getVFS().makeAbsolute(Path);
auto Err = llvm::sys::fs::create_directory(Path, /*IgnoreExisting=*/true);
if (Err) {
Driver.Diag(diag::err_drv_compilationdatabase) << Dir << Err.message();
return;
}
llvm::sys::path::append(
Path,
Twine(llvm::sys::path::filename(Input.getFilename())) + ".%%%%.json");
int FD;
SmallString<256> TempPath;
Err = llvm::sys::fs::createUniqueFile(Path, FD, TempPath,
llvm::sys::fs::OF_Text);
if (Err) {
Driver.Diag(diag::err_drv_compilationdatabase) << Path << Err.message();
return;
}
CompilationDatabase =
std::make_unique<llvm::raw_fd_ostream>(FD, /*shouldClose=*/true);
DumpCompilationDatabase(C, "", Target, Output, Input, Args);
}
static bool CheckARMImplicitITArg(StringRef Value) {
return Value == "always" || Value == "never" || Value == "arm" ||
Value == "thumb";
}
static void AddARMImplicitITArgs(const ArgList &Args, ArgStringList &CmdArgs,
StringRef Value) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString("-arm-implicit-it=" + Value));
}
static void CollectArgsForIntegratedAssembler(Compilation &C,
const ArgList &Args,
ArgStringList &CmdArgs,
const Driver &D) {
if (UseRelaxAll(C, Args))
CmdArgs.push_back("-mrelax-all");
// Only default to -mincremental-linker-compatible if we think we are
// targeting the MSVC linker.
bool DefaultIncrementalLinkerCompatible =
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment();
if (Args.hasFlag(options::OPT_mincremental_linker_compatible,
options::OPT_mno_incremental_linker_compatible,
DefaultIncrementalLinkerCompatible))
CmdArgs.push_back("-mincremental-linker-compatible");
// If you add more args here, also add them to the block below that
// starts with "// If CollectArgsForIntegratedAssembler() isn't called below".
// When passing -I arguments to the assembler we sometimes need to
// unconditionally take the next argument. For example, when parsing
// '-Wa,-I -Wa,foo' we need to accept the -Wa,foo arg after seeing the
// -Wa,-I arg and when parsing '-Wa,-I,foo' we need to accept the 'foo'
// arg after parsing the '-I' arg.
bool TakeNextArg = false;
bool UseRelaxRelocations = C.getDefaultToolChain().useRelaxRelocations();
bool UseNoExecStack = C.getDefaultToolChain().isNoExecStackDefault();
const char *MipsTargetFeature = nullptr;
StringRef ImplicitIt;
for (const Arg *A :
Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler,
options::OPT_mimplicit_it_EQ)) {
A->claim();
if (A->getOption().getID() == options::OPT_mimplicit_it_EQ) {
switch (C.getDefaultToolChain().getArch()) {
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
// Only store the value; the last value set takes effect.
ImplicitIt = A->getValue();
if (!CheckARMImplicitITArg(ImplicitIt))
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << ImplicitIt;
continue;
default:
break;
}
}
for (StringRef Value : A->getValues()) {
if (TakeNextArg) {
CmdArgs.push_back(Value.data());
TakeNextArg = false;
continue;
}
if (C.getDefaultToolChain().getTriple().isOSBinFormatCOFF() &&
Value == "-mbig-obj")
continue; // LLVM handles bigobj automatically
switch (C.getDefaultToolChain().getArch()) {
default:
break;
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
case llvm::Triple::arm:
case llvm::Triple::armeb:
if (Value.startswith("-mimplicit-it=")) {
// Only store the value; the last value set takes effect.
ImplicitIt = Value.split("=").second;
if (CheckARMImplicitITArg(ImplicitIt))
continue;
}
if (Value == "-mthumb")
// -mthumb has already been processed in ComputeLLVMTriple()
// recognize but skip over here.
continue;
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
if (Value == "--trap") {
CmdArgs.push_back("-target-feature");
CmdArgs.push_back("+use-tcc-in-div");
continue;
}
if (Value == "--break") {
CmdArgs.push_back("-target-feature");
CmdArgs.push_back("-use-tcc-in-div");
continue;
}
if (Value.startswith("-msoft-float")) {
CmdArgs.push_back("-target-feature");
CmdArgs.push_back("+soft-float");
continue;
}
if (Value.startswith("-mhard-float")) {
CmdArgs.push_back("-target-feature");
CmdArgs.push_back("-soft-float");
continue;
}
MipsTargetFeature = llvm::StringSwitch<const char *>(Value)
.Case("-mips1", "+mips1")
.Case("-mips2", "+mips2")
.Case("-mips3", "+mips3")
.Case("-mips4", "+mips4")
.Case("-mips5", "+mips5")
.Case("-mips32", "+mips32")
.Case("-mips32r2", "+mips32r2")
.Case("-mips32r3", "+mips32r3")
.Case("-mips32r5", "+mips32r5")
.Case("-mips32r6", "+mips32r6")
.Case("-mips64", "+mips64")
.Case("-mips64r2", "+mips64r2")
.Case("-mips64r3", "+mips64r3")
.Case("-mips64r5", "+mips64r5")
.Case("-mips64r6", "+mips64r6")
.Default(nullptr);
if (MipsTargetFeature)
continue;
}
if (Value == "-force_cpusubtype_ALL") {
// Do nothing, this is the default and we don't support anything else.
} else if (Value == "-L") {
CmdArgs.push_back("-msave-temp-labels");
} else if (Value == "--fatal-warnings") {
CmdArgs.push_back("-massembler-fatal-warnings");
} else if (Value == "--no-warn" || Value == "-W") {
CmdArgs.push_back("-massembler-no-warn");
} else if (Value == "--noexecstack") {
UseNoExecStack = true;
} else if (Value.startswith("-compress-debug-sections") ||
Value.startswith("--compress-debug-sections") ||
Value == "-nocompress-debug-sections" ||
Value == "--nocompress-debug-sections") {
CmdArgs.push_back(Value.data());
} else if (Value == "-mrelax-relocations=yes" ||
Value == "--mrelax-relocations=yes") {
UseRelaxRelocations = true;
} else if (Value == "-mrelax-relocations=no" ||
Value == "--mrelax-relocations=no") {
UseRelaxRelocations = false;
} else if (Value.startswith("-I")) {
CmdArgs.push_back(Value.data());
// We need to consume the next argument if the current arg is a plain
// -I. The next arg will be the include directory.
if (Value == "-I")
TakeNextArg = true;
} else if (Value.startswith("-gdwarf-")) {
// "-gdwarf-N" options are not cc1as options.
unsigned DwarfVersion = DwarfVersionNum(Value);
if (DwarfVersion == 0) { // Send it onward, and let cc1as complain.
CmdArgs.push_back(Value.data());
} else {
RenderDebugEnablingArgs(Args, CmdArgs,
codegenoptions::DebugInfoConstructor,
DwarfVersion, llvm::DebuggerKind::Default);
}
} else if (Value.startswith("-mcpu") || Value.startswith("-mfpu") ||
Value.startswith("-mhwdiv") || Value.startswith("-march")) {
// Do nothing, we'll validate it later.
} else if (Value == "-defsym") {
if (A->getNumValues() != 2) {
D.Diag(diag::err_drv_defsym_invalid_format) << Value;
break;
}
const char *S = A->getValue(1);
auto Pair = StringRef(S).split('=');
auto Sym = Pair.first;
auto SVal = Pair.second;
if (Sym.empty() || SVal.empty()) {
D.Diag(diag::err_drv_defsym_invalid_format) << S;
break;
}
int64_t IVal;
if (SVal.getAsInteger(0, IVal)) {
D.Diag(diag::err_drv_defsym_invalid_symval) << SVal;
break;
}
CmdArgs.push_back(Value.data());
TakeNextArg = true;
} else if (Value == "-fdebug-compilation-dir") {
CmdArgs.push_back("-fdebug-compilation-dir");
TakeNextArg = true;
} else if (Value.consume_front("-fdebug-compilation-dir=")) {
// The flag is a -Wa / -Xassembler argument and Options doesn't
// parse the argument, so this isn't automatically aliased to
// -fdebug-compilation-dir (without '=') here.
CmdArgs.push_back("-fdebug-compilation-dir");
CmdArgs.push_back(Value.data());
} else if (Value == "--version") {
D.PrintVersion(C, llvm::outs());
} else {
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Value;
}
}
}
if (ImplicitIt.size())
AddARMImplicitITArgs(Args, CmdArgs, ImplicitIt);
if (UseRelaxRelocations)
CmdArgs.push_back("--mrelax-relocations");
if (UseNoExecStack)
CmdArgs.push_back("-mnoexecstack");
if (MipsTargetFeature != nullptr) {
CmdArgs.push_back("-target-feature");
CmdArgs.push_back(MipsTargetFeature);
}
// forward -fembed-bitcode to assmebler
if (C.getDriver().embedBitcodeEnabled() ||
C.getDriver().embedBitcodeMarkerOnly())
Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ);
}
static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
bool OFastEnabled, const ArgList &Args,
ArgStringList &CmdArgs,
const JobAction &JA) {
// Handle various floating point optimization flags, mapping them to the
// appropriate LLVM code generation flags. This is complicated by several
// "umbrella" flags, so we do this by stepping through the flags incrementally
// adjusting what we think is enabled/disabled, then at the end setting the
// LLVM flags based on the final state.
bool HonorINFs = true;
bool HonorNaNs = true;
// -fmath-errno is the default on some platforms, e.g. BSD-derived OSes.
bool MathErrno = TC.IsMathErrnoDefault();
bool AssociativeMath = false;
bool ReciprocalMath = false;
bool SignedZeros = true;
bool TrappingMath = false; // Implemented via -ffp-exception-behavior
bool TrappingMathPresent = false; // Is trapping-math in args, and not
// overriden by ffp-exception-behavior?
bool RoundingFPMath = false;
bool RoundingMathPresent = false; // Is rounding-math in args?
// -ffp-model values: strict, fast, precise
StringRef FPModel = "";
// -ffp-exception-behavior options: strict, maytrap, ignore
StringRef FPExceptionBehavior = "";
const llvm::DenormalMode DefaultDenormalFPMath =
TC.getDefaultDenormalModeForType(Args, JA);
const llvm::DenormalMode DefaultDenormalFP32Math =
TC.getDefaultDenormalModeForType(Args, JA, &llvm::APFloat::IEEEsingle());
llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath;
llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math;
StringRef FPContract = "";
bool StrictFPModel = false;
if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) {
CmdArgs.push_back("-mlimit-float-precision");
CmdArgs.push_back(A->getValue());
}
for (const Arg *A : Args) {
auto optID = A->getOption().getID();
bool PreciseFPModel = false;
switch (optID) {
default:
break;
case options::OPT_ffp_model_EQ: {
// If -ffp-model= is seen, reset to fno-fast-math
HonorINFs = true;
HonorNaNs = true;
// Turning *off* -ffast-math restores the toolchain default.
MathErrno = TC.IsMathErrnoDefault();
AssociativeMath = false;
ReciprocalMath = false;
SignedZeros = true;
// -fno_fast_math restores default denormal and fpcontract handling
FPContract = "";
DenormalFPMath = llvm::DenormalMode::getIEEE();
// FIXME: The target may have picked a non-IEEE default mode here based on
// -cl-denorms-are-zero. Should the target consider -fp-model interaction?
DenormalFP32Math = llvm::DenormalMode::getIEEE();
StringRef Val = A->getValue();
if (OFastEnabled && !Val.equals("fast")) {
// Only -ffp-model=fast is compatible with OFast, ignore.
D.Diag(clang::diag::warn_drv_overriding_flag_option)
<< Args.MakeArgString("-ffp-model=" + Val)
<< "-Ofast";
break;
}
StrictFPModel = false;
PreciseFPModel = true;
// ffp-model= is a Driver option, it is entirely rewritten into more
// granular options before being passed into cc1.
// Use the gcc option in the switch below.
if (!FPModel.empty() && !FPModel.equals(Val)) {
D.Diag(clang::diag::warn_drv_overriding_flag_option)
<< Args.MakeArgString("-ffp-model=" + FPModel)
<< Args.MakeArgString("-ffp-model=" + Val);
FPContract = "";
}
if (Val.equals("fast")) {
optID = options::OPT_ffast_math;
FPModel = Val;
FPContract = "fast";
} else if (Val.equals("precise")) {
optID = options::OPT_ffp_contract;
FPModel = Val;
FPContract = "fast";
PreciseFPModel = true;
} else if (Val.equals("strict")) {
StrictFPModel = true;
optID = options::OPT_frounding_math;
FPExceptionBehavior = "strict";
FPModel = Val;
FPContract = "off";
TrappingMath = true;
} else
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Val;
break;
}
}
switch (optID) {
// If this isn't an FP option skip the claim below
default: continue;
// Options controlling individual features
case options::OPT_fhonor_infinities: HonorINFs = true; break;
case options::OPT_fno_honor_infinities: HonorINFs = false; break;
case options::OPT_fhonor_nans: HonorNaNs = true; break;
case options::OPT_fno_honor_nans: HonorNaNs = false; break;
case options::OPT_fmath_errno: MathErrno = true; break;
case options::OPT_fno_math_errno: MathErrno = false; break;
case options::OPT_fassociative_math: AssociativeMath = true; break;
case options::OPT_fno_associative_math: AssociativeMath = false; break;
case options::OPT_freciprocal_math: ReciprocalMath = true; break;
case options::OPT_fno_reciprocal_math: ReciprocalMath = false; break;
case options::OPT_fsigned_zeros: SignedZeros = true; break;
case options::OPT_fno_signed_zeros: SignedZeros = false; break;
case options::OPT_ftrapping_math:
if (!TrappingMathPresent && !FPExceptionBehavior.empty() &&
!FPExceptionBehavior.equals("strict"))
// Warn that previous value of option is overridden.
D.Diag(clang::diag::warn_drv_overriding_flag_option)
<< Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
<< "-ftrapping-math";
TrappingMath = true;
TrappingMathPresent = true;
FPExceptionBehavior = "strict";
break;
case options::OPT_fno_trapping_math:
if (!TrappingMathPresent && !FPExceptionBehavior.empty() &&
!FPExceptionBehavior.equals("ignore"))
// Warn that previous value of option is overridden.
D.Diag(clang::diag::warn_drv_overriding_flag_option)
<< Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
<< "-fno-trapping-math";
TrappingMath = false;
TrappingMathPresent = true;
FPExceptionBehavior = "ignore";
break;
case options::OPT_frounding_math:
RoundingFPMath = true;
RoundingMathPresent = true;
break;
case options::OPT_fno_rounding_math:
RoundingFPMath = false;
RoundingMathPresent = false;
break;
case options::OPT_fdenormal_fp_math_EQ:
DenormalFPMath = llvm::parseDenormalFPAttribute(A->getValue());
if (!DenormalFPMath.isValid()) {
D.Diag(diag::err_drv_invalid_value)
<< A->getAsString(Args) << A->getValue();
}
break;
case options::OPT_fdenormal_fp_math_f32_EQ:
DenormalFP32Math = llvm::parseDenormalFPAttribute(A->getValue());
if (!DenormalFP32Math.isValid()) {
D.Diag(diag::err_drv_invalid_value)
<< A->getAsString(Args) << A->getValue();
}
break;
// Validate and pass through -ffp-contract option.
case options::OPT_ffp_contract: {
StringRef Val = A->getValue();
if (PreciseFPModel) {
// -ffp-model=precise enables ffp-contract=fast as a side effect
// the FPContract value has already been set to a string literal
// and the Val string isn't a pertinent value.
;
} else if (Val.equals("fast") || Val.equals("on") || Val.equals("off"))
FPContract = Val;
else
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Val;
break;
}
// Validate and pass through -ffp-model option.
case options::OPT_ffp_model_EQ:
// This should only occur in the error case
// since the optID has been replaced by a more granular
// floating point option.
break;
// Validate and pass through -ffp-exception-behavior option.
case options::OPT_ffp_exception_behavior_EQ: {
StringRef Val = A->getValue();
if (!TrappingMathPresent && !FPExceptionBehavior.empty() &&
!FPExceptionBehavior.equals(Val))
// Warn that previous value of option is overridden.
D.Diag(clang::diag::warn_drv_overriding_flag_option)
<< Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
<< Args.MakeArgString("-ffp-exception-behavior=" + Val);
TrappingMath = TrappingMathPresent = false;
if (Val.equals("ignore") || Val.equals("maytrap"))
FPExceptionBehavior = Val;
else if (Val.equals("strict")) {
FPExceptionBehavior = Val;
TrappingMath = TrappingMathPresent = true;
} else
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Val;
break;
}
case options::OPT_ffinite_math_only:
HonorINFs = false;
HonorNaNs = false;
break;
case options::OPT_fno_finite_math_only:
HonorINFs = true;
HonorNaNs = true;
break;
case options::OPT_funsafe_math_optimizations:
AssociativeMath = true;
ReciprocalMath = true;
SignedZeros = false;
TrappingMath = false;
FPExceptionBehavior = "";
break;
case options::OPT_fno_unsafe_math_optimizations:
AssociativeMath = false;
ReciprocalMath = false;
SignedZeros = true;
TrappingMath = true;
FPExceptionBehavior = "strict";
// The target may have opted to flush by default, so force IEEE.
DenormalFPMath = llvm::DenormalMode::getIEEE();
DenormalFP32Math = llvm::DenormalMode::getIEEE();
break;
case options::OPT_Ofast:
// If -Ofast is the optimization level, then -ffast-math should be enabled
if (!OFastEnabled)
continue;
LLVM_FALLTHROUGH;
case options::OPT_ffast_math:
HonorINFs = false;
HonorNaNs = false;
MathErrno = false;
AssociativeMath = true;
ReciprocalMath = true;
SignedZeros = false;
TrappingMath = false;
RoundingFPMath = false;
// If fast-math is set then set the fp-contract mode to fast.
FPContract = "fast";
break;
case options::OPT_fno_fast_math:
HonorINFs = true;
HonorNaNs = true;
// Turning on -ffast-math (with either flag) removes the need for
// MathErrno. However, turning *off* -ffast-math merely restores the
// toolchain default (which may be false).
MathErrno = TC.IsMathErrnoDefault();
AssociativeMath = false;
ReciprocalMath = false;
SignedZeros = true;
TrappingMath = false;
RoundingFPMath = false;
// -fno_fast_math restores default denormal and fpcontract handling
DenormalFPMath = DefaultDenormalFPMath;
DenormalFP32Math = llvm::DenormalMode::getIEEE();
FPContract = "";
break;
}
if (StrictFPModel) {
// If -ffp-model=strict has been specified on command line but
// subsequent options conflict then emit warning diagnostic.
if (HonorINFs && HonorNaNs &&
!AssociativeMath && !ReciprocalMath &&
SignedZeros && TrappingMath && RoundingFPMath &&
(FPContract.equals("off") || FPContract.empty()) &&
DenormalFPMath == llvm::DenormalMode::getIEEE() &&
DenormalFP32Math == llvm::DenormalMode::getIEEE())
// OK: Current Arg doesn't conflict with -ffp-model=strict
;
else {
StrictFPModel = false;
FPModel = "";
D.Diag(clang::diag::warn_drv_overriding_flag_option)
<< "-ffp-model=strict" <<
((A->getNumValues() == 0) ? A->getSpelling()
: Args.MakeArgString(A->getSpelling() + A->getValue()));
}
}
// If we handled this option claim it
A->claim();
}
if (!HonorINFs)
CmdArgs.push_back("-menable-no-infs");
if (!HonorNaNs)
CmdArgs.push_back("-menable-no-nans");
if (MathErrno)
CmdArgs.push_back("-fmath-errno");
if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
!TrappingMath)
CmdArgs.push_back("-menable-unsafe-fp-math");
if (!SignedZeros)
CmdArgs.push_back("-fno-signed-zeros");
if (AssociativeMath && !SignedZeros && !TrappingMath)
CmdArgs.push_back("-mreassociate");
if (ReciprocalMath)
CmdArgs.push_back("-freciprocal-math");
if (TrappingMath) {
// FP Exception Behavior is also set to strict
assert(FPExceptionBehavior.equals("strict"));
}
// The default is IEEE.
if (DenormalFPMath != llvm::DenormalMode::getIEEE()) {
llvm::SmallString<64> DenormFlag;
llvm::raw_svector_ostream ArgStr(DenormFlag);
ArgStr << "-fdenormal-fp-math=" << DenormalFPMath;
CmdArgs.push_back(Args.MakeArgString(ArgStr.str()));
}
// Add f32 specific denormal mode flag if it's different.
if (DenormalFP32Math != DenormalFPMath) {
llvm::SmallString<64> DenormFlag;
llvm::raw_svector_ostream ArgStr(DenormFlag);
ArgStr << "-fdenormal-fp-math-f32=" << DenormalFP32Math;
CmdArgs.push_back(Args.MakeArgString(ArgStr.str()));
}
if (!FPContract.empty())
CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + FPContract));
if (!RoundingFPMath)
CmdArgs.push_back(Args.MakeArgString("-fno-rounding-math"));
if (RoundingFPMath && RoundingMathPresent)
CmdArgs.push_back(Args.MakeArgString("-frounding-math"));
if (!FPExceptionBehavior.empty())
CmdArgs.push_back(Args.MakeArgString("-ffp-exception-behavior=" +
FPExceptionBehavior));
ParseMRecip(D, Args, CmdArgs);
// -ffast-math enables the __FAST_MATH__ preprocessor macro, but check for the
// individual features enabled by -ffast-math instead of the option itself as
// that's consistent with gcc's behaviour.
if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath &&
ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) {
CmdArgs.push_back("-ffast-math");
if (FPModel.equals("fast")) {
if (FPContract.equals("fast"))
// All set, do nothing.
;
else if (FPContract.empty())
// Enable -ffp-contract=fast
CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast"));
else
D.Diag(clang::diag::warn_drv_overriding_flag_option)
<< "-ffp-model=fast"
<< Args.MakeArgString("-ffp-contract=" + FPContract);
}
}
// Handle __FINITE_MATH_ONLY__ similarly.
if (!HonorINFs && !HonorNaNs)
CmdArgs.push_back("-ffinite-math-only");
if (const Arg *A = Args.getLastArg(options::OPT_mfpmath_EQ)) {
CmdArgs.push_back("-mfpmath");
CmdArgs.push_back(A->getValue());
}
// Disable a codegen optimization for floating-point casts.
if (Args.hasFlag(options::OPT_fno_strict_float_cast_overflow,
options::OPT_fstrict_float_cast_overflow, false))
CmdArgs.push_back("-fno-strict-float-cast-overflow");
}
static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs,
const llvm::Triple &Triple,
const InputInfo &Input) {
// Enable region store model by default.
CmdArgs.push_back("-analyzer-store=region");
// Treat blocks as analysis entry points.
CmdArgs.push_back("-analyzer-opt-analyze-nested-blocks");
// Add default argument set.
if (!Args.hasArg(options::OPT__analyzer_no_default_checks)) {
CmdArgs.push_back("-analyzer-checker=core");
CmdArgs.push_back("-analyzer-checker=apiModeling");
if (!Triple.isWindowsMSVCEnvironment()) {
CmdArgs.push_back("-analyzer-checker=unix");
} else {
// Enable "unix" checkers that also work on Windows.
CmdArgs.push_back("-analyzer-checker=unix.API");
CmdArgs.push_back("-analyzer-checker=unix.Malloc");
CmdArgs.push_back("-analyzer-checker=unix.MallocSizeof");
CmdArgs.push_back("-analyzer-checker=unix.MismatchedDeallocator");
CmdArgs.push_back("-analyzer-checker=unix.cstring.BadSizeArg");
CmdArgs.push_back("-analyzer-checker=unix.cstring.NullArg");
}
// Disable some unix checkers for PS4.
if (Triple.isPS4CPU()) {
CmdArgs.push_back("-analyzer-disable-checker=unix.API");
CmdArgs.push_back("-analyzer-disable-checker=unix.Vfork");
}
if (Triple.isOSDarwin()) {
CmdArgs.push_back("-analyzer-checker=osx");
CmdArgs.push_back(
"-analyzer-checker=security.insecureAPI.decodeValueOfObjCType");
}
else if (Triple.isOSFuchsia())
CmdArgs.push_back("-analyzer-checker=fuchsia");
CmdArgs.push_back("-analyzer-checker=deadcode");
if (types::isCXX(Input.getType()))
CmdArgs.push_back("-analyzer-checker=cplusplus");
if (!Triple.isPS4CPU()) {
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.UncheckedReturn");
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.getpw");
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.gets");
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp");
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mkstemp");
CmdArgs.push_back("-analyzer-checker=security.insecureAPI.vfork");
}
// Default nullability checks.
CmdArgs.push_back("-analyzer-checker=nullability.NullPassedToNonnull");
CmdArgs.push_back("-analyzer-checker=nullability.NullReturnedFromNonnull");
}
// Set the output format. The default is plist, for (lame) historical reasons.
CmdArgs.push_back("-analyzer-output");
if (Arg *A = Args.getLastArg(options::OPT__analyzer_output))
CmdArgs.push_back(A->getValue());
else
CmdArgs.push_back("plist");
// Disable the presentation of standard compiler warnings when using
// --analyze. We only want to show static analyzer diagnostics or frontend
// errors.
CmdArgs.push_back("-w");
// Add -Xanalyzer arguments when running as analyzer.
Args.AddAllArgValues(CmdArgs, options::OPT_Xanalyzer);
}
static void RenderSSPOptions(const Driver &D, const ToolChain &TC,
const ArgList &Args, ArgStringList &CmdArgs,
bool KernelOrKext) {
const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();
// NVPTX doesn't support stack protectors; from the compiler's perspective, it
// doesn't even have a stack!
if (EffectiveTriple.isNVPTX())
return;
// -stack-protector=0 is default.
LangOptions::StackProtectorMode StackProtectorLevel = LangOptions::SSPOff;
LangOptions::StackProtectorMode DefaultStackProtectorLevel =
TC.GetDefaultStackProtectorLevel(KernelOrKext);
if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
options::OPT_fstack_protector_all,
options::OPT_fstack_protector_strong,
options::OPT_fstack_protector)) {
if (A->getOption().matches(options::OPT_fstack_protector))
StackProtectorLevel =
std::max<>(LangOptions::SSPOn, DefaultStackProtectorLevel);
else if (A->getOption().matches(options::OPT_fstack_protector_strong))
StackProtectorLevel = LangOptions::SSPStrong;
else if (A->getOption().matches(options::OPT_fstack_protector_all))
StackProtectorLevel = LangOptions::SSPReq;
} else {
StackProtectorLevel = DefaultStackProtectorLevel;
}
if (StackProtectorLevel) {
CmdArgs.push_back("-stack-protector");
CmdArgs.push_back(Args.MakeArgString(Twine(StackProtectorLevel)));
}
// --param ssp-buffer-size=
for (const Arg *A : Args.filtered(options::OPT__param)) {
StringRef Str(A->getValue());
if (Str.startswith("ssp-buffer-size=")) {
if (StackProtectorLevel) {
CmdArgs.push_back("-stack-protector-buffer-size");
// FIXME: Verify the argument is a valid integer.
CmdArgs.push_back(Args.MakeArgString(Str.drop_front(16)));
}
A->claim();
}
}
const std::string &TripleStr = EffectiveTriple.getTriple();
if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_EQ)) {
StringRef Value = A->getValue();
if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
if (EffectiveTriple.isX86() && Value != "tls" && Value != "global") {
D.Diag(diag::err_drv_invalid_value_with_suggestion)
<< A->getOption().getName() << Value << "tls global";
return;
}
if (EffectiveTriple.isAArch64() && Value != "sysreg" && Value != "global") {
D.Diag(diag::err_drv_invalid_value_with_suggestion)
<< A->getOption().getName() << Value << "sysreg global";
return;
}
A->render(Args, CmdArgs);
}
if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_offset_EQ)) {
StringRef Value = A->getValue();
if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
int Offset;
if (Value.getAsInteger(10, Offset)) {
D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Value;
return;
}
A->render(Args, CmdArgs);
}
if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_reg_EQ)) {
StringRef Value = A->getValue();
if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
if (EffectiveTriple.isX86() && (Value != "fs" && Value != "gs")) {
D.Diag(diag::err_drv_invalid_value_with_suggestion)
<< A->getOption().getName() << Value << "fs gs";
return;
}
if (EffectiveTriple.isAArch64() && Value != "sp_el0") {
D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Value;
return;
}
A->render(Args, CmdArgs);
}
}
static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();
if (!EffectiveTriple.isOSFreeBSD() && !EffectiveTriple.isOSLinux())
return;
if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() &&
!EffectiveTriple.isPPC64())
return;
if (Args.hasFlag(options::OPT_fstack_clash_protection,
options::OPT_fno_stack_clash_protection, false))
CmdArgs.push_back("-fstack-clash-protection");
}
static void RenderTrivialAutoVarInitOptions(const Driver &D,
const ToolChain &TC,
const ArgList &Args,
ArgStringList &CmdArgs) {
auto DefaultTrivialAutoVarInit = TC.GetDefaultTrivialAutoVarInit();
StringRef TrivialAutoVarInit = "";
for (const Arg *A : Args) {
switch (A->getOption().getID()) {
default:
continue;
case options::OPT_ftrivial_auto_var_init: {
A->claim();
StringRef Val = A->getValue();
if (Val == "uninitialized" || Val == "zero" || Val == "pattern")
TrivialAutoVarInit = Val;
else
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Val;
break;
}
}
}
if (TrivialAutoVarInit.empty())
switch (DefaultTrivialAutoVarInit) {
case LangOptions::TrivialAutoVarInitKind::Uninitialized:
break;
case LangOptions::TrivialAutoVarInitKind::Pattern:
TrivialAutoVarInit = "pattern";
break;
case LangOptions::TrivialAutoVarInitKind::Zero:
TrivialAutoVarInit = "zero";
break;
}
if (!TrivialAutoVarInit.empty()) {
if (TrivialAutoVarInit == "zero" && !Args.hasArg(options::OPT_enable_trivial_var_init_zero))
D.Diag(diag::err_drv_trivial_auto_var_init_zero_disabled);
CmdArgs.push_back(
Args.MakeArgString("-ftrivial-auto-var-init=" + TrivialAutoVarInit));
}
if (Arg *A =
Args.getLastArg(options::OPT_ftrivial_auto_var_init_stop_after)) {
if (!Args.hasArg(options::OPT_ftrivial_auto_var_init) ||
StringRef(
Args.getLastArg(options::OPT_ftrivial_auto_var_init)->getValue()) ==
"uninitialized")
D.Diag(diag::err_drv_trivial_auto_var_init_stop_after_missing_dependency);
A->claim();
StringRef Val = A->getValue();
if (std::stoi(Val.str()) <= 0)
D.Diag(diag::err_drv_trivial_auto_var_init_stop_after_invalid_value);
CmdArgs.push_back(
Args.MakeArgString("-ftrivial-auto-var-init-stop-after=" + Val));
}
}
static void RenderOpenCLOptions(const ArgList &Args, ArgStringList &CmdArgs,
types::ID InputType) {
// cl-denorms-are-zero is not forwarded. It is translated into a generic flag
// for denormal flushing handling based on the target.
const unsigned ForwardedArguments[] = {
options::OPT_cl_opt_disable,
options::OPT_cl_strict_aliasing,
options::OPT_cl_single_precision_constant,
options::OPT_cl_finite_math_only,
options::OPT_cl_kernel_arg_info,
options::OPT_cl_unsafe_math_optimizations,
options::OPT_cl_fast_relaxed_math,
options::OPT_cl_mad_enable,
options::OPT_cl_no_signed_zeros,
options::OPT_cl_fp32_correctly_rounded_divide_sqrt,
options::OPT_cl_uniform_work_group_size
};
if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) {
std::string CLStdStr = std::string("-cl-std=") + A->getValue();
CmdArgs.push_back(Args.MakeArgString(CLStdStr));
}
for (const auto &Arg : ForwardedArguments)
if (const auto *A = Args.getLastArg(Arg))
CmdArgs.push_back(Args.MakeArgString(A->getOption().getPrefixedName()));
// Only add the default headers if we are compiling OpenCL sources.
if ((types::isOpenCL(InputType) ||
(Args.hasArg(options::OPT_cl_std_EQ) && types::isSrcFile(InputType))) &&
!Args.hasArg(options::OPT_cl_no_stdinc)) {
CmdArgs.push_back("-finclude-default-header");
CmdArgs.push_back("-fdeclare-opencl-builtins");
}
}
static void RenderARCMigrateToolOptions(const Driver &D, const ArgList &Args,
ArgStringList &CmdArgs) {
bool ARCMTEnabled = false;
if (!Args.hasArg(options::OPT_fno_objc_arc, options::OPT_fobjc_arc)) {
if (const Arg *A = Args.getLastArg(options::OPT_ccc_arcmt_check,
options::OPT_ccc_arcmt_modify,
options::OPT_ccc_arcmt_migrate)) {
ARCMTEnabled = true;
switch (A->getOption().getID()) {
default: llvm_unreachable("missed a case");
case options::OPT_ccc_arcmt_check:
CmdArgs.push_back("-arcmt-action=check");
break;
case options::OPT_ccc_arcmt_modify:
CmdArgs.push_back("-arcmt-action=modify");
break;
case options::OPT_ccc_arcmt_migrate:
CmdArgs.push_back("-arcmt-action=migrate");
CmdArgs.push_back("-mt-migrate-directory");
CmdArgs.push_back(A->getValue());
Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_report_output);
Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_emit_arc_errors);
break;
}
}
} else {
Args.ClaimAllArgs(options::OPT_ccc_arcmt_check);
Args.ClaimAllArgs(options::OPT_ccc_arcmt_modify);
Args.ClaimAllArgs(options::OPT_ccc_arcmt_migrate);
}
if (const Arg *A = Args.getLastArg(options::OPT_ccc_objcmt_migrate)) {
if (ARCMTEnabled)
D.Diag(diag::err_drv_argument_not_allowed_with)
<< A->getAsString(Args) << "-ccc-arcmt-migrate";
CmdArgs.push_back("-mt-migrate-directory");
CmdArgs.push_back(A->getValue());
if (!Args.hasArg(options::OPT_objcmt_migrate_literals,
options::OPT_objcmt_migrate_subscripting,
options::OPT_objcmt_migrate_property)) {
// None specified, means enable them all.
CmdArgs.push_back("-objcmt-migrate-literals");
CmdArgs.push_back("-objcmt-migrate-subscripting");
CmdArgs.push_back("-objcmt-migrate-property");
} else {
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property);
}
} else {
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_all);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readonly_property);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readwrite_property);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property_dot_syntax);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_annotation);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_instancetype);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_nsmacros);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_protocol_conformance);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_atomic_property);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_returns_innerpointer_property);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_ns_nonatomic_iosonly);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_designated_init);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_whitelist_dir_path);
}
}
static void RenderBuiltinOptions(const ToolChain &TC, const llvm::Triple &T,
const ArgList &Args, ArgStringList &CmdArgs) {
// -fbuiltin is default unless -mkernel is used.
bool UseBuiltins =
Args.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin,
!Args.hasArg(options::OPT_mkernel));
if (!UseBuiltins)
CmdArgs.push_back("-fno-builtin");
// -ffreestanding implies -fno-builtin.
if (Args.hasArg(options::OPT_ffreestanding))
UseBuiltins = false;
// Process the -fno-builtin-* options.
for (const auto &Arg : Args) {
const Option &O = Arg->getOption();
if (!O.matches(options::OPT_fno_builtin_))
continue;
Arg->claim();
// If -fno-builtin is specified, then there's no need to pass the option to
// the frontend.
if (!UseBuiltins)
continue;
StringRef FuncName = Arg->getValue();
CmdArgs.push_back(Args.MakeArgString("-fno-builtin-" + FuncName));
}
// le32-specific flags:
// -fno-math-builtin: clang should not convert math builtins to intrinsics
// by default.
if (TC.getArch() == llvm::Triple::le32)
CmdArgs.push_back("-fno-math-builtin");
}
bool Driver::getDefaultModuleCachePath(SmallVectorImpl<char> &Result) {
if (llvm::sys::path::cache_directory(Result)) {
llvm::sys::path::append(Result, "clang");
llvm::sys::path::append(Result, "ModuleCache");
return true;
}
return false;
}
static void RenderModulesOptions(Compilation &C, const Driver &D,
const ArgList &Args, const InputInfo &Input,
const InputInfo &Output,
ArgStringList &CmdArgs, bool &HaveModules) {
// -fmodules enables the use of precompiled modules (off by default).
// Users can pass -fno-cxx-modules to turn off modules support for
// C++/Objective-C++ programs.
bool HaveClangModules = false;
if (Args.hasFlag(options::OPT_fmodules, options::OPT_fno_modules, false)) {
bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules,
options::OPT_fno_cxx_modules, true);
if (AllowedInCXX || !types::isCXX(Input.getType())) {
CmdArgs.push_back("-fmodules");
HaveClangModules = true;
}
}
HaveModules |= HaveClangModules;
if (Args.hasArg(options::OPT_fmodules_ts)) {
CmdArgs.push_back("-fmodules-ts");
HaveModules = true;
}
// -fmodule-maps enables implicit reading of module map files. By default,
// this is enabled if we are using Clang's flavor of precompiled modules.
if (Args.hasFlag(options::OPT_fimplicit_module_maps,
options::OPT_fno_implicit_module_maps, HaveClangModules))
CmdArgs.push_back("-fimplicit-module-maps");
// -fmodules-decluse checks that modules used are declared so (off by default)
if (Args.hasFlag(options::OPT_fmodules_decluse,
options::OPT_fno_modules_decluse, false))
CmdArgs.push_back("-fmodules-decluse");
// -fmodules-strict-decluse is like -fmodule-decluse, but also checks that
// all #included headers are part of modules.
if (Args.hasFlag(options::OPT_fmodules_strict_decluse,
options::OPT_fno_modules_strict_decluse, false))
CmdArgs.push_back("-fmodules-strict-decluse");
// -fno-implicit-modules turns off implicitly compiling modules on demand.
bool ImplicitModules = false;
if (!Args.hasFlag(options::OPT_fimplicit_modules,
options::OPT_fno_implicit_modules, HaveClangModules)) {
if (HaveModules)
CmdArgs.push_back("-fno-implicit-modules");
} else if (HaveModules) {
ImplicitModules = true;
// -fmodule-cache-path specifies where our implicitly-built module files
// should be written.
SmallString<128> Path;
if (Arg *A = Args.getLastArg(options::OPT_fmodules_cache_path))
Path = A->getValue();
bool HasPath = true;
if (C.isForDiagnostics()) {
// When generating crash reports, we want to emit the modules along with
// the reproduction sources, so we ignore any provided module path.
Path = Output.getFilename();
llvm::sys::path::replace_extension(Path, ".cache");
llvm::sys::path::append(Path, "modules");
} else if (Path.empty()) {
// No module path was provided: use the default.
HasPath = Driver::getDefaultModuleCachePath(Path);
}
// `HasPath` will only be false if getDefaultModuleCachePath() fails.
// That being said, that failure is unlikely and not caching is harmless.
if (HasPath) {
const char Arg[] = "-fmodules-cache-path=";
Path.insert(Path.begin(), Arg, Arg + strlen(Arg));
CmdArgs.push_back(Args.MakeArgString(Path));
}
}
if (HaveModules) {
// -fprebuilt-module-path specifies where to load the prebuilt module files.
for (const Arg *A : Args.filtered(options::OPT_fprebuilt_module_path)) {
CmdArgs.push_back(Args.MakeArgString(
std::string("-fprebuilt-module-path=") + A->getValue()));
A->claim();
}
if (Args.hasFlag(options::OPT_fprebuilt_implicit_modules,
options::OPT_fno_prebuilt_implicit_modules, false))
CmdArgs.push_back("-fprebuilt-implicit-modules");
if (Args.hasFlag(options::OPT_fmodules_validate_input_files_content,
options::OPT_fno_modules_validate_input_files_content,
false))
CmdArgs.push_back("-fvalidate-ast-input-files-content");
}
// -fmodule-name specifies the module that is currently being built (or
// used for header checking by -fmodule-maps).
Args.AddLastArg(CmdArgs, options::OPT_fmodule_name_EQ);
// -fmodule-map-file can be used to specify files containing module
// definitions.
Args.AddAllArgs(CmdArgs, options::OPT_fmodule_map_file);
// -fbuiltin-module-map can be used to load the clang
// builtin headers modulemap file.
if (Args.hasArg(options::OPT_fbuiltin_module_map)) {
SmallString<128> BuiltinModuleMap(D.ResourceDir);
llvm::sys::path::append(BuiltinModuleMap, "include");
llvm::sys::path::append(BuiltinModuleMap, "module.modulemap");
if (llvm::sys::fs::exists(BuiltinModuleMap))
CmdArgs.push_back(
Args.MakeArgString("-fmodule-map-file=" + BuiltinModuleMap));
}
// The -fmodule-file=<name>=<file> form specifies the mapping of module
// names to precompiled module files (the module is loaded only if used).
// The -fmodule-file=<file> form can be used to unconditionally load
// precompiled module files (whether used or not).
if (HaveModules)
Args.AddAllArgs(CmdArgs, options::OPT_fmodule_file);
else
Args.ClaimAllArgs(options::OPT_fmodule_file);
// When building modules and generating crashdumps, we need to dump a module
// dependency VFS alongside the output.
if (HaveClangModules && C.isForDiagnostics()) {
SmallString<128> VFSDir(Output.getFilename());
llvm::sys::path::replace_extension(VFSDir, ".cache");
// Add the cache directory as a temp so the crash diagnostics pick it up.
C.addTempFile(Args.MakeArgString(VFSDir));
llvm::sys::path::append(VFSDir, "vfs");
CmdArgs.push_back("-module-dependency-dir");
CmdArgs.push_back(Args.MakeArgString(VFSDir));
}
if (HaveClangModules)
Args.AddLastArg(CmdArgs, options::OPT_fmodules_user_build_path);
// Pass through all -fmodules-ignore-macro arguments.
Args.AddAllArgs(CmdArgs, options::OPT_fmodules_ignore_macro);
Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_interval);
Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_after);
Args.AddLastArg(CmdArgs, options::OPT_fbuild_session_timestamp);
if (Arg *A = Args.getLastArg(options::OPT_fbuild_session_file)) {
if (Args.hasArg(options::OPT_fbuild_session_timestamp))
D.Diag(diag::err_drv_argument_not_allowed_with)
<< A->getAsString(Args) << "-fbuild-session-timestamp";
llvm::sys::fs::file_status Status;
if (llvm::sys::fs::status(A->getValue(), Status))
D.Diag(diag::err_drv_no_such_file) << A->getValue();
CmdArgs.push_back(
Args.MakeArgString("-fbuild-session-timestamp=" +
Twine((uint64_t)Status.getLastModificationTime()
.time_since_epoch()
.count())));
}
if (Args.getLastArg(options::OPT_fmodules_validate_once_per_build_session)) {
if (!Args.getLastArg(options::OPT_fbuild_session_timestamp,
options::OPT_fbuild_session_file))
D.Diag(diag::err_drv_modules_validate_once_requires_timestamp);
Args.AddLastArg(CmdArgs,
options::OPT_fmodules_validate_once_per_build_session);
}
if (Args.hasFlag(options::OPT_fmodules_validate_system_headers,
options::OPT_fno_modules_validate_system_headers,
ImplicitModules))
CmdArgs.push_back("-fmodules-validate-system-headers");
Args.AddLastArg(CmdArgs, options::OPT_fmodules_disable_diagnostic_validation);
}
static void RenderCharacterOptions(const ArgList &Args, const llvm::Triple &T,
ArgStringList &CmdArgs) {
// -fsigned-char is default.
if (const Arg *A = Args.getLastArg(options::OPT_fsigned_char,
options::OPT_fno_signed_char,
options::OPT_funsigned_char,
options::OPT_fno_unsigned_char)) {
if (A->getOption().matches(options::OPT_funsigned_char) ||
A->getOption().matches(options::OPT_fno_signed_char)) {
CmdArgs.push_back("-fno-signed-char");
}
} else if (!isSignedCharDefault(T)) {
CmdArgs.push_back("-fno-signed-char");
}
// The default depends on the language standard.
Args.AddLastArg(CmdArgs, options::OPT_fchar8__t, options::OPT_fno_char8__t);
if (const Arg *A = Args.getLastArg(options::OPT_fshort_wchar,
options::OPT_fno_short_wchar)) {
if (A->getOption().matches(options::OPT_fshort_wchar)) {
CmdArgs.push_back("-fwchar-type=short");
CmdArgs.push_back("-fno-signed-wchar");
} else {
bool IsARM = T.isARM() || T.isThumb() || T.isAArch64();
CmdArgs.push_back("-fwchar-type=int");
if (T.isOSzOS() ||
(IsARM && !(T.isOSWindows() || T.isOSNetBSD() || T.isOSOpenBSD())))
CmdArgs.push_back("-fno-signed-wchar");
else
CmdArgs.push_back("-fsigned-wchar");
}
}
}
static void RenderObjCOptions(const ToolChain &TC, const Driver &D,
const llvm::Triple &T, const ArgList &Args,
ObjCRuntime &Runtime, bool InferCovariantReturns,
const InputInfo &Input, ArgStringList &CmdArgs) {
const llvm::Triple::ArchType Arch = TC.getArch();
// -fobjc-dispatch-method is only relevant with the nonfragile-abi, and legacy
// is the default. Except for deployment target of 10.5, next runtime is
// always legacy dispatch and -fno-objc-legacy-dispatch gets ignored silently.
if (Runtime.isNonFragile()) {
if (!Args.hasFlag(options::OPT_fobjc_legacy_dispatch,
options::OPT_fno_objc_legacy_dispatch,
Runtime.isLegacyDispatchDefaultForArch(Arch))) {
if (TC.UseObjCMixedDispatch())
CmdArgs.push_back("-fobjc-dispatch-method=mixed");
else
CmdArgs.push_back("-fobjc-dispatch-method=non-legacy");
}
}
// When ObjectiveC legacy runtime is in effect on MacOSX, turn on the option
// to do Array/Dictionary subscripting by default.
if (Arch == llvm::Triple::x86 && T.isMacOSX() &&
Runtime.getKind() == ObjCRuntime::FragileMacOSX && Runtime.isNeXTFamily())
CmdArgs.push_back("-fobjc-subscripting-legacy-runtime");
// Allow -fno-objc-arr to trump -fobjc-arr/-fobjc-arc.
// NOTE: This logic is duplicated in ToolChains.cpp.
if (isObjCAutoRefCount(Args)) {
TC.CheckObjCARC();
CmdArgs.push_back("-fobjc-arc");
// FIXME: It seems like this entire block, and several around it should be
// wrapped in isObjC, but for now we just use it here as this is where it
// was being used previously.
if (types::isCXX(Input.getType()) && types::isObjC(Input.getType())) {
if (TC.GetCXXStdlibType(Args) == ToolChain::CST_Libcxx)
CmdArgs.push_back("-fobjc-arc-cxxlib=libc++");
else
CmdArgs.push_back("-fobjc-arc-cxxlib=libstdc++");
}
// Allow the user to enable full exceptions code emission.
// We default off for Objective-C, on for Objective-C++.
if (Args.hasFlag(options::OPT_fobjc_arc_exceptions,
options::OPT_fno_objc_arc_exceptions,
/*Default=*/types::isCXX(Input.getType())))
CmdArgs.push_back("-fobjc-arc-exceptions");
}
// Silence warning for full exception code emission options when explicitly
// set to use no ARC.
if (Args.hasArg(options::OPT_fno_objc_arc)) {
Args.ClaimAllArgs(options::OPT_fobjc_arc_exceptions);
Args.ClaimAllArgs(options::OPT_fno_objc_arc_exceptions);
}
// Allow the user to control whether messages can be converted to runtime
// functions.
if (types::isObjC(Input.getType())) {
auto *Arg = Args.getLastArg(
options::OPT_fobjc_convert_messages_to_runtime_calls,
options::OPT_fno_objc_convert_messages_to_runtime_calls);
if (Arg &&
Arg->getOption().matches(
options::OPT_fno_objc_convert_messages_to_runtime_calls))
CmdArgs.push_back("-fno-objc-convert-messages-to-runtime-calls");
}
// -fobjc-infer-related-result-type is the default, except in the Objective-C
// rewriter.
if (InferCovariantReturns)
CmdArgs.push_back("-fno-objc-infer-related-result-type");
// Pass down -fobjc-weak or -fno-objc-weak if present.
if (types::isObjC(Input.getType())) {
auto WeakArg =
Args.getLastArg(options::OPT_fobjc_weak, options::OPT_fno_objc_weak);
if (!WeakArg) {
// nothing to do
} else if (!Runtime.allowsWeak()) {
if (WeakArg->getOption().matches(options::OPT_fobjc_weak))
D.Diag(diag::err_objc_weak_unsupported);
} else {
WeakArg->render(Args, CmdArgs);
}
}
if (Args.hasArg(options::OPT_fobjc_disable_direct_methods_for_testing))
CmdArgs.push_back("-fobjc-disable-direct-methods-for-testing");
}
static void RenderDiagnosticsOptions(const Driver &D, const ArgList &Args,
ArgStringList &CmdArgs) {
bool CaretDefault = true;
bool ColumnDefault = true;
if (const Arg *A = Args.getLastArg(options::OPT__SLASH_diagnostics_classic,
options::OPT__SLASH_diagnostics_column,
options::OPT__SLASH_diagnostics_caret)) {
switch (A->getOption().getID()) {
case options::OPT__SLASH_diagnostics_caret:
CaretDefault = true;
ColumnDefault = true;
break;
case options::OPT__SLASH_diagnostics_column:
CaretDefault = false;
ColumnDefault = true;
break;
case options::OPT__SLASH_diagnostics_classic:
CaretDefault = false;
ColumnDefault = false;
break;
}
}
// -fcaret-diagnostics is default.
if (!Args.hasFlag(options::OPT_fcaret_diagnostics,
options::OPT_fno_caret_diagnostics, CaretDefault))
CmdArgs.push_back("-fno-caret-diagnostics");
// -fdiagnostics-fixit-info is default, only pass non-default.
if (!Args.hasFlag(options::OPT_fdiagnostics_fixit_info,
options::OPT_fno_diagnostics_fixit_info))
CmdArgs.push_back("-fno-diagnostics-fixit-info");
// Enable -fdiagnostics-show-option by default.
if (!Args.hasFlag(options::OPT_fdiagnostics_show_option,
options::OPT_fno_diagnostics_show_option, true))
CmdArgs.push_back("-fno-diagnostics-show-option");
if (const Arg *A =
Args.getLastArg(options::OPT_fdiagnostics_show_category_EQ)) {
CmdArgs.push_back("-fdiagnostics-show-category");
CmdArgs.push_back(A->getValue());
}
if (Args.hasFlag(options::OPT_fdiagnostics_show_hotness,
options::OPT_fno_diagnostics_show_hotness, false))
CmdArgs.push_back("-fdiagnostics-show-hotness");
if (const Arg *A =
Args.getLastArg(options::OPT_fdiagnostics_hotness_threshold_EQ)) {
std::string Opt =
std::string("-fdiagnostics-hotness-threshold=") + A->getValue();
CmdArgs.push_back(Args.MakeArgString(Opt));
}
if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) {
CmdArgs.push_back("-fdiagnostics-format");
CmdArgs.push_back(A->getValue());
}
if (const Arg *A = Args.getLastArg(
options::OPT_fdiagnostics_show_note_include_stack,
options::OPT_fno_diagnostics_show_note_include_stack)) {
const Option &O = A->getOption();
if (O.matches(options::OPT_fdiagnostics_show_note_include_stack))
CmdArgs.push_back("-fdiagnostics-show-note-include-stack");
else
CmdArgs.push_back("-fno-diagnostics-show-note-include-stack");
}
// Color diagnostics are parsed by the driver directly from argv and later
// re-parsed to construct this job; claim any possible color diagnostic here
// to avoid warn_drv_unused_argument and diagnose bad
// OPT_fdiagnostics_color_EQ values.
for (const Arg *A : Args) {
const Option &O = A->getOption();
if (!O.matches(options::OPT_fcolor_diagnostics) &&
!O.matches(options::OPT_fdiagnostics_color) &&
!O.matches(options::OPT_fno_color_diagnostics) &&
!O.matches(options::OPT_fno_diagnostics_color) &&
!O.matches(options::OPT_fdiagnostics_color_EQ))
continue;
if (O.matches(options::OPT_fdiagnostics_color_EQ)) {
StringRef Value(A->getValue());
if (Value != "always" && Value != "never" && Value != "auto")
D.Diag(diag::err_drv_clang_unsupported)
<< ("-fdiagnostics-color=" + Value).str();
}
A->claim();
}
if (D.getDiags().getDiagnosticOptions().ShowColors)
CmdArgs.push_back("-fcolor-diagnostics");
if (Args.hasArg(options::OPT_fansi_escape_codes))
CmdArgs.push_back("-fansi-escape-codes");
if (!Args.hasFlag(options::OPT_fshow_source_location,
options::OPT_fno_show_source_location))
CmdArgs.push_back("-fno-show-source-location");
if (Args.hasArg(options::OPT_fdiagnostics_absolute_paths))
CmdArgs.push_back("-fdiagnostics-absolute-paths");
if (!Args.hasFlag(options::OPT_fshow_column, options::OPT_fno_show_column,
ColumnDefault))
CmdArgs.push_back("-fno-show-column");
if (!Args.hasFlag(options::OPT_fspell_checking,
options::OPT_fno_spell_checking))
CmdArgs.push_back("-fno-spell-checking");
}
enum class DwarfFissionKind { None, Split, Single };
static DwarfFissionKind getDebugFissionKind(const Driver &D,
const ArgList &Args, Arg *&Arg) {
Arg = Args.getLastArg(options::OPT_gsplit_dwarf, options::OPT_gsplit_dwarf_EQ,
options::OPT_gno_split_dwarf);
if (!Arg || Arg->getOption().matches(options::OPT_gno_split_dwarf))
return DwarfFissionKind::None;
if (Arg->getOption().matches(options::OPT_gsplit_dwarf))
return DwarfFissionKind::Split;
StringRef Value = Arg->getValue();
if (Value == "split")
return DwarfFissionKind::Split;
if (Value == "single")
return DwarfFissionKind::Single;
D.Diag(diag::err_drv_unsupported_option_argument)
<< Arg->getOption().getName() << Arg->getValue();
return DwarfFissionKind::None;
}
static void renderDwarfFormat(const Driver &D, const llvm::Triple &T,
const ArgList &Args, ArgStringList &CmdArgs,
unsigned DwarfVersion) {
auto *DwarfFormatArg =
Args.getLastArg(options::OPT_gdwarf64, options::OPT_gdwarf32);
if (!DwarfFormatArg)
return;
if (DwarfFormatArg->getOption().matches(options::OPT_gdwarf64)) {
if (DwarfVersion < 3)
D.Diag(diag::err_drv_argument_only_allowed_with)
<< DwarfFormatArg->getAsString(Args) << "DWARFv3 or greater";
else if (!T.isArch64Bit())
D.Diag(diag::err_drv_argument_only_allowed_with)
<< DwarfFormatArg->getAsString(Args) << "64 bit architecture";
else if (!T.isOSBinFormatELF())
D.Diag(diag::err_drv_argument_only_allowed_with)
<< DwarfFormatArg->getAsString(Args) << "ELF platforms";
}
DwarfFormatArg->render(Args, CmdArgs);
}
static void renderDebugOptions(const ToolChain &TC, const Driver &D,
const llvm::Triple &T, const ArgList &Args,
bool EmitCodeView, bool IRInput,
ArgStringList &CmdArgs,
codegenoptions::DebugInfoKind &DebugInfoKind,
DwarfFissionKind &DwarfFission) {
// These two forms of profiling info can't be used together.
if (const Arg *A1 = Args.getLastArg(options::OPT_fpseudo_probe_for_profiling))
if (const Arg *A2 = Args.getLastArg(options::OPT_fdebug_info_for_profiling))
D.Diag(diag::err_drv_argument_not_allowed_with)
<< A1->getAsString(Args) << A2->getAsString(Args);
if (Args.hasFlag(options::OPT_fdebug_info_for_profiling,
options::OPT_fno_debug_info_for_profiling, false) &&
checkDebugInfoOption(
Args.getLastArg(options::OPT_fdebug_info_for_profiling), Args, D, TC))
CmdArgs.push_back("-fdebug-info-for-profiling");
// The 'g' groups options involve a somewhat intricate sequence of decisions
// about what to pass from the driver to the frontend, but by the time they
// reach cc1 they've been factored into three well-defined orthogonal choices:
// * what level of debug info to generate
// * what dwarf version to write
// * what debugger tuning to use
// This avoids having to monkey around further in cc1 other than to disable
// codeview if not running in a Windows environment. Perhaps even that
// decision should be made in the driver as well though.
llvm::DebuggerKind DebuggerTuning = TC.getDefaultDebuggerTuning();
bool SplitDWARFInlining =
Args.hasFlag(options::OPT_fsplit_dwarf_inlining,
options::OPT_fno_split_dwarf_inlining, false);
// Normally -gsplit-dwarf is only useful with -gN. For IR input, Clang does
// object file generation and no IR generation, -gN should not be needed. So
// allow -gsplit-dwarf with either -gN or IR input.
if (IRInput || Args.hasArg(options::OPT_g_Group)) {
Arg *SplitDWARFArg;
DwarfFission = getDebugFissionKind(D, Args, SplitDWARFArg);
if (DwarfFission != DwarfFissionKind::None &&
!checkDebugInfoOption(SplitDWARFArg, Args, D, TC)) {
DwarfFission = DwarfFissionKind::None;
SplitDWARFInlining = false;
}
}
if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
DebugInfoKind = codegenoptions::DebugInfoConstructor;
// If the last option explicitly specified a debug-info level, use it.
if (checkDebugInfoOption(A, Args, D, TC) &&
A->getOption().matches(options::OPT_gN_Group)) {
DebugInfoKind = DebugLevelToInfoKind(*A);
// For -g0 or -gline-tables-only, drop -gsplit-dwarf. This gets a bit more
// complicated if you've disabled inline info in the skeleton CUs
// (SplitDWARFInlining) - then there's value in composing split-dwarf and
// line-tables-only, so let those compose naturally in that case.
if (DebugInfoKind == codegenoptions::NoDebugInfo ||
DebugInfoKind == codegenoptions::DebugDirectivesOnly ||
(DebugInfoKind == codegenoptions::DebugLineTablesOnly &&
SplitDWARFInlining))
DwarfFission = DwarfFissionKind::None;
}
}
// If a debugger tuning argument appeared, remember it.
if (const Arg *A =
Args.getLastArg(options::OPT_gTune_Group, options::OPT_ggdbN_Group)) {
if (checkDebugInfoOption(A, Args, D, TC)) {
if (A->getOption().matches(options::OPT_glldb))
DebuggerTuning = llvm::DebuggerKind::LLDB;
else if (A->getOption().matches(options::OPT_gsce))
DebuggerTuning = llvm::DebuggerKind::SCE;
else if (A->getOption().matches(options::OPT_gdbx))
DebuggerTuning = llvm::DebuggerKind::DBX;
else
DebuggerTuning = llvm::DebuggerKind::GDB;
}
}
// If a -gdwarf argument appeared, remember it.
const Arg *GDwarfN = getDwarfNArg(Args);
bool EmitDwarf = false;
if (GDwarfN) {
if (checkDebugInfoOption(GDwarfN, Args, D, TC))
EmitDwarf = true;
else
GDwarfN = nullptr;
}
if (const Arg *A = Args.getLastArg(options::OPT_gcodeview)) {
if (checkDebugInfoOption(A, Args, D, TC))
EmitCodeView = true;
}
// If the user asked for debug info but did not explicitly specify -gcodeview
// or -gdwarf, ask the toolchain for the default format.
if (!EmitCodeView && !EmitDwarf &&
DebugInfoKind != codegenoptions::NoDebugInfo) {
switch (TC.getDefaultDebugFormat()) {
case codegenoptions::DIF_CodeView:
EmitCodeView = true;
break;
case codegenoptions::DIF_DWARF:
EmitDwarf = true;
break;
}
}
unsigned RequestedDWARFVersion = 0; // DWARF version requested by the user
unsigned EffectiveDWARFVersion = 0; // DWARF version TC can generate. It may
// be lower than what the user wanted.
unsigned DefaultDWARFVersion = ParseDebugDefaultVersion(TC, Args);
if (EmitDwarf) {
// Start with the platform default DWARF version
RequestedDWARFVersion = TC.GetDefaultDwarfVersion();
assert(RequestedDWARFVersion &&
"toolchain default DWARF version must be nonzero");
// If the user specified a default DWARF version, that takes precedence
// over the platform default.
if (DefaultDWARFVersion)
RequestedDWARFVersion = DefaultDWARFVersion;
// Override with a user-specified DWARF version
if (GDwarfN)
if (auto ExplicitVersion = DwarfVersionNum(GDwarfN->getSpelling()))
RequestedDWARFVersion = ExplicitVersion;
// Clamp effective DWARF version to the max supported by the toolchain.
EffectiveDWARFVersion =
std::min(RequestedDWARFVersion, TC.getMaxDwarfVersion());
}
// -gline-directives-only supported only for the DWARF debug info.
if (RequestedDWARFVersion == 0 &&
DebugInfoKind == codegenoptions::DebugDirectivesOnly)
DebugInfoKind = codegenoptions::NoDebugInfo;
// strict DWARF is set to false by default. But for DBX, we need it to be set
// as true by default.
if (const Arg *A = Args.getLastArg(options::OPT_gstrict_dwarf))
(void)checkDebugInfoOption(A, Args, D, TC);
if (Args.hasFlag(options::OPT_gstrict_dwarf, options::OPT_gno_strict_dwarf,
DebuggerTuning == llvm::DebuggerKind::DBX))
CmdArgs.push_back("-gstrict-dwarf");
// And we handle flag -grecord-gcc-switches later with DWARFDebugFlags.
Args.ClaimAllArgs(options::OPT_g_flags_Group);
// Column info is included by default for everything except SCE and
// CodeView. Clang doesn't track end columns, just starting columns, which,
// in theory, is fine for CodeView (and PDB). In practice, however, the
// Microsoft debuggers don't handle missing end columns well, and the AIX
// debugger DBX also doesn't handle the columns well, so it's better not to
// include any column info.
if (const Arg *A = Args.getLastArg(options::OPT_gcolumn_info))
(void)checkDebugInfoOption(A, Args, D, TC);
if (!Args.hasFlag(options::OPT_gcolumn_info, options::OPT_gno_column_info,
!EmitCodeView &&
(DebuggerTuning != llvm::DebuggerKind::SCE &&
DebuggerTuning != llvm::DebuggerKind::DBX)))
CmdArgs.push_back("-gno-column-info");
// FIXME: Move backend command line options to the module.
// If -gline-tables-only or -gline-directives-only is the last option it wins.
if (const Arg *A = Args.getLastArg(options::OPT_gmodules))
if (checkDebugInfoOption(A, Args, D, TC)) {
if (DebugInfoKind != codegenoptions::DebugLineTablesOnly &&
DebugInfoKind != codegenoptions::DebugDirectivesOnly) {
DebugInfoKind = codegenoptions::DebugInfoConstructor;
CmdArgs.push_back("-dwarf-ext-refs");
CmdArgs.push_back("-fmodule-format=obj");
}
}
if (T.isOSBinFormatELF() && SplitDWARFInlining)
CmdArgs.push_back("-fsplit-dwarf-inlining");
// After we've dealt with all combinations of things that could
// make DebugInfoKind be other than None or DebugLineTablesOnly,
// figure out if we need to "upgrade" it to standalone debug info.
// We parse these two '-f' options whether or not they will be used,
// to claim them even if you wrote "-fstandalone-debug -gline-tables-only"
bool NeedFullDebug = Args.hasFlag(
options::OPT_fstandalone_debug, options::OPT_fno_standalone_debug,
DebuggerTuning == llvm::DebuggerKind::LLDB ||
TC.GetDefaultStandaloneDebug());
if (const Arg *A = Args.getLastArg(options::OPT_fstandalone_debug))
(void)checkDebugInfoOption(A, Args, D, TC);
if (DebugInfoKind == codegenoptions::LimitedDebugInfo ||
DebugInfoKind == codegenoptions::DebugInfoConstructor) {
if (Args.hasFlag(options::OPT_fno_eliminate_unused_debug_types,
options::OPT_feliminate_unused_debug_types, false))
DebugInfoKind = codegenoptions::UnusedTypeInfo;
else if (NeedFullDebug)
DebugInfoKind = codegenoptions::FullDebugInfo;
}
if (Args.hasFlag(options::OPT_gembed_source, options::OPT_gno_embed_source,
false)) {
// Source embedding is a vendor extension to DWARF v5. By now we have
// checked if a DWARF version was stated explicitly, and have otherwise
// fallen back to the target default, so if this is still not at least 5
// we emit an error.
const Arg *A = Args.getLastArg(options::OPT_gembed_source);
if (RequestedDWARFVersion < 5)
D.Diag(diag::err_drv_argument_only_allowed_with)
<< A->getAsString(Args) << "-gdwarf-5";
else if (EffectiveDWARFVersion < 5)
// The toolchain has reduced allowed dwarf version, so we can't enable
// -gembed-source.
D.Diag(diag::warn_drv_dwarf_version_limited_by_target)
<< A->getAsString(Args) << TC.getTripleString() << 5
<< EffectiveDWARFVersion;
else if (checkDebugInfoOption(A, Args, D, TC))
CmdArgs.push_back("-gembed-source");
}
if (EmitCodeView) {
CmdArgs.push_back("-gcodeview");
// Emit codeview type hashes if requested.
if (Args.hasFlag(options::OPT_gcodeview_ghash,
options::OPT_gno_codeview_ghash, false)) {
CmdArgs.push_back("-gcodeview-ghash");
}
}
// Omit inline line tables if requested.
if (Args.hasFlag(options::OPT_gno_inline_line_tables,
options::OPT_ginline_line_tables, false)) {
CmdArgs.push_back("-gno-inline-line-tables");
}
// When emitting remarks, we need at least debug lines in the output.
if (willEmitRemarks(Args) &&
DebugInfoKind <= codegenoptions::DebugDirectivesOnly)
DebugInfoKind = codegenoptions::DebugLineTablesOnly;
// Adjust the debug info kind for the given toolchain.
TC.adjustDebugInfoKind(DebugInfoKind, Args);
RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, EffectiveDWARFVersion,
DebuggerTuning);
// -fdebug-macro turns on macro debug info generation.
if (Args.hasFlag(options::OPT_fdebug_macro, options::OPT_fno_debug_macro,
false))
if (checkDebugInfoOption(Args.getLastArg(options::OPT_fdebug_macro), Args,
D, TC))
CmdArgs.push_back("-debug-info-macro");
// -ggnu-pubnames turns on gnu style pubnames in the backend.
const auto *PubnamesArg =
Args.getLastArg(options::OPT_ggnu_pubnames, options::OPT_gno_gnu_pubnames,
options::OPT_gpubnames, options::OPT_gno_pubnames);
if (DwarfFission != DwarfFissionKind::None ||
(PubnamesArg && checkDebugInfoOption(PubnamesArg, Args, D, TC)))
if (!PubnamesArg ||
(!PubnamesArg->getOption().matches(options::OPT_gno_gnu_pubnames) &&
!PubnamesArg->getOption().matches(options::OPT_gno_pubnames)))
CmdArgs.push_back(PubnamesArg && PubnamesArg->getOption().matches(
options::OPT_gpubnames)
? "-gpubnames"
: "-ggnu-pubnames");
if (Args.hasFlag(options::OPT_fdebug_ranges_base_address,
options::OPT_fno_debug_ranges_base_address, false)) {
CmdArgs.push_back("-fdebug-ranges-base-address");
}
// -gdwarf-aranges turns on the emission of the aranges section in the
// backend.
// Always enabled for SCE tuning.
bool NeedAranges = DebuggerTuning == llvm::DebuggerKind::SCE;
if (const Arg *A = Args.getLastArg(options::OPT_gdwarf_aranges))
NeedAranges = checkDebugInfoOption(A, Args, D, TC) || NeedAranges;
if (NeedAranges) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-generate-arange-section");
}
if (Args.hasFlag(options::OPT_fforce_dwarf_frame,
options::OPT_fno_force_dwarf_frame, false))
CmdArgs.push_back("-fforce-dwarf-frame");
if (Args.hasFlag(options::OPT_fdebug_types_section,
options::OPT_fno_debug_types_section, false)) {
if (!(T.isOSBinFormatELF() || T.isOSBinFormatWasm())) {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Args.getLastArg(options::OPT_fdebug_types_section)
->getAsString(Args)
<< T.getTriple();
} else if (checkDebugInfoOption(
Args.getLastArg(options::OPT_fdebug_types_section), Args, D,
TC)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-generate-type-units");
}
}
// To avoid join/split of directory+filename, the integrated assembler prefers
// the directory form of .file on all DWARF versions. GNU as doesn't allow the
// form before DWARF v5.
if (!Args.hasFlag(options::OPT_fdwarf_directory_asm,
options::OPT_fno_dwarf_directory_asm,
TC.useIntegratedAs() || EffectiveDWARFVersion >= 5))
CmdArgs.push_back("-fno-dwarf-directory-asm");
// Decide how to render forward declarations of template instantiations.
// SCE wants full descriptions, others just get them in the name.
if (DebuggerTuning == llvm::DebuggerKind::SCE)
CmdArgs.push_back("-debug-forward-template-params");
// Do we need to explicitly import anonymous namespaces into the parent
// scope?
if (DebuggerTuning == llvm::DebuggerKind::SCE)
CmdArgs.push_back("-dwarf-explicit-import");
renderDwarfFormat(D, T, Args, CmdArgs, EffectiveDWARFVersion);
RenderDebugInfoCompressionArgs(Args, CmdArgs, D, TC);
}
void Clang::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output, const InputInfoList &Inputs,
const ArgList &Args, const char *LinkingOutput) const {
const auto &TC = getToolChain();
const llvm::Triple &RawTriple = TC.getTriple();
const llvm::Triple &Triple = TC.getEffectiveTriple();
const std::string &TripleStr = Triple.getTriple();
bool KernelOrKext =
Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
const Driver &D = TC.getDriver();
ArgStringList CmdArgs;
// Check number of inputs for sanity. We need at least one input.
assert(Inputs.size() >= 1 && "Must have at least one input.");
// CUDA/HIP compilation may have multiple inputs (source file + results of
// device-side compilations). OpenMP device jobs also take the host IR as a
// second input. Module precompilation accepts a list of header files to
// include as part of the module. All other jobs are expected to have exactly
// one input.
bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
bool IsHIP = JA.isOffloading(Action::OFK_HIP);
bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
bool IsHeaderModulePrecompile = isa<HeaderModulePrecompileJobAction>(JA);
bool IsDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) ||
JA.isDeviceOffloading(Action::OFK_Host));
bool IsUsingLTO = D.isUsingLTO(IsDeviceOffloadAction);
auto LTOMode = D.getLTOMode(IsDeviceOffloadAction);
// A header module compilation doesn't have a main input file, so invent a
// fake one as a placeholder.
const char *ModuleName = [&]{
auto *ModuleNameArg = Args.getLastArg(options::OPT_fmodule_name_EQ);
return ModuleNameArg ? ModuleNameArg->getValue() : "";
}();
InputInfo HeaderModuleInput(Inputs[0].getType(), ModuleName, ModuleName);
const InputInfo &Input =
IsHeaderModulePrecompile ? HeaderModuleInput : Inputs[0];
InputInfoList ModuleHeaderInputs;
const InputInfo *CudaDeviceInput = nullptr;
const InputInfo *OpenMPDeviceInput = nullptr;
for (const InputInfo &I : Inputs) {
if (&I == &Input) {
// This is the primary input.
} else if (IsHeaderModulePrecompile &&
types::getPrecompiledType(I.getType()) == types::TY_PCH) {
types::ID Expected = HeaderModuleInput.getType();
if (I.getType() != Expected) {
D.Diag(diag::err_drv_module_header_wrong_kind)
<< I.getFilename() << types::getTypeName(I.getType())
<< types::getTypeName(Expected);
}
ModuleHeaderInputs.push_back(I);
} else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
CudaDeviceInput = &I;
} else if (IsOpenMPDevice && !OpenMPDeviceInput) {
OpenMPDeviceInput = &I;
} else {
llvm_unreachable("unexpectedly given multiple inputs");
}
}
const llvm::Triple *AuxTriple =
(IsCuda || IsHIP) ? TC.getAuxTriple() : nullptr;
bool IsWindowsMSVC = RawTriple.isWindowsMSVCEnvironment();
bool IsIAMCU = RawTriple.isOSIAMCU();
// Adjust IsWindowsXYZ for CUDA/HIP compilations. Even when compiling in
// device mode (i.e., getToolchain().getTriple() is NVPTX/AMDGCN, not
// Windows), we need to pass Windows-specific flags to cc1.
if (IsCuda || IsHIP)
IsWindowsMSVC |= AuxTriple && AuxTriple->isWindowsMSVCEnvironment();
// C++ is not supported for IAMCU.
if (IsIAMCU && types::isCXX(Input.getType()))
D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU";
// Invoke ourselves in -cc1 mode.
//
// FIXME: Implement custom jobs for internal actions.
CmdArgs.push_back("-cc1");
// Add the "effective" target triple.
CmdArgs.push_back("-triple");
CmdArgs.push_back(Args.MakeArgString(TripleStr));
if (const Arg *MJ = Args.getLastArg(options::OPT_MJ)) {
DumpCompilationDatabase(C, MJ->getValue(), TripleStr, Output, Input, Args);
Args.ClaimAllArgs(options::OPT_MJ);
} else if (const Arg *GenCDBFragment =
Args.getLastArg(options::OPT_gen_cdb_fragment_path)) {
DumpCompilationDatabaseFragmentToDir(GenCDBFragment->getValue(), C,
TripleStr, Output, Input, Args);
Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path);
}
if (IsCuda || IsHIP) {
// We have to pass the triple of the host if compiling for a CUDA/HIP device
// and vice-versa.
std::string NormalizedTriple;
if (JA.isDeviceOffloading(Action::OFK_Cuda) ||
JA.isDeviceOffloading(Action::OFK_HIP))
NormalizedTriple = C.getSingleOffloadToolChain<Action::OFK_Host>()
->getTriple()
.normalize();
else {
// Host-side compilation.
NormalizedTriple =
(IsCuda ? C.getSingleOffloadToolChain<Action::OFK_Cuda>()
: C.getSingleOffloadToolChain<Action::OFK_HIP>())
->getTriple()
.normalize();
if (IsCuda) {
// We need to figure out which CUDA version we're compiling for, as that
// determines how we load and launch GPU kernels.
auto *CTC = static_cast<const toolchains::CudaToolChain *>(
C.getSingleOffloadToolChain<Action::OFK_Cuda>());
assert(CTC && "Expected valid CUDA Toolchain.");
if (CTC && CTC->CudaInstallation.version() != CudaVersion::UNKNOWN)
CmdArgs.push_back(Args.MakeArgString(
Twine("-target-sdk-version=") +
CudaVersionToString(CTC->CudaInstallation.version())));
}
}
CmdArgs.push_back("-aux-triple");
CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
}
if (Args.hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false)) {
CmdArgs.push_back("-fsycl-is-device");
if (Arg *A = Args.getLastArg(options::OPT_sycl_std_EQ)) {
A->render(Args, CmdArgs);
} else {
// Ensure the default version in SYCL mode is 2020.
CmdArgs.push_back("-sycl-std=2020");
}
}
if (IsOpenMPDevice) {
// We have to pass the triple of the host if compiling for an OpenMP device.
std::string NormalizedTriple =
C.getSingleOffloadToolChain<Action::OFK_Host>()
->getTriple()
.normalize();
CmdArgs.push_back("-aux-triple");
CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
}
if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
Triple.getArch() == llvm::Triple::thumb)) {
unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6;
unsigned Version = 0;
bool Failure =
Triple.getArchName().substr(Offset).consumeInteger(10, Version);
if (Failure || Version < 7)
D.Diag(diag::err_target_unsupported_arch) << Triple.getArchName()
<< TripleStr;
}
// Push all default warning arguments that are specific to
// the given target. These come before user provided warning options
// are provided.
TC.addClangWarningOptions(CmdArgs);
// FIXME: Subclass ToolChain for SPIR and move this to addClangWarningOptions.
if (Triple.isSPIR())
CmdArgs.push_back("-Wspir-compat");
// Select the appropriate action.
RewriteKind rewriteKind = RK_None;
// If CollectArgsForIntegratedAssembler() isn't called below, claim the args
// it claims when not running an assembler. Otherwise, clang would emit
// "argument unused" warnings for assembler flags when e.g. adding "-E" to
// flags while debugging something. That'd be somewhat inconvenient, and it's
// also inconsistent with most other flags -- we don't warn on
// -ffunction-sections not being used in -E mode either for example, even
// though it's not really used either.
if (!isa<AssembleJobAction>(JA)) {
// The args claimed here should match the args used in
// CollectArgsForIntegratedAssembler().
if (TC.useIntegratedAs()) {
Args.ClaimAllArgs(options::OPT_mrelax_all);
Args.ClaimAllArgs(options::OPT_mno_relax_all);
Args.ClaimAllArgs(options::OPT_mincremental_linker_compatible);
Args.ClaimAllArgs(options::OPT_mno_incremental_linker_compatible);
switch (C.getDefaultToolChain().getArch()) {
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
Args.ClaimAllArgs(options::OPT_mimplicit_it_EQ);
break;
default:
break;
}
}
Args.ClaimAllArgs(options::OPT_Wa_COMMA);
Args.ClaimAllArgs(options::OPT_Xassembler);
}
if (isa<AnalyzeJobAction>(JA)) {
assert(JA.getType() == types::TY_Plist && "Invalid output type.");
CmdArgs.push_back("-analyze");
} else if (isa<MigrateJobAction>(JA)) {
CmdArgs.push_back("-migrate");
} else if (isa<PreprocessJobAction>(JA)) {
if (Output.getType() == types::TY_Dependencies)
CmdArgs.push_back("-Eonly");
else {
CmdArgs.push_back("-E");
if (Args.hasArg(options::OPT_rewrite_objc) &&
!Args.hasArg(options::OPT_g_Group))
CmdArgs.push_back("-P");
}
} else if (isa<AssembleJobAction>(JA)) {
CmdArgs.push_back("-emit-obj");
CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D);
// Also ignore explicit -force_cpusubtype_ALL option.
(void)Args.hasArg(options::OPT_force__cpusubtype__ALL);
} else if (isa<PrecompileJobAction>(JA)) {
if (JA.getType() == types::TY_Nothing)
CmdArgs.push_back("-fsyntax-only");
else if (JA.getType() == types::TY_ModuleFile)
CmdArgs.push_back(IsHeaderModulePrecompile
? "-emit-header-module"
: "-emit-module-interface");
else
CmdArgs.push_back("-emit-pch");
} else if (isa<VerifyPCHJobAction>(JA)) {
CmdArgs.push_back("-verify-pch");
} else {
assert((isa<CompileJobAction>(JA) || isa<BackendJobAction>(JA)) &&
"Invalid action for clang tool.");
if (JA.getType() == types::TY_Nothing) {
CmdArgs.push_back("-fsyntax-only");
} else if (JA.getType() == types::TY_LLVM_IR ||
JA.getType() == types::TY_LTO_IR) {
CmdArgs.push_back("-emit-llvm");
} else if (JA.getType() == types::TY_LLVM_BC ||
JA.getType() == types::TY_LTO_BC) {
// Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S
if (Triple.isAMDGCN() && IsOpenMPDevice && Args.hasArg(options::OPT_S) &&
Args.hasArg(options::OPT_emit_llvm)) {
CmdArgs.push_back("-emit-llvm");
} else {
CmdArgs.push_back("-emit-llvm-bc");
}
} else if (JA.getType() == types::TY_IFS ||
JA.getType() == types::TY_IFS_CPP) {
StringRef ArgStr =
Args.hasArg(options::OPT_interface_stub_version_EQ)
? Args.getLastArgValue(options::OPT_interface_stub_version_EQ)
: "ifs-v1";
CmdArgs.push_back("-emit-interface-stubs");
CmdArgs.push_back(
Args.MakeArgString(Twine("-interface-stub-version=") + ArgStr.str()));
} else if (JA.getType() == types::TY_PP_Asm) {
CmdArgs.push_back("-S");
} else if (JA.getType() == types::TY_AST) {
CmdArgs.push_back("-emit-pch");
} else if (JA.getType() == types::TY_ModuleFile) {
CmdArgs.push_back("-module-file-info");
} else if (JA.getType() == types::TY_RewrittenObjC) {
CmdArgs.push_back("-rewrite-objc");
rewriteKind = RK_NonFragile;
} else if (JA.getType() == types::TY_RewrittenLegacyObjC) {
CmdArgs.push_back("-rewrite-objc");
rewriteKind = RK_Fragile;
} else {
assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!");
}
// Preserve use-list order by default when emitting bitcode, so that
// loading the bitcode up in 'opt' or 'llc' and running passes gives the
// same result as running passes here. For LTO, we don't need to preserve
// the use-list order, since serialization to bitcode is part of the flow.
if (JA.getType() == types::TY_LLVM_BC)
CmdArgs.push_back("-emit-llvm-uselists");
if (IsUsingLTO) {
if (!IsDeviceOffloadAction) {
if (Args.hasArg(options::OPT_flto))
CmdArgs.push_back("-flto");
else {
if (D.getLTOMode() == LTOK_Thin)
CmdArgs.push_back("-flto=thin");
else
CmdArgs.push_back("-flto=full");
}
CmdArgs.push_back("-flto-unit");
} else if (Triple.isAMDGPU()) {
// Only AMDGPU supports device-side LTO
assert(LTOMode == LTOK_Full || LTOMode == LTOK_Thin);
CmdArgs.push_back(Args.MakeArgString(
Twine("-flto=") + (LTOMode == LTOK_Thin ? "thin" : "full")));
CmdArgs.push_back("-flto-unit");
} else {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Args.getLastArg(options::OPT_foffload_lto,
options::OPT_foffload_lto_EQ)
->getAsString(Args)
<< Triple.getTriple();
}
}
}
if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) {
if (!types::isLLVMIR(Input.getType()))
D.Diag(diag::err_drv_arg_requires_bitcode_input) << A->getAsString(Args);
Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ);
}
if (Args.getLastArg(options::OPT_fthin_link_bitcode_EQ))
Args.AddLastArg(CmdArgs, options::OPT_fthin_link_bitcode_EQ);
if (Args.getLastArg(options::OPT_save_temps_EQ))
Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ);
auto *MemProfArg = Args.getLastArg(options::OPT_fmemory_profile,
options::OPT_fmemory_profile_EQ,
options::OPT_fno_memory_profile);
if (MemProfArg &&
!MemProfArg->getOption().matches(options::OPT_fno_memory_profile))
MemProfArg->render(Args, CmdArgs);
// Embed-bitcode option.
// Only white-listed flags below are allowed to be embedded.
if (C.getDriver().embedBitcodeInObject() && !IsUsingLTO &&
(isa<BackendJobAction>(JA) || isa<AssembleJobAction>(JA))) {
// Add flags implied by -fembed-bitcode.
Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ);
// Disable all llvm IR level optimizations.
CmdArgs.push_back("-disable-llvm-passes");
// Render target options.
TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());
// reject options that shouldn't be supported in bitcode
// also reject kernel/kext
static const constexpr unsigned kBitcodeOptionBlacklist[] = {
options::OPT_mkernel,
options::OPT_fapple_kext,
options::OPT_ffunction_sections,
options::OPT_fno_function_sections,
options::OPT_fdata_sections,
options::OPT_fno_data_sections,
options::OPT_fbasic_block_sections_EQ,
options::OPT_funique_internal_linkage_names,
options::OPT_fno_unique_internal_linkage_names,
options::OPT_funique_section_names,
options::OPT_fno_unique_section_names,
options::OPT_funique_basic_block_section_names,
options::OPT_fno_unique_basic_block_section_names,
options::OPT_mrestrict_it,
options::OPT_mno_restrict_it,
options::OPT_mstackrealign,
options::OPT_mno_stackrealign,
options::OPT_mstack_alignment,
options::OPT_mcmodel_EQ,
options::OPT_mlong_calls,
options::OPT_mno_long_calls,
options::OPT_ggnu_pubnames,
options::OPT_gdwarf_aranges,
options::OPT_fdebug_types_section,
options::OPT_fno_debug_types_section,
options::OPT_fdwarf_directory_asm,
options::OPT_fno_dwarf_directory_asm,
options::OPT_mrelax_all,
options::OPT_mno_relax_all,
options::OPT_ftrap_function_EQ,
options::OPT_ffixed_r9,
options::OPT_mfix_cortex_a53_835769,
options::OPT_mno_fix_cortex_a53_835769,
options::OPT_ffixed_x18,
options::OPT_mglobal_merge,
options::OPT_mno_global_merge,
options::OPT_mred_zone,
options::OPT_mno_red_zone,
options::OPT_Wa_COMMA,
options::OPT_Xassembler,
options::OPT_mllvm,
};
for (const auto &A : Args)
if (llvm::find(kBitcodeOptionBlacklist, A->getOption().getID()) !=
std::end(kBitcodeOptionBlacklist))
D.Diag(diag::err_drv_unsupported_embed_bitcode) << A->getSpelling();
// Render the CodeGen options that need to be passed.
if (!Args.hasFlag(options::OPT_foptimize_sibling_calls,
options::OPT_fno_optimize_sibling_calls))
CmdArgs.push_back("-mdisable-tail-calls");
RenderFloatingPointOptions(TC, D, isOptimizationLevelFast(Args), Args,
CmdArgs, JA);
// Render ABI arguments
switch (TC.getArch()) {
default: break;
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumbeb:
RenderARMABI(Triple, Args, CmdArgs);
break;
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_32:
case llvm::Triple::aarch64_be:
RenderAArch64ABI(Triple, Args, CmdArgs);
break;
}
// Optimization level for CodeGen.
if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) {
if (A->getOption().matches(options::OPT_O4)) {
CmdArgs.push_back("-O3");
D.Diag(diag::warn_O4_is_O3);
} else {
A->render(Args, CmdArgs);
}
}
// Input/Output file.
if (Output.getType() == types::TY_Dependencies) {
// Handled with other dependency code.
} else if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Input output.");
}
for (const auto &II : Inputs) {
addDashXForInput(Args, II, CmdArgs);
if (II.isFilename())
CmdArgs.push_back(II.getFilename());
else
II.getInputArg().renderAsInput(Args, CmdArgs);
}
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::AtFileUTF8(), D.getClangProgramPath(),
CmdArgs, Inputs, Output));
return;
}
if (C.getDriver().embedBitcodeMarkerOnly() && !IsUsingLTO)
CmdArgs.push_back("-fembed-bitcode=marker");
// We normally speed up the clang process a bit by skipping destructors at
// exit, but when we're generating diagnostics we can rely on some of the
// cleanup.
if (!C.isForDiagnostics())
CmdArgs.push_back("-disable-free");
#ifdef NDEBUG
const bool IsAssertBuild = false;
#else
const bool IsAssertBuild = true;
#endif
// Disable the verification pass in -asserts builds.
if (!IsAssertBuild)
CmdArgs.push_back("-disable-llvm-verifier");
// Discard value names in assert builds unless otherwise specified.
if (Args.hasFlag(options::OPT_fdiscard_value_names,
options::OPT_fno_discard_value_names, !IsAssertBuild)) {
if (Args.hasArg(options::OPT_fdiscard_value_names) &&
(std::any_of(Inputs.begin(), Inputs.end(),
[](const clang::driver::InputInfo &II) {
return types::isLLVMIR(II.getType());
}))) {
D.Diag(diag::warn_ignoring_fdiscard_for_bitcode);
}
CmdArgs.push_back("-discard-value-names");
}
// Set the main file name, so that debug info works even with
// -save-temps.
CmdArgs.push_back("-main-file-name");
CmdArgs.push_back(getBaseInputName(Args, Input));
// Some flags which affect the language (via preprocessor
// defines).
if (Args.hasArg(options::OPT_static))
CmdArgs.push_back("-static-define");
if (Args.hasArg(options::OPT_municode))
CmdArgs.push_back("-DUNICODE");
if (isa<AnalyzeJobAction>(JA))
RenderAnalyzerOptions(Args, CmdArgs, Triple, Input);
if (isa<AnalyzeJobAction>(JA) ||
(isa<PreprocessJobAction>(JA) && Args.hasArg(options::OPT__analyze)))
CmdArgs.push_back("-setup-static-analyzer");
// Enable compatilibily mode to avoid analyzer-config related errors.
// Since we can't access frontend flags through hasArg, let's manually iterate
// through them.
bool FoundAnalyzerConfig = false;
for (auto Arg : Args.filtered(options::OPT_Xclang))
if (StringRef(Arg->getValue()) == "-analyzer-config") {
FoundAnalyzerConfig = true;
break;
}
if (!FoundAnalyzerConfig)
for (auto Arg : Args.filtered(options::OPT_Xanalyzer))
if (StringRef(Arg->getValue()) == "-analyzer-config") {
FoundAnalyzerConfig = true;
break;
}
if (FoundAnalyzerConfig)
CmdArgs.push_back("-analyzer-config-compatibility-mode=true");
CheckCodeGenerationOptions(D, Args);
unsigned FunctionAlignment = ParseFunctionAlignment(TC, Args);
assert(FunctionAlignment <= 31 && "function alignment will be truncated!");
if (FunctionAlignment) {
CmdArgs.push_back("-function-alignment");
CmdArgs.push_back(Args.MakeArgString(std::to_string(FunctionAlignment)));
}
llvm::Reloc::Model RelocationModel;
unsigned PICLevel;
bool IsPIE;
std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(TC, Args);
bool IsROPI = RelocationModel == llvm::Reloc::ROPI ||
RelocationModel == llvm::Reloc::ROPI_RWPI;
bool IsRWPI = RelocationModel == llvm::Reloc::RWPI ||
RelocationModel == llvm::Reloc::ROPI_RWPI;
if (Args.hasArg(options::OPT_mcmse) &&
!Args.hasArg(options::OPT_fallow_unsupported)) {
if (IsROPI)
D.Diag(diag::err_cmse_pi_are_incompatible) << IsROPI;
if (IsRWPI)
D.Diag(diag::err_cmse_pi_are_incompatible) << !IsRWPI;
}
if (IsROPI && types::isCXX(Input.getType()) &&
!Args.hasArg(options::OPT_fallow_unsupported))
D.Diag(diag::err_drv_ropi_incompatible_with_cxx);
const char *RMName = RelocationModelName(RelocationModel);
if (RMName) {
CmdArgs.push_back("-mrelocation-model");
CmdArgs.push_back(RMName);
}
if (PICLevel > 0) {
CmdArgs.push_back("-pic-level");
CmdArgs.push_back(PICLevel == 1 ? "1" : "2");
if (IsPIE)
CmdArgs.push_back("-pic-is-pie");
}
if (RelocationModel == llvm::Reloc::ROPI ||
RelocationModel == llvm::Reloc::ROPI_RWPI)
CmdArgs.push_back("-fropi");
if (RelocationModel == llvm::Reloc::RWPI ||
RelocationModel == llvm::Reloc::ROPI_RWPI)
CmdArgs.push_back("-frwpi");
if (Arg *A = Args.getLastArg(options::OPT_meabi)) {
CmdArgs.push_back("-meabi");
CmdArgs.push_back(A->getValue());
}
// -fsemantic-interposition is forwarded to CC1: set the
// "SemanticInterposition" metadata to 1 (make some linkages interposable) and
// make default visibility external linkage definitions dso_preemptable.
//
// -fno-semantic-interposition: if the target supports .Lfoo$local local
// aliases (make default visibility external linkage definitions dso_local).
// This is the CC1 default for ELF to match COFF/Mach-O.
//
// Otherwise use Clang's traditional behavior: like
// -fno-semantic-interposition but local aliases are not used. So references
// can be interposed if not optimized out.
if (Triple.isOSBinFormatELF()) {
Arg *A = Args.getLastArg(options::OPT_fsemantic_interposition,
options::OPT_fno_semantic_interposition);
if (RelocationModel != llvm::Reloc::Static && !IsPIE) {
// The supported targets need to call AsmPrinter::getSymbolPreferLocal.
bool SupportsLocalAlias =
Triple.isAArch64() || Triple.isRISCV() || Triple.isX86();
if (!A)
CmdArgs.push_back("-fhalf-no-semantic-interposition");
else if (A->getOption().matches(options::OPT_fsemantic_interposition))
A->render(Args, CmdArgs);
else if (!SupportsLocalAlias)
CmdArgs.push_back("-fhalf-no-semantic-interposition");
}
}
{
std::string Model;
if (Arg *A = Args.getLastArg(options::OPT_mthread_model)) {
if (!TC.isThreadModelSupported(A->getValue()))
D.Diag(diag::err_drv_invalid_thread_model_for_target)
<< A->getValue() << A->getAsString(Args);
Model = A->getValue();
} else
Model = TC.getThreadModel();
if (Model != "posix") {
CmdArgs.push_back("-mthread-model");
CmdArgs.push_back(Args.MakeArgString(Model));
}
}
Args.AddLastArg(CmdArgs, options::OPT_fveclib);
if (Args.hasFlag(options::OPT_fmerge_all_constants,
options::OPT_fno_merge_all_constants, false))
CmdArgs.push_back("-fmerge-all-constants");
if (Args.hasFlag(options::OPT_fno_delete_null_pointer_checks,
options::OPT_fdelete_null_pointer_checks, false))
CmdArgs.push_back("-fno-delete-null-pointer-checks");
// LLVM Code Generator Options.
for (const Arg *A : Args.filtered(options::OPT_frewrite_map_file_EQ)) {
StringRef Map = A->getValue();
if (!llvm::sys::fs::exists(Map)) {
D.Diag(diag::err_drv_no_such_file) << Map;
} else {
A->render(Args, CmdArgs);
A->claim();
}
}
if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ_vec_extabi,
options::OPT_mabi_EQ_vec_default)) {
if (!Triple.isOSAIX())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getSpelling() << RawTriple.str();
if (A->getOption().getID() == options::OPT_mabi_EQ_vec_extabi)
CmdArgs.push_back("-mabi=vec-extabi");
else
CmdArgs.push_back("-mabi=vec-default");
}
if (Arg *A = Args.getLastArg(options::OPT_mlong_double_128)) {
// Emit the unsupported option error until the Clang's library integration
// support for 128-bit long double is available for AIX.
if (Triple.isOSAIX())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getSpelling() << RawTriple.str();
}
if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) {
StringRef v = A->getValue();
// FIXME: Validate the argument here so we don't produce meaningless errors
// about -fwarn-stack-size=.
if (v.empty())
D.Diag(diag::err_drv_missing_argument) << A->getSpelling() << 1;
else
CmdArgs.push_back(Args.MakeArgString("-fwarn-stack-size=" + v));
A->claim();
}
if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables,
true))
CmdArgs.push_back("-fno-jump-tables");
if (Args.hasFlag(options::OPT_fprofile_sample_accurate,
options::OPT_fno_profile_sample_accurate, false))
CmdArgs.push_back("-fprofile-sample-accurate");
if (!Args.hasFlag(options::OPT_fpreserve_as_comments,
options::OPT_fno_preserve_as_comments, true))
CmdArgs.push_back("-fno-preserve-as-comments");
if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) {
CmdArgs.push_back("-mregparm");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_maix_struct_return,
options::OPT_msvr4_struct_return)) {
if (!TC.getTriple().isPPC32()) {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getSpelling() << RawTriple.str();
} else if (A->getOption().matches(options::OPT_maix_struct_return)) {
CmdArgs.push_back("-maix-struct-return");
} else {
assert(A->getOption().matches(options::OPT_msvr4_struct_return));
CmdArgs.push_back("-msvr4-struct-return");
}
}
if (Arg *A = Args.getLastArg(options::OPT_fpcc_struct_return,
options::OPT_freg_struct_return)) {
if (TC.getArch() != llvm::Triple::x86) {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getSpelling() << RawTriple.str();
} else if (A->getOption().matches(options::OPT_fpcc_struct_return)) {
CmdArgs.push_back("-fpcc-struct-return");
} else {
assert(A->getOption().matches(options::OPT_freg_struct_return));
CmdArgs.push_back("-freg-struct-return");
}
}
if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false))
CmdArgs.push_back("-fdefault-calling-conv=stdcall");
if (Args.hasArg(options::OPT_fenable_matrix)) {
// enable-matrix is needed by both the LangOpts and by LLVM.
CmdArgs.push_back("-fenable-matrix");
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-enable-matrix");
}
CodeGenOptions::FramePointerKind FPKeepKind =
getFramePointerKind(Args, RawTriple);
const char *FPKeepKindStr = nullptr;
switch (FPKeepKind) {
case CodeGenOptions::FramePointerKind::None:
FPKeepKindStr = "-mframe-pointer=none";
break;
case CodeGenOptions::FramePointerKind::NonLeaf:
FPKeepKindStr = "-mframe-pointer=non-leaf";
break;
case CodeGenOptions::FramePointerKind::All:
FPKeepKindStr = "-mframe-pointer=all";
break;
}
assert(FPKeepKindStr && "unknown FramePointerKind");
CmdArgs.push_back(FPKeepKindStr);
if (!Args.hasFlag(options::OPT_fzero_initialized_in_bss,
options::OPT_fno_zero_initialized_in_bss, true))
CmdArgs.push_back("-fno-zero-initialized-in-bss");
bool OFastEnabled = isOptimizationLevelFast(Args);
// If -Ofast is the optimization level, then -fstrict-aliasing should be
// enabled. This alias option is being used to simplify the hasFlag logic.
OptSpecifier StrictAliasingAliasOption =
OFastEnabled ? options::OPT_Ofast : options::OPT_fstrict_aliasing;
// We turn strict aliasing off by default if we're in CL mode, since MSVC
// doesn't do any TBAA.
bool TBAAOnByDefault = !D.IsCLMode();
if (!Args.hasFlag(options::OPT_fstrict_aliasing, StrictAliasingAliasOption,
options::OPT_fno_strict_aliasing, TBAAOnByDefault))
CmdArgs.push_back("-relaxed-aliasing");
if (!Args.hasFlag(options::OPT_fstruct_path_tbaa,
options::OPT_fno_struct_path_tbaa))
CmdArgs.push_back("-no-struct-path-tbaa");
if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums,
false))
CmdArgs.push_back("-fstrict-enums");
if (!Args.hasFlag(options::OPT_fstrict_return, options::OPT_fno_strict_return,
true))
CmdArgs.push_back("-fno-strict-return");
if (Args.hasFlag(options::OPT_fallow_editor_placeholders,
options::OPT_fno_allow_editor_placeholders, false))
CmdArgs.push_back("-fallow-editor-placeholders");
if (Args.hasFlag(options::OPT_fstrict_vtable_pointers,
options::OPT_fno_strict_vtable_pointers,
false))
CmdArgs.push_back("-fstrict-vtable-pointers");
if (Args.hasFlag(options::OPT_fforce_emit_vtables,
options::OPT_fno_force_emit_vtables,
false))
CmdArgs.push_back("-fforce-emit-vtables");
if (!Args.hasFlag(options::OPT_foptimize_sibling_calls,
options::OPT_fno_optimize_sibling_calls))
CmdArgs.push_back("-mdisable-tail-calls");
if (Args.hasFlag(options::OPT_fno_escaping_block_tail_calls,
options::OPT_fescaping_block_tail_calls, false))
CmdArgs.push_back("-fno-escaping-block-tail-calls");
Args.AddLastArg(CmdArgs, options::OPT_ffine_grained_bitfield_accesses,
options::OPT_fno_fine_grained_bitfield_accesses);
Args.AddLastArg(CmdArgs, options::OPT_fexperimental_relative_cxx_abi_vtables,
options::OPT_fno_experimental_relative_cxx_abi_vtables);
// Handle segmented stacks.
if (Args.hasFlag(options::OPT_fsplit_stack, options::OPT_fno_split_stack,
false))
CmdArgs.push_back("-fsplit-stack");
// -fprotect-parens=0 is default.
if (Args.hasFlag(options::OPT_fprotect_parens,
options::OPT_fno_protect_parens, false))
CmdArgs.push_back("-fprotect-parens");
RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA);
if (Arg *A = Args.getLastArg(options::OPT_fextend_args_EQ)) {
const llvm::Triple::ArchType Arch = TC.getArch();
if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) {
StringRef V = A->getValue();
if (V == "64")
CmdArgs.push_back("-fextend-arguments=64");
else if (V != "32")
D.Diag(diag::err_drv_invalid_argument_to_option)
<< A->getValue() << A->getOption().getName();
} else
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getOption().getName() << TripleStr;
}
if (Arg *A = Args.getLastArg(options::OPT_mdouble_EQ)) {
if (TC.getArch() == llvm::Triple::avr)
A->render(Args, CmdArgs);
else
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
}
if (Arg *A = Args.getLastArg(options::OPT_LongDouble_Group)) {
if (TC.getTriple().isX86())
A->render(Args, CmdArgs);
else if (TC.getTriple().isPPC() &&
(A->getOption().getID() != options::OPT_mlong_double_80))
A->render(Args, CmdArgs);
else
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
}
// Decide whether to use verbose asm. Verbose assembly is the default on
// toolchains which have the integrated assembler on by default.
bool IsIntegratedAssemblerDefault = TC.IsIntegratedAssemblerDefault();
if (!Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm,
IsIntegratedAssemblerDefault))
CmdArgs.push_back("-fno-verbose-asm");
// Parse 'none' or '$major.$minor'. Disallow -fbinutils-version=0 because we
// use that to indicate the MC default in the backend.
if (Arg *A = Args.getLastArg(options::OPT_fbinutils_version_EQ)) {
StringRef V = A->getValue();
unsigned Num;
if (V == "none")
A->render(Args, CmdArgs);
else if (!V.consumeInteger(10, Num) && Num > 0 &&
(V.empty() || (V.consume_front(".") &&
!V.consumeInteger(10, Num) && V.empty())))
A->render(Args, CmdArgs);
else
D.Diag(diag::err_drv_invalid_argument_to_option)
<< A->getValue() << A->getOption().getName();
}
// If toolchain choose to use MCAsmParser for inline asm don't pass the
// option to disable integrated-as explictly.
if (!TC.useIntegratedAs() && !TC.parseInlineAsmUsingAsmParser())
CmdArgs.push_back("-no-integrated-as");
if (Args.hasArg(options::OPT_fdebug_pass_structure)) {
CmdArgs.push_back("-mdebug-pass");
CmdArgs.push_back("Structure");
}
if (Args.hasArg(options::OPT_fdebug_pass_arguments)) {
CmdArgs.push_back("-mdebug-pass");
CmdArgs.push_back("Arguments");
}
// Enable -mconstructor-aliases except on darwin, where we have to work around
// a linker bug (see <rdar://problem/7651567>), and CUDA/AMDGPU device code,
// where aliases aren't supported.
if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU())
CmdArgs.push_back("-mconstructor-aliases");
// Darwin's kernel doesn't support guard variables; just die if we
// try to use them.
if (KernelOrKext && RawTriple.isOSDarwin())
CmdArgs.push_back("-fforbid-guard-variables");
if (Args.hasFlag(options::OPT_mms_bitfields, options::OPT_mno_ms_bitfields,
Triple.isWindowsGNUEnvironment())) {
CmdArgs.push_back("-mms-bitfields");
}
// Non-PIC code defaults to -fdirect-access-external-data while PIC code
// defaults to -fno-direct-access-external-data. Pass the option if different
// from the default.
if (Arg *A = Args.getLastArg(options::OPT_fdirect_access_external_data,
options::OPT_fno_direct_access_external_data))
if (A->getOption().matches(options::OPT_fdirect_access_external_data) !=
(PICLevel == 0))
A->render(Args, CmdArgs);
if (Args.hasFlag(options::OPT_fno_plt, options::OPT_fplt, false)) {
CmdArgs.push_back("-fno-plt");
}
// -fhosted is default.
// TODO: Audit uses of KernelOrKext and see where it'd be more appropriate to
// use Freestanding.
bool Freestanding =
Args.hasFlag(options::OPT_ffreestanding, options::OPT_fhosted, false) ||
KernelOrKext;
if (Freestanding)
CmdArgs.push_back("-ffreestanding");
// This is a coarse approximation of what llvm-gcc actually does, both
// -fasynchronous-unwind-tables and -fnon-call-exceptions interact in more
// complicated ways.
bool UnwindTables =
Args.hasFlag(options::OPT_fasynchronous_unwind_tables,
options::OPT_fno_asynchronous_unwind_tables,
(TC.IsUnwindTablesDefault(Args) ||
TC.getSanitizerArgs().needsUnwindTables()) &&
!Freestanding);
UnwindTables = Args.hasFlag(options::OPT_funwind_tables,
options::OPT_fno_unwind_tables, UnwindTables);
if (UnwindTables)
CmdArgs.push_back("-munwind-tables");
// Prepare `-aux-target-cpu` and `-aux-target-feature` unless
// `--gpu-use-aux-triple-only` is specified.
if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) &&
(IsCudaDevice || IsHIPDevice)) {
const ArgList &HostArgs =
C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None);
std::string HostCPU =
getCPUName(HostArgs, *TC.getAuxTriple(), /*FromAs*/ false);
if (!HostCPU.empty()) {
CmdArgs.push_back("-aux-target-cpu");
CmdArgs.push_back(Args.MakeArgString(HostCPU));
}
getTargetFeatures(D, *TC.getAuxTriple(), HostArgs, CmdArgs,
/*ForAS*/ false, /*IsAux*/ true);
}
TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());
// FIXME: Handle -mtune=.
(void)Args.hasArg(options::OPT_mtune_EQ);
if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) {
StringRef CM = A->getValue();
if (CM == "small" || CM == "kernel" || CM == "medium" || CM == "large" ||
CM == "tiny") {
if (Triple.isOSAIX() && CM == "medium")
CmdArgs.push_back("-mcmodel=large");
else
A->render(Args, CmdArgs);
} else {
D.Diag(diag::err_drv_invalid_argument_to_option)
<< CM << A->getOption().getName();
}
}
if (Arg *A = Args.getLastArg(options::OPT_mtls_size_EQ)) {
StringRef Value = A->getValue();
unsigned TLSSize = 0;
Value.getAsInteger(10, TLSSize);
if (!Triple.isAArch64() || !Triple.isOSBinFormatELF())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getOption().getName() << TripleStr;
if (TLSSize != 12 && TLSSize != 24 && TLSSize != 32 && TLSSize != 48)
D.Diag(diag::err_drv_invalid_int_value)
<< A->getOption().getName() << Value;
Args.AddLastArg(CmdArgs, options::OPT_mtls_size_EQ);
}
// Add the target cpu
std::string CPU = getCPUName(Args, Triple, /*FromAs*/ false);
if (!CPU.empty()) {
CmdArgs.push_back("-target-cpu");
CmdArgs.push_back(Args.MakeArgString(CPU));
}
RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs);
// FIXME: For now we want to demote any errors to warnings, when they have
// been raised for asking the wrong question of scalable vectors, such as
// asking for the fixed number of elements. This may happen because code that
// is not yet ported to work for scalable vectors uses the wrong interfaces,
// whereas the behaviour is actually correct. Emitting a warning helps bring
// up scalable vector support in an incremental way. When scalable vector
// support is stable enough, all uses of wrong interfaces should be considered
// as errors, but until then, we can live with a warning being emitted by the
// compiler. This way, Clang can be used to compile code with scalable vectors
// and identify possible issues.
if (isa<BackendJobAction>(JA)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-treat-scalable-fixed-error-as-warning");
}
// These two are potentially updated by AddClangCLArgs.
codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
bool EmitCodeView = false;
// Add clang-cl arguments.
types::ID InputType = Input.getType();
if (D.IsCLMode())
AddClangCLArgs(Args, InputType, CmdArgs, &DebugInfoKind, &EmitCodeView);
DwarfFissionKind DwarfFission = DwarfFissionKind::None;
renderDebugOptions(TC, D, RawTriple, Args, EmitCodeView,
types::isLLVMIR(InputType), CmdArgs, DebugInfoKind,
DwarfFission);
// Add the split debug info name to the command lines here so we
// can propagate it to the backend.
bool SplitDWARF = (DwarfFission != DwarfFissionKind::None) &&
(TC.getTriple().isOSBinFormatELF() ||
TC.getTriple().isOSBinFormatWasm()) &&
(isa<AssembleJobAction>(JA) || isa<CompileJobAction>(JA) ||
isa<BackendJobAction>(JA));
if (SplitDWARF) {
const char *SplitDWARFOut = SplitDebugName(JA, Args, Input, Output);
CmdArgs.push_back("-split-dwarf-file");
CmdArgs.push_back(SplitDWARFOut);
if (DwarfFission == DwarfFissionKind::Split) {
CmdArgs.push_back("-split-dwarf-output");
CmdArgs.push_back(SplitDWARFOut);
}
}
// Pass the linker version in use.
if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
CmdArgs.push_back("-target-linker-version");
CmdArgs.push_back(A->getValue());
}
// Explicitly error on some things we know we don't support and can't just
// ignore.
if (!Args.hasArg(options::OPT_fallow_unsupported)) {
Arg *Unsupported;
if (types::isCXX(InputType) && RawTriple.isOSDarwin() &&
TC.getArch() == llvm::Triple::x86) {
if ((Unsupported = Args.getLastArg(options::OPT_fapple_kext)) ||
(Unsupported = Args.getLastArg(options::OPT_mkernel)))
D.Diag(diag::err_drv_clang_unsupported_opt_cxx_darwin_i386)
<< Unsupported->getOption().getName();
}
// The faltivec option has been superseded by the maltivec option.
if ((Unsupported = Args.getLastArg(options::OPT_faltivec)))
D.Diag(diag::err_drv_clang_unsupported_opt_faltivec)
<< Unsupported->getOption().getName()
<< "please use -maltivec and include altivec.h explicitly";
if ((Unsupported = Args.getLastArg(options::OPT_fno_altivec)))
D.Diag(diag::err_drv_clang_unsupported_opt_faltivec)
<< Unsupported->getOption().getName() << "please use -mno-altivec";
}
Args.AddAllArgs(CmdArgs, options::OPT_v);
if (Args.getLastArg(options::OPT_H)) {
CmdArgs.push_back("-H");
CmdArgs.push_back("-sys-header-deps");
}
Args.AddAllArgs(CmdArgs, options::OPT_fshow_skipped_includes);
if (D.CCPrintHeaders && !D.CCGenDiagnostics) {
CmdArgs.push_back("-header-include-file");
CmdArgs.push_back(!D.CCPrintHeadersFilename.empty()
? D.CCPrintHeadersFilename.c_str()
: "-");
CmdArgs.push_back("-sys-header-deps");
}
Args.AddLastArg(CmdArgs, options::OPT_P);
Args.AddLastArg(CmdArgs, options::OPT_print_ivar_layout);
if (D.CCLogDiagnostics && !D.CCGenDiagnostics) {
CmdArgs.push_back("-diagnostic-log-file");
CmdArgs.push_back(!D.CCLogDiagnosticsFilename.empty()
? D.CCLogDiagnosticsFilename.c_str()
: "-");
}
// Give the gen diagnostics more chances to succeed, by avoiding intentional
// crashes.
if (D.CCGenDiagnostics)
CmdArgs.push_back("-disable-pragma-debug-crash");
// Allow backend to put its diagnostic files in the same place as frontend
// crash diagnostics files.
if (Args.hasArg(options::OPT_fcrash_diagnostics_dir)) {
StringRef Dir = Args.getLastArgValue(options::OPT_fcrash_diagnostics_dir);
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString("-crash-diagnostics-dir=" + Dir));
}
bool UseSeparateSections = isUseSeparateSections(Triple);
if (Args.hasFlag(options::OPT_ffunction_sections,
options::OPT_fno_function_sections, UseSeparateSections)) {
CmdArgs.push_back("-ffunction-sections");
}
if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_sections_EQ)) {
StringRef Val = A->getValue();
if (Triple.isX86() && Triple.isOSBinFormatELF()) {
if (Val != "all" && Val != "labels" && Val != "none" &&
!Val.startswith("list="))
D.Diag(diag::err_drv_invalid_value)
<< A->getAsString(Args) << A->getValue();
else
A->render(Args, CmdArgs);
} else if (Triple.isNVPTX()) {
// Do not pass the option to the GPU compilation. We still want it enabled
// for the host-side compilation, so seeing it here is not an error.
} else if (Val != "none") {
// =none is allowed everywhere. It's useful for overriding the option
// and is the same as not specifying the option.
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
}
}
bool HasDefaultDataSections = Triple.isOSBinFormatXCOFF();
if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections,
UseSeparateSections || HasDefaultDataSections)) {
CmdArgs.push_back("-fdata-sections");
}
if (!Args.hasFlag(options::OPT_funique_section_names,
options::OPT_fno_unique_section_names, true))
CmdArgs.push_back("-fno-unique-section-names");
if (Args.hasFlag(options::OPT_funique_internal_linkage_names,
options::OPT_fno_unique_internal_linkage_names, false))
CmdArgs.push_back("-funique-internal-linkage-names");
if (Args.hasFlag(options::OPT_funique_basic_block_section_names,
options::OPT_fno_unique_basic_block_section_names, false))
CmdArgs.push_back("-funique-basic-block-section-names");
if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
options::OPT_fno_split_machine_functions)) {
// This codegen pass is only available on x86-elf targets.
if (Triple.isX86() && Triple.isOSBinFormatELF()) {
if (A->getOption().matches(options::OPT_fsplit_machine_functions))
A->render(Args, CmdArgs);
} else {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
}
}
Args.AddLastArg(CmdArgs, options::OPT_finstrument_functions,
options::OPT_finstrument_functions_after_inlining,
options::OPT_finstrument_function_entry_bare);
// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
// for sampling, overhead of call arc collection is way too high and there's
// no way to collect the output.
if (!Triple.isNVPTX() && !Triple.isAMDGCN())
addPGOAndCoverageFlags(TC, C, D, Output, Args, CmdArgs);
Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);
// Add runtime flag for PS4 when PGO, coverage, or sanitizers are enabled.
if (RawTriple.isPS4CPU() &&
!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
PS4cpu::addProfileRTArgs(TC, Args, CmdArgs);
PS4cpu::addSanitizerArgs(TC, CmdArgs);
}
// Pass options for controlling the default header search paths.
if (Args.hasArg(options::OPT_nostdinc)) {
CmdArgs.push_back("-nostdsysteminc");
CmdArgs.push_back("-nobuiltininc");
} else {
if (Args.hasArg(options::OPT_nostdlibinc))
CmdArgs.push_back("-nostdsysteminc");
Args.AddLastArg(CmdArgs, options::OPT_nostdincxx);
Args.AddLastArg(CmdArgs, options::OPT_nobuiltininc);
}
// Pass the path to compiler resource files.
CmdArgs.push_back("-resource-dir");
CmdArgs.push_back(D.ResourceDir.c_str());
Args.AddLastArg(CmdArgs, options::OPT_working_directory);
RenderARCMigrateToolOptions(D, Args, CmdArgs);
// Add preprocessing options like -I, -D, etc. if we are using the
// preprocessor.
//
// FIXME: Support -fpreprocessed
if (types::getPreprocessedType(InputType) != types::TY_INVALID)
AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs);
// Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes
// that "The compiler can only warn and ignore the option if not recognized".
// When building with ccache, it will pass -D options to clang even on
// preprocessed inputs and configure concludes that -fPIC is not supported.
Args.ClaimAllArgs(options::OPT_D);
// Manually translate -O4 to -O3; let clang reject others.
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
if (A->getOption().matches(options::OPT_O4)) {
CmdArgs.push_back("-O3");
D.Diag(diag::warn_O4_is_O3);
} else {
A->render(Args, CmdArgs);
}
}
// Warn about ignored options to clang.
for (const Arg *A :
Args.filtered(options::OPT_clang_ignored_gcc_optimization_f_Group)) {
D.Diag(diag::warn_ignored_gcc_optimization) << A->getAsString(Args);
A->claim();
}
for (const Arg *A :
Args.filtered(options::OPT_clang_ignored_legacy_options_Group)) {
D.Diag(diag::warn_ignored_clang_option) << A->getAsString(Args);
A->claim();
}
claimNoWarnArgs(Args);
Args.AddAllArgs(CmdArgs, options::OPT_R_Group);
Args.AddAllArgs(CmdArgs, options::OPT_W_Group);
if (Args.hasFlag(options::OPT_pedantic, options::OPT_no_pedantic, false))
CmdArgs.push_back("-pedantic");
Args.AddLastArg(CmdArgs, options::OPT_pedantic_errors);
Args.AddLastArg(CmdArgs, options::OPT_w);
// Fixed point flags
if (Args.hasFlag(options::OPT_ffixed_point, options::OPT_fno_fixed_point,
/*Default=*/false))
Args.AddLastArg(CmdArgs, options::OPT_ffixed_point);
if (Arg *A = Args.getLastArg(options::OPT_fcxx_abi_EQ))
A->render(Args, CmdArgs);
Args.AddLastArg(CmdArgs, options::OPT_fexperimental_relative_cxx_abi_vtables,
options::OPT_fno_experimental_relative_cxx_abi_vtables);
// Handle -{std, ansi, trigraphs} -- take the last of -{std, ansi}
// (-ansi is equivalent to -std=c89 or -std=c++98).
//
// If a std is supplied, only add -trigraphs if it follows the
// option.
bool ImplyVCPPCVer = false;
bool ImplyVCPPCXXVer = false;
const Arg *Std = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi);
if (Std) {
if (Std->getOption().matches(options::OPT_ansi))
if (types::isCXX(InputType))
CmdArgs.push_back("-std=c++98");
else
CmdArgs.push_back("-std=c89");
else
Std->render(Args, CmdArgs);
// If -f(no-)trigraphs appears after the language standard flag, honor it.
if (Arg *A = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi,
options::OPT_ftrigraphs,
options::OPT_fno_trigraphs))
if (A != Std)
A->render(Args, CmdArgs);
} else {
// Honor -std-default.
//
// FIXME: Clang doesn't correctly handle -std= when the input language
// doesn't match. For the time being just ignore this for C++ inputs;
// eventually we want to do all the standard defaulting here instead of
// splitting it between the driver and clang -cc1.
if (!types::isCXX(InputType)) {
if (!Args.hasArg(options::OPT__SLASH_std)) {
Args.AddAllArgsTranslated(CmdArgs, options::OPT_std_default_EQ, "-std=",
/*Joined=*/true);
} else
ImplyVCPPCVer = true;
}
else if (IsWindowsMSVC)
ImplyVCPPCXXVer = true;
Args.AddLastArg(CmdArgs, options::OPT_ftrigraphs,
options::OPT_fno_trigraphs);
// HIP headers has minimum C++ standard requirements. Therefore set the
// default language standard.
if (IsHIP)
CmdArgs.push_back(IsWindowsMSVC ? "-std=c++14" : "-std=c++11");
}
// GCC's behavior for -Wwrite-strings is a bit strange:
// * In C, this "warning flag" changes the types of string literals from
// 'char[N]' to 'const char[N]', and thus triggers an unrelated warning
// for the discarded qualifier.
// * In C++, this is just a normal warning flag.
//
// Implementing this warning correctly in C is hard, so we follow GCC's
// behavior for now. FIXME: Directly diagnose uses of a string literal as
// a non-const char* in C, rather than using this crude hack.
if (!types::isCXX(InputType)) {
// FIXME: This should behave just like a warning flag, and thus should also
// respect -Weverything, -Wno-everything, -Werror=write-strings, and so on.
Arg *WriteStrings =
Args.getLastArg(options::OPT_Wwrite_strings,
options::OPT_Wno_write_strings, options::OPT_w);
if (WriteStrings &&
WriteStrings->getOption().matches(options::OPT_Wwrite_strings))
CmdArgs.push_back("-fconst-strings");
}
// GCC provides a macro definition '__DEPRECATED' when -Wdeprecated is active
// during C++ compilation, which it is by default. GCC keeps this define even
// in the presence of '-w', match this behavior bug-for-bug.
if (types::isCXX(InputType) &&
Args.hasFlag(options::OPT_Wdeprecated, options::OPT_Wno_deprecated,
true)) {
CmdArgs.push_back("-fdeprecated-macro");
}
// Translate GCC's misnamer '-fasm' arguments to '-fgnu-keywords'.
if (Arg *Asm = Args.getLastArg(options::OPT_fasm, options::OPT_fno_asm)) {
if (Asm->getOption().matches(options::OPT_fasm))
CmdArgs.push_back("-fgnu-keywords");
else
CmdArgs.push_back("-fno-gnu-keywords");
}
if (!ShouldEnableAutolink(Args, TC, JA))
CmdArgs.push_back("-fno-autolink");
// Add in -fdebug-compilation-dir if necessary.
addDebugCompDirArg(Args, CmdArgs, D.getVFS());
addDebugPrefixMapArg(D, Args, CmdArgs);
if (Arg *A = Args.getLastArg(options::OPT_ftemplate_depth_,
options::OPT_ftemplate_depth_EQ)) {
CmdArgs.push_back("-ftemplate-depth");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_foperator_arrow_depth_EQ)) {
CmdArgs.push_back("-foperator-arrow-depth");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_depth_EQ)) {
CmdArgs.push_back("-fconstexpr-depth");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_steps_EQ)) {
CmdArgs.push_back("-fconstexpr-steps");
CmdArgs.push_back(A->getValue());
}
if (Args.hasArg(options::OPT_fexperimental_new_constant_interpreter))
CmdArgs.push_back("-fexperimental-new-constant-interpreter");
if (Arg *A = Args.getLastArg(options::OPT_fbracket_depth_EQ)) {
CmdArgs.push_back("-fbracket-depth");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_Wlarge_by_value_copy_EQ,
options::OPT_Wlarge_by_value_copy_def)) {
if (A->getNumValues()) {
StringRef bytes = A->getValue();
CmdArgs.push_back(Args.MakeArgString("-Wlarge-by-value-copy=" + bytes));
} else
CmdArgs.push_back("-Wlarge-by-value-copy=64"); // default value
}
if (Args.hasArg(options::OPT_relocatable_pch))
CmdArgs.push_back("-relocatable-pch");
if (const Arg *A = Args.getLastArg(options::OPT_fcf_runtime_abi_EQ)) {
static const char *kCFABIs[] = {
"standalone", "objc", "swift", "swift-5.0", "swift-4.2", "swift-4.1",
};
if (find(kCFABIs, StringRef(A->getValue())) == std::end(kCFABIs))
D.Diag(diag::err_drv_invalid_cf_runtime_abi) << A->getValue();
else
A->render(Args, CmdArgs);
}
if (Arg *A = Args.getLastArg(options::OPT_fconstant_string_class_EQ)) {
CmdArgs.push_back("-fconstant-string-class");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_ftabstop_EQ)) {
CmdArgs.push_back("-ftabstop");
CmdArgs.push_back(A->getValue());
}
if (Args.hasFlag(options::OPT_fstack_size_section,
options::OPT_fno_stack_size_section, RawTriple.isPS4()))
CmdArgs.push_back("-fstack-size-section");
if (Args.hasArg(options::OPT_fstack_usage)) {
CmdArgs.push_back("-stack-usage-file");
if (Arg *OutputOpt = Args.getLastArg(options::OPT_o)) {
SmallString<128> OutputFilename(OutputOpt->getValue());
llvm::sys::path::replace_extension(OutputFilename, "su");
CmdArgs.push_back(Args.MakeArgString(OutputFilename));
} else
CmdArgs.push_back(
Args.MakeArgString(Twine(getBaseInputStem(Args, Inputs)) + ".su"));
}
CmdArgs.push_back("-ferror-limit");
if (Arg *A = Args.getLastArg(options::OPT_ferror_limit_EQ))
CmdArgs.push_back(A->getValue());
else
CmdArgs.push_back("19");
if (Arg *A = Args.getLastArg(options::OPT_fmacro_backtrace_limit_EQ)) {
CmdArgs.push_back("-fmacro-backtrace-limit");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_ftemplate_backtrace_limit_EQ)) {
CmdArgs.push_back("-ftemplate-backtrace-limit");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_backtrace_limit_EQ)) {
CmdArgs.push_back("-fconstexpr-backtrace-limit");
CmdArgs.push_back(A->getValue());
}
if (Arg *A = Args.getLastArg(options::OPT_fspell_checking_limit_EQ)) {
CmdArgs.push_back("-fspell-checking-limit");
CmdArgs.push_back(A->getValue());
}
// Pass -fmessage-length=.
unsigned MessageLength = 0;
if (Arg *A = Args.getLastArg(options::OPT_fmessage_length_EQ)) {
StringRef V(A->getValue());
if (V.getAsInteger(0, MessageLength))
D.Diag(diag::err_drv_invalid_argument_to_option)
<< V << A->getOption().getName();
} else {
// If -fmessage-length=N was not specified, determine whether this is a
// terminal and, if so, implicitly define -fmessage-length appropriately.
MessageLength = llvm::sys::Process::StandardErrColumns();
}
if (MessageLength != 0)
CmdArgs.push_back(
Args.MakeArgString("-fmessage-length=" + Twine(MessageLength)));
// -fvisibility= and -fvisibility-ms-compat are of a piece.
if (const Arg *A = Args.getLastArg(options::OPT_fvisibility_EQ,
options::OPT_fvisibility_ms_compat)) {
if (A->getOption().matches(options::OPT_fvisibility_EQ)) {
CmdArgs.push_back("-fvisibility");
CmdArgs.push_back(A->getValue());
} else {
assert(A->getOption().matches(options::OPT_fvisibility_ms_compat));
CmdArgs.push_back("-fvisibility");
CmdArgs.push_back("hidden");
CmdArgs.push_back("-ftype-visibility");
CmdArgs.push_back("default");
}
}
if (!RawTriple.isPS4())
if (const Arg *A =
Args.getLastArg(options::OPT_fvisibility_from_dllstorageclass,
options::OPT_fno_visibility_from_dllstorageclass)) {
if (A->getOption().matches(
options::OPT_fvisibility_from_dllstorageclass)) {
CmdArgs.push_back("-fvisibility-from-dllstorageclass");
Args.AddLastArg(CmdArgs, options::OPT_fvisibility_dllexport_EQ);
Args.AddLastArg(CmdArgs, options::OPT_fvisibility_nodllstorageclass_EQ);
Args.AddLastArg(CmdArgs, options::OPT_fvisibility_externs_dllimport_EQ);
Args.AddLastArg(CmdArgs,
options::OPT_fvisibility_externs_nodllstorageclass_EQ);
}
}
if (const Arg *A = Args.getLastArg(options::OPT_mignore_xcoff_visibility)) {
if (Triple.isOSAIX())
CmdArgs.push_back("-mignore-xcoff-visibility");
else
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
}
if (Args.hasFlag(options::OPT_fvisibility_inlines_hidden,
options::OPT_fno_visibility_inlines_hidden, false))
CmdArgs.push_back("-fvisibility-inlines-hidden");
Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden_static_local_var,
options::OPT_fno_visibility_inlines_hidden_static_local_var);
Args.AddLastArg(CmdArgs, options::OPT_fvisibility_global_new_delete_hidden);
Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ);
if (Args.hasFlag(options::OPT_fno_operator_names,
options::OPT_foperator_names, false))
CmdArgs.push_back("-fno-operator-names");
// Forward -f (flag) options which we can pass directly.
Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls);
Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions);
Args.AddLastArg(CmdArgs, options::OPT_fdigraphs, options::OPT_fno_digraphs);
Args.AddLastArg(CmdArgs, options::OPT_femulated_tls,
options::OPT_fno_emulated_tls);
// AltiVec-like language extensions aren't relevant for assembling.
if (!isa<PreprocessJobAction>(JA) || Output.getType() != types::TY_PP_Asm)
Args.AddLastArg(CmdArgs, options::OPT_fzvector);
Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree);
Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type);
// Forward flags for OpenMP. We don't do this if the current action is an
// device offloading action other than OpenMP.
if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
options::OPT_fno_openmp, false) &&
(JA.isDeviceOffloading(Action::OFK_None) ||
JA.isDeviceOffloading(Action::OFK_OpenMP))) {
switch (D.getOpenMPRuntime(Args)) {
case Driver::OMPRT_OMP:
case Driver::OMPRT_IOMP5:
// Clang can generate useful OpenMP code for these two runtime libraries.
CmdArgs.push_back("-fopenmp");
// If no option regarding the use of TLS in OpenMP codegeneration is
// given, decide a default based on the target. Otherwise rely on the
// options and pass the right information to the frontend.
if (!Args.hasFlag(options::OPT_fopenmp_use_tls,
options::OPT_fnoopenmp_use_tls, /*Default=*/true))
CmdArgs.push_back("-fnoopenmp-use-tls");
Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd,
options::OPT_fno_openmp_simd);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_enable_irbuilder);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_number_of_sm_EQ);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_blocks_per_sm_EQ);
Args.AddAllArgs(CmdArgs,
options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ);
if (Args.hasFlag(options::OPT_fopenmp_optimistic_collapse,
options::OPT_fno_openmp_optimistic_collapse,
/*Default=*/false))
CmdArgs.push_back("-fopenmp-optimistic-collapse");
// When in OpenMP offloading mode with NVPTX target, forward
// cuda-mode flag
if (Args.hasFlag(options::OPT_fopenmp_cuda_mode,
options::OPT_fno_openmp_cuda_mode, /*Default=*/false))
CmdArgs.push_back("-fopenmp-cuda-mode");
// When in OpenMP offloading mode with NVPTX target, check if full runtime
// is required.
if (Args.hasFlag(options::OPT_fopenmp_cuda_force_full_runtime,
options::OPT_fno_openmp_cuda_force_full_runtime,
/*Default=*/false))
CmdArgs.push_back("-fopenmp-cuda-force-full-runtime");
break;
default:
// By default, if Clang doesn't know how to generate useful OpenMP code
// for a specific runtime library, we just don't pass the '-fopenmp' flag
// down to the actual compilation.
// FIXME: It would be better to have a mode which *only* omits IR
// generation based on the OpenMP support so that we get consistent
// semantic analysis, etc.
break;
}
} else {
Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd,
options::OPT_fno_openmp_simd);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
}
const SanitizerArgs &Sanitize = TC.getSanitizerArgs();
Sanitize.addArgs(TC, Args, CmdArgs, InputType);
const XRayArgs &XRay = TC.getXRayArgs();
XRay.addArgs(TC, Args, CmdArgs, InputType);
for (const auto &Filename :
Args.getAllArgValues(options::OPT_fprofile_list_EQ)) {
if (D.getVFS().exists(Filename))
CmdArgs.push_back(Args.MakeArgString("-fprofile-list=" + Filename));
else
D.Diag(clang::diag::err_drv_no_such_file) << Filename;
}
if (Arg *A = Args.getLastArg(options::OPT_fpatchable_function_entry_EQ)) {
StringRef S0 = A->getValue(), S = S0;
unsigned Size, Offset = 0;
if (!Triple.isAArch64() && !Triple.isRISCV() && !Triple.isX86())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
else if (S.consumeInteger(10, Size) ||
(!S.empty() && (!S.consume_front(",") ||
S.consumeInteger(10, Offset) || !S.empty())))
D.Diag(diag::err_drv_invalid_argument_to_option)
<< S0 << A->getOption().getName();
else if (Size < Offset)
D.Diag(diag::err_drv_unsupported_fpatchable_function_entry_argument);
else {
CmdArgs.push_back(Args.MakeArgString(A->getSpelling() + Twine(Size)));
CmdArgs.push_back(Args.MakeArgString(
"-fpatchable-function-entry-offset=" + Twine(Offset)));
}
}
if (TC.SupportsProfiling()) {
Args.AddLastArg(CmdArgs, options::OPT_pg);
llvm::Triple::ArchType Arch = TC.getArch();
if (Arg *A = Args.getLastArg(options::OPT_mfentry)) {
if (Arch == llvm::Triple::systemz || TC.getTriple().isX86())
A->render(Args, CmdArgs);
else
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
}
if (Arg *A = Args.getLastArg(options::OPT_mnop_mcount)) {
if (Arch == llvm::Triple::systemz)
A->render(Args, CmdArgs);
else
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
}
if (Arg *A = Args.getLastArg(options::OPT_mrecord_mcount)) {
if (Arch == llvm::Triple::systemz)
A->render(Args, CmdArgs);
else
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
}
}
if (Args.getLastArg(options::OPT_fapple_kext) ||
(Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType)))
CmdArgs.push_back("-fapple-kext");
Args.AddLastArg(CmdArgs, options::OPT_altivec_src_compat);
Args.AddLastArg(CmdArgs, options::OPT_flax_vector_conversions_EQ);
Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch);
Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info);
Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits);
Args.AddLastArg(CmdArgs, options::OPT_ftime_report);
Args.AddLastArg(CmdArgs, options::OPT_ftime_report_EQ);
Args.AddLastArg(CmdArgs, options::OPT_ftime_trace);
Args.AddLastArg(CmdArgs, options::OPT_ftime_trace_granularity_EQ);
Args.AddLastArg(CmdArgs, options::OPT_ftrapv);
Args.AddLastArg(CmdArgs, options::OPT_malign_double);
Args.AddLastArg(CmdArgs, options::OPT_fno_temp_file);
if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) {
CmdArgs.push_back("-ftrapv-handler");
CmdArgs.push_back(A->getValue());
}
Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ);
// -fno-strict-overflow implies -fwrapv if it isn't disabled, but
// -fstrict-overflow won't turn off an explicitly enabled -fwrapv.
if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) {
if (A->getOption().matches(options::OPT_fwrapv))
CmdArgs.push_back("-fwrapv");
} else if (Arg *A = Args.getLastArg(options::OPT_fstrict_overflow,
options::OPT_fno_strict_overflow)) {
if (A->getOption().matches(options::OPT_fno_strict_overflow))
CmdArgs.push_back("-fwrapv");
}
if (Arg *A = Args.getLastArg(options::OPT_freroll_loops,
options::OPT_fno_reroll_loops))
if (A->getOption().matches(options::OPT_freroll_loops))
CmdArgs.push_back("-freroll-loops");
Args.AddLastArg(CmdArgs, options::OPT_ffinite_loops,
options::OPT_fno_finite_loops);
Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings);
Args.AddLastArg(CmdArgs, options::OPT_funroll_loops,
options::OPT_fno_unroll_loops);
Args.AddLastArg(CmdArgs, options::OPT_pthread);
if (Args.hasFlag(options::OPT_mspeculative_load_hardening,
options::OPT_mno_speculative_load_hardening, false))
CmdArgs.push_back(Args.MakeArgString("-mspeculative-load-hardening"));
RenderSSPOptions(D, TC, Args, CmdArgs, KernelOrKext);
RenderSCPOptions(TC, Args, CmdArgs);
RenderTrivialAutoVarInitOptions(D, TC, Args, CmdArgs);
// Translate -mstackrealign
if (Args.hasFlag(options::OPT_mstackrealign, options::OPT_mno_stackrealign,
false))
CmdArgs.push_back(Args.MakeArgString("-mstackrealign"));
if (Args.hasArg(options::OPT_mstack_alignment)) {
StringRef alignment = Args.getLastArgValue(options::OPT_mstack_alignment);
CmdArgs.push_back(Args.MakeArgString("-mstack-alignment=" + alignment));
}
if (Args.hasArg(options::OPT_mstack_probe_size)) {
StringRef Size = Args.getLastArgValue(options::OPT_mstack_probe_size);
if (!Size.empty())
CmdArgs.push_back(Args.MakeArgString("-mstack-probe-size=" + Size));
else
CmdArgs.push_back("-mstack-probe-size=0");
}
if (!Args.hasFlag(options::OPT_mstack_arg_probe,
options::OPT_mno_stack_arg_probe, true))
CmdArgs.push_back(Args.MakeArgString("-mno-stack-arg-probe"));
if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
options::OPT_mno_restrict_it)) {
if (A->getOption().matches(options::OPT_mrestrict_it)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-arm-restrict-it");
} else {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-arm-no-restrict-it");
}
} else if (Triple.isOSWindows() &&
(Triple.getArch() == llvm::Triple::arm ||
Triple.getArch() == llvm::Triple::thumb)) {
// Windows on ARM expects restricted IT blocks
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-arm-restrict-it");
}
// Forward -cl options to -cc1
RenderOpenCLOptions(Args, CmdArgs, InputType);
if (IsHIP) {
if (Args.hasFlag(options::OPT_fhip_new_launch_api,
options::OPT_fno_hip_new_launch_api, true))
CmdArgs.push_back("-fhip-new-launch-api");
if (Args.hasFlag(options::OPT_fgpu_allow_device_init,
options::OPT_fno_gpu_allow_device_init, false))
CmdArgs.push_back("-fgpu-allow-device-init");
}
if (IsCuda || IsHIP) {
if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
CmdArgs.push_back("-fgpu-rdc");
if (Args.hasFlag(options::OPT_fgpu_defer_diag,
options::OPT_fno_gpu_defer_diag, false))
CmdArgs.push_back("-fgpu-defer-diag");
if (Args.hasFlag(options::OPT_fgpu_exclude_wrong_side_overloads,
options::OPT_fno_gpu_exclude_wrong_side_overloads,
false)) {
CmdArgs.push_back("-fgpu-exclude-wrong-side-overloads");
CmdArgs.push_back("-fgpu-defer-diag");
}
}
if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) {
CmdArgs.push_back(
Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
}
// Forward -f options with positive and negative forms; we translate these by
// hand. Do not propagate PGO options to the GPU-side compilations as the
// profile info is for the host-side compilation only.
if (!(IsCudaDevice || IsHIPDevice)) {
if (Arg *A = getLastProfileSampleUseArg(Args)) {
auto *PGOArg = Args.getLastArg(
options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
options::OPT_fcs_profile_generate,
options::OPT_fcs_profile_generate_EQ, options::OPT_fprofile_use,
options::OPT_fprofile_use_EQ);
if (PGOArg)
D.Diag(diag::err_drv_argument_not_allowed_with)
<< "SampleUse with PGO options";
StringRef fname = A->getValue();
if (!llvm::sys::fs::exists(fname))
D.Diag(diag::err_drv_no_such_file) << fname;
else
A->render(Args, CmdArgs);
}
Args.AddLastArg(CmdArgs, options::OPT_fprofile_remapping_file_EQ);
if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
options::OPT_fno_pseudo_probe_for_profiling, false)) {
CmdArgs.push_back("-fpseudo-probe-for-profiling");
// Enforce -funique-internal-linkage-names if it's not explicitly turned
// off.
if (Args.hasFlag(options::OPT_funique_internal_linkage_names,
options::OPT_fno_unique_internal_linkage_names, true))
CmdArgs.push_back("-funique-internal-linkage-names");
}
}
RenderBuiltinOptions(TC, RawTriple, Args, CmdArgs);
if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,
options::OPT_fno_assume_sane_operator_new))
CmdArgs.push_back("-fno-assume-sane-operator-new");
// -fblocks=0 is default.
if (Args.hasFlag(options::OPT_fblocks, options::OPT_fno_blocks,
TC.IsBlocksDefault()) ||
(Args.hasArg(options::OPT_fgnu_runtime) &&
Args.hasArg(options::OPT_fobjc_nonfragile_abi) &&
!Args.hasArg(options::OPT_fno_blocks))) {
CmdArgs.push_back("-fblocks");
if (!Args.hasArg(options::OPT_fgnu_runtime) && !TC.hasBlocksRuntime())
CmdArgs.push_back("-fblocks-runtime-optional");
}
// -fencode-extended-block-signature=1 is default.
if (TC.IsEncodeExtendedBlockSignatureDefault())
CmdArgs.push_back("-fencode-extended-block-signature");
if (Args.hasFlag(options::OPT_fcoroutines_ts, options::OPT_fno_coroutines_ts,
false) &&
types::isCXX(InputType)) {
CmdArgs.push_back("-fcoroutines-ts");
}
Args.AddLastArg(CmdArgs, options::OPT_fdouble_square_bracket_attributes,
options::OPT_fno_double_square_bracket_attributes);
// -faccess-control is default.
if (Args.hasFlag(options::OPT_fno_access_control,
options::OPT_faccess_control, false))
CmdArgs.push_back("-fno-access-control");
// -felide-constructors is the default.
if (Args.hasFlag(options::OPT_fno_elide_constructors,
options::OPT_felide_constructors, false))
CmdArgs.push_back("-fno-elide-constructors");
ToolChain::RTTIMode RTTIMode = TC.getRTTIMode();
if (KernelOrKext || (types::isCXX(InputType) &&
(RTTIMode == ToolChain::RM_Disabled)))
CmdArgs.push_back("-fno-rtti");
// -fshort-enums=0 is default for all architectures except Hexagon and z/OS.
if (Args.hasFlag(options::OPT_fshort_enums, options::OPT_fno_short_enums,
TC.getArch() == llvm::Triple::hexagon || Triple.isOSzOS()))
CmdArgs.push_back("-fshort-enums");
RenderCharacterOptions(Args, AuxTriple ? *AuxTriple : RawTriple, CmdArgs);
// -fuse-cxa-atexit is default.
if (!Args.hasFlag(
options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit,
!RawTriple.isOSAIX() && !RawTriple.isOSWindows() &&
TC.getArch() != llvm::Triple::xcore &&
((RawTriple.getVendor() != llvm::Triple::MipsTechnologies) ||
RawTriple.hasEnvironment())) ||
KernelOrKext)
CmdArgs.push_back("-fno-use-cxa-atexit");
if (Args.hasFlag(options::OPT_fregister_global_dtors_with_atexit,
options::OPT_fno_register_global_dtors_with_atexit,
RawTriple.isOSDarwin() && !KernelOrKext))
CmdArgs.push_back("-fregister-global-dtors-with-atexit");
// -fno-use-line-directives is default.
if (Args.hasFlag(options::OPT_fuse_line_directives,
options::OPT_fno_use_line_directives, false))
CmdArgs.push_back("-fuse-line-directives");
// -fms-extensions=0 is default.
if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
IsWindowsMSVC))
CmdArgs.push_back("-fms-extensions");
// -fms-compatibility=0 is default.
bool IsMSVCCompat = Args.hasFlag(
options::OPT_fms_compatibility, options::OPT_fno_ms_compatibility,
(IsWindowsMSVC && Args.hasFlag(options::OPT_fms_extensions,
options::OPT_fno_ms_extensions, true)));
if (IsMSVCCompat)
CmdArgs.push_back("-fms-compatibility");
// Handle -fgcc-version, if present.
VersionTuple GNUCVer;
if (Arg *A = Args.getLastArg(options::OPT_fgnuc_version_EQ)) {
// Check that the version has 1 to 3 components and the minor and patch
// versions fit in two decimal digits.
StringRef Val = A->getValue();
Val = Val.empty() ? "0" : Val; // Treat "" as 0 or disable.
bool Invalid = GNUCVer.tryParse(Val);
unsigned Minor = GNUCVer.getMinor().getValueOr(0);
unsigned Patch = GNUCVer.getSubminor().getValueOr(0);
if (Invalid || GNUCVer.getBuild() || Minor >= 100 || Patch >= 100) {
D.Diag(diag::err_drv_invalid_value)
<< A->getAsString(Args) << A->getValue();
}
} else if (!IsMSVCCompat) {
// Imitate GCC 4.2.1 by default if -fms-compatibility is not in effect.
GNUCVer = VersionTuple(4, 2, 1);
}
if (!GNUCVer.empty()) {
CmdArgs.push_back(
Args.MakeArgString("-fgnuc-version=" + GNUCVer.getAsString()));
}
VersionTuple MSVT = TC.computeMSVCVersion(&D, Args);
if (!MSVT.empty())
CmdArgs.push_back(
Args.MakeArgString("-fms-compatibility-version=" + MSVT.getAsString()));
bool IsMSVC2015Compatible = MSVT.getMajor() >= 19;
if (ImplyVCPPCVer) {
StringRef LanguageStandard;
if (const Arg *StdArg = Args.getLastArg(options::OPT__SLASH_std)) {
Std = StdArg;
LanguageStandard = llvm::StringSwitch<StringRef>(StdArg->getValue())
.Case("c11", "-std=c11")
.Case("c17", "-std=c17")
.Default("");
if (LanguageStandard.empty())
D.Diag(clang::diag::warn_drv_unused_argument)
<< StdArg->getAsString(Args);
}
CmdArgs.push_back(LanguageStandard.data());
}
if (ImplyVCPPCXXVer) {
StringRef LanguageStandard;
if (const Arg *StdArg = Args.getLastArg(options::OPT__SLASH_std)) {
Std = StdArg;
LanguageStandard = llvm::StringSwitch<StringRef>(StdArg->getValue())
.Case("c++14", "-std=c++14")
.Case("c++17", "-std=c++17")
.Case("c++20", "-std=c++20")
.Case("c++latest", "-std=c++2b")
.Default("");
if (LanguageStandard.empty())
D.Diag(clang::diag::warn_drv_unused_argument)
<< StdArg->getAsString(Args);
}
if (LanguageStandard.empty()) {
if (IsMSVC2015Compatible)
LanguageStandard = "-std=c++14";
else
LanguageStandard = "-std=c++11";
}
CmdArgs.push_back(LanguageStandard.data());
}
// -fno-borland-extensions is default.
if (Args.hasFlag(options::OPT_fborland_extensions,
options::OPT_fno_borland_extensions, false))
CmdArgs.push_back("-fborland-extensions");
// -fno-declspec is default, except for PS4.
if (Args.hasFlag(options::OPT_fdeclspec, options::OPT_fno_declspec,
RawTriple.isPS4()))
CmdArgs.push_back("-fdeclspec");
else if (Args.hasArg(options::OPT_fno_declspec))
CmdArgs.push_back("-fno-declspec"); // Explicitly disabling __declspec.
// -fthreadsafe-static is default, except for MSVC compatibility versions less
// than 19.
if (!Args.hasFlag(options::OPT_fthreadsafe_statics,
options::OPT_fno_threadsafe_statics,
!IsWindowsMSVC || IsMSVC2015Compatible))
CmdArgs.push_back("-fno-threadsafe-statics");
// -fno-delayed-template-parsing is default, except when targeting MSVC.
// Many old Windows SDK versions require this to parse.
// FIXME: MSVC introduced /Zc:twoPhase- to disable this behavior in their
// compiler. We should be able to disable this by default at some point.
if (Args.hasFlag(options::OPT_fdelayed_template_parsing,
options::OPT_fno_delayed_template_parsing, IsWindowsMSVC))
CmdArgs.push_back("-fdelayed-template-parsing");
// -fgnu-keywords default varies depending on language; only pass if
// specified.
Args.AddLastArg(CmdArgs, options::OPT_fgnu_keywords,
options::OPT_fno_gnu_keywords);
if (Args.hasFlag(options::OPT_fgnu89_inline, options::OPT_fno_gnu89_inline,
false))
CmdArgs.push_back("-fgnu89-inline");
if (Args.hasArg(options::OPT_fno_inline))
CmdArgs.push_back("-fno-inline");
Args.AddLastArg(CmdArgs, options::OPT_finline_functions,
options::OPT_finline_hint_functions,
options::OPT_fno_inline_functions);
// FIXME: Find a better way to determine whether the language has modules
// support by default, or just assume that all languages do.
bool HaveModules =
Std && (Std->containsValue("c++2a") || Std->containsValue("c++20") ||
Std->containsValue("c++latest"));
RenderModulesOptions(C, D, Args, Input, Output, CmdArgs, HaveModules);
if (Args.hasFlag(options::OPT_fpch_validate_input_files_content,
options::OPT_fno_pch_validate_input_files_content, false))
CmdArgs.push_back("-fvalidate-ast-input-files-content");
if (Args.hasFlag(options::OPT_fpch_instantiate_templates,
options::OPT_fno_pch_instantiate_templates, false))
CmdArgs.push_back("-fpch-instantiate-templates");
if (Args.hasFlag(options::OPT_fpch_codegen, options::OPT_fno_pch_codegen,
false))
CmdArgs.push_back("-fmodules-codegen");
if (Args.hasFlag(options::OPT_fpch_debuginfo, options::OPT_fno_pch_debuginfo,
false))
CmdArgs.push_back("-fmodules-debuginfo");
Args.AddLastArg(CmdArgs, options::OPT_flegacy_pass_manager,
options::OPT_fno_legacy_pass_manager);
ObjCRuntime Runtime = AddObjCRuntimeArgs(Args, Inputs, CmdArgs, rewriteKind);
RenderObjCOptions(TC, D, RawTriple, Args, Runtime, rewriteKind != RK_None,
Input, CmdArgs);
if (types::isObjC(Input.getType()) &&
Args.hasFlag(options::OPT_fobjc_encode_cxx_class_template_spec,
options::OPT_fno_objc_encode_cxx_class_template_spec,
!Runtime.isNeXTFamily()))
CmdArgs.push_back("-fobjc-encode-cxx-class-template-spec");
if (Args.hasFlag(options::OPT_fapplication_extension,
options::OPT_fno_application_extension, false))
CmdArgs.push_back("-fapplication-extension");
// Handle GCC-style exception args.
bool EH = false;
if (!C.getDriver().IsCLMode())
EH = addExceptionArgs(Args, InputType, TC, KernelOrKext, Runtime, CmdArgs);
// Handle exception personalities
Arg *A = Args.getLastArg(
options::OPT_fsjlj_exceptions, options::OPT_fseh_exceptions,
options::OPT_fdwarf_exceptions, options::OPT_fwasm_exceptions);
if (A) {
const Option &Opt = A->getOption();
if (Opt.matches(options::OPT_fsjlj_exceptions))
CmdArgs.push_back("-exception-model=sjlj");
if (Opt.matches(options::OPT_fseh_exceptions))
CmdArgs.push_back("-exception-model=seh");
if (Opt.matches(options::OPT_fdwarf_exceptions))
CmdArgs.push_back("-exception-model=dwarf");
if (Opt.matches(options::OPT_fwasm_exceptions))
CmdArgs.push_back("-exception-model=wasm");
} else {
switch (TC.GetExceptionModel(Args)) {
default:
break;
case llvm::ExceptionHandling::DwarfCFI:
CmdArgs.push_back("-exception-model=dwarf");
break;
case llvm::ExceptionHandling::SjLj:
CmdArgs.push_back("-exception-model=sjlj");
break;
case llvm::ExceptionHandling::WinEH:
CmdArgs.push_back("-exception-model=seh");
break;
}
}
// C++ "sane" operator new.
if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,
options::OPT_fno_assume_sane_operator_new))
CmdArgs.push_back("-fno-assume-sane-operator-new");
// -frelaxed-template-template-args is off by default, as it is a severe
// breaking change until a corresponding change to template partial ordering
// is provided.
if (Args.hasFlag(options::OPT_frelaxed_template_template_args,
options::OPT_fno_relaxed_template_template_args, false))
CmdArgs.push_back("-frelaxed-template-template-args");
// -fsized-deallocation is off by default, as it is an ABI-breaking change for
// most platforms.
if (Args.hasFlag(options::OPT_fsized_deallocation,
options::OPT_fno_sized_deallocation, false))
CmdArgs.push_back("-fsized-deallocation");
// -faligned-allocation is on by default in C++17 onwards and otherwise off
// by default.
if (Arg *A = Args.getLastArg(options::OPT_faligned_allocation,
options::OPT_fno_aligned_allocation,
options::OPT_faligned_new_EQ)) {
if (A->getOption().matches(options::OPT_fno_aligned_allocation))
CmdArgs.push_back("-fno-aligned-allocation");
else
CmdArgs.push_back("-faligned-allocation");
}
// The default new alignment can be specified using a dedicated option or via
// a GCC-compatible option that also turns on aligned allocation.
if (Arg *A = Args.getLastArg(options::OPT_fnew_alignment_EQ,
options::OPT_faligned_new_EQ))
CmdArgs.push_back(
Args.MakeArgString(Twine("-fnew-alignment=") + A->getValue()));
// -fconstant-cfstrings is default, and may be subject to argument translation
// on Darwin.
if (!Args.hasFlag(options::OPT_fconstant_cfstrings,
options::OPT_fno_constant_cfstrings) ||
!Args.hasFlag(options::OPT_mconstant_cfstrings,
options::OPT_mno_constant_cfstrings))
CmdArgs.push_back("-fno-constant-cfstrings");
// -fno-pascal-strings is default, only pass non-default.
if (Args.hasFlag(options::OPT_fpascal_strings,
options::OPT_fno_pascal_strings, false))
CmdArgs.push_back("-fpascal-strings");
// Honor -fpack-struct= and -fpack-struct, if given. Note that
// -fno-pack-struct doesn't apply to -fpack-struct=.
if (Arg *A = Args.getLastArg(options::OPT_fpack_struct_EQ)) {
std::string PackStructStr = "-fpack-struct=";
PackStructStr += A->getValue();
CmdArgs.push_back(Args.MakeArgString(PackStructStr));
} else if (Args.hasFlag(options::OPT_fpack_struct,
options::OPT_fno_pack_struct, false)) {
CmdArgs.push_back("-fpack-struct=1");
}
// Handle -fmax-type-align=N and -fno-type-align
bool SkipMaxTypeAlign = Args.hasArg(options::OPT_fno_max_type_align);
if (Arg *A = Args.getLastArg(options::OPT_fmax_type_align_EQ)) {
if (!SkipMaxTypeAlign) {
std::string MaxTypeAlignStr = "-fmax-type-align=";
MaxTypeAlignStr += A->getValue();
CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr));
}
} else if (RawTriple.isOSDarwin()) {
if (!SkipMaxTypeAlign) {
std::string MaxTypeAlignStr = "-fmax-type-align=16";
CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr));
}
}
if (!Args.hasFlag(options::OPT_Qy, options::OPT_Qn, true))
CmdArgs.push_back("-Qn");
// -fno-common is the default, set -fcommon only when that flag is set.
if (Args.hasFlag(options::OPT_fcommon, options::OPT_fno_common, false))
CmdArgs.push_back("-fcommon");
// -fsigned-bitfields is default, and clang doesn't yet support
// -funsigned-bitfields.
if (!Args.hasFlag(options::OPT_fsigned_bitfields,
options::OPT_funsigned_bitfields))
D.Diag(diag::warn_drv_clang_unsupported)
<< Args.getLastArg(options::OPT_funsigned_bitfields)->getAsString(Args);
// -fsigned-bitfields is default, and clang doesn't support -fno-for-scope.
if (!Args.hasFlag(options::OPT_ffor_scope, options::OPT_fno_for_scope))
D.Diag(diag::err_drv_clang_unsupported)
<< Args.getLastArg(options::OPT_fno_for_scope)->getAsString(Args);
// -finput_charset=UTF-8 is default. Reject others
if (Arg *inputCharset = Args.getLastArg(options::OPT_finput_charset_EQ)) {
StringRef value = inputCharset->getValue();
if (!value.equals_insensitive("utf-8"))
D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args)
<< value;
}
// -fexec_charset=UTF-8 is default. Reject others
if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
StringRef value = execCharset->getValue();
if (!value.equals_insensitive("utf-8"))
D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args)
<< value;
}
RenderDiagnosticsOptions(D, Args, CmdArgs);
// -fno-asm-blocks is default.
if (Args.hasFlag(options::OPT_fasm_blocks, options::OPT_fno_asm_blocks,
false))
CmdArgs.push_back("-fasm-blocks");
// -fgnu-inline-asm is default.
if (!Args.hasFlag(options::OPT_fgnu_inline_asm,
options::OPT_fno_gnu_inline_asm, true))
CmdArgs.push_back("-fno-gnu-inline-asm");
// Enable vectorization per default according to the optimization level
// selected. For optimization levels that want vectorization we use the alias
// option to simplify the hasFlag logic.
bool EnableVec = shouldEnableVectorizerAtOLevel(Args, false);
OptSpecifier VectorizeAliasOption =
EnableVec ? options::OPT_O_Group : options::OPT_fvectorize;
if (Args.hasFlag(options::OPT_fvectorize, VectorizeAliasOption,
options::OPT_fno_vectorize, EnableVec))
CmdArgs.push_back("-vectorize-loops");
// -fslp-vectorize is enabled based on the optimization level selected.
bool EnableSLPVec = shouldEnableVectorizerAtOLevel(Args, true);
OptSpecifier SLPVectAliasOption =
EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize;
if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption,
options::OPT_fno_slp_vectorize, EnableSLPVec))
CmdArgs.push_back("-vectorize-slp");
ParseMPreferVectorWidth(D, Args, CmdArgs);
Args.AddLastArg(CmdArgs, options::OPT_fshow_overloads_EQ);
Args.AddLastArg(CmdArgs,
options::OPT_fsanitize_undefined_strip_path_components_EQ);
// -fdollars-in-identifiers default varies depending on platform and
// language; only pass if specified.
if (Arg *A = Args.getLastArg(options::OPT_fdollars_in_identifiers,
options::OPT_fno_dollars_in_identifiers)) {
if (A->getOption().matches(options::OPT_fdollars_in_identifiers))
CmdArgs.push_back("-fdollars-in-identifiers");
else
CmdArgs.push_back("-fno-dollars-in-identifiers");
}
// -funit-at-a-time is default, and we don't support -fno-unit-at-a-time for
// practical purposes.
if (Arg *A = Args.getLastArg(options::OPT_funit_at_a_time,
options::OPT_fno_unit_at_a_time)) {
if (A->getOption().matches(options::OPT_fno_unit_at_a_time))
D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args);
}
if (Args.hasFlag(options::OPT_fapple_pragma_pack,
options::OPT_fno_apple_pragma_pack, false))
CmdArgs.push_back("-fapple-pragma-pack");
if (Args.hasFlag(options::OPT_fxl_pragma_pack,
options::OPT_fno_xl_pragma_pack, RawTriple.isOSAIX()))
CmdArgs.push_back("-fxl-pragma-pack");
// Remarks can be enabled with any of the `-f.*optimization-record.*` flags.
if (willEmitRemarks(Args) && checkRemarksOptions(D, Args, Triple))
renderRemarksOptions(Args, CmdArgs, Triple, Input, Output, JA);
bool RewriteImports = Args.hasFlag(options::OPT_frewrite_imports,
options::OPT_fno_rewrite_imports, false);
if (RewriteImports)
CmdArgs.push_back("-frewrite-imports");
// Enable rewrite includes if the user's asked for it or if we're generating
// diagnostics.
// TODO: Once -module-dependency-dir works with -frewrite-includes it'd be
// nice to enable this when doing a crashdump for modules as well.
if (Args.hasFlag(options::OPT_frewrite_includes,
options::OPT_fno_rewrite_includes, false) ||
(C.isForDiagnostics() && !HaveModules))
CmdArgs.push_back("-frewrite-includes");
// Only allow -traditional or -traditional-cpp outside in preprocessing modes.
if (Arg *A = Args.getLastArg(options::OPT_traditional,
options::OPT_traditional_cpp)) {
if (isa<PreprocessJobAction>(JA))
CmdArgs.push_back("-traditional-cpp");
else
D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
}
Args.AddLastArg(CmdArgs, options::OPT_dM);
Args.AddLastArg(CmdArgs, options::OPT_dD);
Args.AddLastArg(CmdArgs, options::OPT_fmax_tokens_EQ);
// Handle serialized diagnostics.
if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) {
CmdArgs.push_back("-serialize-diagnostic-file");
CmdArgs.push_back(Args.MakeArgString(A->getValue()));
}
if (Args.hasArg(options::OPT_fretain_comments_from_system_headers))
CmdArgs.push_back("-fretain-comments-from-system-headers");
// Forward -fcomment-block-commands to -cc1.
Args.AddAllArgs(CmdArgs, options::OPT_fcomment_block_commands);
// Forward -fparse-all-comments to -cc1.
Args.AddAllArgs(CmdArgs, options::OPT_fparse_all_comments);
// Turn -fplugin=name.so into -load name.so
for (const Arg *A : Args.filtered(options::OPT_fplugin_EQ)) {
CmdArgs.push_back("-load");
CmdArgs.push_back(A->getValue());
A->claim();
}
// Forward -fpass-plugin=name.so to -cc1.
for (const Arg *A : Args.filtered(options::OPT_fpass_plugin_EQ)) {
CmdArgs.push_back(
Args.MakeArgString(Twine("-fpass-plugin=") + A->getValue()));
A->claim();
}
// Setup statistics file output.
SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
if (!StatsFile.empty())
CmdArgs.push_back(Args.MakeArgString(Twine("-stats-file=") + StatsFile));
// Forward -Xclang arguments to -cc1, and -mllvm arguments to the LLVM option
// parser.
// -finclude-default-header flag is for preprocessor,
// do not pass it to other cc1 commands when save-temps is enabled
if (C.getDriver().isSaveTempsEnabled() &&
!isa<PreprocessJobAction>(JA)) {
for (auto Arg : Args.filtered(options::OPT_Xclang)) {
Arg->claim();
if (StringRef(Arg->getValue()) != "-finclude-default-header")
CmdArgs.push_back(Arg->getValue());
}
}
else {
Args.AddAllArgValues(CmdArgs, options::OPT_Xclang);
}
for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
A->claim();
// We translate this by hand to the -cc1 argument, since nightly test uses
// it and developers have been trained to spell it with -mllvm. Both
// spellings are now deprecated and should be removed.
if (StringRef(A->getValue(0)) == "-disable-llvm-optzns") {
CmdArgs.push_back("-disable-llvm-optzns");
} else {
A->render(Args, CmdArgs);
}
}
// With -save-temps, we want to save the unoptimized bitcode output from the
// CompileJobAction, use -disable-llvm-passes to get pristine IR generated
// by the frontend.
// When -fembed-bitcode is enabled, optimized bitcode is emitted because it
// has slightly different breakdown between stages.
// FIXME: -fembed-bitcode -save-temps will save optimized bitcode instead of
// pristine IR generated by the frontend. Ideally, a new compile action should
// be added so both IR can be captured.
if ((C.getDriver().isSaveTempsEnabled() ||
JA.isHostOffloading(Action::OFK_OpenMP)) &&
!(C.getDriver().embedBitcodeInObject() && !IsUsingLTO) &&
isa<CompileJobAction>(JA))
CmdArgs.push_back("-disable-llvm-passes");
Args.AddAllArgs(CmdArgs, options::OPT_undef);
const char *Exec = D.getClangProgramPath();
// Optionally embed the -cc1 level arguments into the debug info or a
// section, for build analysis.
// Also record command line arguments into the debug info if
// -grecord-gcc-switches options is set on.
// By default, -gno-record-gcc-switches is set on and no recording.
auto GRecordSwitches =
Args.hasFlag(options::OPT_grecord_command_line,
options::OPT_gno_record_command_line, false);
auto FRecordSwitches =
Args.hasFlag(options::OPT_frecord_command_line,
options::OPT_fno_record_command_line, false);
if (FRecordSwitches && !Triple.isOSBinFormatELF())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Args.getLastArg(options::OPT_frecord_command_line)->getAsString(Args)
<< TripleStr;
if (TC.UseDwarfDebugFlags() || GRecordSwitches || FRecordSwitches) {
ArgStringList OriginalArgs;
for (const auto &Arg : Args)
Arg->render(Args, OriginalArgs);
SmallString<256> Flags;
EscapeSpacesAndBackslashes(Exec, Flags);
for (const char *OriginalArg : OriginalArgs) {
SmallString<128> EscapedArg;
EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);
Flags += " ";
Flags += EscapedArg;
}
auto FlagsArgString = Args.MakeArgString(Flags);
if (TC.UseDwarfDebugFlags() || GRecordSwitches) {
CmdArgs.push_back("-dwarf-debug-flags");
CmdArgs.push_back(FlagsArgString);
}
if (FRecordSwitches) {
CmdArgs.push_back("-record-command-line");
CmdArgs.push_back(FlagsArgString);
}
}
// Host-side cuda compilation receives all device-side outputs in a single
// fatbin as Inputs[1]. Include the binary with -fcuda-include-gpubinary.
if ((IsCuda || IsHIP) && CudaDeviceInput) {
CmdArgs.push_back("-fcuda-include-gpubinary");
CmdArgs.push_back(CudaDeviceInput->getFilename());
if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
CmdArgs.push_back("-fgpu-rdc");
}
if (IsCuda) {
if (Args.hasFlag(options::OPT_fcuda_short_ptr,
options::OPT_fno_cuda_short_ptr, false))
CmdArgs.push_back("-fcuda-short-ptr");
}
if (IsCuda || IsHIP) {
// Determine the original source input.
const Action *SourceAction = &JA;
while (SourceAction->getKind() != Action::InputClass) {
assert(!SourceAction->getInputs().empty() && "unexpected root action!");
SourceAction = SourceAction->getInputs()[0];
}
auto CUID = cast<InputAction>(SourceAction)->getId();
if (!CUID.empty())
CmdArgs.push_back(Args.MakeArgString(Twine("-cuid=") + Twine(CUID)));
}
if (IsHIP)
CmdArgs.push_back("-fcuda-allow-variadic-functions");
if (IsCudaDevice || IsHIPDevice) {
StringRef InlineThresh =
Args.getLastArgValue(options::OPT_fgpu_inline_threshold_EQ);
if (!InlineThresh.empty()) {
std::string ArgStr =
std::string("-inline-threshold=") + InlineThresh.str();
CmdArgs.append({"-mllvm", Args.MakeArgStringRef(ArgStr)});
}
}
// OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path
// to specify the result of the compile phase on the host, so the meaningful
// device declarations can be identified. Also, -fopenmp-is-device is passed
// along to tell the frontend that it is generating code for a device, so that
// only the relevant declarations are emitted.
if (IsOpenMPDevice) {
CmdArgs.push_back("-fopenmp-is-device");
if (OpenMPDeviceInput) {
CmdArgs.push_back("-fopenmp-host-ir-file-path");
CmdArgs.push_back(Args.MakeArgString(OpenMPDeviceInput->getFilename()));
}
}
if (Triple.isAMDGPU()) {
handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs);
if (Args.hasFlag(options::OPT_munsafe_fp_atomics,
options::OPT_mno_unsafe_fp_atomics, /*Default=*/false))
CmdArgs.push_back("-munsafe-fp-atomics");
}
// For all the host OpenMP offloading compile jobs we need to pass the targets
// information using -fopenmp-targets= option.
if (JA.isHostOffloading(Action::OFK_OpenMP)) {
SmallString<128> TargetInfo("-fopenmp-targets=");
Arg *Tgts = Args.getLastArg(options::OPT_fopenmp_targets_EQ);
assert(Tgts && Tgts->getNumValues() &&
"OpenMP offloading has to have targets specified.");
for (unsigned i = 0; i < Tgts->getNumValues(); ++i) {
if (i)
TargetInfo += ',';
// We need to get the string from the triple because it may be not exactly
// the same as the one we get directly from the arguments.
llvm::Triple T(Tgts->getValue(i));
TargetInfo += T.getTriple();
}
CmdArgs.push_back(Args.MakeArgString(TargetInfo.str()));
}
bool VirtualFunctionElimination =
Args.hasFlag(options::OPT_fvirtual_function_elimination,
options::OPT_fno_virtual_function_elimination, false);
if (VirtualFunctionElimination) {
// VFE requires full LTO (currently, this might be relaxed to allow ThinLTO
// in the future).
if (LTOMode != LTOK_Full)
D.Diag(diag::err_drv_argument_only_allowed_with)
<< "-fvirtual-function-elimination"
<< "-flto=full";
CmdArgs.push_back("-fvirtual-function-elimination");
}
// VFE requires whole-program-vtables, and enables it by default.
bool WholeProgramVTables = Args.hasFlag(
options::OPT_fwhole_program_vtables,
options::OPT_fno_whole_program_vtables, VirtualFunctionElimination);
if (VirtualFunctionElimination && !WholeProgramVTables) {
D.Diag(diag::err_drv_argument_not_allowed_with)
<< "-fno-whole-program-vtables"
<< "-fvirtual-function-elimination";
}
if (WholeProgramVTables) {
// Propagate -fwhole-program-vtables if this is an LTO compile.
if (IsUsingLTO)
CmdArgs.push_back("-fwhole-program-vtables");
// Check if we passed LTO options but they were suppressed because this is a
// device offloading action, or we passed device offload LTO options which
// were suppressed because this is not the device offload action.
// Otherwise, issue an error.
else if (!D.isUsingLTO(!IsDeviceOffloadAction))
D.Diag(diag::err_drv_argument_only_allowed_with)
<< "-fwhole-program-vtables"
<< "-flto";
}
bool DefaultsSplitLTOUnit =
(WholeProgramVTables || Sanitize.needsLTO()) &&
(LTOMode == LTOK_Full || TC.canSplitThinLTOUnit());
bool SplitLTOUnit =
Args.hasFlag(options::OPT_fsplit_lto_unit,
options::OPT_fno_split_lto_unit, DefaultsSplitLTOUnit);
if (Sanitize.needsLTO() && !SplitLTOUnit)
D.Diag(diag::err_drv_argument_not_allowed_with) << "-fno-split-lto-unit"
<< "-fsanitize=cfi";
if (SplitLTOUnit)
CmdArgs.push_back("-fsplit-lto-unit");
if (Arg *A = Args.getLastArg(options::OPT_fglobal_isel,
options::OPT_fno_global_isel)) {
CmdArgs.push_back("-mllvm");
if (A->getOption().matches(options::OPT_fglobal_isel)) {
CmdArgs.push_back("-global-isel=1");
// GISel is on by default on AArch64 -O0, so don't bother adding
// the fallback remarks for it. Other combinations will add a warning of
// some kind.
bool IsArchSupported = Triple.getArch() == llvm::Triple::aarch64;
bool IsOptLevelSupported = false;
Arg *A = Args.getLastArg(options::OPT_O_Group);
if (Triple.getArch() == llvm::Triple::aarch64) {
if (!A || A->getOption().matches(options::OPT_O0))
IsOptLevelSupported = true;
}
if (!IsArchSupported || !IsOptLevelSupported) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-global-isel-abort=2");
if (!IsArchSupported)
D.Diag(diag::warn_drv_global_isel_incomplete) << Triple.getArchName();
else
D.Diag(diag::warn_drv_global_isel_incomplete_opt);
}
} else {
CmdArgs.push_back("-global-isel=0");
}
}
if (Args.hasArg(options::OPT_forder_file_instrumentation)) {
CmdArgs.push_back("-forder-file-instrumentation");
// Enable order file instrumentation when ThinLTO is not on. When ThinLTO is
// on, we need to pass these flags as linker flags and that will be handled
// outside of the compiler.
if (!IsUsingLTO) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-enable-order-file-instrumentation");
}
}
if (Arg *A = Args.getLastArg(options::OPT_fforce_enable_int128,
options::OPT_fno_force_enable_int128)) {
if (A->getOption().matches(options::OPT_fforce_enable_int128))
CmdArgs.push_back("-fforce-enable-int128");
}
if (Args.hasFlag(options::OPT_fkeep_static_consts,
options::OPT_fno_keep_static_consts, false))
CmdArgs.push_back("-fkeep-static-consts");
if (Args.hasFlag(options::OPT_fcomplete_member_pointers,
options::OPT_fno_complete_member_pointers, false))
CmdArgs.push_back("-fcomplete-member-pointers");
if (!Args.hasFlag(options::OPT_fcxx_static_destructors,
options::OPT_fno_cxx_static_destructors, true))
CmdArgs.push_back("-fno-c++-static-destructors");
addMachineOutlinerArgs(D, Args, CmdArgs, Triple, /*IsLTO=*/false);
if (Arg *A = Args.getLastArg(options::OPT_moutline_atomics,
options::OPT_mno_outline_atomics)) {
if (A->getOption().matches(options::OPT_moutline_atomics)) {
// Option -moutline-atomics supported for AArch64 target only.
if (!Triple.isAArch64()) {
D.Diag(diag::warn_drv_moutline_atomics_unsupported_opt)
<< Triple.getArchName();
} else {
CmdArgs.push_back("-target-feature");
CmdArgs.push_back("+outline-atomics");
}
} else {
CmdArgs.push_back("-target-feature");
CmdArgs.push_back("-outline-atomics");
}
} else if (Triple.isAArch64() &&
getToolChain().IsAArch64OutlineAtomicsDefault(Args)) {
CmdArgs.push_back("-target-feature");
CmdArgs.push_back("+outline-atomics");
}
if (Args.hasFlag(options::OPT_faddrsig, options::OPT_fno_addrsig,
(TC.getTriple().isOSBinFormatELF() ||
TC.getTriple().isOSBinFormatCOFF()) &&
!TC.getTriple().isPS4() && !TC.getTriple().isVE() &&
!TC.getTriple().isOSNetBSD() &&
!Distro(D.getVFS(), TC.getTriple()).IsGentoo() &&
!TC.getTriple().isAndroid() && TC.useIntegratedAs()))
CmdArgs.push_back("-faddrsig");
if ((Triple.isOSBinFormatELF() || Triple.isOSBinFormatMachO()) &&
(EH || UnwindTables || DebugInfoKind != codegenoptions::NoDebugInfo))
CmdArgs.push_back("-D__GCC_HAVE_DWARF2_CFI_ASM=1");
if (Arg *A = Args.getLastArg(options::OPT_fsymbol_partition_EQ)) {
std::string Str = A->getAsString(Args);
if (!TC.getTriple().isOSBinFormatELF())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Str << TC.getTripleString();
CmdArgs.push_back(Args.MakeArgString(Str));
}
// Add the "-o out -x type src.c" flags last. This is done primarily to make
// the -cc1 command easier to edit when reproducing compiler crashes.
if (Output.getType() == types::TY_Dependencies) {
// Handled with other dependency code.
} else if (Output.isFilename()) {
if (Output.getType() == clang::driver::types::TY_IFS_CPP ||
Output.getType() == clang::driver::types::TY_IFS) {
SmallString<128> OutputFilename(Output.getFilename());
llvm::sys::path::replace_extension(OutputFilename, "ifs");
CmdArgs.push_back("-o");
CmdArgs.push_back(Args.MakeArgString(OutputFilename));
} else {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
}
} else {
assert(Output.isNothing() && "Invalid output.");
}
addDashXForInput(Args, Input, CmdArgs);
ArrayRef<InputInfo> FrontendInputs = Input;
if (IsHeaderModulePrecompile)
FrontendInputs = ModuleHeaderInputs;
else if (Input.isNothing())
FrontendInputs = {};
for (const InputInfo &Input : FrontendInputs) {
if (Input.isFilename())
CmdArgs.push_back(Input.getFilename());
else
Input.getInputArg().renderAsInput(Args, CmdArgs);
}
if (D.CC1Main && !D.CCGenDiagnostics) {
// Invoke the CC1 directly in this process
C.addCommand(std::make_unique<CC1Command>(JA, *this,
ResponseFileSupport::AtFileUTF8(),
Exec, CmdArgs, Inputs, Output));
} else {
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileUTF8(),
Exec, CmdArgs, Inputs, Output));
}
// Make the compile command echo its inputs for /showFilenames.
if (Output.getType() == types::TY_Object &&
Args.hasFlag(options::OPT__SLASH_showFilenames,
options::OPT__SLASH_showFilenames_, false)) {
C.getJobs().getJobs().back()->PrintInputFilenames = true;
}
if (Arg *A = Args.getLastArg(options::OPT_pg))
if (FPKeepKind == CodeGenOptions::FramePointerKind::None &&
!Args.hasArg(options::OPT_mfentry))
D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer"
<< A->getAsString(Args);
// Claim some arguments which clang supports automatically.
// -fpch-preprocess is used with gcc to add a special marker in the output to
// include the PCH file.
Args.ClaimAllArgs(options::OPT_fpch_preprocess);
// Claim some arguments which clang doesn't support, but we don't
// care to warn the user about.
Args.ClaimAllArgs(options::OPT_clang_ignored_f_Group);
Args.ClaimAllArgs(options::OPT_clang_ignored_m_Group);
// Disable warnings for clang -E -emit-llvm foo.c
Args.ClaimAllArgs(options::OPT_emit_llvm);
}
Clang::Clang(const ToolChain &TC)
// CAUTION! The first constructor argument ("clang") is not arbitrary,
// as it is for other tools. Some operations on a Tool actually test
// whether that tool is Clang based on the Tool's Name as a string.
: Tool("clang", "clang frontend", TC) {}
Clang::~Clang() {}
/// Add options related to the Objective-C runtime/ABI.
///
/// Returns true if the runtime is non-fragile.
ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args,
const InputInfoList &inputs,
ArgStringList &cmdArgs,
RewriteKind rewriteKind) const {
// Look for the controlling runtime option.
Arg *runtimeArg =
args.getLastArg(options::OPT_fnext_runtime, options::OPT_fgnu_runtime,
options::OPT_fobjc_runtime_EQ);
// Just forward -fobjc-runtime= to the frontend. This supercedes
// options about fragility.
if (runtimeArg &&
runtimeArg->getOption().matches(options::OPT_fobjc_runtime_EQ)) {
ObjCRuntime runtime;
StringRef value = runtimeArg->getValue();
if (runtime.tryParse(value)) {
getToolChain().getDriver().Diag(diag::err_drv_unknown_objc_runtime)
<< value;
}
if ((runtime.getKind() == ObjCRuntime::GNUstep) &&
(runtime.getVersion() >= VersionTuple(2, 0)))
if (!getToolChain().getTriple().isOSBinFormatELF() &&
!getToolChain().getTriple().isOSBinFormatCOFF()) {
getToolChain().getDriver().Diag(
diag::err_drv_gnustep_objc_runtime_incompatible_binary)
<< runtime.getVersion().getMajor();
}
runtimeArg->render(args, cmdArgs);
return runtime;
}
// Otherwise, we'll need the ABI "version". Version numbers are
// slightly confusing for historical reasons:
// 1 - Traditional "fragile" ABI
// 2 - Non-fragile ABI, version 1
// 3 - Non-fragile ABI, version 2
unsigned objcABIVersion = 1;
// If -fobjc-abi-version= is present, use that to set the version.
if (Arg *abiArg = args.getLastArg(options::OPT_fobjc_abi_version_EQ)) {
StringRef value = abiArg->getValue();
if (value == "1")
objcABIVersion = 1;
else if (value == "2")
objcABIVersion = 2;
else if (value == "3")
objcABIVersion = 3;
else
getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << value;
} else {
// Otherwise, determine if we are using the non-fragile ABI.
bool nonFragileABIIsDefault =
(rewriteKind == RK_NonFragile ||
(rewriteKind == RK_None &&
getToolChain().IsObjCNonFragileABIDefault()));
if (args.hasFlag(options::OPT_fobjc_nonfragile_abi,
options::OPT_fno_objc_nonfragile_abi,
nonFragileABIIsDefault)) {
// Determine the non-fragile ABI version to use.
#ifdef DISABLE_DEFAULT_NONFRAGILEABI_TWO
unsigned nonFragileABIVersion = 1;
#else
unsigned nonFragileABIVersion = 2;
#endif
if (Arg *abiArg =
args.getLastArg(options::OPT_fobjc_nonfragile_abi_version_EQ)) {
StringRef value = abiArg->getValue();
if (value == "1")
nonFragileABIVersion = 1;
else if (value == "2")
nonFragileABIVersion = 2;
else
getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
<< value;
}
objcABIVersion = 1 + nonFragileABIVersion;
} else {
objcABIVersion = 1;
}
}
// We don't actually care about the ABI version other than whether
// it's non-fragile.
bool isNonFragile = objcABIVersion != 1;
// If we have no runtime argument, ask the toolchain for its default runtime.
// However, the rewriter only really supports the Mac runtime, so assume that.
ObjCRuntime runtime;
if (!runtimeArg) {
switch (rewriteKind) {
case RK_None:
runtime = getToolChain().getDefaultObjCRuntime(isNonFragile);
break;
case RK_Fragile:
runtime = ObjCRuntime(ObjCRuntime::FragileMacOSX, VersionTuple());
break;
case RK_NonFragile:
runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple());
break;
}
// -fnext-runtime
} else if (runtimeArg->getOption().matches(options::OPT_fnext_runtime)) {
// On Darwin, make this use the default behavior for the toolchain.
if (getToolChain().getTriple().isOSDarwin()) {
runtime = getToolChain().getDefaultObjCRuntime(isNonFragile);
// Otherwise, build for a generic macosx port.
} else {
runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple());
}
// -fgnu-runtime
} else {
assert(runtimeArg->getOption().matches(options::OPT_fgnu_runtime));
// Legacy behaviour is to target the gnustep runtime if we are in
// non-fragile mode or the GCC runtime in fragile mode.
if (isNonFragile)
runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple(2, 0));
else
runtime = ObjCRuntime(ObjCRuntime::GCC, VersionTuple());
}
if (llvm::any_of(inputs, [](const InputInfo &input) {
return types::isObjC(input.getType());
}))
cmdArgs.push_back(
args.MakeArgString("-fobjc-runtime=" + runtime.getAsString()));
return runtime;
}
static bool maybeConsumeDash(const std::string &EH, size_t &I) {
bool HaveDash = (I + 1 < EH.size() && EH[I + 1] == '-');
I += HaveDash;
return !HaveDash;
}
namespace {
struct EHFlags {
bool Synch = false;
bool Asynch = false;
bool NoUnwindC = false;
};
} // end anonymous namespace
/// /EH controls whether to run destructor cleanups when exceptions are
/// thrown. There are three modifiers:
/// - s: Cleanup after "synchronous" exceptions, aka C++ exceptions.
/// - a: Cleanup after "asynchronous" exceptions, aka structured exceptions.
/// The 'a' modifier is unimplemented and fundamentally hard in LLVM IR.
/// - c: Assume that extern "C" functions are implicitly nounwind.
/// The default is /EHs-c-, meaning cleanups are disabled.
static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) {
EHFlags EH;
std::vector<std::string> EHArgs =
Args.getAllArgValues(options::OPT__SLASH_EH);
for (auto EHVal : EHArgs) {
for (size_t I = 0, E = EHVal.size(); I != E; ++I) {
switch (EHVal[I]) {
case 'a':
EH.Asynch = maybeConsumeDash(EHVal, I);
if (EH.Asynch)
EH.Synch = false;
continue;
case 'c':
EH.NoUnwindC = maybeConsumeDash(EHVal, I);
continue;
case 's':
EH.Synch = maybeConsumeDash(EHVal, I);
if (EH.Synch)
EH.Asynch = false;
continue;
default:
break;
}
D.Diag(clang::diag::err_drv_invalid_value) << "/EH" << EHVal;
break;
}
}
// The /GX, /GX- flags are only processed if there are not /EH flags.
// The default is that /GX is not specified.
if (EHArgs.empty() &&
Args.hasFlag(options::OPT__SLASH_GX, options::OPT__SLASH_GX_,
/*Default=*/false)) {
EH.Synch = true;
EH.NoUnwindC = true;
}
return EH;
}
void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
ArgStringList &CmdArgs,
codegenoptions::DebugInfoKind *DebugInfoKind,
bool *EmitCodeView) const {
unsigned RTOptionID = options::OPT__SLASH_MT;
bool isNVPTX = getToolChain().getTriple().isNVPTX();
if (Args.hasArg(options::OPT__SLASH_LDd))
// The /LDd option implies /MTd. The dependent lib part can be overridden,
// but defining _DEBUG is sticky.
RTOptionID = options::OPT__SLASH_MTd;
if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group))
RTOptionID = A->getOption().getID();
StringRef FlagForCRT;
switch (RTOptionID) {
case options::OPT__SLASH_MD:
if (Args.hasArg(options::OPT__SLASH_LDd))
CmdArgs.push_back("-D_DEBUG");
CmdArgs.push_back("-D_MT");
CmdArgs.push_back("-D_DLL");
FlagForCRT = "--dependent-lib=msvcrt";
break;
case options::OPT__SLASH_MDd:
CmdArgs.push_back("-D_DEBUG");
CmdArgs.push_back("-D_MT");
CmdArgs.push_back("-D_DLL");
FlagForCRT = "--dependent-lib=msvcrtd";
break;
case options::OPT__SLASH_MT:
if (Args.hasArg(options::OPT__SLASH_LDd))
CmdArgs.push_back("-D_DEBUG");
CmdArgs.push_back("-D_MT");
CmdArgs.push_back("-flto-visibility-public-std");
FlagForCRT = "--dependent-lib=libcmt";
break;
case options::OPT__SLASH_MTd:
CmdArgs.push_back("-D_DEBUG");
CmdArgs.push_back("-D_MT");
CmdArgs.push_back("-flto-visibility-public-std");
FlagForCRT = "--dependent-lib=libcmtd";
break;
default:
llvm_unreachable("Unexpected option ID.");
}
if (Args.hasArg(options::OPT__SLASH_Zl)) {
CmdArgs.push_back("-D_VC_NODEFAULTLIB");
} else {
CmdArgs.push_back(FlagForCRT.data());
// This provides POSIX compatibility (maps 'open' to '_open'), which most
// users want. The /Za flag to cl.exe turns this off, but it's not
// implemented in clang.
CmdArgs.push_back("--dependent-lib=oldnames");
}
if (Arg *ShowIncludes =
Args.getLastArg(options::OPT__SLASH_showIncludes,
options::OPT__SLASH_showIncludes_user)) {
CmdArgs.push_back("--show-includes");
if (ShowIncludes->getOption().matches(options::OPT__SLASH_showIncludes))
CmdArgs.push_back("-sys-header-deps");
}
// This controls whether or not we emit RTTI data for polymorphic types.
if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR,
/*Default=*/false))
CmdArgs.push_back("-fno-rtti-data");
// This controls whether or not we emit stack-protector instrumentation.
// In MSVC, Buffer Security Check (/GS) is on by default.
if (!isNVPTX && Args.hasFlag(options::OPT__SLASH_GS, options::OPT__SLASH_GS_,
/*Default=*/true)) {
CmdArgs.push_back("-stack-protector");
CmdArgs.push_back(Args.MakeArgString(Twine(LangOptions::SSPStrong)));
}
// Emit CodeView if -Z7 or -gline-tables-only are present.
if (Arg *DebugInfoArg = Args.getLastArg(options::OPT__SLASH_Z7,
options::OPT_gline_tables_only)) {
*EmitCodeView = true;
if (DebugInfoArg->getOption().matches(options::OPT__SLASH_Z7))
*DebugInfoKind = codegenoptions::DebugInfoConstructor;
else
*DebugInfoKind = codegenoptions::DebugLineTablesOnly;
} else {
*EmitCodeView = false;
}
const Driver &D = getToolChain().getDriver();
EHFlags EH = parseClangCLEHFlags(D, Args);
if (!isNVPTX && (EH.Synch || EH.Asynch)) {
if (types::isCXX(InputType))
CmdArgs.push_back("-fcxx-exceptions");
CmdArgs.push_back("-fexceptions");
}
if (types::isCXX(InputType) && EH.Synch && EH.NoUnwindC)
CmdArgs.push_back("-fexternc-nounwind");
// /EP should expand to -E -P.
if (Args.hasArg(options::OPT__SLASH_EP)) {
CmdArgs.push_back("-E");
CmdArgs.push_back("-P");
}
unsigned VolatileOptionID;
if (getToolChain().getTriple().isX86())
VolatileOptionID = options::OPT__SLASH_volatile_ms;
else
VolatileOptionID = options::OPT__SLASH_volatile_iso;
if (Arg *A = Args.getLastArg(options::OPT__SLASH_volatile_Group))
VolatileOptionID = A->getOption().getID();
if (VolatileOptionID == options::OPT__SLASH_volatile_ms)
CmdArgs.push_back("-fms-volatile");
if (Args.hasFlag(options::OPT__SLASH_Zc_dllexportInlines_,
options::OPT__SLASH_Zc_dllexportInlines,
false)) {
CmdArgs.push_back("-fno-dllexport-inlines");
}
Arg *MostGeneralArg = Args.getLastArg(options::OPT__SLASH_vmg);
Arg *BestCaseArg = Args.getLastArg(options::OPT__SLASH_vmb);
if (MostGeneralArg && BestCaseArg)
D.Diag(clang::diag::err_drv_argument_not_allowed_with)
<< MostGeneralArg->getAsString(Args) << BestCaseArg->getAsString(Args);
if (MostGeneralArg) {
Arg *SingleArg = Args.getLastArg(options::OPT__SLASH_vms);
Arg *MultipleArg = Args.getLastArg(options::OPT__SLASH_vmm);
Arg *VirtualArg = Args.getLastArg(options::OPT__SLASH_vmv);
Arg *FirstConflict = SingleArg ? SingleArg : MultipleArg;
Arg *SecondConflict = VirtualArg ? VirtualArg : MultipleArg;
if (FirstConflict && SecondConflict && FirstConflict != SecondConflict)
D.Diag(clang::diag::err_drv_argument_not_allowed_with)
<< FirstConflict->getAsString(Args)
<< SecondConflict->getAsString(Args);
if (SingleArg)
CmdArgs.push_back("-fms-memptr-rep=single");
else if (MultipleArg)
CmdArgs.push_back("-fms-memptr-rep=multiple");
else
CmdArgs.push_back("-fms-memptr-rep=virtual");
}
// Parse the default calling convention options.
if (Arg *CCArg =
Args.getLastArg(options::OPT__SLASH_Gd, options::OPT__SLASH_Gr,
options::OPT__SLASH_Gz, options::OPT__SLASH_Gv,
options::OPT__SLASH_Gregcall)) {
unsigned DCCOptId = CCArg->getOption().getID();
const char *DCCFlag = nullptr;
bool ArchSupported = !isNVPTX;
llvm::Triple::ArchType Arch = getToolChain().getArch();
switch (DCCOptId) {
case options::OPT__SLASH_Gd:
DCCFlag = "-fdefault-calling-conv=cdecl";
break;
case options::OPT__SLASH_Gr:
ArchSupported = Arch == llvm::Triple::x86;
DCCFlag = "-fdefault-calling-conv=fastcall";
break;
case options::OPT__SLASH_Gz:
ArchSupported = Arch == llvm::Triple::x86;
DCCFlag = "-fdefault-calling-conv=stdcall";
break;
case options::OPT__SLASH_Gv:
ArchSupported = Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64;
DCCFlag = "-fdefault-calling-conv=vectorcall";
break;
case options::OPT__SLASH_Gregcall:
ArchSupported = Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64;
DCCFlag = "-fdefault-calling-conv=regcall";
break;
}
// MSVC doesn't warn if /Gr or /Gz is used on x64, so we don't either.
if (ArchSupported && DCCFlag)
CmdArgs.push_back(DCCFlag);
}
Args.AddLastArg(CmdArgs, options::OPT_vtordisp_mode_EQ);
if (!Args.hasArg(options::OPT_fdiagnostics_format_EQ)) {
CmdArgs.push_back("-fdiagnostics-format");
CmdArgs.push_back("msvc");
}
if (Arg *A = Args.getLastArg(options::OPT__SLASH_guard)) {
StringRef GuardArgs = A->getValue();
// The only valid options are "cf", "cf,nochecks", "cf-", "ehcont" and
// "ehcont-".
if (GuardArgs.equals_insensitive("cf")) {
// Emit CFG instrumentation and the table of address-taken functions.
CmdArgs.push_back("-cfguard");
} else if (GuardArgs.equals_insensitive("cf,nochecks")) {
// Emit only the table of address-taken functions.
CmdArgs.push_back("-cfguard-no-checks");
} else if (GuardArgs.equals_insensitive("ehcont")) {
// Emit EH continuation table.
CmdArgs.push_back("-ehcontguard");
} else if (GuardArgs.equals_insensitive("cf-") ||
GuardArgs.equals_insensitive("ehcont-")) {
// Do nothing, but we might want to emit a security warning in future.
} else {
D.Diag(diag::err_drv_invalid_value) << A->getSpelling() << GuardArgs;
}
}
}
const char *Clang::getBaseInputName(const ArgList &Args,
const InputInfo &Input) {
return Args.MakeArgString(llvm::sys::path::filename(Input.getBaseInput()));
}
const char *Clang::getBaseInputStem(const ArgList &Args,
const InputInfoList &Inputs) {
const char *Str = getBaseInputName(Args, Inputs[0]);
if (const char *End = strrchr(Str, '.'))
return Args.MakeArgString(std::string(Str, End));
return Str;
}
const char *Clang::getDependencyFileName(const ArgList &Args,
const InputInfoList &Inputs) {
// FIXME: Think about this more.
if (Arg *OutputOpt = Args.getLastArg(options::OPT_o)) {
SmallString<128> OutputFilename(OutputOpt->getValue());
llvm::sys::path::replace_extension(OutputFilename, llvm::Twine('d'));
return Args.MakeArgString(OutputFilename);
}
return Args.MakeArgString(Twine(getBaseInputStem(Args, Inputs)) + ".d");
}
// Begin ClangAs
void ClangAs::AddMIPSTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
StringRef CPUName;
StringRef ABIName;
const llvm::Triple &Triple = getToolChain().getTriple();
mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
CmdArgs.push_back("-target-abi");
CmdArgs.push_back(ABIName.data());
}
void ClangAs::AddX86TargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
addX86AlignBranchArgs(getToolChain().getDriver(), Args, CmdArgs,
/*IsLTO=*/false);
if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) {
StringRef Value = A->getValue();
if (Value == "intel" || Value == "att") {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value));
} else {
getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Value;
}
}
}
void ClangAs::AddRISCVTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
const llvm::Triple &Triple = getToolChain().getTriple();
StringRef ABIName = riscv::getRISCVABI(Args, Triple);
CmdArgs.push_back("-target-abi");
CmdArgs.push_back(ABIName.data());
}
void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output, const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
assert(Inputs.size() == 1 && "Unexpected number of inputs.");
const InputInfo &Input = Inputs[0];
const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
const std::string &TripleStr = Triple.getTriple();
const auto &D = getToolChain().getDriver();
// Don't warn about "clang -w -c foo.s"
Args.ClaimAllArgs(options::OPT_w);
// and "clang -emit-llvm -c foo.s"
Args.ClaimAllArgs(options::OPT_emit_llvm);
claimNoWarnArgs(Args);
// Invoke ourselves in -cc1as mode.
//
// FIXME: Implement custom jobs for internal actions.
CmdArgs.push_back("-cc1as");
// Add the "effective" target triple.
CmdArgs.push_back("-triple");
CmdArgs.push_back(Args.MakeArgString(TripleStr));
// Set the output mode, we currently only expect to be used as a real
// assembler.
CmdArgs.push_back("-filetype");
CmdArgs.push_back("obj");
// Set the main file name, so that debug info works even with
// -save-temps or preprocessed assembly.
CmdArgs.push_back("-main-file-name");
CmdArgs.push_back(Clang::getBaseInputName(Args, Input));
// Add the target cpu
std::string CPU = getCPUName(Args, Triple, /*FromAs*/ true);
if (!CPU.empty()) {
CmdArgs.push_back("-target-cpu");
CmdArgs.push_back(Args.MakeArgString(CPU));
}
// Add the target features
getTargetFeatures(D, Triple, Args, CmdArgs, true);
// Ignore explicit -force_cpusubtype_ALL option.
(void)Args.hasArg(options::OPT_force__cpusubtype__ALL);
// Pass along any -I options so we get proper .include search paths.
Args.AddAllArgs(CmdArgs, options::OPT_I_Group);
// Determine the original source input.
const Action *SourceAction = &JA;
while (SourceAction->getKind() != Action::InputClass) {
assert(!SourceAction->getInputs().empty() && "unexpected root action!");
SourceAction = SourceAction->getInputs()[0];
}
// Forward -g and handle debug info related flags, assuming we are dealing
// with an actual assembly file.
bool WantDebug = false;
Args.ClaimAllArgs(options::OPT_g_Group);
if (Arg *A = Args.getLastArg(options::OPT_g_Group))
WantDebug = !A->getOption().matches(options::OPT_g0) &&
!A->getOption().matches(options::OPT_ggdb0);
unsigned DwarfVersion = ParseDebugDefaultVersion(getToolChain(), Args);
if (const Arg *GDwarfN = getDwarfNArg(Args))
DwarfVersion = DwarfVersionNum(GDwarfN->getSpelling());
if (DwarfVersion == 0)
DwarfVersion = getToolChain().GetDefaultDwarfVersion();
codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
if (SourceAction->getType() == types::TY_Asm ||
SourceAction->getType() == types::TY_PP_Asm) {
// You might think that it would be ok to set DebugInfoKind outside of
// the guard for source type, however there is a test which asserts
// that some assembler invocation receives no -debug-info-kind,
// and it's not clear whether that test is just overly restrictive.
DebugInfoKind = (WantDebug ? codegenoptions::DebugInfoConstructor
: codegenoptions::NoDebugInfo);
// Add the -fdebug-compilation-dir flag if needed.
addDebugCompDirArg(Args, CmdArgs, C.getDriver().getVFS());
addDebugPrefixMapArg(getToolChain().getDriver(), Args, CmdArgs);
// Set the AT_producer to the clang version when using the integrated
// assembler on assembly source files.
CmdArgs.push_back("-dwarf-debug-producer");
CmdArgs.push_back(Args.MakeArgString(getClangFullVersion()));
// And pass along -I options
Args.AddAllArgs(CmdArgs, options::OPT_I);
}
RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion,
llvm::DebuggerKind::Default);
renderDwarfFormat(D, Triple, Args, CmdArgs, DwarfVersion);
RenderDebugInfoCompressionArgs(Args, CmdArgs, D, getToolChain());
// Handle -fPIC et al -- the relocation-model affects the assembler
// for some targets.
llvm::Reloc::Model RelocationModel;
unsigned PICLevel;
bool IsPIE;
std::tie(RelocationModel, PICLevel, IsPIE) =
ParsePICArgs(getToolChain(), Args);
const char *RMName = RelocationModelName(RelocationModel);
if (RMName) {
CmdArgs.push_back("-mrelocation-model");
CmdArgs.push_back(RMName);
}
// Optionally embed the -cc1as level arguments into the debug info, for build
// analysis.
if (getToolChain().UseDwarfDebugFlags()) {
ArgStringList OriginalArgs;
for (const auto &Arg : Args)
Arg->render(Args, OriginalArgs);
SmallString<256> Flags;
const char *Exec = getToolChain().getDriver().getClangProgramPath();
EscapeSpacesAndBackslashes(Exec, Flags);
for (const char *OriginalArg : OriginalArgs) {
SmallString<128> EscapedArg;
EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);
Flags += " ";
Flags += EscapedArg;
}
CmdArgs.push_back("-dwarf-debug-flags");
CmdArgs.push_back(Args.MakeArgString(Flags));
}
// FIXME: Add -static support, once we have it.
// Add target specific flags.
switch (getToolChain().getArch()) {
default:
break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
AddMIPSTargetArgs(Args, CmdArgs);
break;
case llvm::Triple::x86:
case llvm::Triple::x86_64:
AddX86TargetArgs(Args, CmdArgs);
break;
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
// This isn't in AddARMTargetArgs because we want to do this for assembly
// only, not C/C++.
if (Args.hasFlag(options::OPT_mdefault_build_attributes,
options::OPT_mno_default_build_attributes, true)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-arm-add-build-attributes");
}
break;
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_32:
case llvm::Triple::aarch64_be:
if (Args.hasArg(options::OPT_mmark_bti_property)) {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-aarch64-mark-bti-property");
}
break;
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
AddRISCVTargetArgs(Args, CmdArgs);
break;
}
// Consume all the warning flags. Usually this would be handled more
// gracefully by -cc1 (warning about unknown warning flags, etc) but -cc1as
// doesn't handle that so rather than warning about unused flags that are
// actually used, we'll lie by omission instead.
// FIXME: Stop lying and consume only the appropriate driver flags
Args.ClaimAllArgs(options::OPT_W_Group);
CollectArgsForIntegratedAssembler(C, Args, CmdArgs,
getToolChain().getDriver());
Args.AddAllArgs(CmdArgs, options::OPT_mllvm);
assert(Output.isFilename() && "Unexpected lipo output.");
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
const llvm::Triple &T = getToolChain().getTriple();
Arg *A;
if (getDebugFissionKind(D, Args, A) == DwarfFissionKind::Split &&
T.isOSBinFormatELF()) {
CmdArgs.push_back("-split-dwarf-output");
CmdArgs.push_back(SplitDebugName(JA, Args, Input, Output));
}
if (Triple.isAMDGPU())
handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs);
assert(Input.isFilename() && "Invalid input.");
CmdArgs.push_back(Input.getFilename());
const char *Exec = getToolChain().getDriver().getClangProgramPath();
if (D.CC1Main && !D.CCGenDiagnostics) {
// Invoke cc1as directly in this process.
C.addCommand(std::make_unique<CC1Command>(JA, *this,
ResponseFileSupport::AtFileUTF8(),
Exec, CmdArgs, Inputs, Output));
} else {
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileUTF8(),
Exec, CmdArgs, Inputs, Output));
}
}
// Begin OffloadBundler
void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const {
// The version with only one output is expected to refer to a bundling job.
assert(isa<OffloadBundlingJobAction>(JA) && "Expecting bundling job!");
// The bundling command looks like this:
// clang-offload-bundler -type=bc
// -targets=host-triple,openmp-triple1,openmp-triple2
// -outputs=input_file
// -inputs=unbundle_file_host,unbundle_file_tgt1,unbundle_file_tgt2"
ArgStringList CmdArgs;
// Get the type.
CmdArgs.push_back(TCArgs.MakeArgString(
Twine("-type=") + types::getTypeTempSuffix(Output.getType())));
assert(JA.getInputs().size() == Inputs.size() &&
"Not have inputs for all dependence actions??");
// Get the targets.
SmallString<128> Triples;
Triples += "-targets=";
for (unsigned I = 0; I < Inputs.size(); ++I) {
if (I)
Triples += ',';
// Find ToolChain for this input.
Action::OffloadKind CurKind = Action::OFK_Host;
const ToolChain *CurTC = &getToolChain();
const Action *CurDep = JA.getInputs()[I];
if (const auto *OA = dyn_cast<OffloadAction>(CurDep)) {
CurTC = nullptr;
OA->doOnEachDependence([&](Action *A, const ToolChain *TC, const char *) {
assert(CurTC == nullptr && "Expected one dependence!");
CurKind = A->getOffloadingDeviceKind();
CurTC = TC;
});
}
Triples += Action::GetOffloadKindName(CurKind);
Triples += "-";
std::string NormalizedTriple = CurTC->getTriple().normalize();
Triples += NormalizedTriple;
if (CurDep->getOffloadingArch() != nullptr) {
// If OffloadArch is present it can only appear as the 6th hypen
// sepearated field of Bundle Entry ID. So, pad required number of
// hyphens in Triple.
for (int i = 4 - StringRef(NormalizedTriple).count("-"); i > 0; i--)
Triples += "-";
Triples += CurDep->getOffloadingArch();
}
}
CmdArgs.push_back(TCArgs.MakeArgString(Triples));
// Get bundled file command.
CmdArgs.push_back(
TCArgs.MakeArgString(Twine("-outputs=") + Output.getFilename()));
// Get unbundled files command.
SmallString<128> UB;
UB += "-inputs=";
for (unsigned I = 0; I < Inputs.size(); ++I) {
if (I)
UB += ',';
// Find ToolChain for this input.
const ToolChain *CurTC = &getToolChain();
if (const auto *OA = dyn_cast<OffloadAction>(JA.getInputs()[I])) {
CurTC = nullptr;
OA->doOnEachDependence([&](Action *, const ToolChain *TC, const char *) {
assert(CurTC == nullptr && "Expected one dependence!");
CurTC = TC;
});
UB += C.addTempFile(
C.getArgs().MakeArgString(CurTC->getInputFilename(Inputs[I])));
} else {
UB += CurTC->getInputFilename(Inputs[I]);
}
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
// All the inputs are encoded as commands.
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::None(),
TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
CmdArgs, None, Output));
}
void OffloadBundler::ConstructJobMultipleOutputs(
Compilation &C, const JobAction &JA, const InputInfoList &Outputs,
const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const {
// The version with multiple outputs is expected to refer to a unbundling job.
auto &UA = cast<OffloadUnbundlingJobAction>(JA);
// The unbundling command looks like this:
// clang-offload-bundler -type=bc
// -targets=host-triple,openmp-triple1,openmp-triple2
// -inputs=input_file
// -outputs=unbundle_file_host,unbundle_file_tgt1,unbundle_file_tgt2"
// -unbundle
ArgStringList CmdArgs;
assert(Inputs.size() == 1 && "Expecting to unbundle a single file!");
InputInfo Input = Inputs.front();
// Get the type.
CmdArgs.push_back(TCArgs.MakeArgString(
Twine("-type=") + types::getTypeTempSuffix(Input.getType())));
// Get the targets.
SmallString<128> Triples;
Triples += "-targets=";
auto DepInfo = UA.getDependentActionsInfo();
for (unsigned I = 0; I < DepInfo.size(); ++I) {
if (I)
Triples += ',';
auto &Dep = DepInfo[I];
Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind);
Triples += "-";
std::string NormalizedTriple =
Dep.DependentToolChain->getTriple().normalize();
Triples += NormalizedTriple;
if (!Dep.DependentBoundArch.empty()) {
// If OffloadArch is present it can only appear as the 6th hypen
// sepearated field of Bundle Entry ID. So, pad required number of
// hyphens in Triple.
for (int i = 4 - StringRef(NormalizedTriple).count("-"); i > 0; i--)
Triples += "-";
Triples += Dep.DependentBoundArch;
}
}
CmdArgs.push_back(TCArgs.MakeArgString(Triples));
// Get bundled file command.
CmdArgs.push_back(
TCArgs.MakeArgString(Twine("-inputs=") + Input.getFilename()));
// Get unbundled files command.
SmallString<128> UB;
UB += "-outputs=";
for (unsigned I = 0; I < Outputs.size(); ++I) {
if (I)
UB += ',';
UB += DepInfo[I].DependentToolChain->getInputFilename(Outputs[I]);
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
CmdArgs.push_back("-unbundle");
CmdArgs.push_back("-allow-missing-bundles");
// All the inputs are encoded as commands.
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::None(),
TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
CmdArgs, None, Outputs));
}
void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
// Add the "effective" target triple.
CmdArgs.push_back("-target");
CmdArgs.push_back(Args.MakeArgString(Triple.getTriple()));
// Add the output file name.
assert(Output.isFilename() && "Invalid output.");
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
// Add inputs.
for (const InputInfo &I : Inputs) {
assert(I.isFilename() && "Invalid input.");
CmdArgs.push_back(I.getFilename());
}
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::None(),
Args.MakeArgString(getToolChain().GetProgramPath(getShortName())),
CmdArgs, Inputs, Output));
}
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 83cab3ac00cb..0ffe95795381 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1,1739 +1,1740 @@
//===--- CommonArgs.cpp - Args handling for multiple toolchains -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "CommonArgs.h"
#include "Arch/AArch64.h"
#include "Arch/ARM.h"
#include "Arch/M68k.h"
#include "Arch/Mips.h"
#include "Arch/PPC.h"
#include "Arch/SystemZ.h"
#include "Arch/VE.h"
#include "Arch/X86.h"
#include "HIP.h"
#include "Hexagon.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/Version.h"
#include "clang/Config/config.h"
#include "clang/Driver/Action.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Job.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "clang/Driver/ToolChain.h"
#include "clang/Driver/Util.h"
#include "clang/Driver/XRayArgs.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/YAMLParser.h"
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
static void renderRpassOptions(const ArgList &Args, ArgStringList &CmdArgs) {
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_EQ))
CmdArgs.push_back(Args.MakeArgString(Twine("--plugin-opt=-pass-remarks=") +
A->getValue()));
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_missed_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("--plugin-opt=-pass-remarks-missed=") + A->getValue()));
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_analysis_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("--plugin-opt=-pass-remarks-analysis=") + A->getValue()));
}
static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs,
const llvm::Triple &Triple,
const InputInfo &Input,
const InputInfo &Output) {
StringRef Format = "yaml";
if (const Arg *A = Args.getLastArg(options::OPT_fsave_optimization_record_EQ))
Format = A->getValue();
SmallString<128> F;
const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ);
if (A)
F = A->getValue();
else if (Output.isFilename())
F = Output.getFilename();
assert(!F.empty() && "Cannot determine remarks output name.");
// Append "opt.ld.<format>" to the end of the file name.
CmdArgs.push_back(
Args.MakeArgString(Twine("--plugin-opt=opt-remarks-filename=") + F +
Twine(".opt.ld.") + Format));
if (const Arg *A =
Args.getLastArg(options::OPT_foptimization_record_passes_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("--plugin-opt=opt-remarks-passes=") + A->getValue()));
CmdArgs.push_back(Args.MakeArgString(
Twine("--plugin-opt=opt-remarks-format=") + Format.data()));
}
static void renderRemarksHotnessOptions(const ArgList &Args,
ArgStringList &CmdArgs) {
if (Args.hasFlag(options::OPT_fdiagnostics_show_hotness,
options::OPT_fno_diagnostics_show_hotness, false))
CmdArgs.push_back("--plugin-opt=opt-remarks-with-hotness");
if (const Arg *A =
Args.getLastArg(options::OPT_fdiagnostics_hotness_threshold_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("--plugin-opt=opt-remarks-hotness-threshold=") + A->getValue()));
}
void tools::addPathIfExists(const Driver &D, const Twine &Path,
ToolChain::path_list &Paths) {
if (D.getVFS().exists(Path))
Paths.push_back(Path.str());
}
void tools::handleTargetFeaturesGroup(const ArgList &Args,
std::vector<StringRef> &Features,
OptSpecifier Group) {
for (const Arg *A : Args.filtered(Group)) {
StringRef Name = A->getOption().getName();
A->claim();
// Skip over "-m".
assert(Name.startswith("m") && "Invalid feature name.");
Name = Name.substr(1);
bool IsNegative = Name.startswith("no-");
if (IsNegative)
Name = Name.substr(3);
Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
}
}
std::vector<StringRef>
tools::unifyTargetFeatures(const std::vector<StringRef> &Features) {
std::vector<StringRef> UnifiedFeatures;
// Find the last of each feature.
llvm::StringMap<unsigned> LastOpt;
for (unsigned I = 0, N = Features.size(); I < N; ++I) {
StringRef Name = Features[I];
assert(Name[0] == '-' || Name[0] == '+');
LastOpt[Name.drop_front(1)] = I;
}
for (unsigned I = 0, N = Features.size(); I < N; ++I) {
// If this feature was overridden, ignore it.
StringRef Name = Features[I];
llvm::StringMap<unsigned>::iterator LastI = LastOpt.find(Name.drop_front(1));
assert(LastI != LastOpt.end());
unsigned Last = LastI->second;
if (Last != I)
continue;
UnifiedFeatures.push_back(Name);
}
return UnifiedFeatures;
}
void tools::addDirectoryList(const ArgList &Args, ArgStringList &CmdArgs,
const char *ArgName, const char *EnvVar) {
const char *DirList = ::getenv(EnvVar);
bool CombinedArg = false;
if (!DirList)
return; // Nothing to do.
StringRef Name(ArgName);
if (Name.equals("-I") || Name.equals("-L") || Name.empty())
CombinedArg = true;
StringRef Dirs(DirList);
if (Dirs.empty()) // Empty string should not add '.'.
return;
StringRef::size_type Delim;
while ((Delim = Dirs.find(llvm::sys::EnvPathSeparator)) != StringRef::npos) {
if (Delim == 0) { // Leading colon.
if (CombinedArg) {
CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + "."));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(".");
}
} else {
if (CombinedArg) {
CmdArgs.push_back(
Args.MakeArgString(std::string(ArgName) + Dirs.substr(0, Delim)));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(Args.MakeArgString(Dirs.substr(0, Delim)));
}
}
Dirs = Dirs.substr(Delim + 1);
}
if (Dirs.empty()) { // Trailing colon.
if (CombinedArg) {
CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + "."));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(".");
}
} else { // Add the last path.
if (CombinedArg) {
CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + Dirs));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(Args.MakeArgString(Dirs));
}
}
}
void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
const ArgList &Args, ArgStringList &CmdArgs,
const JobAction &JA) {
const Driver &D = TC.getDriver();
// Add extra linker input arguments which are not treated as inputs
// (constructed via -Xarch_).
Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);
// LIBRARY_PATH are included before user inputs and only supported on native
// toolchains.
if (!TC.isCrossCompiling())
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
for (const auto &II : Inputs) {
// If the current tool chain refers to an OpenMP offloading host, we
// should ignore inputs that refer to OpenMP offloading devices -
// they will be embedded according to a proper linker script.
if (auto *IA = II.getAction())
if ((JA.isHostOffloading(Action::OFK_OpenMP) &&
IA->isDeviceOffloading(Action::OFK_OpenMP)))
continue;
if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType()))
// Don't try to pass LLVM inputs unless we have native support.
D.Diag(diag::err_drv_no_linker_llvm_support) << TC.getTripleString();
// Add filenames immediately.
if (II.isFilename()) {
CmdArgs.push_back(II.getFilename());
continue;
}
// Otherwise, this is a linker input argument.
const Arg &A = II.getInputArg();
// Handle reserved library options.
if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx))
TC.AddCXXStdlibLibArgs(Args, CmdArgs);
else if (A.getOption().matches(options::OPT_Z_reserved_lib_cckext))
TC.AddCCKextLibArgs(Args, CmdArgs);
else if (A.getOption().matches(options::OPT_z)) {
// Pass -z prefix for gcc linker compatibility.
A.claim();
A.render(Args, CmdArgs);
} else {
A.renderAsInput(Args, CmdArgs);
}
}
}
void tools::addLinkerCompressDebugSectionsOption(
const ToolChain &TC, const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) {
// GNU ld supports --compress-debug-sections=none|zlib|zlib-gnu|zlib-gabi
// whereas zlib is an alias to zlib-gabi. Therefore -gz=none|zlib|zlib-gnu
// are translated to --compress-debug-sections=none|zlib|zlib-gnu.
// -gz is not translated since ld --compress-debug-sections option requires an
// argument.
if (const Arg *A = Args.getLastArg(options::OPT_gz_EQ)) {
StringRef V = A->getValue();
if (V == "none" || V == "zlib" || V == "zlib-gnu")
CmdArgs.push_back(Args.MakeArgString("--compress-debug-sections=" + V));
else
TC.getDriver().Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << V;
}
}
void tools::AddTargetFeature(const ArgList &Args,
std::vector<StringRef> &Features,
OptSpecifier OnOpt, OptSpecifier OffOpt,
StringRef FeatureName) {
if (Arg *A = Args.getLastArg(OnOpt, OffOpt)) {
if (A->getOption().matches(OnOpt))
Features.push_back(Args.MakeArgString("+" + FeatureName));
else
Features.push_back(Args.MakeArgString("-" + FeatureName));
}
}
/// Get the (LLVM) name of the AMDGPU gpu we are targeting.
static std::string getAMDGPUTargetGPU(const llvm::Triple &T,
const ArgList &Args) {
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
auto GPUName = getProcessorFromTargetID(T, A->getValue());
return llvm::StringSwitch<std::string>(GPUName)
.Cases("rv630", "rv635", "r600")
.Cases("rv610", "rv620", "rs780", "rs880")
.Case("rv740", "rv770")
.Case("palm", "cedar")
.Cases("sumo", "sumo2", "sumo")
.Case("hemlock", "cypress")
.Case("aruba", "cayman")
.Default(GPUName.str());
}
return "";
}
static std::string getLanaiTargetCPU(const ArgList &Args) {
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
return A->getValue();
}
return "";
}
/// Get the (LLVM) name of the WebAssembly cpu we are targeting.
static StringRef getWebAssemblyTargetCPU(const ArgList &Args) {
// If we have -mcpu=, use that.
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
StringRef CPU = A->getValue();
#ifdef __wasm__
// Handle "native" by examining the host. "native" isn't meaningful when
// cross compiling, so only support this when the host is also WebAssembly.
if (CPU == "native")
return llvm::sys::getHostCPUName();
#endif
return CPU;
}
return "generic";
}
std::string tools::getCPUName(const ArgList &Args, const llvm::Triple &T,
bool FromAs) {
Arg *A;
switch (T.getArch()) {
default:
return "";
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_32:
case llvm::Triple::aarch64_be:
return aarch64::getAArch64TargetCPU(Args, T, A);
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb: {
StringRef MArch, MCPU;
arm::getARMArchCPUFromArgs(Args, MArch, MCPU, FromAs);
return arm::getARMTargetCPU(MCPU, MArch, T);
}
case llvm::Triple::avr:
if (const Arg *A = Args.getLastArg(options::OPT_mmcu_EQ))
return A->getValue();
return "";
case llvm::Triple::m68k:
return m68k::getM68kTargetCPU(Args);
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el: {
StringRef CPUName;
StringRef ABIName;
mips::getMipsCPUAndABI(Args, T, CPUName, ABIName);
return std::string(CPUName);
}
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
return A->getValue();
return "";
case llvm::Triple::ppc:
case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le: {
std::string TargetCPUName = ppc::getPPCTargetCPU(Args);
// LLVM may default to generating code for the native CPU,
// but, like gcc, we default to a more generic option for
// each architecture. (except on AIX)
if (!TargetCPUName.empty())
return TargetCPUName;
if (T.isOSAIX()) {
unsigned major, minor, unused_micro;
T.getOSVersion(major, minor, unused_micro);
// The minimal arch level moved from pwr4 for AIX7.1 to
// pwr7 for AIX7.2.
TargetCPUName =
(major < 7 || (major == 7 && minor < 2)) ? "pwr4" : "pwr7";
} else if (T.getArch() == llvm::Triple::ppc64le)
TargetCPUName = "ppc64le";
else if (T.getArch() == llvm::Triple::ppc64)
TargetCPUName = "ppc64";
else
TargetCPUName = "ppc";
return TargetCPUName;
}
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
return A->getValue();
return "";
case llvm::Triple::bpfel:
case llvm::Triple::bpfeb:
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
case llvm::Triple::sparcv9:
if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
return A->getValue();
if (T.getArch() == llvm::Triple::sparc && T.isOSSolaris())
return "v9";
return "";
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return x86::getX86TargetCPU(Args, T);
case llvm::Triple::hexagon:
return "hexagon" +
toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
case llvm::Triple::lanai:
return getLanaiTargetCPU(Args);
case llvm::Triple::systemz:
return systemz::getSystemZTargetCPU(Args);
case llvm::Triple::r600:
case llvm::Triple::amdgcn:
return getAMDGPUTargetGPU(T, Args);
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
return std::string(getWebAssemblyTargetCPU(Args));
}
}
llvm::StringRef tools::getLTOParallelism(const ArgList &Args, const Driver &D) {
Arg *LtoJobsArg = Args.getLastArg(options::OPT_flto_jobs_EQ);
if (!LtoJobsArg)
return {};
if (!llvm::get_threadpool_strategy(LtoJobsArg->getValue()))
D.Diag(diag::err_drv_invalid_int_value)
<< LtoJobsArg->getAsString(Args) << LtoJobsArg->getValue();
return LtoJobsArg->getValue();
}
// CloudABI uses -ffunction-sections and -fdata-sections by default.
bool tools::isUseSeparateSections(const llvm::Triple &Triple) {
return Triple.getOS() == llvm::Triple::CloudABI;
}
void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
ArgStringList &CmdArgs, const InputInfo &Output,
const InputInfo &Input, bool IsThinLTO) {
const char *Linker = Args.MakeArgString(ToolChain.GetLinkerPath());
const Driver &D = ToolChain.getDriver();
if (llvm::sys::path::filename(Linker) != "ld.lld" &&
llvm::sys::path::stem(Linker) != "ld.lld") {
// Tell the linker to load the plugin. This has to come before
// AddLinkerInputs as gold requires -plugin to come before any -plugin-opt
// that -Wl might forward.
CmdArgs.push_back("-plugin");
#if defined(_WIN32)
const char *Suffix = ".dll";
#elif defined(__APPLE__)
const char *Suffix = ".dylib";
#else
const char *Suffix = ".so";
#endif
SmallString<1024> Plugin;
llvm::sys::path::native(
Twine(D.Dir) + "/../lib" CLANG_LIBDIR_SUFFIX "/LLVMgold" + Suffix,
Plugin);
CmdArgs.push_back(Args.MakeArgString(Plugin));
}
// Try to pass driver level flags relevant to LTO code generation down to
// the plugin.
// Handle flags for selecting CPU variants.
std::string CPU = getCPUName(Args, ToolChain.getTriple());
if (!CPU.empty())
CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU));
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
// The optimization level matches
// CompilerInvocation.cpp:getOptimizationLevel().
StringRef OOpt;
if (A->getOption().matches(options::OPT_O4) ||
A->getOption().matches(options::OPT_Ofast))
OOpt = "3";
else if (A->getOption().matches(options::OPT_O)) {
OOpt = A->getValue();
if (OOpt == "g")
OOpt = "1";
else if (OOpt == "s" || OOpt == "z")
OOpt = "2";
} else if (A->getOption().matches(options::OPT_O0))
OOpt = "0";
if (!OOpt.empty())
CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=O") + OOpt));
}
if (Args.hasArg(options::OPT_gsplit_dwarf)) {
CmdArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=dwo_dir=") +
Output.getFilename() + "_dwo"));
}
if (IsThinLTO)
CmdArgs.push_back("-plugin-opt=thinlto");
StringRef Parallelism = getLTOParallelism(Args, D);
if (!Parallelism.empty())
CmdArgs.push_back(
Args.MakeArgString("-plugin-opt=jobs=" + Twine(Parallelism)));
// If an explicit debugger tuning argument appeared, pass it along.
if (Arg *A = Args.getLastArg(options::OPT_gTune_Group,
options::OPT_ggdbN_Group)) {
if (A->getOption().matches(options::OPT_glldb))
CmdArgs.push_back("-plugin-opt=-debugger-tune=lldb");
else if (A->getOption().matches(options::OPT_gsce))
CmdArgs.push_back("-plugin-opt=-debugger-tune=sce");
else if (A->getOption().matches(options::OPT_gdbx))
CmdArgs.push_back("-plugin-opt=-debugger-tune=dbx");
else
CmdArgs.push_back("-plugin-opt=-debugger-tune=gdb");
}
bool UseSeparateSections =
isUseSeparateSections(ToolChain.getEffectiveTriple());
if (Args.hasFlag(options::OPT_ffunction_sections,
options::OPT_fno_function_sections, UseSeparateSections)) {
CmdArgs.push_back("-plugin-opt=-function-sections");
}
if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections,
UseSeparateSections)) {
CmdArgs.push_back("-plugin-opt=-data-sections");
}
if (Arg *A = getLastProfileSampleUseArg(Args)) {
StringRef FName = A->getValue();
if (!llvm::sys::fs::exists(FName))
D.Diag(diag::err_drv_no_such_file) << FName;
else
CmdArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=sample-profile=") + FName));
}
auto *CSPGOGenerateArg = Args.getLastArg(options::OPT_fcs_profile_generate,
options::OPT_fcs_profile_generate_EQ,
options::OPT_fno_profile_generate);
if (CSPGOGenerateArg &&
CSPGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
CSPGOGenerateArg = nullptr;
auto *ProfileUseArg = getLastProfileUseArg(Args);
if (CSPGOGenerateArg) {
CmdArgs.push_back(Args.MakeArgString("-plugin-opt=cs-profile-generate"));
if (CSPGOGenerateArg->getOption().matches(
options::OPT_fcs_profile_generate_EQ)) {
SmallString<128> Path(CSPGOGenerateArg->getValue());
llvm::sys::path::append(Path, "default_%m.profraw");
CmdArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=cs-profile-path=") + Path));
} else
CmdArgs.push_back(
Args.MakeArgString("-plugin-opt=cs-profile-path=default_%m.profraw"));
} else if (ProfileUseArg) {
SmallString<128> Path(
ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue());
if (Path.empty() || llvm::sys::fs::is_directory(Path))
llvm::sys::path::append(Path, "default.profdata");
CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=cs-profile-path=") +
Path));
}
// Pass an option to enable/disable the new pass manager.
if (auto *A = Args.getLastArg(options::OPT_flegacy_pass_manager,
options::OPT_fno_legacy_pass_manager)) {
if (A->getOption().matches(options::OPT_flegacy_pass_manager))
CmdArgs.push_back("-plugin-opt=legacy-pass-manager");
else
CmdArgs.push_back("-plugin-opt=new-pass-manager");
}
// Pass an option to enable pseudo probe emission.
if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
options::OPT_fno_pseudo_probe_for_profiling, false))
CmdArgs.push_back("-plugin-opt=pseudo-probe-for-profiling");
// Setup statistics file output.
SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
if (!StatsFile.empty())
CmdArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=stats-file=") + StatsFile));
addX86AlignBranchArgs(D, Args, CmdArgs, /*IsLTO=*/true);
// Handle remark diagnostics on screen options: '-Rpass-*'.
renderRpassOptions(Args, CmdArgs);
// Handle serialized remarks options: '-fsave-optimization-record'
// and '-foptimization-record-*'.
if (willEmitRemarks(Args))
renderRemarksOptions(Args, CmdArgs, ToolChain.getEffectiveTriple(), Input,
Output);
// Handle remarks hotness/threshold related options.
renderRemarksHotnessOptions(Args, CmdArgs);
addMachineOutlinerArgs(D, Args, CmdArgs, ToolChain.getEffectiveTriple(),
/*IsLTO=*/true);
}
void tools::addArchSpecificRPath(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
// Enable -frtlib-add-rpath by default for the case of VE.
const bool IsVE = TC.getTriple().isVE();
bool DefaultValue = IsVE;
if (!Args.hasFlag(options::OPT_frtlib_add_rpath,
options::OPT_fno_rtlib_add_rpath, DefaultValue))
return;
std::string CandidateRPath = TC.getArchSpecificLibPath();
if (TC.getVFS().exists(CandidateRPath)) {
CmdArgs.push_back("-rpath");
CmdArgs.push_back(Args.MakeArgString(CandidateRPath.c_str()));
}
}
bool tools::addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC,
const ArgList &Args, bool ForceStaticHostRuntime,
bool IsOffloadingHost, bool GompNeedsRT) {
if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
options::OPT_fno_openmp, false))
return false;
Driver::OpenMPRuntimeKind RTKind = TC.getDriver().getOpenMPRuntime(Args);
if (RTKind == Driver::OMPRT_Unknown)
// Already diagnosed.
return false;
if (ForceStaticHostRuntime)
CmdArgs.push_back("-Bstatic");
switch (RTKind) {
case Driver::OMPRT_OMP:
CmdArgs.push_back("-lomp");
break;
case Driver::OMPRT_GOMP:
CmdArgs.push_back("-lgomp");
break;
case Driver::OMPRT_IOMP5:
CmdArgs.push_back("-liomp5");
break;
case Driver::OMPRT_Unknown:
break;
}
if (ForceStaticHostRuntime)
CmdArgs.push_back("-Bdynamic");
if (RTKind == Driver::OMPRT_GOMP && GompNeedsRT)
CmdArgs.push_back("-lrt");
if (IsOffloadingHost)
CmdArgs.push_back("-lomptarget");
addArchSpecificRPath(TC, Args, CmdArgs);
return true;
}
static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs, StringRef Sanitizer,
bool IsShared, bool IsWhole) {
// Wrap any static runtimes that must be forced into executable in
// whole-archive.
if (IsWhole) CmdArgs.push_back("--whole-archive");
CmdArgs.push_back(TC.getCompilerRTArgString(
Args, Sanitizer, IsShared ? ToolChain::FT_Shared : ToolChain::FT_Static));
if (IsWhole) CmdArgs.push_back("--no-whole-archive");
if (IsShared) {
addArchSpecificRPath(TC, Args, CmdArgs);
}
}
// Tries to use a file with the list of dynamic symbols that need to be exported
// from the runtime library. Returns true if the file was found.
static bool addSanitizerDynamicList(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs,
StringRef Sanitizer) {
// Solaris ld defaults to --export-dynamic behaviour but doesn't support
// the option, so don't try to pass it.
if (TC.getTriple().getOS() == llvm::Triple::Solaris)
return true;
SmallString<128> SanRT(TC.getCompilerRT(Args, Sanitizer));
if (llvm::sys::fs::exists(SanRT + ".syms")) {
CmdArgs.push_back(Args.MakeArgString("--dynamic-list=" + SanRT + ".syms"));
return true;
}
return false;
}
static const char *getAsNeededOption(const ToolChain &TC, bool as_needed) {
assert(!TC.getTriple().isOSAIX() &&
"AIX linker does not support any form of --as-needed option yet.");
// While the Solaris 11.2 ld added --as-needed/--no-as-needed as aliases
// for the native forms -z ignore/-z record, they are missing in Illumos,
// so always use the native form.
if (TC.getTriple().isOSSolaris())
return as_needed ? "-zignore" : "-zrecord";
else
return as_needed ? "--as-needed" : "--no-as-needed";
}
void tools::linkSanitizerRuntimeDeps(const ToolChain &TC,
ArgStringList &CmdArgs) {
// Fuchsia never needs these. Any sanitizer runtimes with system
// dependencies use the `.deplibs` feature instead.
if (TC.getTriple().isOSFuchsia())
return;
// Force linking against the system libraries sanitizers depends on
// (see PR15823 why this is necessary).
CmdArgs.push_back(getAsNeededOption(TC, false));
// There's no libpthread or librt on RTEMS & Android.
if (TC.getTriple().getOS() != llvm::Triple::RTEMS &&
!TC.getTriple().isAndroid()) {
CmdArgs.push_back("-lpthread");
if (!TC.getTriple().isOSOpenBSD())
CmdArgs.push_back("-lrt");
}
CmdArgs.push_back("-lm");
// There's no libdl on all OSes.
if (!TC.getTriple().isOSFreeBSD() && !TC.getTriple().isOSNetBSD() &&
!TC.getTriple().isOSOpenBSD() &&
TC.getTriple().getOS() != llvm::Triple::RTEMS)
CmdArgs.push_back("-ldl");
// Required for backtrace on some OSes
if (TC.getTriple().isOSFreeBSD() ||
- TC.getTriple().isOSNetBSD())
+ TC.getTriple().isOSNetBSD() ||
+ TC.getTriple().isOSOpenBSD())
CmdArgs.push_back("-lexecinfo");
}
static void
collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
SmallVectorImpl<StringRef> &SharedRuntimes,
SmallVectorImpl<StringRef> &StaticRuntimes,
SmallVectorImpl<StringRef> &NonWholeStaticRuntimes,
SmallVectorImpl<StringRef> &HelperStaticRuntimes,
SmallVectorImpl<StringRef> &RequiredSymbols) {
const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
// Collect shared runtimes.
if (SanArgs.needsSharedRt()) {
if (SanArgs.needsAsanRt() && SanArgs.linkRuntimes()) {
SharedRuntimes.push_back("asan");
if (!Args.hasArg(options::OPT_shared) && !TC.getTriple().isAndroid())
HelperStaticRuntimes.push_back("asan-preinit");
}
if (SanArgs.needsMemProfRt() && SanArgs.linkRuntimes()) {
SharedRuntimes.push_back("memprof");
if (!Args.hasArg(options::OPT_shared) && !TC.getTriple().isAndroid())
HelperStaticRuntimes.push_back("memprof-preinit");
}
if (SanArgs.needsUbsanRt() && SanArgs.linkRuntimes()) {
if (SanArgs.requiresMinimalRuntime())
SharedRuntimes.push_back("ubsan_minimal");
else
SharedRuntimes.push_back("ubsan_standalone");
}
if (SanArgs.needsScudoRt() && SanArgs.linkRuntimes()) {
if (SanArgs.requiresMinimalRuntime())
SharedRuntimes.push_back("scudo_minimal");
else
SharedRuntimes.push_back("scudo");
}
if (SanArgs.needsTsanRt() && SanArgs.linkRuntimes())
SharedRuntimes.push_back("tsan");
if (SanArgs.needsHwasanRt() && SanArgs.linkRuntimes()) {
if (SanArgs.needsHwasanAliasesRt())
SharedRuntimes.push_back("hwasan_aliases");
else
SharedRuntimes.push_back("hwasan");
}
}
// The stats_client library is also statically linked into DSOs.
if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes())
StaticRuntimes.push_back("stats_client");
// Collect static runtimes.
if (Args.hasArg(options::OPT_shared)) {
// Don't link static runtimes into DSOs.
return;
}
// Each static runtime that has a DSO counterpart above is excluded below,
// but runtimes that exist only as static are not affected by needsSharedRt.
if (!SanArgs.needsSharedRt() && SanArgs.needsAsanRt() && SanArgs.linkRuntimes()) {
StaticRuntimes.push_back("asan");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("asan_cxx");
}
if (!SanArgs.needsSharedRt() && SanArgs.needsMemProfRt() &&
SanArgs.linkRuntimes()) {
StaticRuntimes.push_back("memprof");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("memprof_cxx");
}
if (!SanArgs.needsSharedRt() && SanArgs.needsHwasanRt() && SanArgs.linkRuntimes()) {
if (SanArgs.needsHwasanAliasesRt()) {
StaticRuntimes.push_back("hwasan_aliases");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("hwasan_aliases_cxx");
} else {
StaticRuntimes.push_back("hwasan");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("hwasan_cxx");
}
}
if (SanArgs.needsDfsanRt() && SanArgs.linkRuntimes())
StaticRuntimes.push_back("dfsan");
if (SanArgs.needsLsanRt() && SanArgs.linkRuntimes())
StaticRuntimes.push_back("lsan");
if (SanArgs.needsMsanRt() && SanArgs.linkRuntimes()) {
StaticRuntimes.push_back("msan");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("msan_cxx");
}
if (!SanArgs.needsSharedRt() && SanArgs.needsTsanRt() &&
SanArgs.linkRuntimes()) {
StaticRuntimes.push_back("tsan");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("tsan_cxx");
}
if (!SanArgs.needsSharedRt() && SanArgs.needsUbsanRt() && SanArgs.linkRuntimes()) {
if (SanArgs.requiresMinimalRuntime()) {
StaticRuntimes.push_back("ubsan_minimal");
} else {
StaticRuntimes.push_back("ubsan_standalone");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("ubsan_standalone_cxx");
}
}
if (SanArgs.needsSafeStackRt() && SanArgs.linkRuntimes()) {
NonWholeStaticRuntimes.push_back("safestack");
RequiredSymbols.push_back("__safestack_init");
}
if (!(SanArgs.needsSharedRt() && SanArgs.needsUbsanRt() && SanArgs.linkRuntimes())) {
if (SanArgs.needsCfiRt() && SanArgs.linkRuntimes())
StaticRuntimes.push_back("cfi");
if (SanArgs.needsCfiDiagRt() && SanArgs.linkRuntimes()) {
StaticRuntimes.push_back("cfi_diag");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("ubsan_standalone_cxx");
}
}
if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes()) {
NonWholeStaticRuntimes.push_back("stats");
RequiredSymbols.push_back("__sanitizer_stats_register");
}
if (!SanArgs.needsSharedRt() && SanArgs.needsScudoRt() && SanArgs.linkRuntimes()) {
if (SanArgs.requiresMinimalRuntime()) {
StaticRuntimes.push_back("scudo_minimal");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("scudo_cxx_minimal");
} else {
StaticRuntimes.push_back("scudo");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("scudo_cxx");
}
}
}
// Should be called before we add system libraries (C++ ABI, libstdc++/libc++,
// C runtime, etc). Returns true if sanitizer system deps need to be linked in.
bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
SmallVector<StringRef, 4> SharedRuntimes, StaticRuntimes,
NonWholeStaticRuntimes, HelperStaticRuntimes, RequiredSymbols;
collectSanitizerRuntimes(TC, Args, SharedRuntimes, StaticRuntimes,
NonWholeStaticRuntimes, HelperStaticRuntimes,
RequiredSymbols);
const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
// Inject libfuzzer dependencies.
if (SanArgs.needsFuzzer() && SanArgs.linkRuntimes() &&
!Args.hasArg(options::OPT_shared)) {
addSanitizerRuntime(TC, Args, CmdArgs, "fuzzer", false, true);
if (SanArgs.needsFuzzerInterceptors())
addSanitizerRuntime(TC, Args, CmdArgs, "fuzzer_interceptors", false,
true);
if (!Args.hasArg(clang::driver::options::OPT_nostdlibxx)) {
bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
!Args.hasArg(options::OPT_static);
if (OnlyLibstdcxxStatic)
CmdArgs.push_back("-Bstatic");
TC.AddCXXStdlibLibArgs(Args, CmdArgs);
if (OnlyLibstdcxxStatic)
CmdArgs.push_back("-Bdynamic");
}
}
for (auto RT : SharedRuntimes)
addSanitizerRuntime(TC, Args, CmdArgs, RT, true, false);
for (auto RT : HelperStaticRuntimes)
addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true);
bool AddExportDynamic = false;
for (auto RT : StaticRuntimes) {
addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true);
AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT);
}
for (auto RT : NonWholeStaticRuntimes) {
addSanitizerRuntime(TC, Args, CmdArgs, RT, false, false);
AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT);
}
for (auto S : RequiredSymbols) {
CmdArgs.push_back("-u");
CmdArgs.push_back(Args.MakeArgString(S));
}
// If there is a static runtime with no dynamic list, force all the symbols
// to be dynamic to be sure we export sanitizer interface functions.
if (AddExportDynamic)
CmdArgs.push_back("--export-dynamic");
if (SanArgs.hasCrossDsoCfi() && !AddExportDynamic)
CmdArgs.push_back("--export-dynamic-symbol=__cfi_check");
return !StaticRuntimes.empty() || !NonWholeStaticRuntimes.empty();
}
bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringList &CmdArgs) {
if (Args.hasArg(options::OPT_shared))
return false;
if (TC.getXRayArgs().needsXRayRt()) {
CmdArgs.push_back("-whole-archive");
CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray"));
for (const auto &Mode : TC.getXRayArgs().modeList())
CmdArgs.push_back(TC.getCompilerRTArgString(Args, Mode));
CmdArgs.push_back("-no-whole-archive");
return true;
}
return false;
}
void tools::linkXRayRuntimeDeps(const ToolChain &TC, ArgStringList &CmdArgs) {
CmdArgs.push_back(getAsNeededOption(TC, false));
CmdArgs.push_back("-lpthread");
if (!TC.getTriple().isOSOpenBSD())
CmdArgs.push_back("-lrt");
CmdArgs.push_back("-lm");
if (!TC.getTriple().isOSFreeBSD() &&
!TC.getTriple().isOSNetBSD() &&
!TC.getTriple().isOSOpenBSD())
CmdArgs.push_back("-ldl");
}
bool tools::areOptimizationsEnabled(const ArgList &Args) {
// Find the last -O arg and see if it is non-zero.
if (Arg *A = Args.getLastArg(options::OPT_O_Group))
return !A->getOption().matches(options::OPT_O0);
// Defaults to -O0.
return false;
}
const char *tools::SplitDebugName(const JobAction &JA, const ArgList &Args,
const InputInfo &Input,
const InputInfo &Output) {
auto AddPostfix = [JA](auto &F) {
if (JA.getOffloadingDeviceKind() == Action::OFK_HIP)
F += (Twine("_") + JA.getOffloadingArch()).str();
F += ".dwo";
};
if (Arg *A = Args.getLastArg(options::OPT_gsplit_dwarf_EQ))
if (StringRef(A->getValue()) == "single")
return Args.MakeArgString(Output.getFilename());
Arg *FinalOutput = Args.getLastArg(options::OPT_o);
if (FinalOutput && Args.hasArg(options::OPT_c)) {
SmallString<128> T(FinalOutput->getValue());
llvm::sys::path::remove_filename(T);
llvm::sys::path::append(T, llvm::sys::path::stem(FinalOutput->getValue()));
AddPostfix(T);
return Args.MakeArgString(T);
} else {
// Use the compilation dir.
Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
options::OPT_fdebug_compilation_dir_EQ);
SmallString<128> T(A ? A->getValue() : "");
SmallString<128> F(llvm::sys::path::stem(Input.getBaseInput()));
AddPostfix(F);
T += F;
return Args.MakeArgString(T);
}
}
void tools::SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T,
const JobAction &JA, const ArgList &Args,
const InputInfo &Output, const char *OutFile) {
ArgStringList ExtractArgs;
ExtractArgs.push_back("--extract-dwo");
ArgStringList StripArgs;
StripArgs.push_back("--strip-dwo");
// Grabbing the output of the earlier compile step.
StripArgs.push_back(Output.getFilename());
ExtractArgs.push_back(Output.getFilename());
ExtractArgs.push_back(OutFile);
const char *Exec =
Args.MakeArgString(TC.GetProgramPath(CLANG_DEFAULT_OBJCOPY));
InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename());
// First extract the dwo sections.
C.addCommand(std::make_unique<Command>(JA, T,
ResponseFileSupport::AtFileCurCP(),
Exec, ExtractArgs, II, Output));
// Then remove them from the original .o file.
C.addCommand(std::make_unique<Command>(
JA, T, ResponseFileSupport::AtFileCurCP(), Exec, StripArgs, II, Output));
}
// Claim options we don't want to warn if they are unused. We do this for
// options that build systems might add but are unused when assembling or only
// running the preprocessor for example.
void tools::claimNoWarnArgs(const ArgList &Args) {
// Don't warn about unused -f(no-)?lto. This can happen when we're
// preprocessing, precompiling or assembling.
Args.ClaimAllArgs(options::OPT_flto_EQ);
Args.ClaimAllArgs(options::OPT_flto);
Args.ClaimAllArgs(options::OPT_fno_lto);
}
Arg *tools::getLastProfileUseArg(const ArgList &Args) {
auto *ProfileUseArg = Args.getLastArg(
options::OPT_fprofile_instr_use, options::OPT_fprofile_instr_use_EQ,
options::OPT_fprofile_use, options::OPT_fprofile_use_EQ,
options::OPT_fno_profile_instr_use);
if (ProfileUseArg &&
ProfileUseArg->getOption().matches(options::OPT_fno_profile_instr_use))
ProfileUseArg = nullptr;
return ProfileUseArg;
}
Arg *tools::getLastProfileSampleUseArg(const ArgList &Args) {
auto *ProfileSampleUseArg = Args.getLastArg(
options::OPT_fprofile_sample_use, options::OPT_fprofile_sample_use_EQ,
options::OPT_fauto_profile, options::OPT_fauto_profile_EQ,
options::OPT_fno_profile_sample_use, options::OPT_fno_auto_profile);
if (ProfileSampleUseArg &&
(ProfileSampleUseArg->getOption().matches(
options::OPT_fno_profile_sample_use) ||
ProfileSampleUseArg->getOption().matches(options::OPT_fno_auto_profile)))
return nullptr;
return Args.getLastArg(options::OPT_fprofile_sample_use_EQ,
options::OPT_fauto_profile_EQ);
}
/// Parses the various -fpic/-fPIC/-fpie/-fPIE arguments. Then,
/// smooshes them together with platform defaults, to decide whether
/// this compile should be using PIC mode or not. Returns a tuple of
/// (RelocationModel, PICLevel, IsPIE).
std::tuple<llvm::Reloc::Model, unsigned, bool>
tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) {
const llvm::Triple &EffectiveTriple = ToolChain.getEffectiveTriple();
const llvm::Triple &Triple = ToolChain.getTriple();
bool PIE = ToolChain.isPIEDefault();
bool PIC = PIE || ToolChain.isPICDefault();
// The Darwin/MachO default to use PIC does not apply when using -static.
if (Triple.isOSBinFormatMachO() && Args.hasArg(options::OPT_static))
PIE = PIC = false;
bool IsPICLevelTwo = PIC;
bool KernelOrKext =
Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
// Android-specific defaults for PIC/PIE
if (Triple.isAndroid()) {
switch (Triple.getArch()) {
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
case llvm::Triple::aarch64:
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
PIC = true; // "-fpic"
break;
case llvm::Triple::x86:
case llvm::Triple::x86_64:
PIC = true; // "-fPIC"
IsPICLevelTwo = true;
break;
default:
break;
}
}
// OpenBSD-specific defaults for PIE
if (Triple.isOSOpenBSD()) {
switch (ToolChain.getArch()) {
case llvm::Triple::arm:
case llvm::Triple::aarch64:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
IsPICLevelTwo = false; // "-fpie"
break;
case llvm::Triple::ppc:
case llvm::Triple::sparcv9:
IsPICLevelTwo = true; // "-fPIE"
break;
default:
break;
}
}
// AMDGPU-specific defaults for PIC.
if (Triple.getArch() == llvm::Triple::amdgcn)
PIC = true;
// The last argument relating to either PIC or PIE wins, and no
// other argument is used. If the last argument is any flavor of the
// '-fno-...' arguments, both PIC and PIE are disabled. Any PIE
// option implicitly enables PIC at the same level.
Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
options::OPT_fpic, options::OPT_fno_pic,
options::OPT_fPIE, options::OPT_fno_PIE,
options::OPT_fpie, options::OPT_fno_pie);
if (Triple.isOSWindows() && LastPICArg &&
LastPICArg ==
Args.getLastArg(options::OPT_fPIC, options::OPT_fpic,
options::OPT_fPIE, options::OPT_fpie)) {
ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
<< LastPICArg->getSpelling() << Triple.str();
if (Triple.getArch() == llvm::Triple::x86_64)
return std::make_tuple(llvm::Reloc::PIC_, 2U, false);
return std::make_tuple(llvm::Reloc::Static, 0U, false);
}
// Check whether the tool chain trumps the PIC-ness decision. If the PIC-ness
// is forced, then neither PIC nor PIE flags will have no effect.
if (!ToolChain.isPICDefaultForced()) {
if (LastPICArg) {
Option O = LastPICArg->getOption();
if (O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic) ||
O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) {
PIE = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie);
PIC =
PIE || O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic);
IsPICLevelTwo =
O.matches(options::OPT_fPIE) || O.matches(options::OPT_fPIC);
} else {
PIE = PIC = false;
if (EffectiveTriple.isPS4CPU()) {
Arg *ModelArg = Args.getLastArg(options::OPT_mcmodel_EQ);
StringRef Model = ModelArg ? ModelArg->getValue() : "";
if (Model != "kernel") {
PIC = true;
ToolChain.getDriver().Diag(diag::warn_drv_ps4_force_pic)
<< LastPICArg->getSpelling();
}
}
}
}
}
// Introduce a Darwin and PS4-specific hack. If the default is PIC, but the
// PIC level would've been set to level 1, force it back to level 2 PIC
// instead.
if (PIC && (Triple.isOSDarwin() || EffectiveTriple.isPS4CPU()))
IsPICLevelTwo |= ToolChain.isPICDefault();
// This kernel flags are a trump-card: they will disable PIC/PIE
// generation, independent of the argument order.
if (KernelOrKext &&
((!EffectiveTriple.isiOS() || EffectiveTriple.isOSVersionLT(6)) &&
!EffectiveTriple.isWatchOS()))
PIC = PIE = false;
if (Arg *A = Args.getLastArg(options::OPT_mdynamic_no_pic)) {
// This is a very special mode. It trumps the other modes, almost no one
// uses it, and it isn't even valid on any OS but Darwin.
if (!Triple.isOSDarwin())
ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getSpelling() << Triple.str();
// FIXME: Warn when this flag trumps some other PIC or PIE flag.
// Only a forced PIC mode can cause the actual compile to have PIC defines
// etc., no flags are sufficient. This behavior was selected to closely
// match that of llvm-gcc and Apple GCC before that.
PIC = ToolChain.isPICDefault() && ToolChain.isPICDefaultForced();
return std::make_tuple(llvm::Reloc::DynamicNoPIC, PIC ? 2U : 0U, false);
}
bool EmbeddedPISupported;
switch (Triple.getArch()) {
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
EmbeddedPISupported = true;
break;
default:
EmbeddedPISupported = false;
break;
}
bool ROPI = false, RWPI = false;
Arg* LastROPIArg = Args.getLastArg(options::OPT_fropi, options::OPT_fno_ropi);
if (LastROPIArg && LastROPIArg->getOption().matches(options::OPT_fropi)) {
if (!EmbeddedPISupported)
ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
<< LastROPIArg->getSpelling() << Triple.str();
ROPI = true;
}
Arg *LastRWPIArg = Args.getLastArg(options::OPT_frwpi, options::OPT_fno_rwpi);
if (LastRWPIArg && LastRWPIArg->getOption().matches(options::OPT_frwpi)) {
if (!EmbeddedPISupported)
ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
<< LastRWPIArg->getSpelling() << Triple.str();
RWPI = true;
}
// ROPI and RWPI are not compatible with PIC or PIE.
if ((ROPI || RWPI) && (PIC || PIE))
ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic);
if (Triple.isMIPS()) {
StringRef CPUName;
StringRef ABIName;
mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
// When targeting the N64 ABI, PIC is the default, except in the case
// when the -mno-abicalls option is used. In that case we exit
// at next check regardless of PIC being set below.
if (ABIName == "n64")
PIC = true;
// When targettng MIPS with -mno-abicalls, it's always static.
if(Args.hasArg(options::OPT_mno_abicalls))
return std::make_tuple(llvm::Reloc::Static, 0U, false);
// Unlike other architectures, MIPS, even with -fPIC/-mxgot/multigot,
// does not use PIC level 2 for historical reasons.
IsPICLevelTwo = false;
}
if (PIC)
return std::make_tuple(llvm::Reloc::PIC_, IsPICLevelTwo ? 2U : 1U, PIE);
llvm::Reloc::Model RelocM = llvm::Reloc::Static;
if (ROPI && RWPI)
RelocM = llvm::Reloc::ROPI_RWPI;
else if (ROPI)
RelocM = llvm::Reloc::ROPI;
else if (RWPI)
RelocM = llvm::Reloc::RWPI;
return std::make_tuple(RelocM, 0U, false);
}
// `-falign-functions` indicates that the functions should be aligned to a
// 16-byte boundary.
//
// `-falign-functions=1` is the same as `-fno-align-functions`.
//
// The scalar `n` in `-falign-functions=n` must be an integral value between
// [0, 65536]. If the value is not a power-of-two, it will be rounded up to
// the nearest power-of-two.
//
// If we return `0`, the frontend will default to the backend's preferred
// alignment.
//
// NOTE: icc only allows values between [0, 4096]. icc uses `-falign-functions`
// to mean `-falign-functions=16`. GCC defaults to the backend's preferred
// alignment. For unaligned functions, we default to the backend's preferred
// alignment.
unsigned tools::ParseFunctionAlignment(const ToolChain &TC,
const ArgList &Args) {
const Arg *A = Args.getLastArg(options::OPT_falign_functions,
options::OPT_falign_functions_EQ,
options::OPT_fno_align_functions);
if (!A || A->getOption().matches(options::OPT_fno_align_functions))
return 0;
if (A->getOption().matches(options::OPT_falign_functions))
return 0;
unsigned Value = 0;
if (StringRef(A->getValue()).getAsInteger(10, Value) || Value > 65536)
TC.getDriver().Diag(diag::err_drv_invalid_int_value)
<< A->getAsString(Args) << A->getValue();
return Value ? llvm::Log2_32_Ceil(std::min(Value, 65536u)) : Value;
}
unsigned tools::ParseDebugDefaultVersion(const ToolChain &TC,
const ArgList &Args) {
const Arg *A = Args.getLastArg(options::OPT_fdebug_default_version);
if (!A)
return 0;
unsigned Value = 0;
if (StringRef(A->getValue()).getAsInteger(10, Value) || Value > 5 ||
Value < 2)
TC.getDriver().Diag(diag::err_drv_invalid_int_value)
<< A->getAsString(Args) << A->getValue();
return Value;
}
void tools::AddAssemblerKPIC(const ToolChain &ToolChain, const ArgList &Args,
ArgStringList &CmdArgs) {
llvm::Reloc::Model RelocationModel;
unsigned PICLevel;
bool IsPIE;
std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(ToolChain, Args);
if (RelocationModel != llvm::Reloc::Static)
CmdArgs.push_back("-KPIC");
}
/// Determine whether Objective-C automated reference counting is
/// enabled.
bool tools::isObjCAutoRefCount(const ArgList &Args) {
return Args.hasFlag(options::OPT_fobjc_arc, options::OPT_fno_objc_arc, false);
}
enum class LibGccType { UnspecifiedLibGcc, StaticLibGcc, SharedLibGcc };
static LibGccType getLibGccType(const ToolChain &TC, const Driver &D,
const ArgList &Args) {
if (Args.hasArg(options::OPT_static_libgcc) ||
Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_static_pie))
return LibGccType::StaticLibGcc;
if (Args.hasArg(options::OPT_shared_libgcc))
return LibGccType::SharedLibGcc;
// The Android NDK only provides libunwind.a, not libunwind.so.
if (TC.getTriple().isAndroid())
return LibGccType::StaticLibGcc;
// For MinGW, don't imply a shared libgcc here, we only want to return
// SharedLibGcc if that was explicitly requested.
if (D.CCCIsCXX() && !TC.getTriple().isOSCygMing())
return LibGccType::SharedLibGcc;
return LibGccType::UnspecifiedLibGcc;
}
// Gcc adds libgcc arguments in various ways:
//
// gcc <none>: -lgcc --as-needed -lgcc_s --no-as-needed
// g++ <none>: -lgcc_s -lgcc
// gcc shared: -lgcc_s -lgcc
// g++ shared: -lgcc_s -lgcc
// gcc static: -lgcc -lgcc_eh
// g++ static: -lgcc -lgcc_eh
// gcc static-pie: -lgcc -lgcc_eh
// g++ static-pie: -lgcc -lgcc_eh
//
// Also, certain targets need additional adjustments.
static void AddUnwindLibrary(const ToolChain &TC, const Driver &D,
ArgStringList &CmdArgs, const ArgList &Args) {
ToolChain::UnwindLibType UNW = TC.GetUnwindLibType(Args);
// Targets that don't use unwind libraries.
if ((TC.getTriple().isAndroid() && UNW == ToolChain::UNW_Libgcc) ||
TC.getTriple().isOSIAMCU() || TC.getTriple().isOSBinFormatWasm() ||
UNW == ToolChain::UNW_None)
return;
LibGccType LGT = getLibGccType(TC, D, Args);
bool AsNeeded = LGT == LibGccType::UnspecifiedLibGcc &&
!TC.getTriple().isAndroid() &&
!TC.getTriple().isOSCygMing() && !TC.getTriple().isOSAIX();
if (AsNeeded)
CmdArgs.push_back(getAsNeededOption(TC, true));
switch (UNW) {
case ToolChain::UNW_None:
return;
case ToolChain::UNW_Libgcc: {
if (LGT == LibGccType::StaticLibGcc)
CmdArgs.push_back("-lgcc_eh");
else
CmdArgs.push_back("-lgcc_s");
break;
}
case ToolChain::UNW_CompilerRT:
if (TC.getTriple().isOSAIX()) {
// AIX only has libunwind as a shared library. So do not pass
// anything in if -static is specified.
if (LGT != LibGccType::StaticLibGcc)
CmdArgs.push_back("-lunwind");
} else if (LGT == LibGccType::StaticLibGcc) {
CmdArgs.push_back("-l:libunwind.a");
} else if (TC.getTriple().isOSCygMing()) {
if (LGT == LibGccType::SharedLibGcc)
CmdArgs.push_back("-l:libunwind.dll.a");
else
// Let the linker choose between libunwind.dll.a and libunwind.a
// depending on what's available, and depending on the -static flag
CmdArgs.push_back("-lunwind");
} else {
CmdArgs.push_back("-l:libunwind.so");
}
break;
}
if (AsNeeded)
CmdArgs.push_back(getAsNeededOption(TC, false));
}
static void AddLibgcc(const ToolChain &TC, const Driver &D,
ArgStringList &CmdArgs, const ArgList &Args) {
LibGccType LGT = getLibGccType(TC, D, Args);
if (LGT != LibGccType::SharedLibGcc)
CmdArgs.push_back("-lgcc");
AddUnwindLibrary(TC, D, CmdArgs, Args);
if (LGT == LibGccType::SharedLibGcc)
CmdArgs.push_back("-lgcc");
}
void tools::AddRunTimeLibs(const ToolChain &TC, const Driver &D,
ArgStringList &CmdArgs, const ArgList &Args) {
// Make use of compiler-rt if --rtlib option is used
ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(Args);
switch (RLT) {
case ToolChain::RLT_CompilerRT:
CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins"));
AddUnwindLibrary(TC, D, CmdArgs, Args);
break;
case ToolChain::RLT_Libgcc:
// Make sure libgcc is not used under MSVC environment by default
if (TC.getTriple().isKnownWindowsMSVCEnvironment()) {
// Issue error diagnostic if libgcc is explicitly specified
// through command line as --rtlib option argument.
if (Args.hasArg(options::OPT_rtlib_EQ)) {
TC.getDriver().Diag(diag::err_drv_unsupported_rtlib_for_platform)
<< Args.getLastArg(options::OPT_rtlib_EQ)->getValue() << "MSVC";
}
} else
AddLibgcc(TC, D, CmdArgs, Args);
break;
}
// On Android, the unwinder uses dl_iterate_phdr (or one of
// dl_unwind_find_exidx/__gnu_Unwind_Find_exidx on arm32) from libdl.so. For
// statically-linked executables, these functions come from libc.a instead.
if (TC.getTriple().isAndroid() && !Args.hasArg(options::OPT_static) &&
!Args.hasArg(options::OPT_static_pie))
CmdArgs.push_back("-ldl");
}
SmallString<128> tools::getStatsFileName(const llvm::opt::ArgList &Args,
const InputInfo &Output,
const InputInfo &Input,
const Driver &D) {
const Arg *A = Args.getLastArg(options::OPT_save_stats_EQ);
if (!A)
return {};
StringRef SaveStats = A->getValue();
SmallString<128> StatsFile;
if (SaveStats == "obj" && Output.isFilename()) {
StatsFile.assign(Output.getFilename());
llvm::sys::path::remove_filename(StatsFile);
} else if (SaveStats != "cwd") {
D.Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << SaveStats;
return {};
}
StringRef BaseName = llvm::sys::path::filename(Input.getBaseInput());
llvm::sys::path::append(StatsFile, BaseName);
llvm::sys::path::replace_extension(StatsFile, "stats");
return StatsFile;
}
void tools::addMultilibFlag(bool Enabled, const char *const Flag,
Multilib::flags_list &Flags) {
Flags.push_back(std::string(Enabled ? "+" : "-") + Flag);
}
void tools::addX86AlignBranchArgs(const Driver &D, const ArgList &Args,
ArgStringList &CmdArgs, bool IsLTO) {
auto addArg = [&, IsLTO](const Twine &Arg) {
if (IsLTO) {
CmdArgs.push_back(Args.MakeArgString("-plugin-opt=" + Arg));
} else {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString(Arg));
}
};
if (Args.hasArg(options::OPT_mbranches_within_32B_boundaries)) {
addArg(Twine("-x86-branches-within-32B-boundaries"));
}
if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_boundary_EQ)) {
StringRef Value = A->getValue();
unsigned Boundary;
if (Value.getAsInteger(10, Boundary) || Boundary < 16 ||
!llvm::isPowerOf2_64(Boundary)) {
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Value << A->getOption().getName();
} else {
addArg("-x86-align-branch-boundary=" + Twine(Boundary));
}
}
if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_EQ)) {
std::string AlignBranch;
for (StringRef T : A->getValues()) {
if (T != "fused" && T != "jcc" && T != "jmp" && T != "call" &&
T != "ret" && T != "indirect")
D.Diag(diag::err_drv_invalid_malign_branch_EQ)
<< T << "fused, jcc, jmp, call, ret, indirect";
if (!AlignBranch.empty())
AlignBranch += '+';
AlignBranch += T;
}
addArg("-x86-align-branch=" + Twine(AlignBranch));
}
if (const Arg *A = Args.getLastArg(options::OPT_mpad_max_prefix_size_EQ)) {
StringRef Value = A->getValue();
unsigned PrefixSize;
if (Value.getAsInteger(10, PrefixSize)) {
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Value << A->getOption().getName();
} else {
addArg("-x86-pad-max-prefix-size=" + Twine(PrefixSize));
}
}
}
static llvm::opt::Arg *
getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
// The last of -mcode-object-v3, -mno-code-object-v3 and
// -mcode-object-version=<version> wins.
return Args.getLastArg(options::OPT_mcode_object_v3_legacy,
options::OPT_mno_code_object_v3_legacy,
options::OPT_mcode_object_version_EQ);
}
void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
const unsigned MinCodeObjVer = 2;
const unsigned MaxCodeObjVer = 4;
// Emit warnings for legacy options even if they are overridden.
if (Args.hasArg(options::OPT_mno_code_object_v3_legacy))
D.Diag(diag::warn_drv_deprecated_arg) << "-mno-code-object-v3"
<< "-mcode-object-version=2";
if (Args.hasArg(options::OPT_mcode_object_v3_legacy))
D.Diag(diag::warn_drv_deprecated_arg) << "-mcode-object-v3"
<< "-mcode-object-version=3";
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
if (CodeObjArg->getOption().getID() ==
options::OPT_mcode_object_version_EQ) {
unsigned CodeObjVer = MaxCodeObjVer;
auto Remnant =
StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
if (Remnant || CodeObjVer < MinCodeObjVer || CodeObjVer > MaxCodeObjVer)
D.Diag(diag::err_drv_invalid_int_value)
<< CodeObjArg->getAsString(Args) << CodeObjArg->getValue();
}
}
}
unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
unsigned CodeObjVer = 4; // default
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
if (CodeObjArg->getOption().getID() ==
options::OPT_mno_code_object_v3_legacy) {
CodeObjVer = 2;
} else if (CodeObjArg->getOption().getID() ==
options::OPT_mcode_object_v3_legacy) {
CodeObjVer = 3;
} else {
StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
}
}
return CodeObjVer;
}
bool tools::haveAMDGPUCodeObjectVersionArgument(
const Driver &D, const llvm::opt::ArgList &Args) {
return getAMDGPUCodeObjectArgument(D, Args) != nullptr;
}
void tools::addMachineOutlinerArgs(const Driver &D,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
const llvm::Triple &Triple, bool IsLTO) {
auto addArg = [&, IsLTO](const Twine &Arg) {
if (IsLTO) {
CmdArgs.push_back(Args.MakeArgString("-plugin-opt=" + Arg));
} else {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString(Arg));
}
};
if (Arg *A = Args.getLastArg(options::OPT_moutline,
options::OPT_mno_outline)) {
if (A->getOption().matches(options::OPT_moutline)) {
// We only support -moutline in AArch64 and ARM targets right now. If
// we're not compiling for these, emit a warning and ignore the flag.
// Otherwise, add the proper mllvm flags.
if (!(Triple.isARM() || Triple.isThumb() ||
Triple.getArch() == llvm::Triple::aarch64 ||
Triple.getArch() == llvm::Triple::aarch64_32)) {
D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName();
} else {
addArg(Twine("-enable-machine-outliner"));
}
} else {
// Disable all outlining behaviour.
addArg(Twine("-enable-machine-outliner=never"));
}
}
}
void tools::addOpenMPDeviceRTL(const Driver &D,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef BitcodeSuffix,
const llvm::Triple &Triple) {
SmallVector<StringRef, 8> LibraryPaths;
// Add user defined library paths from LIBRARY_PATH.
llvm::Optional<std::string> LibPath =
llvm::sys::Process::GetEnv("LIBRARY_PATH");
if (LibPath) {
SmallVector<StringRef, 8> Frags;
const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
for (StringRef Path : Frags)
LibraryPaths.emplace_back(Path.trim());
}
// Add path to lib / lib64 folder.
SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
LibraryPaths.emplace_back(DefaultLibPath.c_str());
OptSpecifier LibomptargetBCPathOpt =
Triple.isAMDGCN() ? options::OPT_libomptarget_amdgcn_bc_path_EQ
: options::OPT_libomptarget_nvptx_bc_path_EQ;
StringRef ArchPrefix = Triple.isAMDGCN() ? "amdgcn" : "nvptx";
// First check whether user specifies bc library
if (const Arg *A = DriverArgs.getLastArg(LibomptargetBCPathOpt)) {
std::string LibOmpTargetName(A->getValue());
if (llvm::sys::fs::exists(LibOmpTargetName)) {
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetName));
} else {
D.Diag(diag::err_drv_omp_offload_target_bcruntime_not_found)
<< LibOmpTargetName;
}
} else {
bool FoundBCLibrary = false;
std::string LibOmpTargetName =
"libomptarget-" + BitcodeSuffix.str() + ".bc";
for (StringRef LibraryPath : LibraryPaths) {
SmallString<128> LibOmpTargetFile(LibraryPath);
llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
if (llvm::sys::fs::exists(LibOmpTargetFile)) {
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
FoundBCLibrary = true;
break;
}
}
if (!FoundBCLibrary)
D.Diag(diag::err_drv_omp_offload_target_missingbcruntime)
<< LibOmpTargetName << ArchPrefix;
}
}
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/HIP.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/HIP.cpp
index 59d58aadb687..c4e840de86e1 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/HIP.cpp
@@ -1,487 +1,458 @@
//===--- HIP.cpp - HIP Tool and ToolChain Implementations -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "HIP.h"
#include "AMDGPU.h"
#include "CommonArgs.h"
#include "clang/Basic/Cuda.h"
#include "clang/Basic/TargetID.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Options.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TargetParser.h"
using namespace clang::driver;
using namespace clang::driver::toolchains;
using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
#if defined(_WIN32) || defined(_WIN64)
#define NULL_FILE "nul"
#else
#define NULL_FILE "/dev/null"
#endif
namespace {
const unsigned HIPCodeObjectAlign = 4096;
} // namespace
void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const InputInfo &Output,
const llvm::opt::ArgList &Args) const {
// Construct lld command.
// The output from ld.lld is an HSA code object file.
ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", "-shared",
"-plugin-opt=-amdgpu-internalize-symbols"};
auto &TC = getToolChain();
auto &D = TC.getDriver();
assert(!Inputs.empty() && "Must have at least one input.");
bool IsThinLTO = D.getLTOMode(/*IsOffload=*/true) == LTOK_Thin;
addLTOOptions(TC, Args, LldArgs, Output, Inputs[0], IsThinLTO);
// Extract all the -m options
std::vector<llvm::StringRef> Features;
amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features);
// Add features to mattr such as cumode
std::string MAttrString = "-plugin-opt=-mattr=";
for (auto OneFeature : unifyTargetFeatures(Features)) {
MAttrString.append(Args.MakeArgString(OneFeature));
if (OneFeature != Features.back())
MAttrString.append(",");
}
if (!Features.empty())
LldArgs.push_back(Args.MakeArgString(MAttrString));
// ToDo: Remove this option after AMDGPU backend supports ISA-level linking.
// Since AMDGPU backend currently does not support ISA-level linking, all
// called functions need to be imported.
if (IsThinLTO)
LldArgs.push_back(Args.MakeArgString("-plugin-opt=-force-import-all"));
for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
LldArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=") + A->getValue(0)));
}
if (C.getDriver().isSaveTempsEnabled())
LldArgs.push_back("-save-temps");
addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
LldArgs.append({"-o", Output.getFilename()});
for (auto Input : Inputs)
LldArgs.push_back(Input.getFilename());
if (Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize,
false))
llvm::for_each(TC.getHIPDeviceLibs(Args), [&](StringRef BCFile) {
LldArgs.push_back(Args.MakeArgString(BCFile));
});
const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Lld, LldArgs, Inputs, Output));
}
// Construct a clang-offload-bundler command to bundle code objects for
// different GPU's into a HIP fat binary.
void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
StringRef OutputFileName, const InputInfoList &Inputs,
const llvm::opt::ArgList &Args, const Tool& T) {
// Construct clang-offload-bundler command to bundle object files for
// for different GPU archs.
ArgStringList BundlerArgs;
BundlerArgs.push_back(Args.MakeArgString("-type=o"));
BundlerArgs.push_back(
Args.MakeArgString("-bundle-align=" + Twine(HIPCodeObjectAlign)));
// ToDo: Remove the dummy host binary entry which is required by
// clang-offload-bundler.
std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
std::string BundlerInputArg = "-inputs=" NULL_FILE;
// For code object version 2 and 3, the offload kind in bundle ID is 'hip'
// for backward compatibility. For code object version 4 and greater, the
// offload kind in bundle ID is 'hipv4'.
std::string OffloadKind = "hip";
if (getAMDGPUCodeObjectVersion(C.getDriver(), Args) >= 4)
OffloadKind = OffloadKind + "v4";
for (const auto &II : Inputs) {
const auto* A = II.getAction();
BundlerTargetArg = BundlerTargetArg + "," + OffloadKind +
"-amdgcn-amd-amdhsa--" +
StringRef(A->getOffloadingArch()).str();
BundlerInputArg = BundlerInputArg + "," + II.getFilename();
}
BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));
std::string Output = std::string(OutputFileName);
auto BundlerOutputArg =
Args.MakeArgString(std::string("-outputs=").append(Output));
BundlerArgs.push_back(BundlerOutputArg);
const char *Bundler = Args.MakeArgString(
T.getToolChain().GetProgramPath("clang-offload-bundler"));
C.addCommand(std::make_unique<Command>(
JA, T, ResponseFileSupport::None(), Bundler, BundlerArgs, Inputs,
InputInfo(&JA, Args.MakeArgString(Output))));
}
/// Add Generated HIP Object File which has device images embedded into the
/// host to the argument list for linking. Using MC directives, embed the
/// device code and also define symbols required by the code generation so that
/// the image can be retrieved at runtime.
void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
Compilation &C, const InputInfo &Output,
const InputInfoList &Inputs, const ArgList &Args,
const JobAction &JA) const {
const ToolChain &TC = getToolChain();
std::string Name =
std::string(llvm::sys::path::stem(Output.getFilename()));
// Create Temp Object File Generator,
// Offload Bundled file and Bundled Object file.
// Keep them if save-temps is enabled.
const char *McinFile;
const char *BundleFile;
if (C.getDriver().isSaveTempsEnabled()) {
McinFile = C.getArgs().MakeArgString(Name + ".mcin");
BundleFile = C.getArgs().MakeArgString(Name + ".hipfb");
} else {
auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin");
McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin));
auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb");
BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb));
}
constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this);
// Create a buffer to write the contents of the temp obj generator.
std::string ObjBuffer;
llvm::raw_string_ostream ObjStream(ObjBuffer);
// Add MC directives to embed target binaries. We ensure that each
// section and image is 16-byte aligned. This is not mandatory, but
// increases the likelihood of data to be aligned with a cache block
// in several main host machines.
ObjStream << "# HIP Object Generator\n";
ObjStream << "# *** Automatically generated by Clang ***\n";
ObjStream << " .protected __hip_fatbin\n";
ObjStream << " .type __hip_fatbin,@object\n";
ObjStream << " .section .hip_fatbin,\"a\",@progbits\n";
ObjStream << " .globl __hip_fatbin\n";
ObjStream << " .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign))
<< "\n";
ObjStream << "__hip_fatbin:\n";
ObjStream << " .incbin \"" << BundleFile << "\"\n";
ObjStream.flush();
// Dump the contents of the temp object file gen if the user requested that.
// We support this option to enable testing of behavior with -###.
if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
llvm::errs() << ObjBuffer;
// Open script file and write the contents.
std::error_code EC;
llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None);
if (EC) {
C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
return;
}
Objf << ObjBuffer;
ArgStringList McArgs{"-o", Output.getFilename(),
McinFile, "--filetype=obj"};
const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc"));
C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Mc, McArgs, Inputs, Output));
}
// For amdgcn the inputs of the linker job are device bitcode and output is
// object file. It calls llvm-link, opt, llc, then lld steps.
void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
if (Inputs.size() > 0 &&
Inputs[0].getType() == types::TY_Image &&
JA.getType() == types::TY_Object)
return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA);
if (JA.getType() == types::TY_HIP_FATBIN)
return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
return constructLldCommand(C, JA, Inputs, Output, Args);
}
HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
const ToolChain &HostTC, const ArgList &Args)
: ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
// Lookup binaries into the driver directory, this is used to
// discover the clang-offload-bundler executable.
getProgramPaths().push_back(getDriver().Dir);
}
void HIPToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadingKind) const {
HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
assert(DeviceOffloadingKind == Action::OFK_HIP &&
"Only HIP offloading kinds are supported for GPUs.");
CC1Args.push_back("-fcuda-is-device");
if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
options::OPT_fno_cuda_approx_transcendentals, false))
CC1Args.push_back("-fcuda-approx-transcendentals");
if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false))
CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
StringRef MaxThreadsPerBlock =
DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
if (!MaxThreadsPerBlock.empty()) {
std::string ArgStr =
std::string("--gpu-max-threads-per-block=") + MaxThreadsPerBlock.str();
CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr));
}
CC1Args.push_back("-fcuda-allow-variadic-functions");
// Default to "hidden" visibility, as object level linking will not be
// supported for the foreseeable future.
if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
options::OPT_fvisibility_ms_compat)) {
CC1Args.append({"-fvisibility", "hidden"});
CC1Args.push_back("-fapply-global-visibility-to-externs");
}
llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](StringRef BCFile) {
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
});
}
llvm::opt::DerivedArgList *
HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const {
DerivedArgList *DAL =
HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
if (!DAL)
DAL = new DerivedArgList(Args.getBaseArgs());
const OptTable &Opts = getDriver().getOpts();
for (Arg *A : Args) {
if (!shouldSkipArgument(A))
DAL->append(A);
}
if (!BoundArch.empty()) {
DAL->eraseArg(options::OPT_mcpu_EQ);
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch);
checkTargetID(*DAL);
}
return DAL;
}
Tool *HIPToolChain::buildLinker() const {
assert(getTriple().getArch() == llvm::Triple::amdgcn);
return new tools::AMDGCN::Linker(*this);
}
void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
HostTC.addClangWarningOptions(CC1Args);
}
ToolChain::CXXStdlibType
HIPToolChain::GetCXXStdlibType(const ArgList &Args) const {
return HostTC.GetCXXStdlibType(Args);
}
void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
}
void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
ArgStringList &CC1Args) const {
HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
}
void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
ArgStringList &CC1Args) const {
HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
}
void HIPToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
}
SanitizerMask HIPToolChain::getSupportedSanitizers() const {
// The HIPToolChain only supports sanitizers in the sense that it allows
// sanitizer arguments on the command line if they are supported by the host
// toolchain. The HIPToolChain will actually ignore any command line
// arguments for any of these "supported" sanitizers. That means that no
// sanitization of device code is actually supported at this time.
//
// This behavior is necessary because the host and device toolchains
// invocations often share the command line, so the device toolchain must
// tolerate flags meant only for the host toolchain.
return HostTC.getSupportedSanitizers();
}
VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D,
const ArgList &Args) const {
return HostTC.computeMSVCVersion(D, Args);
}
llvm::SmallVector<std::string, 12>
HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
llvm::SmallVector<std::string, 12> BCLibs;
if (DriverArgs.hasArg(options::OPT_nogpulib))
return {};
ArgStringList LibraryPaths;
// Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
for (auto Path : RocmInstallation.getRocmDeviceLibPathArg())
LibraryPaths.push_back(DriverArgs.MakeArgString(Path));
addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH");
// Maintain compatability with --hip-device-lib.
auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ);
if (!BCLibArgs.empty()) {
llvm::for_each(BCLibArgs, [&](StringRef BCName) {
StringRef FullName;
for (std::string LibraryPath : LibraryPaths) {
SmallString<128> Path(LibraryPath);
llvm::sys::path::append(Path, BCName);
FullName = Path;
if (llvm::sys::fs::exists(FullName)) {
BCLibs.push_back(FullName.str());
return;
}
}
getDriver().Diag(diag::err_drv_no_such_file) << BCName;
});
} else {
if (!RocmInstallation.hasDeviceLibrary()) {
getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
return {};
}
StringRef GpuArch = getGPUArch(DriverArgs);
assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
- (void)GpuArch;
- auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
- const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
-
- std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
- if (LibDeviceFile.empty()) {
- getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
- return {};
- }
// If --hip-device-lib is not set, add the default bitcode libraries.
- // TODO: There are way too many flags that change this. Do we need to check
- // them all?
- bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
- options::OPT_fno_gpu_flush_denormals_to_zero,
- getDefaultDenormsAreZeroForTarget(Kind));
- bool FiniteOnly =
- DriverArgs.hasFlag(options::OPT_ffinite_math_only,
- options::OPT_fno_finite_math_only, false);
- bool UnsafeMathOpt =
- DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
- options::OPT_fno_unsafe_math_optimizations, false);
- bool FastRelaxedMath = DriverArgs.hasFlag(
- options::OPT_ffast_math, options::OPT_fno_fast_math, false);
- bool CorrectSqrt = DriverArgs.hasFlag(
- options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
- options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
- bool Wave64 = isWave64(DriverArgs, Kind);
-
if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
options::OPT_fno_gpu_sanitize, false)) {
auto AsanRTL = RocmInstallation.getAsanRTLPath();
if (AsanRTL.empty()) {
unsigned DiagID = getDriver().getDiags().getCustomDiagID(
DiagnosticsEngine::Error,
"AMDGPU address sanitizer runtime library (asanrtl) is not found. "
"Please install ROCm device library which supports address "
"sanitizer");
getDriver().Diag(DiagID);
return {};
} else
BCLibs.push_back(AsanRTL.str());
}
// Add the HIP specific bitcode library.
BCLibs.push_back(RocmInstallation.getHIPPath().str());
- // Add the generic set of libraries.
- BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
- FastRelaxedMath, CorrectSqrt));
+ // Add common device libraries like ocml etc.
+ BCLibs.append(getCommonDeviceLibNames(DriverArgs, GpuArch.str()));
// Add instrument lib.
auto InstLib =
DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ);
if (InstLib.empty())
return BCLibs;
if (llvm::sys::fs::exists(InstLib))
BCLibs.push_back(InstLib.str());
else
getDriver().Diag(diag::err_drv_no_such_file) << InstLib;
}
return BCLibs;
}
void HIPToolChain::checkTargetID(const llvm::opt::ArgList &DriverArgs) const {
auto PTID = getParsedTargetID(DriverArgs);
if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
getDriver().Diag(clang::diag::err_drv_bad_target_id)
<< PTID.OptionalTargetID.getValue();
return;
}
assert(PTID.OptionalFeatures && "Invalid return from getParsedTargetID");
auto &FeatureMap = PTID.OptionalFeatures.getValue();
// Sanitizer is not supported with xnack-.
if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
options::OPT_fno_gpu_sanitize, false)) {
auto Loc = FeatureMap.find("xnack");
if (Loc != FeatureMap.end() && !Loc->second) {
auto &Diags = getDriver().getDiags();
auto DiagID = Diags.getCustomDiagID(
DiagnosticsEngine::Error,
"'-fgpu-sanitize' is not compatible with offload arch '%0'. "
"Use an offload arch without 'xnack-' instead");
Diags.Report(DiagID) << PTID.OptionalTargetID.getValue();
}
}
}
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp
index e162165b2561..89828fbb6f5f 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -1,316 +1,323 @@
//===--- OpenBSD.cpp - OpenBSD ToolChain Implementations --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "OpenBSD.h"
#include "Arch/Mips.h"
#include "Arch/Sparc.h"
#include "CommonArgs.h"
#include "clang/Config/config.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/Path.h"
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang::driver::toolchains;
using namespace clang;
using namespace llvm::opt;
void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
claimNoWarnArgs(Args);
ArgStringList CmdArgs;
switch (getToolChain().getArch()) {
case llvm::Triple::x86:
// When building 32-bit code on OpenBSD/amd64, we have to explicitly
// instruct as in the base system to assemble 32-bit code.
CmdArgs.push_back("--32");
break;
case llvm::Triple::ppc:
CmdArgs.push_back("-mppc");
CmdArgs.push_back("-many");
break;
case llvm::Triple::sparcv9: {
CmdArgs.push_back("-64");
std::string CPU = getCPUName(Args, getToolChain().getTriple());
CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
break;
}
case llvm::Triple::mips64:
case llvm::Triple::mips64el: {
StringRef CPUName;
StringRef ABIName;
mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
CmdArgs.push_back("-mabi");
CmdArgs.push_back(mips::getGnuCompatibleMipsABIName(ABIName).data());
if (getToolChain().getTriple().isLittleEndian())
CmdArgs.push_back("-EL");
else
CmdArgs.push_back("-EB");
AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
break;
}
default:
break;
}
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (const auto &II : Inputs)
CmdArgs.push_back(II.getFilename());
const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
}
void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const toolchains::OpenBSD &ToolChain =
static_cast<const toolchains::OpenBSD &>(getToolChain());
const Driver &D = getToolChain().getDriver();
ArgStringList CmdArgs;
// Silence warning for "clang -g foo.o -o foo"
Args.ClaimAllArgs(options::OPT_g_Group);
// and "clang -emit-llvm foo.o -o foo"
Args.ClaimAllArgs(options::OPT_emit_llvm);
// and for "clang -w foo.o -o foo". Other warning options are already
// handled somewhere else.
Args.ClaimAllArgs(options::OPT_w);
if (ToolChain.getArch() == llvm::Triple::mips64)
CmdArgs.push_back("-EB");
else if (ToolChain.getArch() == llvm::Triple::mips64el)
CmdArgs.push_back("-EL");
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) {
CmdArgs.push_back("-e");
CmdArgs.push_back("__start");
}
CmdArgs.push_back("--eh-frame-hdr");
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-Bstatic");
} else {
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
CmdArgs.push_back("-Bdynamic");
if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-shared");
} else {
CmdArgs.push_back("-dynamic-linker");
CmdArgs.push_back("/usr/libexec/ld.so");
}
}
if (Args.hasArg(options::OPT_pie))
CmdArgs.push_back("-pie");
if (Args.hasArg(options::OPT_nopie) || Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-nopie");
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
const char *crt0 = nullptr;
const char *crtbegin = nullptr;
if (!Args.hasArg(options::OPT_shared)) {
if (Args.hasArg(options::OPT_pg))
crt0 = "gcrt0.o";
else if (Args.hasArg(options::OPT_static) &&
!Args.hasArg(options::OPT_nopie))
crt0 = "rcrt0.o";
else
crt0 = "crt0.o";
crtbegin = "crtbegin.o";
} else {
crtbegin = "crtbeginS.o";
}
if (crt0)
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt0)));
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
}
Args.AddAllArgs(CmdArgs, options::OPT_L);
ToolChain.AddFilePathLibArgs(Args, CmdArgs);
Args.AddAllArgs(CmdArgs, {options::OPT_T_Group, options::OPT_e,
options::OPT_s, options::OPT_t,
options::OPT_Z_Flag, options::OPT_r});
bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs);
AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+ // Use the static OpenMP runtime with -static-openmp
+ bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) &&
+ !Args.hasArg(options::OPT_static);
+ addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP);
+
if (D.CCCIsCXX()) {
if (ToolChain.ShouldLinkCXXStdlib(Args))
ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lm_p");
else
CmdArgs.push_back("-lm");
}
if (NeedsSanitizerDeps) {
CmdArgs.push_back(ToolChain.getCompilerRTArgString(Args, "builtins"));
linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
}
if (NeedsXRayDeps) {
CmdArgs.push_back(ToolChain.getCompilerRTArgString(Args, "builtins"));
linkXRayRuntimeDeps(ToolChain, CmdArgs);
}
// FIXME: For some reason GCC passes -lgcc before adding
// the default system libraries. Just mimic this for now.
CmdArgs.push_back("-lcompiler_rt");
if (Args.hasArg(options::OPT_pthread)) {
if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lpthread_p");
else
CmdArgs.push_back("-lpthread");
}
if (!Args.hasArg(options::OPT_shared)) {
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back("-lc_p");
else
CmdArgs.push_back("-lc");
}
CmdArgs.push_back("-lcompiler_rt");
}
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
const char *crtend = nullptr;
if (!Args.hasArg(options::OPT_shared))
crtend = "crtend.o";
else
crtend = "crtendS.o";
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
}
+ ToolChain.addProfileRTLibs(Args, CmdArgs);
+
const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
}
SanitizerMask OpenBSD::getSupportedSanitizers() const {
const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
// For future use, only UBsan at the moment
SanitizerMask Res = ToolChain::getSupportedSanitizers();
if (IsX86 || IsX86_64) {
Res |= SanitizerKind::Vptr;
Res |= SanitizerKind::Fuzzer;
Res |= SanitizerKind::FuzzerNoLink;
}
return Res;
}
/// OpenBSD - OpenBSD tool chain which can call as(1) and ld(1) directly.
OpenBSD::OpenBSD(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
getFilePaths().push_back(getDriver().SysRoot + "/usr/lib");
}
void OpenBSD::AddClangSystemIncludeArgs(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const {
const Driver &D = getDriver();
if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc))
return;
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
SmallString<128> Dir(D.ResourceDir);
llvm::sys::path::append(Dir, "include");
addSystemInclude(DriverArgs, CC1Args, Dir.str());
}
if (DriverArgs.hasArg(options::OPT_nostdlibinc))
return;
// Check for configure-time C include directories.
StringRef CIncludeDirs(C_INCLUDE_DIRS);
if (CIncludeDirs != "") {
SmallVector<StringRef, 5> dirs;
CIncludeDirs.split(dirs, ":");
for (StringRef dir : dirs) {
StringRef Prefix =
llvm::sys::path::is_absolute(dir) ? StringRef(D.SysRoot) : "";
addExternCSystemInclude(DriverArgs, CC1Args, Prefix + dir);
}
return;
}
addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/include");
}
void OpenBSD::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const {
addSystemInclude(DriverArgs, CC1Args,
getDriver().SysRoot + "/usr/include/c++/v1");
}
void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
bool Profiling = Args.hasArg(options::OPT_pg);
CmdArgs.push_back(Profiling ? "-lc++_p" : "-lc++");
CmdArgs.push_back(Profiling ? "-lc++abi_p" : "-lc++abi");
CmdArgs.push_back(Profiling ? "-lpthread_p" : "-lpthread");
}
std::string OpenBSD::getCompilerRT(const ArgList &Args,
StringRef Component,
FileType Type) const {
SmallString<128> Path(getDriver().SysRoot);
llvm::sys::path::append(Path, "/usr/lib/libcompiler_rt.a");
return std::string(Path.str());
}
Tool *OpenBSD::buildAssembler() const {
return new tools::openbsd::Assembler(*this);
}
Tool *OpenBSD::buildLinker() const { return new tools::openbsd::Linker(*this); }
bool OpenBSD::HasNativeLLVMSupport() const { return true; }
diff --git a/contrib/llvm-project/clang/lib/Headers/__clang_cuda_device_functions.h b/contrib/llvm-project/clang/lib/Headers/__clang_cuda_device_functions.h
index f801e5426aa4..cc4e1a4dd96a 100644
--- a/contrib/llvm-project/clang/lib/Headers/__clang_cuda_device_functions.h
+++ b/contrib/llvm-project/clang/lib/Headers/__clang_cuda_device_functions.h
@@ -1,1486 +1,1558 @@
/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__
#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__
#ifndef __OPENMP_NVPTX__
#if CUDA_VERSION < 9000
#error This file is intended to be used with CUDA-9+ only.
#endif
#endif
// __DEVICE__ is a helper macro with common set of attributes for the wrappers
// we implement in this file. We need static in order to avoid emitting unused
// functions and __forceinline__ helps inlining these wrappers at -O1.
#pragma push_macro("__DEVICE__")
#ifdef __OPENMP_NVPTX__
#define __DEVICE__ static __attribute__((always_inline, nothrow))
#else
#define __DEVICE__ static __device__ __forceinline__
#endif
__DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }
__DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }
__DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }
__DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }
__DEVICE__ unsigned long long __brevll(unsigned long long __a) {
return __nv_brevll(__a);
}
#if defined(__cplusplus)
-__DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
+__DEVICE__ void __brkpt() { __asm__ __volatile__("brkpt;"); }
__DEVICE__ void __brkpt(int __a) { __brkpt(); }
#else
-__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); }
+__DEVICE__ void __attribute__((overloadable)) __brkpt(void) {
+ __asm__ __volatile__("brkpt;");
+}
__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
#endif
__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
unsigned int __c) {
return __nv_byte_perm(__a, __b, __c);
}
__DEVICE__ int __clz(int __a) { return __nv_clz(__a); }
__DEVICE__ int __clzll(long long __a) { return __nv_clzll(__a); }
__DEVICE__ float __cosf(float __a) { return __nv_fast_cosf(__a); }
__DEVICE__ double __dAtomicAdd(double *__p, double __v) {
return __nvvm_atom_add_gen_d(__p, __v);
}
__DEVICE__ double __dAtomicAdd_block(double *__p, double __v) {
return __nvvm_atom_cta_add_gen_d(__p, __v);
}
__DEVICE__ double __dAtomicAdd_system(double *__p, double __v) {
return __nvvm_atom_sys_add_gen_d(__p, __v);
}
__DEVICE__ double __dadd_rd(double __a, double __b) {
return __nv_dadd_rd(__a, __b);
}
__DEVICE__ double __dadd_rn(double __a, double __b) {
return __nv_dadd_rn(__a, __b);
}
__DEVICE__ double __dadd_ru(double __a, double __b) {
return __nv_dadd_ru(__a, __b);
}
__DEVICE__ double __dadd_rz(double __a, double __b) {
return __nv_dadd_rz(__a, __b);
}
__DEVICE__ double __ddiv_rd(double __a, double __b) {
return __nv_ddiv_rd(__a, __b);
}
__DEVICE__ double __ddiv_rn(double __a, double __b) {
return __nv_ddiv_rn(__a, __b);
}
__DEVICE__ double __ddiv_ru(double __a, double __b) {
return __nv_ddiv_ru(__a, __b);
}
__DEVICE__ double __ddiv_rz(double __a, double __b) {
return __nv_ddiv_rz(__a, __b);
}
__DEVICE__ double __dmul_rd(double __a, double __b) {
return __nv_dmul_rd(__a, __b);
}
__DEVICE__ double __dmul_rn(double __a, double __b) {
return __nv_dmul_rn(__a, __b);
}
__DEVICE__ double __dmul_ru(double __a, double __b) {
return __nv_dmul_ru(__a, __b);
}
__DEVICE__ double __dmul_rz(double __a, double __b) {
return __nv_dmul_rz(__a, __b);
}
__DEVICE__ float __double2float_rd(double __a) {
return __nv_double2float_rd(__a);
}
__DEVICE__ float __double2float_rn(double __a) {
return __nv_double2float_rn(__a);
}
__DEVICE__ float __double2float_ru(double __a) {
return __nv_double2float_ru(__a);
}
__DEVICE__ float __double2float_rz(double __a) {
return __nv_double2float_rz(__a);
}
__DEVICE__ int __double2hiint(double __a) { return __nv_double2hiint(__a); }
__DEVICE__ int __double2int_rd(double __a) { return __nv_double2int_rd(__a); }
__DEVICE__ int __double2int_rn(double __a) { return __nv_double2int_rn(__a); }
__DEVICE__ int __double2int_ru(double __a) { return __nv_double2int_ru(__a); }
__DEVICE__ int __double2int_rz(double __a) { return __nv_double2int_rz(__a); }
__DEVICE__ long long __double2ll_rd(double __a) {
return __nv_double2ll_rd(__a);
}
__DEVICE__ long long __double2ll_rn(double __a) {
return __nv_double2ll_rn(__a);
}
__DEVICE__ long long __double2ll_ru(double __a) {
return __nv_double2ll_ru(__a);
}
__DEVICE__ long long __double2ll_rz(double __a) {
return __nv_double2ll_rz(__a);
}
__DEVICE__ int __double2loint(double __a) { return __nv_double2loint(__a); }
__DEVICE__ unsigned int __double2uint_rd(double __a) {
return __nv_double2uint_rd(__a);
}
__DEVICE__ unsigned int __double2uint_rn(double __a) {
return __nv_double2uint_rn(__a);
}
__DEVICE__ unsigned int __double2uint_ru(double __a) {
return __nv_double2uint_ru(__a);
}
__DEVICE__ unsigned int __double2uint_rz(double __a) {
return __nv_double2uint_rz(__a);
}
__DEVICE__ unsigned long long __double2ull_rd(double __a) {
return __nv_double2ull_rd(__a);
}
__DEVICE__ unsigned long long __double2ull_rn(double __a) {
return __nv_double2ull_rn(__a);
}
__DEVICE__ unsigned long long __double2ull_ru(double __a) {
return __nv_double2ull_ru(__a);
}
__DEVICE__ unsigned long long __double2ull_rz(double __a) {
return __nv_double2ull_rz(__a);
}
__DEVICE__ long long __double_as_longlong(double __a) {
return __nv_double_as_longlong(__a);
}
__DEVICE__ double __drcp_rd(double __a) { return __nv_drcp_rd(__a); }
__DEVICE__ double __drcp_rn(double __a) { return __nv_drcp_rn(__a); }
__DEVICE__ double __drcp_ru(double __a) { return __nv_drcp_ru(__a); }
__DEVICE__ double __drcp_rz(double __a) { return __nv_drcp_rz(__a); }
__DEVICE__ double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); }
__DEVICE__ double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); }
__DEVICE__ double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); }
__DEVICE__ double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); }
__DEVICE__ double __dsub_rd(double __a, double __b) {
return __nv_dsub_rd(__a, __b);
}
__DEVICE__ double __dsub_rn(double __a, double __b) {
return __nv_dsub_rn(__a, __b);
}
__DEVICE__ double __dsub_ru(double __a, double __b) {
return __nv_dsub_ru(__a, __b);
}
__DEVICE__ double __dsub_rz(double __a, double __b) {
return __nv_dsub_rz(__a, __b);
}
__DEVICE__ float __exp10f(float __a) { return __nv_fast_exp10f(__a); }
__DEVICE__ float __expf(float __a) { return __nv_fast_expf(__a); }
__DEVICE__ float __fAtomicAdd(float *__p, float __v) {
return __nvvm_atom_add_gen_f(__p, __v);
}
__DEVICE__ float __fAtomicAdd_block(float *__p, float __v) {
return __nvvm_atom_cta_add_gen_f(__p, __v);
}
__DEVICE__ float __fAtomicAdd_system(float *__p, float __v) {
return __nvvm_atom_sys_add_gen_f(__p, __v);
}
__DEVICE__ float __fAtomicExch(float *__p, float __v) {
return __nv_int_as_float(
__nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
}
__DEVICE__ float __fAtomicExch_block(float *__p, float __v) {
return __nv_int_as_float(
__nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
}
__DEVICE__ float __fAtomicExch_system(float *__p, float __v) {
return __nv_int_as_float(
__nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
}
__DEVICE__ float __fadd_rd(float __a, float __b) {
return __nv_fadd_rd(__a, __b);
}
__DEVICE__ float __fadd_rn(float __a, float __b) {
return __nv_fadd_rn(__a, __b);
}
__DEVICE__ float __fadd_ru(float __a, float __b) {
return __nv_fadd_ru(__a, __b);
}
__DEVICE__ float __fadd_rz(float __a, float __b) {
return __nv_fadd_rz(__a, __b);
}
__DEVICE__ float __fdiv_rd(float __a, float __b) {
return __nv_fdiv_rd(__a, __b);
}
__DEVICE__ float __fdiv_rn(float __a, float __b) {
return __nv_fdiv_rn(__a, __b);
}
__DEVICE__ float __fdiv_ru(float __a, float __b) {
return __nv_fdiv_ru(__a, __b);
}
__DEVICE__ float __fdiv_rz(float __a, float __b) {
return __nv_fdiv_rz(__a, __b);
}
__DEVICE__ float __fdividef(float __a, float __b) {
return __nv_fast_fdividef(__a, __b);
}
__DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); }
__DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }
__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }
__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }
#ifdef _MSC_VER
__DEVICE__ int __finitel(long double __a);
#endif
__DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
__DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
__DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }
__DEVICE__ int __float2int_rz(float __a) { return __nv_float2int_rz(__a); }
__DEVICE__ long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); }
__DEVICE__ long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); }
__DEVICE__ long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); }
__DEVICE__ long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); }
__DEVICE__ unsigned int __float2uint_rd(float __a) {
return __nv_float2uint_rd(__a);
}
__DEVICE__ unsigned int __float2uint_rn(float __a) {
return __nv_float2uint_rn(__a);
}
__DEVICE__ unsigned int __float2uint_ru(float __a) {
return __nv_float2uint_ru(__a);
}
__DEVICE__ unsigned int __float2uint_rz(float __a) {
return __nv_float2uint_rz(__a);
}
__DEVICE__ unsigned long long __float2ull_rd(float __a) {
return __nv_float2ull_rd(__a);
}
__DEVICE__ unsigned long long __float2ull_rn(float __a) {
return __nv_float2ull_rn(__a);
}
__DEVICE__ unsigned long long __float2ull_ru(float __a) {
return __nv_float2ull_ru(__a);
}
__DEVICE__ unsigned long long __float2ull_rz(float __a) {
return __nv_float2ull_rz(__a);
}
__DEVICE__ int __float_as_int(float __a) { return __nv_float_as_int(__a); }
__DEVICE__ unsigned int __float_as_uint(float __a) {
return __nv_float_as_uint(__a);
}
__DEVICE__ double __fma_rd(double __a, double __b, double __c) {
return __nv_fma_rd(__a, __b, __c);
}
__DEVICE__ double __fma_rn(double __a, double __b, double __c) {
return __nv_fma_rn(__a, __b, __c);
}
__DEVICE__ double __fma_ru(double __a, double __b, double __c) {
return __nv_fma_ru(__a, __b, __c);
}
__DEVICE__ double __fma_rz(double __a, double __b, double __c) {
return __nv_fma_rz(__a, __b, __c);
}
__DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) {
return __nv_fmaf_ieee_rd(__a, __b, __c);
}
__DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) {
return __nv_fmaf_ieee_rn(__a, __b, __c);
}
__DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) {
return __nv_fmaf_ieee_ru(__a, __b, __c);
}
__DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) {
return __nv_fmaf_ieee_rz(__a, __b, __c);
}
__DEVICE__ float __fmaf_rd(float __a, float __b, float __c) {
return __nv_fmaf_rd(__a, __b, __c);
}
__DEVICE__ float __fmaf_rn(float __a, float __b, float __c) {
return __nv_fmaf_rn(__a, __b, __c);
}
__DEVICE__ float __fmaf_ru(float __a, float __b, float __c) {
return __nv_fmaf_ru(__a, __b, __c);
}
__DEVICE__ float __fmaf_rz(float __a, float __b, float __c) {
return __nv_fmaf_rz(__a, __b, __c);
}
__DEVICE__ float __fmul_rd(float __a, float __b) {
return __nv_fmul_rd(__a, __b);
}
__DEVICE__ float __fmul_rn(float __a, float __b) {
return __nv_fmul_rn(__a, __b);
}
__DEVICE__ float __fmul_ru(float __a, float __b) {
return __nv_fmul_ru(__a, __b);
}
__DEVICE__ float __fmul_rz(float __a, float __b) {
return __nv_fmul_rz(__a, __b);
}
__DEVICE__ float __frcp_rd(float __a) { return __nv_frcp_rd(__a); }
__DEVICE__ float __frcp_rn(float __a) { return __nv_frcp_rn(__a); }
__DEVICE__ float __frcp_ru(float __a) { return __nv_frcp_ru(__a); }
__DEVICE__ float __frcp_rz(float __a) { return __nv_frcp_rz(__a); }
__DEVICE__ float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); }
__DEVICE__ float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); }
__DEVICE__ float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); }
__DEVICE__ float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); }
__DEVICE__ float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); }
__DEVICE__ float __fsub_rd(float __a, float __b) {
return __nv_fsub_rd(__a, __b);
}
__DEVICE__ float __fsub_rn(float __a, float __b) {
return __nv_fsub_rn(__a, __b);
}
__DEVICE__ float __fsub_ru(float __a, float __b) {
return __nv_fsub_ru(__a, __b);
}
__DEVICE__ float __fsub_rz(float __a, float __b) {
return __nv_fsub_rz(__a, __b);
}
__DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); }
__DEVICE__ double __hiloint2double(int __a, int __b) {
return __nv_hiloint2double(__a, __b);
}
__DEVICE__ int __iAtomicAdd(int *__p, int __v) {
return __nvvm_atom_add_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicAdd_block(int *__p, int __v) {
return __nvvm_atom_cta_add_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicAdd_system(int *__p, int __v) {
return __nvvm_atom_sys_add_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicAnd(int *__p, int __v) {
return __nvvm_atom_and_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicAnd_block(int *__p, int __v) {
return __nvvm_atom_cta_and_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicAnd_system(int *__p, int __v) {
return __nvvm_atom_sys_and_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v) {
return __nvvm_atom_cas_gen_i(__p, __cmp, __v);
}
__DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v) {
return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v);
}
__DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v) {
return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v);
}
__DEVICE__ int __iAtomicExch(int *__p, int __v) {
return __nvvm_atom_xchg_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicExch_block(int *__p, int __v) {
return __nvvm_atom_cta_xchg_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicExch_system(int *__p, int __v) {
return __nvvm_atom_sys_xchg_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicMax(int *__p, int __v) {
return __nvvm_atom_max_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicMax_block(int *__p, int __v) {
return __nvvm_atom_cta_max_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicMax_system(int *__p, int __v) {
return __nvvm_atom_sys_max_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicMin(int *__p, int __v) {
return __nvvm_atom_min_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicMin_block(int *__p, int __v) {
return __nvvm_atom_cta_min_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicMin_system(int *__p, int __v) {
return __nvvm_atom_sys_min_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicOr(int *__p, int __v) {
return __nvvm_atom_or_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicOr_block(int *__p, int __v) {
return __nvvm_atom_cta_or_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicOr_system(int *__p, int __v) {
return __nvvm_atom_sys_or_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicXor(int *__p, int __v) {
return __nvvm_atom_xor_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicXor_block(int *__p, int __v) {
return __nvvm_atom_cta_xor_gen_i(__p, __v);
}
__DEVICE__ int __iAtomicXor_system(int *__p, int __v) {
return __nvvm_atom_sys_xor_gen_i(__p, __v);
}
__DEVICE__ long long __illAtomicMax(long long *__p, long long __v) {
return __nvvm_atom_max_gen_ll(__p, __v);
}
__DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v) {
return __nvvm_atom_cta_max_gen_ll(__p, __v);
}
__DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v) {
return __nvvm_atom_sys_max_gen_ll(__p, __v);
}
__DEVICE__ long long __illAtomicMin(long long *__p, long long __v) {
return __nvvm_atom_min_gen_ll(__p, __v);
}
__DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v) {
return __nvvm_atom_cta_min_gen_ll(__p, __v);
}
__DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v) {
return __nvvm_atom_sys_min_gen_ll(__p, __v);
}
__DEVICE__ double __int2double_rn(int __a) { return __nv_int2double_rn(__a); }
__DEVICE__ float __int2float_rd(int __a) { return __nv_int2float_rd(__a); }
__DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); }
__DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); }
__DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); }
__DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); }
__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }
__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }
__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }
#ifdef _MSC_VER
__DEVICE__ int __isinfl(long double __a);
#endif
__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }
__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }
#ifdef _MSC_VER
__DEVICE__ int __isnanl(long double __a);
#endif
__DEVICE__ double __ll2double_rd(long long __a) {
return __nv_ll2double_rd(__a);
}
__DEVICE__ double __ll2double_rn(long long __a) {
return __nv_ll2double_rn(__a);
}
__DEVICE__ double __ll2double_ru(long long __a) {
return __nv_ll2double_ru(__a);
}
__DEVICE__ double __ll2double_rz(long long __a) {
return __nv_ll2double_rz(__a);
}
__DEVICE__ float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); }
__DEVICE__ float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); }
__DEVICE__ float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); }
__DEVICE__ float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); }
__DEVICE__ long long __llAtomicAnd(long long *__p, long long __v) {
return __nvvm_atom_and_gen_ll(__p, __v);
}
__DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v) {
return __nvvm_atom_cta_and_gen_ll(__p, __v);
}
__DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v) {
return __nvvm_atom_sys_and_gen_ll(__p, __v);
}
__DEVICE__ long long __llAtomicOr(long long *__p, long long __v) {
return __nvvm_atom_or_gen_ll(__p, __v);
}
__DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v) {
return __nvvm_atom_cta_or_gen_ll(__p, __v);
}
__DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v) {
return __nvvm_atom_sys_or_gen_ll(__p, __v);
}
__DEVICE__ long long __llAtomicXor(long long *__p, long long __v) {
return __nvvm_atom_xor_gen_ll(__p, __v);
}
__DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v) {
return __nvvm_atom_cta_xor_gen_ll(__p, __v);
}
__DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v) {
return __nvvm_atom_sys_xor_gen_ll(__p, __v);
}
__DEVICE__ float __log10f(float __a) { return __nv_fast_log10f(__a); }
__DEVICE__ float __log2f(float __a) { return __nv_fast_log2f(__a); }
__DEVICE__ float __logf(float __a) { return __nv_fast_logf(__a); }
__DEVICE__ double __longlong_as_double(long long __a) {
return __nv_longlong_as_double(__a);
}
__DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); }
__DEVICE__ long long __mul64hi(long long __a, long long __b) {
return __nv_mul64hi(__a, __b);
}
__DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); }
__DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }
__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }
__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }
__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }
__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }
__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }
__DEVICE__ float __powf(float __a, float __b) {
return __nv_fast_powf(__a, __b);
}
// Parameter must have a known integer value.
-#define __prof_trigger(__a) asm __volatile__("pmevent \t%0;" ::"i"(__a))
+#define __prof_trigger(__a) __asm__ __volatile__("pmevent \t%0;" ::"i"(__a))
__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }
__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {
return __nv_sad(__a, __b, __c);
}
__DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }
__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }
__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }
__DEVICE__ void __sincosf(float __a, float *__s, float *__c) {
return __nv_fast_sincosf(__a, __s, __c);
}
__DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }
__DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }
__DEVICE__ int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); }
__DEVICE__ int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); }
__DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); }
__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }
__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };
__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };
-__DEVICE__ void __trap(void) { asm volatile("trap;"); }
+__DEVICE__ void __trap(void) { __asm__ __volatile__("trap;"); }
__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {
return __nvvm_atom_add_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_cta_add_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_sys_add_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) {
return __nvvm_atom_and_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_cta_and_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_sys_and_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp,
unsigned int __v) {
return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v);
}
__DEVICE__ unsigned int
__uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) {
return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v);
}
__DEVICE__ unsigned int
__uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) {
return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v);
}
__DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) {
return __nvvm_atom_dec_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_cta_dec_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_sys_dec_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) {
return __nvvm_atom_xchg_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) {
return __nvvm_atom_inc_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_cta_inc_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_sys_inc_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) {
return __nvvm_atom_max_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_cta_max_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_sys_max_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) {
return __nvvm_atom_min_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_cta_min_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_sys_min_gen_ui(__p, __v);
}
__DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) {
return __nvvm_atom_or_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) {
return __nvvm_atom_cta_or_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_sys_or_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) {
return __nvvm_atom_xor_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_cta_xor_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p,
unsigned int __v) {
return __nvvm_atom_sys_xor_gen_i((int *)__p, __v);
}
__DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) {
return __nv_uhadd(__a, __b);
}
__DEVICE__ double __uint2double_rn(unsigned int __a) {
return __nv_uint2double_rn(__a);
}
__DEVICE__ float __uint2float_rd(unsigned int __a) {
return __nv_uint2float_rd(__a);
}
__DEVICE__ float __uint2float_rn(unsigned int __a) {
return __nv_uint2float_rn(__a);
}
__DEVICE__ float __uint2float_ru(unsigned int __a) {
return __nv_uint2float_ru(__a);
}
__DEVICE__ float __uint2float_rz(unsigned int __a) {
return __nv_uint2float_rz(__a);
}
__DEVICE__ float __uint_as_float(unsigned int __a) {
return __nv_uint_as_float(__a);
} //
__DEVICE__ double __ull2double_rd(unsigned long long __a) {
return __nv_ull2double_rd(__a);
}
__DEVICE__ double __ull2double_rn(unsigned long long __a) {
return __nv_ull2double_rn(__a);
}
__DEVICE__ double __ull2double_ru(unsigned long long __a) {
return __nv_ull2double_ru(__a);
}
__DEVICE__ double __ull2double_rz(unsigned long long __a) {
return __nv_ull2double_rz(__a);
}
__DEVICE__ float __ull2float_rd(unsigned long long __a) {
return __nv_ull2float_rd(__a);
}
__DEVICE__ float __ull2float_rn(unsigned long long __a) {
return __nv_ull2float_rn(__a);
}
__DEVICE__ float __ull2float_ru(unsigned long long __a) {
return __nv_ull2float_ru(__a);
}
__DEVICE__ float __ull2float_rz(unsigned long long __a) {
return __nv_ull2float_rz(__a);
}
__DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_add_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_and_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p,
unsigned long long __cmp,
unsigned long long __v) {
return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v);
}
__DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p,
unsigned long long __cmp,
unsigned long long __v) {
return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v);
}
__DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p,
unsigned long long __cmp,
unsigned long long __v) {
return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v);
}
__DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_xchg_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_max_gen_ull(__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_cta_max_gen_ull(__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_sys_max_gen_ull(__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_min_gen_ull(__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_cta_min_gen_ull(__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_sys_min_gen_ull(__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_or_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_xor_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p,
unsigned long long __v) {
return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v);
}
__DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) {
return __nv_umul24(__a, __b);
}
__DEVICE__ unsigned long long __umul64hi(unsigned long long __a,
unsigned long long __b) {
return __nv_umul64hi(__a, __b);
}
__DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) {
return __nv_umulhi(__a, __b);
}
__DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) {
return __nv_urhadd(__a, __b);
}
__DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b,
unsigned int __c) {
return __nv_usad(__a, __b, __c);
}
#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020
__DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); }
__DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); }
__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
return __nv_vabsdiffs2(__a, __b);
}
__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
return __nv_vabsdiffs4(__a, __b);
}
__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
return __nv_vabsdiffu2(__a, __b);
}
__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
return __nv_vabsdiffu4(__a, __b);
}
__DEVICE__ unsigned int __vabsss2(unsigned int __a) {
return __nv_vabsss2(__a);
}
__DEVICE__ unsigned int __vabsss4(unsigned int __a) {
return __nv_vabsss4(__a);
}
__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {
return __nv_vadd2(__a, __b);
}
__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {
return __nv_vadd4(__a, __b);
}
__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
return __nv_vaddss2(__a, __b);
}
__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
return __nv_vaddss4(__a, __b);
}
__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
return __nv_vaddus2(__a, __b);
}
__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
return __nv_vaddus4(__a, __b);
}
__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
return __nv_vavgs2(__a, __b);
}
__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
return __nv_vavgs4(__a, __b);
}
__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
return __nv_vavgu2(__a, __b);
}
__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
return __nv_vavgu4(__a, __b);
}
__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
return __nv_vcmpeq2(__a, __b);
}
__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
return __nv_vcmpeq4(__a, __b);
}
__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
return __nv_vcmpges2(__a, __b);
}
__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
return __nv_vcmpges4(__a, __b);
}
__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
return __nv_vcmpgeu2(__a, __b);
}
__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
return __nv_vcmpgeu4(__a, __b);
}
__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
return __nv_vcmpgts2(__a, __b);
}
__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
return __nv_vcmpgts4(__a, __b);
}
__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
return __nv_vcmpgtu2(__a, __b);
}
__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
return __nv_vcmpgtu4(__a, __b);
}
__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
return __nv_vcmples2(__a, __b);
}
__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
return __nv_vcmples4(__a, __b);
}
__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
return __nv_vcmpleu2(__a, __b);
}
__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
return __nv_vcmpleu4(__a, __b);
}
__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
return __nv_vcmplts2(__a, __b);
}
__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
return __nv_vcmplts4(__a, __b);
}
__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
return __nv_vcmpltu2(__a, __b);
}
__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
return __nv_vcmpltu4(__a, __b);
}
__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
return __nv_vcmpne2(__a, __b);
}
__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
return __nv_vcmpne4(__a, __b);
}
__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {
return __nv_vhaddu2(__a, __b);
}
__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {
return __nv_vhaddu4(__a, __b);
}
__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
return __nv_vmaxs2(__a, __b);
}
__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
return __nv_vmaxs4(__a, __b);
}
__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
return __nv_vmaxu2(__a, __b);
}
__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
return __nv_vmaxu4(__a, __b);
}
__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {
return __nv_vmins2(__a, __b);
}
__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {
return __nv_vmins4(__a, __b);
}
__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {
return __nv_vminu2(__a, __b);
}
__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {
return __nv_vminu4(__a, __b);
}
__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __nv_vneg2(__a); }
__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __nv_vneg4(__a); }
__DEVICE__ unsigned int __vnegss2(unsigned int __a) {
return __nv_vnegss2(__a);
}
__DEVICE__ unsigned int __vnegss4(unsigned int __a) {
return __nv_vnegss4(__a);
}
__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {
return __nv_vsads2(__a, __b);
}
__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {
return __nv_vsads4(__a, __b);
}
__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
return __nv_vsadu2(__a, __b);
}
__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
return __nv_vsadu4(__a, __b);
}
__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
return __nv_vseteq2(__a, __b);
}
__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
return __nv_vseteq4(__a, __b);
}
__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
return __nv_vsetges2(__a, __b);
}
__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
return __nv_vsetges4(__a, __b);
}
__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
return __nv_vsetgeu2(__a, __b);
}
__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
return __nv_vsetgeu4(__a, __b);
}
__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
return __nv_vsetgts2(__a, __b);
}
__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
return __nv_vsetgts4(__a, __b);
}
__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
return __nv_vsetgtu2(__a, __b);
}
__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
return __nv_vsetgtu4(__a, __b);
}
__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
return __nv_vsetles2(__a, __b);
}
__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
return __nv_vsetles4(__a, __b);
}
__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
return __nv_vsetleu2(__a, __b);
}
__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
return __nv_vsetleu4(__a, __b);
}
__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
return __nv_vsetlts2(__a, __b);
}
__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
return __nv_vsetlts4(__a, __b);
}
__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
return __nv_vsetltu2(__a, __b);
}
__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
return __nv_vsetltu4(__a, __b);
}
__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
return __nv_vsetne2(__a, __b);
}
__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
return __nv_vsetne4(__a, __b);
}
__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {
return __nv_vsub2(__a, __b);
}
__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {
return __nv_vsub4(__a, __b);
}
__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
return __nv_vsubss2(__a, __b);
}
__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
return __nv_vsubss4(__a, __b);
}
__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
return __nv_vsubus2(__a, __b);
}
__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
return __nv_vsubus4(__a, __b);
}
#else // CUDA_VERSION >= 9020
// CUDA no longer provides inline assembly (or bitcode) implementation of these
// functions, so we have to reimplment them. The implementation is naive and is
// not optimized for performance.
// Helper function to convert N-bit boolean subfields into all-0 or all-1.
// E.g. __bool2mask(0x01000100,8) -> 0xff00ff00
// __bool2mask(0x00010000,16) -> 0xffff0000
__DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) {
return (__a << shift) - __a;
}
__DEVICE__ unsigned int __vabs2(unsigned int __a) {
unsigned int r;
- asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(0), "r"(0));
+ __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(0), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabs4(unsigned int __a) {
unsigned int r;
- asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(0), "r"(0));
+ __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(0), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsss2(unsigned int __a) {
unsigned int r;
- asm("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(0), "r"(0));
+ __asm__("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(0), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsss4(unsigned int __a) {
unsigned int r;
- asm("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(0), "r"(0));
+ __asm__("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(0), "r"(0));
return r;
}
__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd2.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd2.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd4.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd4.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd2.u32.u32.u32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd2.u32.u32.u32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd4.u32.u32.u32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd4.u32.u32.u32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vavrg2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vavrg2.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vavrg4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vavrg4.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vavrg2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vavrg2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vavrg4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vavrg4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.eq %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
return __bool2mask(__vseteq2(__a, __b), 16);
}
__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.eq %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
return __bool2mask(__vseteq4(__a, __b), 8);
}
__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.s32.s32.ge %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetges2(__a, __b), 16);
}
__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.s32.s32.ge %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetges4(__a, __b), 8);
}
__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.ge %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetgeu2(__a, __b), 16);
}
__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.ge %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetgeu4(__a, __b), 8);
}
__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.s32.s32.gt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetgts2(__a, __b), 16);
}
__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.s32.s32.gt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetgts4(__a, __b), 8);
}
__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.gt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetgtu2(__a, __b), 16);
}
__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.gt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetgtu4(__a, __b), 8);
}
__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.s32.s32.le %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetles2(__a, __b), 16);
}
__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.s32.s32.le %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetles4(__a, __b), 8);
}
__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.le %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetleu2(__a, __b), 16);
}
__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.le %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetleu4(__a, __b), 8);
}
__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.s32.s32.lt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetlts2(__a, __b), 16);
}
__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.s32.s32.lt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetlts4(__a, __b), 8);
}
__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.lt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetltu2(__a, __b), 16);
}
__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.lt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetltu4(__a, __b), 8);
}
__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.ne %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetne2(__a, __b), 16);
}
__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.ne %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
return __bool2mask(__vsetne4(__a, __b), 8);
}
// Based on ITEM 23 in AIM-239: http://dspace.mit.edu/handle/1721.1/6086
// (a & b) + (a | b) = a + b = (a ^ b) + 2 * (a & b) =>
// (a + b) / 2 = ((a ^ b) >> 1) + (a & b)
// To operate on multiple sub-elements we need to make sure to mask out bits
// that crossed over into adjacent elements during the shift.
__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {
return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b);
}
__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {
return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b);
}
__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
unsigned int r;
if ((__a & 0x8000) && (__b & 0x8000)) {
// Work around a bug in ptxas which produces invalid result if low element
// is negative.
unsigned mask = __vcmpgts2(__a, __b);
r = (__a & mask) | (__b & ~mask);
} else {
- asm("vmax2.s32.s32.s32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmax2.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
}
return r;
}
__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmax4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmax4.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmax2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmax2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmax4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmax4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmin2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmin2.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmin4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmin4.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmin2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmin2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmin4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmin4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }
__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }
__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub2.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub2.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vnegss2(unsigned int __a) {
return __vsubss2(0, __a);
}
__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub4.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub4.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vnegss4(unsigned int __a) {
return __vsubss4(0, __a);
}
__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub2.u32.u32.u32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub2.u32.u32.u32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub4.u32.u32.u32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub4.u32.u32.u32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
#endif // CUDA_VERSION >= 9020
// For OpenMP we require the user to include <time.h> as we need to know what
// clock_t is on the system.
#ifndef __OPENMP_NVPTX__
__DEVICE__ /* clock_t= */ int clock() { return __nvvm_read_ptx_sreg_clock(); }
#endif
__DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }
// These functions shouldn't be declared when including this header
// for math function resolution purposes.
#ifndef __OPENMP_NVPTX__
__DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {
return __builtin_memcpy(__a, __b, __c);
}
__DEVICE__ void *memset(void *__a, int __b, size_t __c) {
return __builtin_memset(__a, __b, __c);
}
#endif
#pragma pop_macro("__DEVICE__")
#endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__
diff --git a/contrib/llvm-project/clang/lib/Headers/__clang_hip_cmath.h b/contrib/llvm-project/clang/lib/Headers/__clang_hip_cmath.h
index 7342705434e6..d488db0a94d9 100644
--- a/contrib/llvm-project/clang/lib/Headers/__clang_hip_cmath.h
+++ b/contrib/llvm-project/clang/lib/Headers/__clang_hip_cmath.h
@@ -1,810 +1,842 @@
/*===---- __clang_hip_cmath.h - HIP cmath decls -----------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_HIP_CMATH_H__
#define __CLANG_HIP_CMATH_H__
-#if !defined(__HIP__)
+#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
#endif
#if !defined(__HIPCC_RTC__)
#if defined(__cplusplus)
#include <limits>
#include <type_traits>
#include <utility>
#endif
#include <limits.h>
#include <stdint.h>
#endif // !defined(__HIPCC_RTC__)
#pragma push_macro("__DEVICE__")
+#pragma push_macro("__CONSTEXPR__")
+#ifdef __OPENMP_AMDGCN__
+#define __DEVICE__ static __attribute__((always_inline, nothrow))
+#define __CONSTEXPR__ constexpr
+#else
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
+#define __CONSTEXPR__
+#endif // __OPENMP_AMDGCN__
// Start with functions that cannot be defined by DEF macros below.
#if defined(__cplusplus)
-__DEVICE__ double abs(double __x) { return ::fabs(__x); }
-__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
-__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
-__DEVICE__ long abs(long __n) { return ::labs(__n); }
-__DEVICE__ float fma(float __x, float __y, float __z) {
+#if defined __OPENMP_AMDGCN__
+__DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); }
+__DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); }
+__DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); }
+#endif
+__DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); }
+__DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); }
+__DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); }
+__DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); }
+__DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) {
return ::fmaf(__x, __y, __z);
}
#if !defined(__HIPCC_RTC__)
// The value returned by fpclassify is platform dependent, therefore it is not
// supported by hipRTC.
-__DEVICE__ int fpclassify(float __x) {
+__DEVICE__ __CONSTEXPR__ int fpclassify(float __x) {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
FP_ZERO, __x);
}
-__DEVICE__ int fpclassify(double __x) {
+__DEVICE__ __CONSTEXPR__ int fpclassify(double __x) {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
FP_ZERO, __x);
}
#endif // !defined(__HIPCC_RTC__)
-__DEVICE__ float frexp(float __arg, int *__exp) {
+__DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) {
return ::frexpf(__arg, __exp);
}
#if defined(__OPENMP_AMDGCN__)
// For OpenMP we work around some old system headers that have non-conforming
// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
// this by providing two versions of these functions, differing only in the
// return type. To avoid conflicting definitions we disable implicit base
// function generation. That means we will end up with two specializations, one
// per type, but only one has a base function defined by the system header.
#pragma omp begin declare variant match( \
implementation = {extension(disable_implicit_base)})
// FIXME: We lack an extension to customize the mangling of the variants, e.g.,
// add a suffix. This means we would clash with the names of the variants
// (note that we do not create implicit base functions here). To avoid
// this clash we add a new trait to some of them that is always true
// (this is LLVM after all ;)). It will only influence the mangled name
// of the variants inside the inner region and avoid the clash.
#pragma omp begin declare variant match(implementation = {vendor(llvm)})
-__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
-__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
-__DEVICE__ int isfinite(double __x) { return ::__finite(__x); }
-__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
-__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); }
+__DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); }
#pragma omp end declare variant
#endif // defined(__OPENMP_AMDGCN__)
-__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
-__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
-__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); }
-__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
-__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); }
+__DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); }
#if defined(__OPENMP_AMDGCN__)
#pragma omp end declare variant
#endif // defined(__OPENMP_AMDGCN__)
-__DEVICE__ bool isgreater(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) {
return __builtin_isgreater(__x, __y);
}
-__DEVICE__ bool isgreater(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) {
return __builtin_isgreater(__x, __y);
}
-__DEVICE__ bool isgreaterequal(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) {
return __builtin_isgreaterequal(__x, __y);
}
-__DEVICE__ bool isgreaterequal(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) {
return __builtin_isgreaterequal(__x, __y);
}
-__DEVICE__ bool isless(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) {
return __builtin_isless(__x, __y);
}
-__DEVICE__ bool isless(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) {
return __builtin_isless(__x, __y);
}
-__DEVICE__ bool islessequal(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) {
return __builtin_islessequal(__x, __y);
}
-__DEVICE__ bool islessequal(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) {
return __builtin_islessequal(__x, __y);
}
-__DEVICE__ bool islessgreater(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) {
return __builtin_islessgreater(__x, __y);
}
-__DEVICE__ bool islessgreater(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) {
return __builtin_islessgreater(__x, __y);
}
-__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
-__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
-__DEVICE__ bool isunordered(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isnormal(float __x) {
+ return __builtin_isnormal(__x);
+}
+__DEVICE__ __CONSTEXPR__ bool isnormal(double __x) {
+ return __builtin_isnormal(__x);
+}
+__DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) {
return __builtin_isunordered(__x, __y);
}
-__DEVICE__ bool isunordered(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) {
return __builtin_isunordered(__x, __y);
}
-__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
-__DEVICE__ float pow(float __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) {
+ return ::modff(__x, __iptr);
+}
+__DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) {
return ::powif(__base, __iexp);
}
-__DEVICE__ double pow(double __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) {
return ::powi(__base, __iexp);
}
-__DEVICE__ float remquo(float __x, float __y, int *__quo) {
+__DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) {
return ::remquof(__x, __y, __quo);
}
-__DEVICE__ float scalbln(float __x, long int __n) {
+__DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) {
return ::scalblnf(__x, __n);
}
-__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
-__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); }
+__DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); }
+__DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); }
// Notably missing above is nexttoward. We omit it because
// ocml doesn't provide an implementation, and we don't want to be in the
// business of implementing tricky libm functions in this header.
// Other functions.
-__DEVICE__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) {
+__DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y,
+ _Float16 __z) {
return __ocml_fma_f16(__x, __y, __z);
}
-__DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) {
return __ocml_pown_f16(__base, __iexp);
}
+#ifndef __OPENMP_AMDGCN__
// BEGIN DEF_FUN and HIP_OVERLOAD
// BEGIN DEF_FUN
#pragma push_macro("__DEF_FUN1")
#pragma push_macro("__DEF_FUN2")
#pragma push_macro("__DEF_FUN2_FI")
// Define cmath functions with float argument and returns __retty.
#define __DEF_FUN1(__retty, __func) \
- __DEVICE__ \
- __retty __func(float __x) { return __func##f(__x); }
+ __DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); }
// Define cmath functions with two float arguments and returns __retty.
#define __DEF_FUN2(__retty, __func) \
- __DEVICE__ \
- __retty __func(float __x, float __y) { return __func##f(__x, __y); }
+ __DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) { \
+ return __func##f(__x, __y); \
+ }
// Define cmath functions with a float and an int argument and returns __retty.
#define __DEF_FUN2_FI(__retty, __func) \
- __DEVICE__ \
- __retty __func(float __x, int __y) { return __func##f(__x, __y); }
+ __DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) { \
+ return __func##f(__x, __y); \
+ }
__DEF_FUN1(float, acos)
__DEF_FUN1(float, acosh)
__DEF_FUN1(float, asin)
__DEF_FUN1(float, asinh)
__DEF_FUN1(float, atan)
__DEF_FUN2(float, atan2)
__DEF_FUN1(float, atanh)
__DEF_FUN1(float, cbrt)
__DEF_FUN1(float, ceil)
__DEF_FUN2(float, copysign)
__DEF_FUN1(float, cos)
__DEF_FUN1(float, cosh)
__DEF_FUN1(float, erf)
__DEF_FUN1(float, erfc)
__DEF_FUN1(float, exp)
__DEF_FUN1(float, exp2)
__DEF_FUN1(float, expm1)
__DEF_FUN1(float, fabs)
__DEF_FUN2(float, fdim)
__DEF_FUN1(float, floor)
__DEF_FUN2(float, fmax)
__DEF_FUN2(float, fmin)
__DEF_FUN2(float, fmod)
__DEF_FUN2(float, hypot)
__DEF_FUN1(int, ilogb)
__DEF_FUN2_FI(float, ldexp)
__DEF_FUN1(float, lgamma)
__DEF_FUN1(float, log)
__DEF_FUN1(float, log10)
__DEF_FUN1(float, log1p)
__DEF_FUN1(float, log2)
__DEF_FUN1(float, logb)
__DEF_FUN1(long long, llrint)
__DEF_FUN1(long long, llround)
__DEF_FUN1(long, lrint)
__DEF_FUN1(long, lround)
__DEF_FUN1(float, nearbyint)
__DEF_FUN2(float, nextafter)
__DEF_FUN2(float, pow)
__DEF_FUN2(float, remainder)
__DEF_FUN1(float, rint)
__DEF_FUN1(float, round)
__DEF_FUN2_FI(float, scalbn)
__DEF_FUN1(float, sin)
__DEF_FUN1(float, sinh)
__DEF_FUN1(float, sqrt)
__DEF_FUN1(float, tan)
__DEF_FUN1(float, tanh)
__DEF_FUN1(float, tgamma)
__DEF_FUN1(float, trunc)
#pragma pop_macro("__DEF_FUN1")
#pragma pop_macro("__DEF_FUN2")
#pragma pop_macro("__DEF_FUN2_FI")
// END DEF_FUN
// BEGIN HIP_OVERLOAD
#pragma push_macro("__HIP_OVERLOAD1")
#pragma push_macro("__HIP_OVERLOAD2")
// __hip_enable_if::type is a type function which returns __T if __B is true.
template <bool __B, class __T = void> struct __hip_enable_if {};
template <class __T> struct __hip_enable_if<true, __T> { typedef __T type; };
namespace __hip {
template <class _Tp> struct is_integral {
enum { value = 0 };
};
template <> struct is_integral<bool> {
enum { value = 1 };
};
template <> struct is_integral<char> {
enum { value = 1 };
};
template <> struct is_integral<signed char> {
enum { value = 1 };
};
template <> struct is_integral<unsigned char> {
enum { value = 1 };
};
template <> struct is_integral<wchar_t> {
enum { value = 1 };
};
template <> struct is_integral<short> {
enum { value = 1 };
};
template <> struct is_integral<unsigned short> {
enum { value = 1 };
};
template <> struct is_integral<int> {
enum { value = 1 };
};
template <> struct is_integral<unsigned int> {
enum { value = 1 };
};
template <> struct is_integral<long> {
enum { value = 1 };
};
template <> struct is_integral<unsigned long> {
enum { value = 1 };
};
template <> struct is_integral<long long> {
enum { value = 1 };
};
template <> struct is_integral<unsigned long long> {
enum { value = 1 };
};
// ToDo: specializes is_arithmetic<_Float16>
template <class _Tp> struct is_arithmetic {
enum { value = 0 };
};
template <> struct is_arithmetic<bool> {
enum { value = 1 };
};
template <> struct is_arithmetic<char> {
enum { value = 1 };
};
template <> struct is_arithmetic<signed char> {
enum { value = 1 };
};
template <> struct is_arithmetic<unsigned char> {
enum { value = 1 };
};
template <> struct is_arithmetic<wchar_t> {
enum { value = 1 };
};
template <> struct is_arithmetic<short> {
enum { value = 1 };
};
template <> struct is_arithmetic<unsigned short> {
enum { value = 1 };
};
template <> struct is_arithmetic<int> {
enum { value = 1 };
};
template <> struct is_arithmetic<unsigned int> {
enum { value = 1 };
};
template <> struct is_arithmetic<long> {
enum { value = 1 };
};
template <> struct is_arithmetic<unsigned long> {
enum { value = 1 };
};
template <> struct is_arithmetic<long long> {
enum { value = 1 };
};
template <> struct is_arithmetic<unsigned long long> {
enum { value = 1 };
};
template <> struct is_arithmetic<float> {
enum { value = 1 };
};
template <> struct is_arithmetic<double> {
enum { value = 1 };
};
struct true_type {
static const __constant__ bool value = true;
};
struct false_type {
static const __constant__ bool value = false;
};
template <typename __T, typename __U> struct is_same : public false_type {};
template <typename __T> struct is_same<__T, __T> : public true_type {};
template <typename __T> struct add_rvalue_reference { typedef __T &&type; };
template <typename __T> typename add_rvalue_reference<__T>::type declval();
// decltype is only available in C++11 and above.
#if __cplusplus >= 201103L
// __hip_promote
template <class _Tp> struct __numeric_type {
static void __test(...);
static _Float16 __test(_Float16);
static float __test(float);
static double __test(char);
static double __test(int);
static double __test(unsigned);
static double __test(long);
static double __test(unsigned long);
static double __test(long long);
static double __test(unsigned long long);
static double __test(double);
// No support for long double, use double instead.
static double __test(long double);
typedef decltype(__test(declval<_Tp>())) type;
static const bool value = !is_same<type, void>::value;
};
template <> struct __numeric_type<void> { static const bool value = true; };
template <class _A1, class _A2 = void, class _A3 = void,
bool = __numeric_type<_A1>::value &&__numeric_type<_A2>::value
&&__numeric_type<_A3>::value>
class __promote_imp {
public:
static const bool value = false;
};
template <class _A1, class _A2, class _A3>
class __promote_imp<_A1, _A2, _A3, true> {
private:
typedef typename __promote_imp<_A1>::type __type1;
typedef typename __promote_imp<_A2>::type __type2;
typedef typename __promote_imp<_A3>::type __type3;
public:
typedef decltype(__type1() + __type2() + __type3()) type;
static const bool value = true;
};
template <class _A1, class _A2> class __promote_imp<_A1, _A2, void, true> {
private:
typedef typename __promote_imp<_A1>::type __type1;
typedef typename __promote_imp<_A2>::type __type2;
public:
typedef decltype(__type1() + __type2()) type;
static const bool value = true;
};
template <class _A1> class __promote_imp<_A1, void, void, true> {
public:
typedef typename __numeric_type<_A1>::type type;
static const bool value = true;
};
template <class _A1, class _A2 = void, class _A3 = void>
class __promote : public __promote_imp<_A1, _A2, _A3> {};
#endif //__cplusplus >= 201103L
} // namespace __hip
// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
// floor(double).
#define __HIP_OVERLOAD1(__retty, __fn) \
template <typename __T> \
- __DEVICE__ \
+ __DEVICE__ __CONSTEXPR__ \
typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type \
__fn(__T __x) { \
return ::__fn((double)__x); \
}
// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
// or integer argument to avoid compilation error due to ambibuity. e.g.
// max(5.0f, 6.0) is resolved with max(double, double).
#if __cplusplus >= 201103L
#define __HIP_OVERLOAD2(__retty, __fn) \
template <typename __T1, typename __T2> \
- __DEVICE__ typename __hip_enable_if< \
+ __DEVICE__ __CONSTEXPR__ typename __hip_enable_if< \
__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, \
typename __hip::__promote<__T1, __T2>::type>::type \
__fn(__T1 __x, __T2 __y) { \
typedef typename __hip::__promote<__T1, __T2>::type __result_type; \
return __fn((__result_type)__x, (__result_type)__y); \
}
#else
#define __HIP_OVERLOAD2(__retty, __fn) \
template <typename __T1, typename __T2> \
- __DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \
- __hip::is_arithmetic<__T2>::value, \
- __retty>::type \
- __fn(__T1 __x, __T2 __y) { \
+ __DEVICE__ __CONSTEXPR__ \
+ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \
+ __hip::is_arithmetic<__T2>::value, \
+ __retty>::type \
+ __fn(__T1 __x, __T2 __y) { \
return __fn((double)__x, (double)__y); \
}
#endif
__HIP_OVERLOAD1(double, acos)
__HIP_OVERLOAD1(double, acosh)
__HIP_OVERLOAD1(double, asin)
__HIP_OVERLOAD1(double, asinh)
__HIP_OVERLOAD1(double, atan)
__HIP_OVERLOAD2(double, atan2)
__HIP_OVERLOAD1(double, atanh)
__HIP_OVERLOAD1(double, cbrt)
__HIP_OVERLOAD1(double, ceil)
__HIP_OVERLOAD2(double, copysign)
__HIP_OVERLOAD1(double, cos)
__HIP_OVERLOAD1(double, cosh)
__HIP_OVERLOAD1(double, erf)
__HIP_OVERLOAD1(double, erfc)
__HIP_OVERLOAD1(double, exp)
__HIP_OVERLOAD1(double, exp2)
__HIP_OVERLOAD1(double, expm1)
__HIP_OVERLOAD1(double, fabs)
__HIP_OVERLOAD2(double, fdim)
__HIP_OVERLOAD1(double, floor)
__HIP_OVERLOAD2(double, fmax)
__HIP_OVERLOAD2(double, fmin)
__HIP_OVERLOAD2(double, fmod)
#if !defined(__HIPCC_RTC__)
__HIP_OVERLOAD1(int, fpclassify)
#endif // !defined(__HIPCC_RTC__)
__HIP_OVERLOAD2(double, hypot)
__HIP_OVERLOAD1(int, ilogb)
__HIP_OVERLOAD1(bool, isfinite)
__HIP_OVERLOAD2(bool, isgreater)
__HIP_OVERLOAD2(bool, isgreaterequal)
__HIP_OVERLOAD1(bool, isinf)
__HIP_OVERLOAD2(bool, isless)
__HIP_OVERLOAD2(bool, islessequal)
__HIP_OVERLOAD2(bool, islessgreater)
__HIP_OVERLOAD1(bool, isnan)
__HIP_OVERLOAD1(bool, isnormal)
__HIP_OVERLOAD2(bool, isunordered)
__HIP_OVERLOAD1(double, lgamma)
__HIP_OVERLOAD1(double, log)
__HIP_OVERLOAD1(double, log10)
__HIP_OVERLOAD1(double, log1p)
__HIP_OVERLOAD1(double, log2)
__HIP_OVERLOAD1(double, logb)
__HIP_OVERLOAD1(long long, llrint)
__HIP_OVERLOAD1(long long, llround)
__HIP_OVERLOAD1(long, lrint)
__HIP_OVERLOAD1(long, lround)
__HIP_OVERLOAD1(double, nearbyint)
__HIP_OVERLOAD2(double, nextafter)
__HIP_OVERLOAD2(double, pow)
__HIP_OVERLOAD2(double, remainder)
__HIP_OVERLOAD1(double, rint)
__HIP_OVERLOAD1(double, round)
__HIP_OVERLOAD1(bool, signbit)
__HIP_OVERLOAD1(double, sin)
__HIP_OVERLOAD1(double, sinh)
__HIP_OVERLOAD1(double, sqrt)
__HIP_OVERLOAD1(double, tan)
__HIP_OVERLOAD1(double, tanh)
__HIP_OVERLOAD1(double, tgamma)
__HIP_OVERLOAD1(double, trunc)
// Overload these but don't add them to std, they are not part of cmath.
__HIP_OVERLOAD2(double, max)
__HIP_OVERLOAD2(double, min)
// Additional Overloads that don't quite match HIP_OVERLOAD.
#if __cplusplus >= 201103L
template <typename __T1, typename __T2, typename __T3>
-__DEVICE__ typename __hip_enable_if<
+__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<
__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value &&
__hip::is_arithmetic<__T3>::value,
typename __hip::__promote<__T1, __T2, __T3>::type>::type
fma(__T1 __x, __T2 __y, __T3 __z) {
typedef typename __hip::__promote<__T1, __T2, __T3>::type __result_type;
return ::fma((__result_type)__x, (__result_type)__y, (__result_type)__z);
}
#else
template <typename __T1, typename __T2, typename __T3>
-__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
- __hip::is_arithmetic<__T2>::value &&
- __hip::is_arithmetic<__T3>::value,
- double>::type
-fma(__T1 __x, __T2 __y, __T3 __z) {
+__DEVICE__ __CONSTEXPR__
+ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
+ __hip::is_arithmetic<__T2>::value &&
+ __hip::is_arithmetic<__T3>::value,
+ double>::type
+ fma(__T1 __x, __T2 __y, __T3 __z) {
return ::fma((double)__x, (double)__y, (double)__z);
}
#endif
template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
frexp(__T __x, int *__exp) {
return ::frexp((double)__x, __exp);
}
template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
ldexp(__T __x, int __exp) {
return ::ldexp((double)__x, __exp);
}
template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
modf(__T __x, double *__exp) {
return ::modf((double)__x, __exp);
}
#if __cplusplus >= 201103L
template <typename __T1, typename __T2>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
__hip::is_arithmetic<__T2>::value,
typename __hip::__promote<__T1, __T2>::type>::type
remquo(__T1 __x, __T2 __y, int *__quo) {
typedef typename __hip::__promote<__T1, __T2>::type __result_type;
return ::remquo((__result_type)__x, (__result_type)__y, __quo);
}
#else
template <typename __T1, typename __T2>
-__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
- __hip::is_arithmetic<__T2>::value,
- double>::type
-remquo(__T1 __x, __T2 __y, int *__quo) {
+__DEVICE__ __CONSTEXPR__
+ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
+ __hip::is_arithmetic<__T2>::value,
+ double>::type
+ remquo(__T1 __x, __T2 __y, int *__quo) {
return ::remquo((double)__x, (double)__y, __quo);
}
#endif
template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
scalbln(__T __x, long int __exp) {
return ::scalbln((double)__x, __exp);
}
template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
scalbn(__T __x, int __exp) {
return ::scalbn((double)__x, __exp);
}
#pragma pop_macro("__HIP_OVERLOAD1")
#pragma pop_macro("__HIP_OVERLOAD2")
// END HIP_OVERLOAD
// END DEF_FUN and HIP_OVERLOAD
+#endif // ifndef __OPENMP_AMDGCN__
#endif // defined(__cplusplus)
+#ifndef __OPENMP_AMDGCN__
// Define these overloads inside the namespace our standard library uses.
#if !defined(__HIPCC_RTC__)
#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_BEGIN_NAMESPACE_STD
#else
namespace std {
#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_BEGIN_NAMESPACE_VERSION
#endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION
#endif // _LIBCPP_BEGIN_NAMESPACE_STD
// Pull the new overloads we defined above into namespace std.
// using ::abs; - This may be considered for C++.
using ::acos;
using ::acosh;
using ::asin;
using ::asinh;
using ::atan;
using ::atan2;
using ::atanh;
using ::cbrt;
using ::ceil;
using ::copysign;
using ::cos;
using ::cosh;
using ::erf;
using ::erfc;
using ::exp;
using ::exp2;
using ::expm1;
using ::fabs;
using ::fdim;
using ::floor;
using ::fma;
using ::fmax;
using ::fmin;
using ::fmod;
using ::fpclassify;
using ::frexp;
using ::hypot;
using ::ilogb;
using ::isfinite;
using ::isgreater;
using ::isgreaterequal;
using ::isless;
using ::islessequal;
using ::islessgreater;
using ::isnormal;
using ::isunordered;
using ::ldexp;
using ::lgamma;
using ::llrint;
using ::llround;
using ::log;
using ::log10;
using ::log1p;
using ::log2;
using ::logb;
using ::lrint;
using ::lround;
using ::modf;
// using ::nan; - This may be considered for C++.
// using ::nanf; - This may be considered for C++.
// using ::nanl; - This is not yet defined.
using ::nearbyint;
using ::nextafter;
// using ::nexttoward; - Omit this since we do not have a definition.
using ::pow;
using ::remainder;
using ::remquo;
using ::rint;
using ::round;
using ::scalbln;
using ::scalbn;
using ::signbit;
using ::sin;
using ::sinh;
using ::sqrt;
using ::tan;
using ::tanh;
using ::tgamma;
using ::trunc;
// Well this is fun: We need to pull these symbols in for libc++, but we can't
// pull them in with libstdc++, because its ::isinf and ::isnan are different
// than its std::isinf and std::isnan.
#ifndef __GLIBCXX__
using ::isinf;
using ::isnan;
#endif
// Finally, pull the "foobarf" functions that HIP defines into std.
using ::acosf;
using ::acoshf;
using ::asinf;
using ::asinhf;
using ::atan2f;
using ::atanf;
using ::atanhf;
using ::cbrtf;
using ::ceilf;
using ::copysignf;
using ::cosf;
using ::coshf;
using ::erfcf;
using ::erff;
using ::exp2f;
using ::expf;
using ::expm1f;
using ::fabsf;
using ::fdimf;
using ::floorf;
using ::fmaf;
using ::fmaxf;
using ::fminf;
using ::fmodf;
using ::frexpf;
using ::hypotf;
using ::ilogbf;
using ::ldexpf;
using ::lgammaf;
using ::llrintf;
using ::llroundf;
using ::log10f;
using ::log1pf;
using ::log2f;
using ::logbf;
using ::logf;
using ::lrintf;
using ::lroundf;
using ::modff;
using ::nearbyintf;
using ::nextafterf;
// using ::nexttowardf; - Omit this since we do not have a definition.
using ::powf;
using ::remainderf;
using ::remquof;
using ::rintf;
using ::roundf;
using ::scalblnf;
using ::scalbnf;
using ::sinf;
using ::sinhf;
using ::sqrtf;
using ::tanf;
using ::tanhf;
using ::tgammaf;
using ::truncf;
#ifdef _LIBCPP_END_NAMESPACE_STD
_LIBCPP_END_NAMESPACE_STD
#else
#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_END_NAMESPACE_VERSION
#endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION
} // namespace std
#endif // _LIBCPP_END_NAMESPACE_STD
#endif // !defined(__HIPCC_RTC__)
// Define device-side math functions from <ymath.h> on MSVC.
#if !defined(__HIPCC_RTC__)
#if defined(_MSC_VER)
// Before VS2019, `<ymath.h>` is also included in `<limits>` and other headers.
// But, from VS2019, it's only included in `<complex>`. Need to include
// `<ymath.h>` here to ensure C functions declared there won't be markded as
// `__host__` and `__device__` through `<complex>` wrapper.
#include <ymath.h>
#if defined(__cplusplus)
extern "C" {
#endif // defined(__cplusplus)
-__DEVICE__ __attribute__((overloadable)) double _Cosh(double x, double y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x,
+ double y) {
return cosh(x) * y;
}
-__DEVICE__ __attribute__((overloadable)) float _FCosh(float x, float y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x,
+ float y) {
return coshf(x) * y;
}
-__DEVICE__ __attribute__((overloadable)) short _Dtest(double *p) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) {
return fpclassify(*p);
}
-__DEVICE__ __attribute__((overloadable)) short _FDtest(float *p) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) {
return fpclassify(*p);
}
-__DEVICE__ __attribute__((overloadable)) double _Sinh(double x, double y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x,
+ double y) {
return sinh(x) * y;
}
-__DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x,
+ float y) {
return sinhf(x) * y;
}
#if defined(__cplusplus)
}
#endif // defined(__cplusplus)
#endif // defined(_MSC_VER)
#endif // !defined(__HIPCC_RTC__)
+#endif // ifndef __OPENMP_AMDGCN__
#pragma pop_macro("__DEVICE__")
+#pragma pop_macro("__CONSTEXPR__")
#endif // __CLANG_HIP_CMATH_H__
diff --git a/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h b/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h
index 1f0982d92eff..ef7e087b832c 100644
--- a/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h
+++ b/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h
@@ -1,1279 +1,1321 @@
/*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_HIP_MATH_H__
#define __CLANG_HIP_MATH_H__
-#if !defined(__HIP__)
+#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
#endif
#if !defined(__HIPCC_RTC__)
#if defined(__cplusplus)
#include <algorithm>
#endif
#include <limits.h>
#include <stdint.h>
-#endif // __HIPCC_RTC__
+#ifdef __OPENMP_AMDGCN__
+#include <omp.h>
+#endif
+#endif // !defined(__HIPCC_RTC__)
#pragma push_macro("__DEVICE__")
+
+#ifdef __OPENMP_AMDGCN__
+#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
+#else
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
+#endif
// A few functions return bool type starting only in C++11.
#pragma push_macro("__RETURN_TYPE")
+#ifdef __OPENMP_AMDGCN__
+#define __RETURN_TYPE int
+#else
#if defined(__cplusplus)
#define __RETURN_TYPE bool
#else
#define __RETURN_TYPE int
#endif
+#endif // __OPENMP_AMDGCN__
#if defined (__cplusplus) && __cplusplus < 201103L
// emulate static_assert on type sizes
template<bool>
struct __compare_result{};
template<>
struct __compare_result<true> {
static const __device__ bool valid;
};
__DEVICE__
void __suppress_unused_warning(bool b){};
template <unsigned int S, unsigned int T>
__DEVICE__ void __static_assert_equal_size() {
__suppress_unused_warning(__compare_result<S == T>::valid);
}
#define __static_assert_type_size_equal(A, B) \
__static_assert_equal_size<A,B>()
#else
#define __static_assert_type_size_equal(A,B) \
static_assert((A) == (B), "")
#endif
__DEVICE__
uint64_t __make_mantissa_base8(const char *__tagp) {
uint64_t __r = 0;
while (__tagp) {
char __tmp = *__tagp;
if (__tmp >= '0' && __tmp <= '7')
__r = (__r * 8u) + __tmp - '0';
else
return 0;
++__tagp;
}
return __r;
}
__DEVICE__
uint64_t __make_mantissa_base10(const char *__tagp) {
uint64_t __r = 0;
while (__tagp) {
char __tmp = *__tagp;
if (__tmp >= '0' && __tmp <= '9')
__r = (__r * 10u) + __tmp - '0';
else
return 0;
++__tagp;
}
return __r;
}
__DEVICE__
uint64_t __make_mantissa_base16(const char *__tagp) {
uint64_t __r = 0;
while (__tagp) {
char __tmp = *__tagp;
if (__tmp >= '0' && __tmp <= '9')
__r = (__r * 16u) + __tmp - '0';
else if (__tmp >= 'a' && __tmp <= 'f')
__r = (__r * 16u) + __tmp - 'a' + 10;
else if (__tmp >= 'A' && __tmp <= 'F')
__r = (__r * 16u) + __tmp - 'A' + 10;
else
return 0;
++__tagp;
}
return __r;
}
__DEVICE__
uint64_t __make_mantissa(const char *__tagp) {
if (!__tagp)
return 0u;
if (*__tagp == '0') {
++__tagp;
if (*__tagp == 'x' || *__tagp == 'X')
return __make_mantissa_base16(__tagp);
else
return __make_mantissa_base8(__tagp);
}
return __make_mantissa_base10(__tagp);
}
// BEGIN FLOAT
#if defined(__cplusplus)
__DEVICE__
int abs(int __x) {
int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
return (__x ^ __sgn) - __sgn;
}
__DEVICE__
long labs(long __x) {
long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
return (__x ^ __sgn) - __sgn;
}
__DEVICE__
long long llabs(long long __x) {
long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
return (__x ^ __sgn) - __sgn;
}
#endif
__DEVICE__
float acosf(float __x) { return __ocml_acos_f32(__x); }
__DEVICE__
float acoshf(float __x) { return __ocml_acosh_f32(__x); }
__DEVICE__
float asinf(float __x) { return __ocml_asin_f32(__x); }
__DEVICE__
float asinhf(float __x) { return __ocml_asinh_f32(__x); }
__DEVICE__
float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
__DEVICE__
float atanf(float __x) { return __ocml_atan_f32(__x); }
__DEVICE__
float atanhf(float __x) { return __ocml_atanh_f32(__x); }
__DEVICE__
float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
__DEVICE__
float ceilf(float __x) { return __ocml_ceil_f32(__x); }
__DEVICE__
float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); }
__DEVICE__
float cosf(float __x) { return __ocml_cos_f32(__x); }
__DEVICE__
float coshf(float __x) { return __ocml_cosh_f32(__x); }
__DEVICE__
float cospif(float __x) { return __ocml_cospi_f32(__x); }
__DEVICE__
float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
__DEVICE__
float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
__DEVICE__
float erfcf(float __x) { return __ocml_erfc_f32(__x); }
__DEVICE__
float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
__DEVICE__
float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
__DEVICE__
float erff(float __x) { return __ocml_erf_f32(__x); }
__DEVICE__
float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
__DEVICE__
float exp10f(float __x) { return __ocml_exp10_f32(__x); }
__DEVICE__
float exp2f(float __x) { return __ocml_exp2_f32(__x); }
__DEVICE__
float expf(float __x) { return __ocml_exp_f32(__x); }
__DEVICE__
float expm1f(float __x) { return __ocml_expm1_f32(__x); }
__DEVICE__
float fabsf(float __x) { return __ocml_fabs_f32(__x); }
__DEVICE__
float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
__DEVICE__
float fdividef(float __x, float __y) { return __x / __y; }
__DEVICE__
float floorf(float __x) { return __ocml_floor_f32(__x); }
__DEVICE__
float fmaf(float __x, float __y, float __z) {
return __ocml_fma_f32(__x, __y, __z);
}
__DEVICE__
float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); }
__DEVICE__
float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); }
__DEVICE__
float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
__DEVICE__
float frexpf(float __x, int *__nptr) {
int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
float __r =
__ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
*__nptr = __tmp;
return __r;
}
__DEVICE__
float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
__DEVICE__
int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
__DEVICE__
__RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); }
__DEVICE__
__RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); }
__DEVICE__
__RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); }
__DEVICE__
float j0f(float __x) { return __ocml_j0_f32(__x); }
__DEVICE__
float j1f(float __x) { return __ocml_j1_f32(__x); }
__DEVICE__
float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
// and the Miller & Brown algorithm
// for linear recurrences to get O(log n) steps, but it's unclear if
// it'd be beneficial in this case.
if (__n == 0)
return j0f(__x);
if (__n == 1)
return j1f(__x);
float __x0 = j0f(__x);
float __x1 = j1f(__x);
for (int __i = 1; __i < __n; ++__i) {
float __x2 = (2 * __i) / __x * __x1 - __x0;
__x0 = __x1;
__x1 = __x2;
}
return __x1;
}
__DEVICE__
float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
__DEVICE__
float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
__DEVICE__
long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
__DEVICE__
long long int llroundf(float __x) { return __ocml_round_f32(__x); }
__DEVICE__
float log10f(float __x) { return __ocml_log10_f32(__x); }
__DEVICE__
float log1pf(float __x) { return __ocml_log1p_f32(__x); }
__DEVICE__
float log2f(float __x) { return __ocml_log2_f32(__x); }
__DEVICE__
float logbf(float __x) { return __ocml_logb_f32(__x); }
__DEVICE__
float logf(float __x) { return __ocml_log_f32(__x); }
__DEVICE__
long int lrintf(float __x) { return __ocml_rint_f32(__x); }
__DEVICE__
long int lroundf(float __x) { return __ocml_round_f32(__x); }
__DEVICE__
float modff(float __x, float *__iptr) {
float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
float __r =
__ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
*__iptr = __tmp;
return __r;
}
__DEVICE__
float nanf(const char *__tagp) {
union {
float val;
struct ieee_float {
unsigned int mantissa : 22;
unsigned int quiet : 1;
unsigned int exponent : 8;
unsigned int sign : 1;
} bits;
} __tmp;
__static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
__tmp.bits.sign = 0u;
__tmp.bits.exponent = ~0u;
__tmp.bits.quiet = 1u;
__tmp.bits.mantissa = __make_mantissa(__tagp);
return __tmp.val;
}
__DEVICE__
float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
__DEVICE__
float nextafterf(float __x, float __y) {
return __ocml_nextafter_f32(__x, __y);
}
__DEVICE__
float norm3df(float __x, float __y, float __z) {
return __ocml_len3_f32(__x, __y, __z);
}
__DEVICE__
float norm4df(float __x, float __y, float __z, float __w) {
return __ocml_len4_f32(__x, __y, __z, __w);
}
__DEVICE__
float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
__DEVICE__
float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
__DEVICE__
float normf(int __dim,
const float *__a) { // TODO: placeholder until OCML adds support.
float __r = 0;
while (__dim--) {
__r += __a[0] * __a[0];
++__a;
}
return __ocml_sqrt_f32(__r);
}
__DEVICE__
float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
__DEVICE__
float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
__DEVICE__
float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
__DEVICE__
float remainderf(float __x, float __y) {
return __ocml_remainder_f32(__x, __y);
}
__DEVICE__
float remquof(float __x, float __y, int *__quo) {
int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
float __r = __ocml_remquo_f32(
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
*__quo = __tmp;
return __r;
}
__DEVICE__
float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
__DEVICE__
float rintf(float __x) { return __ocml_rint_f32(__x); }
__DEVICE__
float rnorm3df(float __x, float __y, float __z) {
return __ocml_rlen3_f32(__x, __y, __z);
}
__DEVICE__
float rnorm4df(float __x, float __y, float __z, float __w) {
return __ocml_rlen4_f32(__x, __y, __z, __w);
}
__DEVICE__
float rnormf(int __dim,
const float *__a) { // TODO: placeholder until OCML adds support.
float __r = 0;
while (__dim--) {
__r += __a[0] * __a[0];
++__a;
}
return __ocml_rsqrt_f32(__r);
}
__DEVICE__
float roundf(float __x) { return __ocml_round_f32(__x); }
__DEVICE__
float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
__DEVICE__
float scalblnf(float __x, long int __n) {
return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
: __ocml_scalb_f32(__x, __n);
}
__DEVICE__
float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
__DEVICE__
__RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
__DEVICE__
void sincosf(float __x, float *__sinptr, float *__cosptr) {
float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
*__sinptr =
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
*__cosptr = __tmp;
}
__DEVICE__
void sincospif(float __x, float *__sinptr, float *__cosptr) {
float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
*__sinptr = __ocml_sincospi_f32(
__x, (__attribute__((address_space(5))) float *)&__tmp);
*__cosptr = __tmp;
}
__DEVICE__
float sinf(float __x) { return __ocml_sin_f32(__x); }
__DEVICE__
float sinhf(float __x) { return __ocml_sinh_f32(__x); }
__DEVICE__
float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
__DEVICE__
float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
__DEVICE__
float tanf(float __x) { return __ocml_tan_f32(__x); }
__DEVICE__
float tanhf(float __x) { return __ocml_tanh_f32(__x); }
__DEVICE__
float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
__DEVICE__
float truncf(float __x) { return __ocml_trunc_f32(__x); }
__DEVICE__
float y0f(float __x) { return __ocml_y0_f32(__x); }
__DEVICE__
float y1f(float __x) { return __ocml_y1_f32(__x); }
__DEVICE__
float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
// and the Miller & Brown algorithm
// for linear recurrences to get O(log n) steps, but it's unclear if
// it'd be beneficial in this case. Placeholder until OCML adds
// support.
if (__n == 0)
return y0f(__x);
if (__n == 1)
return y1f(__x);
float __x0 = y0f(__x);
float __x1 = y1f(__x);
for (int __i = 1; __i < __n; ++__i) {
float __x2 = (2 * __i) / __x * __x1 - __x0;
__x0 = __x1;
__x1 = __x2;
}
return __x1;
}
// BEGIN INTRINSICS
__DEVICE__
float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
__DEVICE__
float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
__DEVICE__
float __expf(float __x) { return __ocml_native_exp_f32(__x); }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
__DEVICE__
float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
__DEVICE__
float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
__DEVICE__
float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fadd_rn(float __x, float __y) { return __x + __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
__DEVICE__
float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
__DEVICE__
float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
__DEVICE__
float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fdiv_rn(float __x, float __y) { return __x / __y; }
#endif
__DEVICE__
float __fdividef(float __x, float __y) { return __x / __y; }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fmaf_rd(float __x, float __y, float __z) {
return __ocml_fma_rtn_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_rn(float __x, float __y, float __z) {
return __ocml_fma_rte_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_ru(float __x, float __y, float __z) {
return __ocml_fma_rtp_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_rz(float __x, float __y, float __z) {
return __ocml_fma_rtz_f32(__x, __y, __z);
}
#else
__DEVICE__
float __fmaf_rn(float __x, float __y, float __z) {
return __ocml_fma_f32(__x, __y, __z);
}
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
__DEVICE__
float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
__DEVICE__
float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
__DEVICE__
float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fmul_rn(float __x, float __y) { return __x * __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
__DEVICE__
float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
__DEVICE__
float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
__DEVICE__
float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
#else
__DEVICE__
float __frcp_rn(float __x) { return 1.0f / __x; }
#endif
__DEVICE__
float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
__DEVICE__
float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
__DEVICE__
float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
__DEVICE__
float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
#else
__DEVICE__
float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
__DEVICE__
float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
__DEVICE__
float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
__DEVICE__
float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fsub_rn(float __x, float __y) { return __x - __y; }
#endif
__DEVICE__
float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
__DEVICE__
float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
__DEVICE__
float __logf(float __x) { return __ocml_native_log_f32(__x); }
__DEVICE__
float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
__DEVICE__
float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
__DEVICE__
void __sincosf(float __x, float *__sinptr, float *__cosptr) {
*__sinptr = __ocml_native_sin_f32(__x);
*__cosptr = __ocml_native_cos_f32(__x);
}
__DEVICE__
float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
__DEVICE__
float __tanf(float __x) { return __ocml_tan_f32(__x); }
// END INTRINSICS
// END FLOAT
// BEGIN DOUBLE
__DEVICE__
double acos(double __x) { return __ocml_acos_f64(__x); }
__DEVICE__
double acosh(double __x) { return __ocml_acosh_f64(__x); }
__DEVICE__
double asin(double __x) { return __ocml_asin_f64(__x); }
__DEVICE__
double asinh(double __x) { return __ocml_asinh_f64(__x); }
__DEVICE__
double atan(double __x) { return __ocml_atan_f64(__x); }
__DEVICE__
double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
__DEVICE__
double atanh(double __x) { return __ocml_atanh_f64(__x); }
__DEVICE__
double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
__DEVICE__
double ceil(double __x) { return __ocml_ceil_f64(__x); }
__DEVICE__
double copysign(double __x, double __y) {
return __ocml_copysign_f64(__x, __y);
}
__DEVICE__
double cos(double __x) { return __ocml_cos_f64(__x); }
__DEVICE__
double cosh(double __x) { return __ocml_cosh_f64(__x); }
__DEVICE__
double cospi(double __x) { return __ocml_cospi_f64(__x); }
__DEVICE__
double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
__DEVICE__
double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
__DEVICE__
double erf(double __x) { return __ocml_erf_f64(__x); }
__DEVICE__
double erfc(double __x) { return __ocml_erfc_f64(__x); }
__DEVICE__
double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
__DEVICE__
double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
__DEVICE__
double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
__DEVICE__
double exp(double __x) { return __ocml_exp_f64(__x); }
__DEVICE__
double exp10(double __x) { return __ocml_exp10_f64(__x); }
__DEVICE__
double exp2(double __x) { return __ocml_exp2_f64(__x); }
__DEVICE__
double expm1(double __x) { return __ocml_expm1_f64(__x); }
__DEVICE__
double fabs(double __x) { return __ocml_fabs_f64(__x); }
__DEVICE__
double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
__DEVICE__
double floor(double __x) { return __ocml_floor_f64(__x); }
__DEVICE__
double fma(double __x, double __y, double __z) {
return __ocml_fma_f64(__x, __y, __z);
}
__DEVICE__
double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); }
__DEVICE__
double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); }
__DEVICE__
double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
__DEVICE__
double frexp(double __x, int *__nptr) {
int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
double __r =
__ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
*__nptr = __tmp;
return __r;
}
__DEVICE__
double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
__DEVICE__
int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
__DEVICE__
__RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); }
__DEVICE__
__RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); }
__DEVICE__
__RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); }
__DEVICE__
double j0(double __x) { return __ocml_j0_f64(__x); }
__DEVICE__
double j1(double __x) { return __ocml_j1_f64(__x); }
__DEVICE__
double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
// and the Miller & Brown algorithm
// for linear recurrences to get O(log n) steps, but it's unclear if
// it'd be beneficial in this case. Placeholder until OCML adds
// support.
if (__n == 0)
return j0(__x);
if (__n == 1)
return j1(__x);
double __x0 = j0(__x);
double __x1 = j1(__x);
for (int __i = 1; __i < __n; ++__i) {
double __x2 = (2 * __i) / __x * __x1 - __x0;
__x0 = __x1;
__x1 = __x2;
}
return __x1;
}
__DEVICE__
double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
__DEVICE__
double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
__DEVICE__
long long int llrint(double __x) { return __ocml_rint_f64(__x); }
__DEVICE__
long long int llround(double __x) { return __ocml_round_f64(__x); }
__DEVICE__
double log(double __x) { return __ocml_log_f64(__x); }
__DEVICE__
double log10(double __x) { return __ocml_log10_f64(__x); }
__DEVICE__
double log1p(double __x) { return __ocml_log1p_f64(__x); }
__DEVICE__
double log2(double __x) { return __ocml_log2_f64(__x); }
__DEVICE__
double logb(double __x) { return __ocml_logb_f64(__x); }
__DEVICE__
long int lrint(double __x) { return __ocml_rint_f64(__x); }
__DEVICE__
long int lround(double __x) { return __ocml_round_f64(__x); }
__DEVICE__
double modf(double __x, double *__iptr) {
double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
double __r =
__ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
*__iptr = __tmp;
return __r;
}
__DEVICE__
double nan(const char *__tagp) {
#if !_WIN32
union {
double val;
struct ieee_double {
uint64_t mantissa : 51;
uint32_t quiet : 1;
uint32_t exponent : 11;
uint32_t sign : 1;
} bits;
} __tmp;
__static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
__tmp.bits.sign = 0u;
__tmp.bits.exponent = ~0u;
__tmp.bits.quiet = 1u;
__tmp.bits.mantissa = __make_mantissa(__tagp);
return __tmp.val;
#else
__static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
uint64_t __val = __make_mantissa(__tagp);
__val |= 0xFFF << 51;
return *reinterpret_cast<double *>(&__val);
#endif
}
__DEVICE__
double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
__DEVICE__
double nextafter(double __x, double __y) {
return __ocml_nextafter_f64(__x, __y);
}
__DEVICE__
double norm(int __dim,
const double *__a) { // TODO: placeholder until OCML adds support.
double __r = 0;
while (__dim--) {
__r += __a[0] * __a[0];
++__a;
}
return __ocml_sqrt_f64(__r);
}
__DEVICE__
double norm3d(double __x, double __y, double __z) {
return __ocml_len3_f64(__x, __y, __z);
}
__DEVICE__
double norm4d(double __x, double __y, double __z, double __w) {
return __ocml_len4_f64(__x, __y, __z, __w);
}
__DEVICE__
double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
__DEVICE__
double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
__DEVICE__
double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
__DEVICE__
double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
__DEVICE__
double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
__DEVICE__
double remainder(double __x, double __y) {
return __ocml_remainder_f64(__x, __y);
}
__DEVICE__
double remquo(double __x, double __y, int *__quo) {
int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
double __r = __ocml_remquo_f64(
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
*__quo = __tmp;
return __r;
}
__DEVICE__
double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
__DEVICE__
double rint(double __x) { return __ocml_rint_f64(__x); }
__DEVICE__
double rnorm(int __dim,
const double *__a) { // TODO: placeholder until OCML adds support.
double __r = 0;
while (__dim--) {
__r += __a[0] * __a[0];
++__a;
}
return __ocml_rsqrt_f64(__r);
}
__DEVICE__
double rnorm3d(double __x, double __y, double __z) {
return __ocml_rlen3_f64(__x, __y, __z);
}
__DEVICE__
double rnorm4d(double __x, double __y, double __z, double __w) {
return __ocml_rlen4_f64(__x, __y, __z, __w);
}
__DEVICE__
double round(double __x) { return __ocml_round_f64(__x); }
__DEVICE__
double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
__DEVICE__
double scalbln(double __x, long int __n) {
return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
: __ocml_scalb_f64(__x, __n);
}
__DEVICE__
double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }
__DEVICE__
__RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); }
__DEVICE__
double sin(double __x) { return __ocml_sin_f64(__x); }
__DEVICE__
void sincos(double __x, double *__sinptr, double *__cosptr) {
double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
*__sinptr = __ocml_sincos_f64(
__x, (__attribute__((address_space(5))) double *)&__tmp);
*__cosptr = __tmp;
}
__DEVICE__
void sincospi(double __x, double *__sinptr, double *__cosptr) {
double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
*__sinptr = __ocml_sincospi_f64(
__x, (__attribute__((address_space(5))) double *)&__tmp);
*__cosptr = __tmp;
}
__DEVICE__
double sinh(double __x) { return __ocml_sinh_f64(__x); }
__DEVICE__
double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
__DEVICE__
double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
__DEVICE__
double tan(double __x) { return __ocml_tan_f64(__x); }
__DEVICE__
double tanh(double __x) { return __ocml_tanh_f64(__x); }
__DEVICE__
double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
__DEVICE__
double trunc(double __x) { return __ocml_trunc_f64(__x); }
__DEVICE__
double y0(double __x) { return __ocml_y0_f64(__x); }
__DEVICE__
double y1(double __x) { return __ocml_y1_f64(__x); }
__DEVICE__
double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
// and the Miller & Brown algorithm
// for linear recurrences to get O(log n) steps, but it's unclear if
// it'd be beneficial in this case. Placeholder until OCML adds
// support.
if (__n == 0)
return y0(__x);
if (__n == 1)
return y1(__x);
double __x0 = y0(__x);
double __x1 = y1(__x);
for (int __i = 1; __i < __n; ++__i) {
double __x2 = (2 * __i) / __x * __x1 - __x0;
__x0 = __x1;
__x1 = __x2;
}
return __x1;
}
// BEGIN INTRINSICS
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
double __dadd_rd(double __x, double __y) {
return __ocml_add_rtn_f64(__x, __y);
}
__DEVICE__
double __dadd_rn(double __x, double __y) {
return __ocml_add_rte_f64(__x, __y);
}
__DEVICE__
double __dadd_ru(double __x, double __y) {
return __ocml_add_rtp_f64(__x, __y);
}
__DEVICE__
double __dadd_rz(double __x, double __y) {
return __ocml_add_rtz_f64(__x, __y);
}
#else
__DEVICE__
double __dadd_rn(double __x, double __y) { return __x + __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
double __ddiv_rd(double __x, double __y) {
return __ocml_div_rtn_f64(__x, __y);
}
__DEVICE__
double __ddiv_rn(double __x, double __y) {
return __ocml_div_rte_f64(__x, __y);
}
__DEVICE__
double __ddiv_ru(double __x, double __y) {
return __ocml_div_rtp_f64(__x, __y);
}
__DEVICE__
double __ddiv_rz(double __x, double __y) {
return __ocml_div_rtz_f64(__x, __y);
}
#else
__DEVICE__
double __ddiv_rn(double __x, double __y) { return __x / __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
double __dmul_rd(double __x, double __y) {
return __ocml_mul_rtn_f64(__x, __y);
}
__DEVICE__
double __dmul_rn(double __x, double __y) {
return __ocml_mul_rte_f64(__x, __y);
}
__DEVICE__
double __dmul_ru(double __x, double __y) {
return __ocml_mul_rtp_f64(__x, __y);
}
__DEVICE__
double __dmul_rz(double __x, double __y) {
return __ocml_mul_rtz_f64(__x, __y);
}
#else
__DEVICE__
double __dmul_rn(double __x, double __y) { return __x * __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
__DEVICE__
double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
__DEVICE__
double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
__DEVICE__
double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
#else
__DEVICE__
double __drcp_rn(double __x) { return 1.0 / __x; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
__DEVICE__
double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
__DEVICE__
double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
__DEVICE__
double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
#else
__DEVICE__
double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
double __dsub_rd(double __x, double __y) {
return __ocml_sub_rtn_f64(__x, __y);
}
__DEVICE__
double __dsub_rn(double __x, double __y) {
return __ocml_sub_rte_f64(__x, __y);
}
__DEVICE__
double __dsub_ru(double __x, double __y) {
return __ocml_sub_rtp_f64(__x, __y);
}
__DEVICE__
double __dsub_rz(double __x, double __y) {
return __ocml_sub_rtz_f64(__x, __y);
}
#else
__DEVICE__
double __dsub_rn(double __x, double __y) { return __x - __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
double __fma_rd(double __x, double __y, double __z) {
return __ocml_fma_rtn_f64(__x, __y, __z);
}
__DEVICE__
double __fma_rn(double __x, double __y, double __z) {
return __ocml_fma_rte_f64(__x, __y, __z);
}
__DEVICE__
double __fma_ru(double __x, double __y, double __z) {
return __ocml_fma_rtp_f64(__x, __y, __z);
}
__DEVICE__
double __fma_rz(double __x, double __y, double __z) {
return __ocml_fma_rtz_f64(__x, __y, __z);
}
#else
__DEVICE__
double __fma_rn(double __x, double __y, double __z) {
return __ocml_fma_f64(__x, __y, __z);
}
#endif
// END INTRINSICS
// END DOUBLE
// C only macros
#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
#define signbit(__x) \
_Generic((__x), float : __signbitf, double : __signbit)(__x)
#endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
#if defined(__cplusplus)
template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
return (__arg1 < __arg2) ? __arg1 : __arg2;
}
template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
return (__arg1 > __arg2) ? __arg1 : __arg2;
}
__DEVICE__ int min(int __arg1, int __arg2) {
return (__arg1 < __arg2) ? __arg1 : __arg2;
}
__DEVICE__ int max(int __arg1, int __arg2) {
return (__arg1 > __arg2) ? __arg1 : __arg2;
}
__DEVICE__
float max(float __x, float __y) { return fmaxf(__x, __y); }
__DEVICE__
double max(double __x, double __y) { return fmax(__x, __y); }
__DEVICE__
float min(float __x, float __y) { return fminf(__x, __y); }
__DEVICE__
double min(double __x, double __y) { return fmin(__x, __y); }
-#if !defined(__HIPCC_RTC__)
+#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
__host__ inline static int min(int __arg1, int __arg2) {
return std::min(__arg1, __arg2);
}
__host__ inline static int max(int __arg1, int __arg2) {
return std::max(__arg1, __arg2);
}
-#endif // __HIPCC_RTC__
+#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
#endif
#pragma pop_macro("__DEVICE__")
#pragma pop_macro("__RETURN_TYPE")
#endif // __CLANG_HIP_MATH_H__
diff --git a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
index 953857badfc4..279fb26fbaf7 100644
--- a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -1,84 +1,106 @@
/*===- __clang_openmp_device_functions.h - OpenMP device function declares -===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_OPENMP_DEVICE_FUNCTIONS_H__
#define __CLANG_OPENMP_DEVICE_FUNCTIONS_H__
#ifndef _OPENMP
#error "This file is for OpenMP compilation only."
#endif
-#pragma omp begin declare variant match( \
- device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
-
#ifdef __cplusplus
extern "C" {
#endif
+#pragma omp begin declare variant match( \
+ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
+
#define __CUDA__
#define __OPENMP_NVPTX__
/// Include declarations for libdevice functions.
#include <__clang_cuda_libdevice_declares.h>
/// Provide definitions for these functions.
#include <__clang_cuda_device_functions.h>
#undef __OPENMP_NVPTX__
#undef __CUDA__
-#ifdef __cplusplus
-} // extern "C"
+#pragma omp end declare variant
+
+#ifdef __AMDGCN__
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+// Import types which will be used by __clang_hip_libdevice_declares.h
+#ifndef __cplusplus
+#include <stdbool.h>
+#include <stdint.h>
#endif
+#define __OPENMP_AMDGCN__
+#pragma push_macro("__device__")
+#define __device__
+
+/// Include declarations for libdevice functions.
+#include <__clang_hip_libdevice_declares.h>
+
+#pragma pop_macro("__device__")
+#undef __OPENMP_AMDGCN__
+
#pragma omp end declare variant
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
// Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
// need to `include <new>` in C++ mode.
#ifdef __cplusplus
// We require malloc/free.
#include <cstdlib>
#pragma push_macro("OPENMP_NOEXCEPT")
#if __cplusplus >= 201103L
#define OPENMP_NOEXCEPT noexcept
#else
#define OPENMP_NOEXCEPT
#endif
// Device overrides for non-placement new and delete.
inline void *operator new(__SIZE_TYPE__ size) {
if (size == 0)
size = 1;
return ::malloc(size);
}
inline void *operator new[](__SIZE_TYPE__ size) { return ::operator new(size); }
inline void operator delete(void *ptr)OPENMP_NOEXCEPT { ::free(ptr); }
inline void operator delete[](void *ptr) OPENMP_NOEXCEPT {
::operator delete(ptr);
}
// Sized delete, C++14 only.
#if __cplusplus >= 201402L
inline void operator delete(void *ptr, __SIZE_TYPE__ size)OPENMP_NOEXCEPT {
::operator delete(ptr);
}
inline void operator delete[](void *ptr, __SIZE_TYPE__ size) OPENMP_NOEXCEPT {
::operator delete(ptr);
}
#endif
#pragma pop_macro("OPENMP_NOEXCEPT")
#endif
#endif
diff --git a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/cmath b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/cmath
index 1aff66af7d52..22a720aca956 100644
--- a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/cmath
+++ b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/cmath
@@ -1,78 +1,132 @@
/*===-- __clang_openmp_device_functions.h - OpenMP math declares ------ c++ -===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_OPENMP_CMATH_H__
#define __CLANG_OPENMP_CMATH_H__
#ifndef _OPENMP
#error "This file is for OpenMP compilation only."
#endif
#include_next <cmath>
// Make sure we include our math.h overlay, it probably happend already but we
// need to be sure.
#include <math.h>
// We (might) need cstdlib because __clang_cuda_cmath.h below declares `abs`
// which might live in cstdlib.
#include <cstdlib>
// We need limits because __clang_cuda_cmath.h below uses `std::numeric_limit`.
#include <limits>
#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any, allow_templates)})
#define __CUDA__
#define __OPENMP_NVPTX__
#include <__clang_cuda_cmath.h>
#undef __OPENMP_NVPTX__
#undef __CUDA__
// Overloads not provided by the CUDA wrappers but by the CUDA system headers.
// Since we do not include the latter we define them ourselves.
#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
__DEVICE__ float erf(float __x) { return ::erff(__x); }
__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
__DEVICE__ long long int llrint(float __x) { return ::llrintf(__x); }
__DEVICE__ long long int llround(float __x) { return ::llroundf(__x); }
__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
__DEVICE__ float log2(float __x) { return ::log2f(__x); }
__DEVICE__ float logb(float __x) { return ::logbf(__x); }
__DEVICE__ long int lrint(float __x) { return ::lrintf(__x); }
__DEVICE__ long int lround(float __x) { return ::lroundf(__x); }
__DEVICE__ float nextafter(float __x, float __y) {
return ::nextafterf(__x, __y);
}
__DEVICE__ float remainder(float __x, float __y) {
return ::remainderf(__x, __y);
}
__DEVICE__ float scalbln(float __x, long int __y) {
return ::scalblnf(__x, __y);
}
__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
#undef __DEVICE__
#pragma omp end declare variant
+#ifdef __AMDGCN__
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+#pragma push_macro("__constant__")
+#define __constant__ __attribute__((constant))
+#define __OPENMP_AMDGCN__
+
+#include <__clang_hip_cmath.h>
+
+#pragma pop_macro("__constant__")
+#undef __OPENMP_AMDGCN__
+
+// Define overloads otherwise which are absent
+#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
+
+__DEVICE__ float acos(float __x) { return ::acosf(__x); }
+__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
+__DEVICE__ float asin(float __x) { return ::asinf(__x); }
+__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
+__DEVICE__ float atan(float __x) { return ::atanf(__x); }
+__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
+__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
+__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
+__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
+__DEVICE__ float erf(float __x) { return ::erff(__x); }
+__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
+__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
+__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
+__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
+__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
+__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
+__DEVICE__ float ldexp(float __arg, int __exp) {
+ return ::ldexpf(__arg, __exp);
+}
+__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
+__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
+__DEVICE__ float logb(float __x) { return ::logbf(__x); }
+__DEVICE__ float nextafter(float __x, float __y) {
+ return ::nextafterf(__x, __y);
+}
+__DEVICE__ float remainder(float __x, float __y) {
+ return ::remainderf(__x, __y);
+}
+__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
+__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
+__DEVICE__ float tan(float __x) { return ::tanf(__x); }
+__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
+__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
+
+#undef __DEVICE__
+
+#pragma omp end declare variant
+#endif // __AMDGCN__
+
#endif
diff --git a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/math.h b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/math.h
index c64af8b13ece..1e3c07cfdb8c 100644
--- a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/math.h
+++ b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/math.h
@@ -1,51 +1,61 @@
/*===---- openmp_wrapper/math.h -------- OpenMP math.h intercept ------ c++ -===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
// If we are in C++ mode and include <math.h> (not <cmath>) first, we still need
// to make sure <cmath> is read first. The problem otherwise is that we haven't
// seen the declarations of the math.h functions when the system math.h includes
// our cmath overlay. However, our cmath overlay, or better the underlying
// overlay, e.g. CUDA, uses the math.h functions. Since we haven't declared them
// yet we get errors. CUDA avoids this by eagerly declaring all math functions
// (in the __device__ space) but we cannot do this. Instead we break the
// dependence by forcing cmath to go first. While our cmath will in turn include
// this file, the cmath guards will prevent recursion.
#ifdef __cplusplus
#include <cmath>
#endif
#ifndef __CLANG_OPENMP_MATH_H__
#define __CLANG_OPENMP_MATH_H__
#ifndef _OPENMP
#error "This file is for OpenMP compilation only."
#endif
#include_next <math.h>
// We need limits.h for __clang_cuda_math.h below and because it should not hurt
// we include it eagerly here.
#include <limits.h>
// We need stdlib.h because (for now) __clang_cuda_math.h below declares `abs`
// which should live in stdlib.h.
#include <stdlib.h>
#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
#define __CUDA__
#define __OPENMP_NVPTX__
#include <__clang_cuda_math.h>
#undef __OPENMP_NVPTX__
#undef __CUDA__
#pragma omp end declare variant
+#ifdef __AMDGCN__
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+#define __OPENMP_AMDGCN__
+#include <__clang_hip_math.h>
+#undef __OPENMP_AMDGCN__
+
+#pragma omp end declare variant
+#endif
+
#endif
diff --git a/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index be4c51930789..25f134868758 100644
--- a/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1,6367 +1,6367 @@
//===--- SemaTemplateInstantiateDecl.cpp - C++ Template Decl Instantiation ===/
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===/
//
// This file implements C++ template instantiation for declarations.
//
//===----------------------------------------------------------------------===/
#include "TreeTransform.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTMutationListener.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/DeclVisitor.h"
#include "clang/AST/DependentDiagnostic.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/PrettyDeclStackTrace.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Sema/Initialization.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/SemaInternal.h"
#include "clang/Sema/Template.h"
#include "clang/Sema/TemplateInstCallback.h"
#include "llvm/Support/TimeProfiler.h"
using namespace clang;
static bool isDeclWithinFunction(const Decl *D) {
const DeclContext *DC = D->getDeclContext();
if (DC->isFunctionOrMethod())
return true;
if (DC->isRecord())
return cast<CXXRecordDecl>(DC)->isLocalClass();
return false;
}
template<typename DeclT>
static bool SubstQualifier(Sema &SemaRef, const DeclT *OldDecl, DeclT *NewDecl,
const MultiLevelTemplateArgumentList &TemplateArgs) {
if (!OldDecl->getQualifierLoc())
return false;
assert((NewDecl->getFriendObjectKind() ||
!OldDecl->getLexicalDeclContext()->isDependentContext()) &&
"non-friend with qualified name defined in dependent context");
Sema::ContextRAII SavedContext(
SemaRef,
const_cast<DeclContext *>(NewDecl->getFriendObjectKind()
? NewDecl->getLexicalDeclContext()
: OldDecl->getLexicalDeclContext()));
NestedNameSpecifierLoc NewQualifierLoc
= SemaRef.SubstNestedNameSpecifierLoc(OldDecl->getQualifierLoc(),
TemplateArgs);
if (!NewQualifierLoc)
return true;
NewDecl->setQualifierInfo(NewQualifierLoc);
return false;
}
bool TemplateDeclInstantiator::SubstQualifier(const DeclaratorDecl *OldDecl,
DeclaratorDecl *NewDecl) {
return ::SubstQualifier(SemaRef, OldDecl, NewDecl, TemplateArgs);
}
bool TemplateDeclInstantiator::SubstQualifier(const TagDecl *OldDecl,
TagDecl *NewDecl) {
return ::SubstQualifier(SemaRef, OldDecl, NewDecl, TemplateArgs);
}
// Include attribute instantiation code.
#include "clang/Sema/AttrTemplateInstantiate.inc"
static void instantiateDependentAlignedAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const AlignedAttr *Aligned, Decl *New, bool IsPackExpansion) {
if (Aligned->isAlignmentExpr()) {
// The alignment expression is a constant expression.
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Result = S.SubstExpr(Aligned->getAlignmentExpr(), TemplateArgs);
if (!Result.isInvalid())
S.AddAlignedAttr(New, *Aligned, Result.getAs<Expr>(), IsPackExpansion);
} else {
TypeSourceInfo *Result = S.SubstType(Aligned->getAlignmentType(),
TemplateArgs, Aligned->getLocation(),
DeclarationName());
if (Result)
S.AddAlignedAttr(New, *Aligned, Result, IsPackExpansion);
}
}
static void instantiateDependentAlignedAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const AlignedAttr *Aligned, Decl *New) {
if (!Aligned->isPackExpansion()) {
instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, false);
return;
}
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
if (Aligned->isAlignmentExpr())
S.collectUnexpandedParameterPacks(Aligned->getAlignmentExpr(),
Unexpanded);
else
S.collectUnexpandedParameterPacks(Aligned->getAlignmentType()->getTypeLoc(),
Unexpanded);
assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");
// Determine whether we can expand this attribute pack yet.
bool Expand = true, RetainExpansion = false;
Optional<unsigned> NumExpansions;
// FIXME: Use the actual location of the ellipsis.
SourceLocation EllipsisLoc = Aligned->getLocation();
if (S.CheckParameterPacksForExpansion(EllipsisLoc, Aligned->getRange(),
Unexpanded, TemplateArgs, Expand,
RetainExpansion, NumExpansions))
return;
if (!Expand) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, -1);
instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, true);
} else {
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, I);
instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, false);
}
}
}
static void instantiateDependentAssumeAlignedAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const AssumeAlignedAttr *Aligned, Decl *New) {
// The alignment expression is a constant expression.
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
Expr *E, *OE = nullptr;
ExprResult Result = S.SubstExpr(Aligned->getAlignment(), TemplateArgs);
if (Result.isInvalid())
return;
E = Result.getAs<Expr>();
if (Aligned->getOffset()) {
Result = S.SubstExpr(Aligned->getOffset(), TemplateArgs);
if (Result.isInvalid())
return;
OE = Result.getAs<Expr>();
}
S.AddAssumeAlignedAttr(New, *Aligned, E, OE);
}
static void instantiateDependentAlignValueAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const AlignValueAttr *Aligned, Decl *New) {
// The alignment expression is a constant expression.
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Result = S.SubstExpr(Aligned->getAlignment(), TemplateArgs);
if (!Result.isInvalid())
S.AddAlignValueAttr(New, *Aligned, Result.getAs<Expr>());
}
static void instantiateDependentAllocAlignAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const AllocAlignAttr *Align, Decl *New) {
Expr *Param = IntegerLiteral::Create(
S.getASTContext(),
llvm::APInt(64, Align->getParamIndex().getSourceIndex()),
S.getASTContext().UnsignedLongLongTy, Align->getLocation());
S.AddAllocAlignAttr(New, *Align, Param);
}
static void instantiateDependentAnnotationAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const AnnotateAttr *Attr, Decl *New) {
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
SmallVector<Expr *, 4> Args;
Args.reserve(Attr->args_size());
for (auto *E : Attr->args()) {
ExprResult Result = S.SubstExpr(E, TemplateArgs);
if (!Result.isUsable())
return;
Args.push_back(Result.get());
}
S.AddAnnotationAttr(New, *Attr, Attr->getAnnotation(), Args);
}
static Expr *instantiateDependentFunctionAttrCondition(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const Attr *A, Expr *OldCond, const Decl *Tmpl, FunctionDecl *New) {
Expr *Cond = nullptr;
{
Sema::ContextRAII SwitchContext(S, New);
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Result = S.SubstExpr(OldCond, TemplateArgs);
if (Result.isInvalid())
return nullptr;
Cond = Result.getAs<Expr>();
}
if (!Cond->isTypeDependent()) {
ExprResult Converted = S.PerformContextuallyConvertToBool(Cond);
if (Converted.isInvalid())
return nullptr;
Cond = Converted.get();
}
SmallVector<PartialDiagnosticAt, 8> Diags;
if (OldCond->isValueDependent() && !Cond->isValueDependent() &&
!Expr::isPotentialConstantExprUnevaluated(Cond, New, Diags)) {
S.Diag(A->getLocation(), diag::err_attr_cond_never_constant_expr) << A;
for (const auto &P : Diags)
S.Diag(P.first, P.second);
return nullptr;
}
return Cond;
}
static void instantiateDependentEnableIfAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const EnableIfAttr *EIA, const Decl *Tmpl, FunctionDecl *New) {
Expr *Cond = instantiateDependentFunctionAttrCondition(
S, TemplateArgs, EIA, EIA->getCond(), Tmpl, New);
if (Cond)
New->addAttr(new (S.getASTContext()) EnableIfAttr(S.getASTContext(), *EIA,
Cond, EIA->getMessage()));
}
static void instantiateDependentDiagnoseIfAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const DiagnoseIfAttr *DIA, const Decl *Tmpl, FunctionDecl *New) {
Expr *Cond = instantiateDependentFunctionAttrCondition(
S, TemplateArgs, DIA, DIA->getCond(), Tmpl, New);
if (Cond)
New->addAttr(new (S.getASTContext()) DiagnoseIfAttr(
S.getASTContext(), *DIA, Cond, DIA->getMessage(),
DIA->getDiagnosticType(), DIA->getArgDependent(), New));
}
// Constructs and adds to New a new instance of CUDALaunchBoundsAttr using
// template A as the base and arguments from TemplateArgs.
static void instantiateDependentCUDALaunchBoundsAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const CUDALaunchBoundsAttr &Attr, Decl *New) {
// The alignment expression is a constant expression.
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Result = S.SubstExpr(Attr.getMaxThreads(), TemplateArgs);
if (Result.isInvalid())
return;
Expr *MaxThreads = Result.getAs<Expr>();
Expr *MinBlocks = nullptr;
if (Attr.getMinBlocks()) {
Result = S.SubstExpr(Attr.getMinBlocks(), TemplateArgs);
if (Result.isInvalid())
return;
MinBlocks = Result.getAs<Expr>();
}
S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks);
}
static void
instantiateDependentModeAttr(Sema &S,
const MultiLevelTemplateArgumentList &TemplateArgs,
const ModeAttr &Attr, Decl *New) {
S.AddModeAttr(New, Attr, Attr.getMode(),
/*InInstantiation=*/true);
}
/// Instantiation of 'declare simd' attribute and its arguments.
static void instantiateOMPDeclareSimdDeclAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const OMPDeclareSimdDeclAttr &Attr, Decl *New) {
// Allow 'this' in clauses with varlists.
if (auto *FTD = dyn_cast<FunctionTemplateDecl>(New))
New = FTD->getTemplatedDecl();
auto *FD = cast<FunctionDecl>(New);
auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(FD->getDeclContext());
SmallVector<Expr *, 4> Uniforms, Aligneds, Alignments, Linears, Steps;
SmallVector<unsigned, 4> LinModifiers;
auto SubstExpr = [&](Expr *E) -> ExprResult {
if (auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()))
if (auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl())) {
Sema::ContextRAII SavedContext(S, FD);
LocalInstantiationScope Local(S);
if (FD->getNumParams() > PVD->getFunctionScopeIndex())
Local.InstantiatedLocal(
PVD, FD->getParamDecl(PVD->getFunctionScopeIndex()));
return S.SubstExpr(E, TemplateArgs);
}
Sema::CXXThisScopeRAII ThisScope(S, ThisContext, Qualifiers(),
FD->isCXXInstanceMember());
return S.SubstExpr(E, TemplateArgs);
};
// Substitute a single OpenMP clause, which is a potentially-evaluated
// full-expression.
auto Subst = [&](Expr *E) -> ExprResult {
EnterExpressionEvaluationContext Evaluated(
S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
ExprResult Res = SubstExpr(E);
if (Res.isInvalid())
return Res;
return S.ActOnFinishFullExpr(Res.get(), false);
};
ExprResult Simdlen;
if (auto *E = Attr.getSimdlen())
Simdlen = Subst(E);
if (Attr.uniforms_size() > 0) {
for(auto *E : Attr.uniforms()) {
ExprResult Inst = Subst(E);
if (Inst.isInvalid())
continue;
Uniforms.push_back(Inst.get());
}
}
auto AI = Attr.alignments_begin();
for (auto *E : Attr.aligneds()) {
ExprResult Inst = Subst(E);
if (Inst.isInvalid())
continue;
Aligneds.push_back(Inst.get());
Inst = ExprEmpty();
if (*AI)
Inst = S.SubstExpr(*AI, TemplateArgs);
Alignments.push_back(Inst.get());
++AI;
}
auto SI = Attr.steps_begin();
for (auto *E : Attr.linears()) {
ExprResult Inst = Subst(E);
if (Inst.isInvalid())
continue;
Linears.push_back(Inst.get());
Inst = ExprEmpty();
if (*SI)
Inst = S.SubstExpr(*SI, TemplateArgs);
Steps.push_back(Inst.get());
++SI;
}
LinModifiers.append(Attr.modifiers_begin(), Attr.modifiers_end());
(void)S.ActOnOpenMPDeclareSimdDirective(
S.ConvertDeclToDeclGroup(New), Attr.getBranchState(), Simdlen.get(),
Uniforms, Aligneds, Alignments, Linears, LinModifiers, Steps,
Attr.getRange());
}
/// Instantiation of 'declare variant' attribute and its arguments.
static void instantiateOMPDeclareVariantAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const OMPDeclareVariantAttr &Attr, Decl *New) {
// Allow 'this' in clauses with varlists.
if (auto *FTD = dyn_cast<FunctionTemplateDecl>(New))
New = FTD->getTemplatedDecl();
auto *FD = cast<FunctionDecl>(New);
auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(FD->getDeclContext());
auto &&SubstExpr = [FD, ThisContext, &S, &TemplateArgs](Expr *E) {
if (auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()))
if (auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl())) {
Sema::ContextRAII SavedContext(S, FD);
LocalInstantiationScope Local(S);
if (FD->getNumParams() > PVD->getFunctionScopeIndex())
Local.InstantiatedLocal(
PVD, FD->getParamDecl(PVD->getFunctionScopeIndex()));
return S.SubstExpr(E, TemplateArgs);
}
Sema::CXXThisScopeRAII ThisScope(S, ThisContext, Qualifiers(),
FD->isCXXInstanceMember());
return S.SubstExpr(E, TemplateArgs);
};
// Substitute a single OpenMP clause, which is a potentially-evaluated
// full-expression.
auto &&Subst = [&SubstExpr, &S](Expr *E) {
EnterExpressionEvaluationContext Evaluated(
S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
ExprResult Res = SubstExpr(E);
if (Res.isInvalid())
return Res;
return S.ActOnFinishFullExpr(Res.get(), false);
};
ExprResult VariantFuncRef;
if (Expr *E = Attr.getVariantFuncRef()) {
// Do not mark function as is used to prevent its emission if this is the
// only place where it is used.
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
VariantFuncRef = Subst(E);
}
// Copy the template version of the OMPTraitInfo and run substitute on all
// score and condition expressiosn.
OMPTraitInfo &TI = S.getASTContext().getNewOMPTraitInfo();
TI = *Attr.getTraitInfos();
// Try to substitute template parameters in score and condition expressions.
auto SubstScoreOrConditionExpr = [&S, Subst](Expr *&E, bool) {
if (E) {
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult ER = Subst(E);
if (ER.isUsable())
E = ER.get();
else
return true;
}
return false;
};
if (TI.anyScoreOrCondition(SubstScoreOrConditionExpr))
return;
Expr *E = VariantFuncRef.get();
// Check function/variant ref for `omp declare variant` but not for `omp
// begin declare variant` (which use implicit attributes).
Optional<std::pair<FunctionDecl *, Expr *>> DeclVarData =
S.checkOpenMPDeclareVariantFunction(S.ConvertDeclToDeclGroup(New),
VariantFuncRef.get(), TI,
Attr.getRange());
if (!DeclVarData)
return;
E = DeclVarData.getValue().second;
FD = DeclVarData.getValue().first;
if (auto *VariantDRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
if (auto *VariantFD = dyn_cast<FunctionDecl>(VariantDRE->getDecl())) {
if (auto *VariantFTD = VariantFD->getDescribedFunctionTemplate()) {
if (!VariantFTD->isThisDeclarationADefinition())
return;
Sema::TentativeAnalysisScope Trap(S);
const TemplateArgumentList *TAL = TemplateArgumentList::CreateCopy(
S.Context, TemplateArgs.getInnermost());
auto *SubstFD = S.InstantiateFunctionDeclaration(VariantFTD, TAL,
New->getLocation());
if (!SubstFD)
return;
QualType NewType = S.Context.mergeFunctionTypes(
SubstFD->getType(), FD->getType(),
/* OfBlockPointer */ false,
/* Unqualified */ false, /* AllowCXX */ true);
if (NewType.isNull())
return;
S.InstantiateFunctionDefinition(
New->getLocation(), SubstFD, /* Recursive */ true,
/* DefinitionRequired */ false, /* AtEndOfTU */ false);
SubstFD->setInstantiationIsPending(!SubstFD->isDefined());
E = DeclRefExpr::Create(S.Context, NestedNameSpecifierLoc(),
SourceLocation(), SubstFD,
/* RefersToEnclosingVariableOrCapture */ false,
/* NameLoc */ SubstFD->getLocation(),
SubstFD->getType(), ExprValueKind::VK_PRValue);
}
}
}
S.ActOnOpenMPDeclareVariantDirective(FD, E, TI, Attr.getRange());
}
static void instantiateDependentAMDGPUFlatWorkGroupSizeAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const AMDGPUFlatWorkGroupSizeAttr &Attr, Decl *New) {
// Both min and max expression are constant expressions.
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs);
if (Result.isInvalid())
return;
Expr *MinExpr = Result.getAs<Expr>();
Result = S.SubstExpr(Attr.getMax(), TemplateArgs);
if (Result.isInvalid())
return;
Expr *MaxExpr = Result.getAs<Expr>();
S.addAMDGPUFlatWorkGroupSizeAttr(New, Attr, MinExpr, MaxExpr);
}
static ExplicitSpecifier
instantiateExplicitSpecifier(Sema &S,
const MultiLevelTemplateArgumentList &TemplateArgs,
ExplicitSpecifier ES, FunctionDecl *New) {
if (!ES.getExpr())
return ES;
Expr *OldCond = ES.getExpr();
Expr *Cond = nullptr;
{
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult SubstResult = S.SubstExpr(OldCond, TemplateArgs);
if (SubstResult.isInvalid()) {
return ExplicitSpecifier::Invalid();
}
Cond = SubstResult.get();
}
ExplicitSpecifier Result(Cond, ES.getKind());
if (!Cond->isTypeDependent())
S.tryResolveExplicitSpecifier(Result);
return Result;
}
static void instantiateDependentAMDGPUWavesPerEUAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const AMDGPUWavesPerEUAttr &Attr, Decl *New) {
// Both min and max expression are constant expressions.
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs);
if (Result.isInvalid())
return;
Expr *MinExpr = Result.getAs<Expr>();
Expr *MaxExpr = nullptr;
if (auto Max = Attr.getMax()) {
Result = S.SubstExpr(Max, TemplateArgs);
if (Result.isInvalid())
return;
MaxExpr = Result.getAs<Expr>();
}
S.addAMDGPUWavesPerEUAttr(New, Attr, MinExpr, MaxExpr);
}
// This doesn't take any template parameters, but we have a custom action that
// needs to happen when the kernel itself is instantiated. We need to run the
// ItaniumMangler to mark the names required to name this kernel.
static void instantiateDependentSYCLKernelAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const SYCLKernelAttr &Attr, Decl *New) {
// Functions cannot be partially specialized, so if we are being instantiated,
// we are obviously a complete specialization. Since this attribute is only
// valid on function template declarations, we know that this is a full
// instantiation of a kernel.
S.AddSYCLKernelLambda(cast<FunctionDecl>(New));
// Evaluate whether this would change any of the already evaluated
// __builtin_sycl_unique_stable_name values.
for (auto &Itr : S.Context.SYCLUniqueStableNameEvaluatedValues) {
const std::string &CurName = Itr.first->ComputeName(S.Context);
if (Itr.second != CurName) {
S.Diag(New->getLocation(),
diag::err_kernel_invalidates_sycl_unique_stable_name);
S.Diag(Itr.first->getLocation(),
diag::note_sycl_unique_stable_name_evaluated_here);
// Update this so future diagnostics work correctly.
Itr.second = CurName;
}
}
New->addAttr(Attr.clone(S.getASTContext()));
}
/// Determine whether the attribute A might be relevent to the declaration D.
/// If not, we can skip instantiating it. The attribute may or may not have
/// been instantiated yet.
static bool isRelevantAttr(Sema &S, const Decl *D, const Attr *A) {
// 'preferred_name' is only relevant to the matching specialization of the
// template.
if (const auto *PNA = dyn_cast<PreferredNameAttr>(A)) {
QualType T = PNA->getTypedefType();
const auto *RD = cast<CXXRecordDecl>(D);
if (!T->isDependentType() && !RD->isDependentContext() &&
!declaresSameEntity(T->getAsCXXRecordDecl(), RD))
return false;
for (const auto *ExistingPNA : D->specific_attrs<PreferredNameAttr>())
if (S.Context.hasSameType(ExistingPNA->getTypedefType(),
PNA->getTypedefType()))
return false;
return true;
}
return true;
}
void Sema::InstantiateAttrsForDecl(
const MultiLevelTemplateArgumentList &TemplateArgs, const Decl *Tmpl,
Decl *New, LateInstantiatedAttrVec *LateAttrs,
LocalInstantiationScope *OuterMostScope) {
if (NamedDecl *ND = dyn_cast<NamedDecl>(New)) {
// FIXME: This function is called multiple times for the same template
// specialization. We should only instantiate attributes that were added
// since the previous instantiation.
for (const auto *TmplAttr : Tmpl->attrs()) {
if (!isRelevantAttr(*this, New, TmplAttr))
continue;
// FIXME: If any of the special case versions from InstantiateAttrs become
// applicable to template declaration, we'll need to add them here.
CXXThisScopeRAII ThisScope(
*this, dyn_cast_or_null<CXXRecordDecl>(ND->getDeclContext()),
Qualifiers(), ND->isCXXInstanceMember());
Attr *NewAttr = sema::instantiateTemplateAttributeForDecl(
TmplAttr, Context, *this, TemplateArgs);
if (NewAttr && isRelevantAttr(*this, New, NewAttr))
New->addAttr(NewAttr);
}
}
}
static Sema::RetainOwnershipKind
attrToRetainOwnershipKind(const Attr *A) {
switch (A->getKind()) {
case clang::attr::CFConsumed:
return Sema::RetainOwnershipKind::CF;
case clang::attr::OSConsumed:
return Sema::RetainOwnershipKind::OS;
case clang::attr::NSConsumed:
return Sema::RetainOwnershipKind::NS;
default:
llvm_unreachable("Wrong argument supplied");
}
}
void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
const Decl *Tmpl, Decl *New,
LateInstantiatedAttrVec *LateAttrs,
LocalInstantiationScope *OuterMostScope) {
for (const auto *TmplAttr : Tmpl->attrs()) {
if (!isRelevantAttr(*this, New, TmplAttr))
continue;
// FIXME: This should be generalized to more than just the AlignedAttr.
const AlignedAttr *Aligned = dyn_cast<AlignedAttr>(TmplAttr);
if (Aligned && Aligned->isAlignmentDependent()) {
instantiateDependentAlignedAttr(*this, TemplateArgs, Aligned, New);
continue;
}
if (const auto *AssumeAligned = dyn_cast<AssumeAlignedAttr>(TmplAttr)) {
instantiateDependentAssumeAlignedAttr(*this, TemplateArgs, AssumeAligned, New);
continue;
}
if (const auto *AlignValue = dyn_cast<AlignValueAttr>(TmplAttr)) {
instantiateDependentAlignValueAttr(*this, TemplateArgs, AlignValue, New);
continue;
}
if (const auto *AllocAlign = dyn_cast<AllocAlignAttr>(TmplAttr)) {
instantiateDependentAllocAlignAttr(*this, TemplateArgs, AllocAlign, New);
continue;
}
if (const auto *Annotate = dyn_cast<AnnotateAttr>(TmplAttr)) {
instantiateDependentAnnotationAttr(*this, TemplateArgs, Annotate, New);
continue;
}
if (const auto *EnableIf = dyn_cast<EnableIfAttr>(TmplAttr)) {
instantiateDependentEnableIfAttr(*this, TemplateArgs, EnableIf, Tmpl,
cast<FunctionDecl>(New));
continue;
}
if (const auto *DiagnoseIf = dyn_cast<DiagnoseIfAttr>(TmplAttr)) {
instantiateDependentDiagnoseIfAttr(*this, TemplateArgs, DiagnoseIf, Tmpl,
cast<FunctionDecl>(New));
continue;
}
if (const auto *CUDALaunchBounds =
dyn_cast<CUDALaunchBoundsAttr>(TmplAttr)) {
instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs,
*CUDALaunchBounds, New);
continue;
}
if (const auto *Mode = dyn_cast<ModeAttr>(TmplAttr)) {
instantiateDependentModeAttr(*this, TemplateArgs, *Mode, New);
continue;
}
if (const auto *OMPAttr = dyn_cast<OMPDeclareSimdDeclAttr>(TmplAttr)) {
instantiateOMPDeclareSimdDeclAttr(*this, TemplateArgs, *OMPAttr, New);
continue;
}
if (const auto *OMPAttr = dyn_cast<OMPDeclareVariantAttr>(TmplAttr)) {
instantiateOMPDeclareVariantAttr(*this, TemplateArgs, *OMPAttr, New);
continue;
}
if (const auto *AMDGPUFlatWorkGroupSize =
dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(TmplAttr)) {
instantiateDependentAMDGPUFlatWorkGroupSizeAttr(
*this, TemplateArgs, *AMDGPUFlatWorkGroupSize, New);
}
if (const auto *AMDGPUFlatWorkGroupSize =
dyn_cast<AMDGPUWavesPerEUAttr>(TmplAttr)) {
instantiateDependentAMDGPUWavesPerEUAttr(*this, TemplateArgs,
*AMDGPUFlatWorkGroupSize, New);
}
// Existing DLL attribute on the instantiation takes precedence.
if (TmplAttr->getKind() == attr::DLLExport ||
TmplAttr->getKind() == attr::DLLImport) {
if (New->hasAttr<DLLExportAttr>() || New->hasAttr<DLLImportAttr>()) {
continue;
}
}
if (const auto *ABIAttr = dyn_cast<ParameterABIAttr>(TmplAttr)) {
AddParameterABIAttr(New, *ABIAttr, ABIAttr->getABI());
continue;
}
if (isa<NSConsumedAttr>(TmplAttr) || isa<OSConsumedAttr>(TmplAttr) ||
isa<CFConsumedAttr>(TmplAttr)) {
AddXConsumedAttr(New, *TmplAttr, attrToRetainOwnershipKind(TmplAttr),
/*template instantiation=*/true);
continue;
}
if (auto *A = dyn_cast<PointerAttr>(TmplAttr)) {
if (!New->hasAttr<PointerAttr>())
New->addAttr(A->clone(Context));
continue;
}
if (auto *A = dyn_cast<OwnerAttr>(TmplAttr)) {
if (!New->hasAttr<OwnerAttr>())
New->addAttr(A->clone(Context));
continue;
}
if (auto *A = dyn_cast<SYCLKernelAttr>(TmplAttr)) {
instantiateDependentSYCLKernelAttr(*this, TemplateArgs, *A, New);
continue;
}
assert(!TmplAttr->isPackExpansion());
if (TmplAttr->isLateParsed() && LateAttrs) {
// Late parsed attributes must be instantiated and attached after the
// enclosing class has been instantiated. See Sema::InstantiateClass.
LocalInstantiationScope *Saved = nullptr;
if (CurrentInstantiationScope)
Saved = CurrentInstantiationScope->cloneScopes(OuterMostScope);
LateAttrs->push_back(LateInstantiatedAttribute(TmplAttr, Saved, New));
} else {
// Allow 'this' within late-parsed attributes.
auto *ND = cast<NamedDecl>(New);
auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(ND->getDeclContext());
CXXThisScopeRAII ThisScope(*this, ThisContext, Qualifiers(),
ND->isCXXInstanceMember());
Attr *NewAttr = sema::instantiateTemplateAttribute(TmplAttr, Context,
*this, TemplateArgs);
if (NewAttr && isRelevantAttr(*this, New, TmplAttr))
New->addAttr(NewAttr);
}
}
}
/// In the MS ABI, we need to instantiate default arguments of dllexported
/// default constructors along with the constructor definition. This allows IR
/// gen to emit a constructor closure which calls the default constructor with
/// its default arguments.
void Sema::InstantiateDefaultCtorDefaultArgs(CXXConstructorDecl *Ctor) {
assert(Context.getTargetInfo().getCXXABI().isMicrosoft() &&
Ctor->isDefaultConstructor());
unsigned NumParams = Ctor->getNumParams();
if (NumParams == 0)
return;
DLLExportAttr *Attr = Ctor->getAttr<DLLExportAttr>();
if (!Attr)
return;
for (unsigned I = 0; I != NumParams; ++I) {
(void)CheckCXXDefaultArgExpr(Attr->getLocation(), Ctor,
Ctor->getParamDecl(I));
DiscardCleanupsInEvaluationContext();
}
}
/// Get the previous declaration of a declaration for the purposes of template
/// instantiation. If this finds a previous declaration, then the previous
/// declaration of the instantiation of D should be an instantiation of the
/// result of this function.
template<typename DeclT>
static DeclT *getPreviousDeclForInstantiation(DeclT *D) {
DeclT *Result = D->getPreviousDecl();
// If the declaration is within a class, and the previous declaration was
// merged from a different definition of that class, then we don't have a
// previous declaration for the purpose of template instantiation.
if (Result && isa<CXXRecordDecl>(D->getDeclContext()) &&
D->getLexicalDeclContext() != Result->getLexicalDeclContext())
return nullptr;
return Result;
}
Decl *
TemplateDeclInstantiator::VisitTranslationUnitDecl(TranslationUnitDecl *D) {
llvm_unreachable("Translation units cannot be instantiated");
}
Decl *
TemplateDeclInstantiator::VisitPragmaCommentDecl(PragmaCommentDecl *D) {
llvm_unreachable("pragma comment cannot be instantiated");
}
Decl *TemplateDeclInstantiator::VisitPragmaDetectMismatchDecl(
PragmaDetectMismatchDecl *D) {
llvm_unreachable("pragma comment cannot be instantiated");
}
Decl *
TemplateDeclInstantiator::VisitExternCContextDecl(ExternCContextDecl *D) {
llvm_unreachable("extern \"C\" context cannot be instantiated");
}
Decl *TemplateDeclInstantiator::VisitMSGuidDecl(MSGuidDecl *D) {
llvm_unreachable("GUID declaration cannot be instantiated");
}
Decl *TemplateDeclInstantiator::VisitTemplateParamObjectDecl(
TemplateParamObjectDecl *D) {
llvm_unreachable("template parameter objects cannot be instantiated");
}
Decl *
TemplateDeclInstantiator::VisitLabelDecl(LabelDecl *D) {
LabelDecl *Inst = LabelDecl::Create(SemaRef.Context, Owner, D->getLocation(),
D->getIdentifier());
Owner->addDecl(Inst);
return Inst;
}
Decl *
TemplateDeclInstantiator::VisitNamespaceDecl(NamespaceDecl *D) {
llvm_unreachable("Namespaces cannot be instantiated");
}
Decl *
TemplateDeclInstantiator::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) {
NamespaceAliasDecl *Inst
= NamespaceAliasDecl::Create(SemaRef.Context, Owner,
D->getNamespaceLoc(),
D->getAliasLoc(),
D->getIdentifier(),
D->getQualifierLoc(),
D->getTargetNameLoc(),
D->getNamespace());
Owner->addDecl(Inst);
return Inst;
}
Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D,
bool IsTypeAlias) {
bool Invalid = false;
TypeSourceInfo *DI = D->getTypeSourceInfo();
if (DI->getType()->isInstantiationDependentType() ||
DI->getType()->isVariablyModifiedType()) {
DI = SemaRef.SubstType(DI, TemplateArgs,
D->getLocation(), D->getDeclName());
if (!DI) {
Invalid = true;
DI = SemaRef.Context.getTrivialTypeSourceInfo(SemaRef.Context.IntTy);
}
} else {
SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType());
}
// HACK: 2012-10-23 g++ has a bug where it gets the value kind of ?: wrong.
// libstdc++ relies upon this bug in its implementation of common_type. If we
// happen to be processing that implementation, fake up the g++ ?:
// semantics. See LWG issue 2141 for more information on the bug. The bugs
// are fixed in g++ and libstdc++ 4.9.0 (2014-04-22).
const DecltypeType *DT = DI->getType()->getAs<DecltypeType>();
CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D->getDeclContext());
if (DT && RD && isa<ConditionalOperator>(DT->getUnderlyingExpr()) &&
DT->isReferenceType() &&
RD->getEnclosingNamespaceContext() == SemaRef.getStdNamespace() &&
RD->getIdentifier() && RD->getIdentifier()->isStr("common_type") &&
D->getIdentifier() && D->getIdentifier()->isStr("type") &&
SemaRef.getSourceManager().isInSystemHeader(D->getBeginLoc()))
// Fold it to the (non-reference) type which g++ would have produced.
DI = SemaRef.Context.getTrivialTypeSourceInfo(
DI->getType().getNonReferenceType());
// Create the new typedef
TypedefNameDecl *Typedef;
if (IsTypeAlias)
Typedef = TypeAliasDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(),
D->getLocation(), D->getIdentifier(), DI);
else
Typedef = TypedefDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(),
D->getLocation(), D->getIdentifier(), DI);
if (Invalid)
Typedef->setInvalidDecl();
// If the old typedef was the name for linkage purposes of an anonymous
// tag decl, re-establish that relationship for the new typedef.
if (const TagType *oldTagType = D->getUnderlyingType()->getAs<TagType>()) {
TagDecl *oldTag = oldTagType->getDecl();
if (oldTag->getTypedefNameForAnonDecl() == D && !Invalid) {
TagDecl *newTag = DI->getType()->castAs<TagType>()->getDecl();
assert(!newTag->hasNameForLinkage());
newTag->setTypedefNameForAnonDecl(Typedef);
}
}
if (TypedefNameDecl *Prev = getPreviousDeclForInstantiation(D)) {
NamedDecl *InstPrev = SemaRef.FindInstantiatedDecl(D->getLocation(), Prev,
TemplateArgs);
if (!InstPrev)
return nullptr;
TypedefNameDecl *InstPrevTypedef = cast<TypedefNameDecl>(InstPrev);
// If the typedef types are not identical, reject them.
SemaRef.isIncompatibleTypedef(InstPrevTypedef, Typedef);
Typedef->setPreviousDecl(InstPrevTypedef);
}
SemaRef.InstantiateAttrs(TemplateArgs, D, Typedef);
if (D->getUnderlyingType()->getAs<DependentNameType>())
SemaRef.inferGslPointerAttribute(Typedef);
Typedef->setAccess(D->getAccess());
return Typedef;
}
Decl *TemplateDeclInstantiator::VisitTypedefDecl(TypedefDecl *D) {
Decl *Typedef = InstantiateTypedefNameDecl(D, /*IsTypeAlias=*/false);
if (Typedef)
Owner->addDecl(Typedef);
return Typedef;
}
Decl *TemplateDeclInstantiator::VisitTypeAliasDecl(TypeAliasDecl *D) {
Decl *Typedef = InstantiateTypedefNameDecl(D, /*IsTypeAlias=*/true);
if (Typedef)
Owner->addDecl(Typedef);
return Typedef;
}
Decl *
TemplateDeclInstantiator::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) {
// Create a local instantiation scope for this type alias template, which
// will contain the instantiations of the template parameters.
LocalInstantiationScope Scope(SemaRef);
TemplateParameterList *TempParams = D->getTemplateParameters();
TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
if (!InstParams)
return nullptr;
TypeAliasDecl *Pattern = D->getTemplatedDecl();
TypeAliasTemplateDecl *PrevAliasTemplate = nullptr;
if (getPreviousDeclForInstantiation<TypedefNameDecl>(Pattern)) {
DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName());
if (!Found.empty()) {
PrevAliasTemplate = dyn_cast<TypeAliasTemplateDecl>(Found.front());
}
}
TypeAliasDecl *AliasInst = cast_or_null<TypeAliasDecl>(
InstantiateTypedefNameDecl(Pattern, /*IsTypeAlias=*/true));
if (!AliasInst)
return nullptr;
TypeAliasTemplateDecl *Inst
= TypeAliasTemplateDecl::Create(SemaRef.Context, Owner, D->getLocation(),
D->getDeclName(), InstParams, AliasInst);
AliasInst->setDescribedAliasTemplate(Inst);
if (PrevAliasTemplate)
Inst->setPreviousDecl(PrevAliasTemplate);
Inst->setAccess(D->getAccess());
if (!PrevAliasTemplate)
Inst->setInstantiatedFromMemberTemplate(D);
Owner->addDecl(Inst);
return Inst;
}
Decl *TemplateDeclInstantiator::VisitBindingDecl(BindingDecl *D) {
auto *NewBD = BindingDecl::Create(SemaRef.Context, Owner, D->getLocation(),
D->getIdentifier());
NewBD->setReferenced(D->isReferenced());
SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewBD);
return NewBD;
}
Decl *TemplateDeclInstantiator::VisitDecompositionDecl(DecompositionDecl *D) {
// Transform the bindings first.
SmallVector<BindingDecl*, 16> NewBindings;
for (auto *OldBD : D->bindings())
NewBindings.push_back(cast<BindingDecl>(VisitBindingDecl(OldBD)));
ArrayRef<BindingDecl*> NewBindingArray = NewBindings;
auto *NewDD = cast_or_null<DecompositionDecl>(
VisitVarDecl(D, /*InstantiatingVarTemplate=*/false, &NewBindingArray));
if (!NewDD || NewDD->isInvalidDecl())
for (auto *NewBD : NewBindings)
NewBD->setInvalidDecl();
return NewDD;
}
Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D) {
return VisitVarDecl(D, /*InstantiatingVarTemplate=*/false);
}
Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D,
bool InstantiatingVarTemplate,
ArrayRef<BindingDecl*> *Bindings) {
// Do substitution on the type of the declaration
TypeSourceInfo *DI = SemaRef.SubstType(
D->getTypeSourceInfo(), TemplateArgs, D->getTypeSpecStartLoc(),
D->getDeclName(), /*AllowDeducedTST*/true);
if (!DI)
return nullptr;
if (DI->getType()->isFunctionType()) {
SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function)
<< D->isStaticDataMember() << DI->getType();
return nullptr;
}
DeclContext *DC = Owner;
if (D->isLocalExternDecl())
SemaRef.adjustContextForLocalExternDecl(DC);
// Build the instantiated declaration.
VarDecl *Var;
if (Bindings)
Var = DecompositionDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(),
D->getLocation(), DI->getType(), DI,
D->getStorageClass(), *Bindings);
else
Var = VarDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(),
D->getLocation(), D->getIdentifier(), DI->getType(),
DI, D->getStorageClass());
// In ARC, infer 'retaining' for variables of retainable type.
if (SemaRef.getLangOpts().ObjCAutoRefCount &&
SemaRef.inferObjCARCLifetime(Var))
Var->setInvalidDecl();
if (SemaRef.getLangOpts().OpenCL)
SemaRef.deduceOpenCLAddressSpace(Var);
// Substitute the nested name specifier, if any.
if (SubstQualifier(D, Var))
return nullptr;
SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner,
StartingScope, InstantiatingVarTemplate);
- if (D->isNRVOVariable()) {
+ if (D->isNRVOVariable() && !Var->isInvalidDecl()) {
QualType RT;
if (auto *F = dyn_cast<FunctionDecl>(DC))
RT = F->getReturnType();
else if (isa<BlockDecl>(DC))
RT = cast<FunctionType>(SemaRef.getCurBlock()->FunctionType)
->getReturnType();
else
llvm_unreachable("Unknown context type");
// This is the last chance we have of checking copy elision eligibility
// for functions in dependent contexts. The sema actions for building
// the return statement during template instantiation will have no effect
// regarding copy elision, since NRVO propagation runs on the scope exit
// actions, and these are not run on instantiation.
// This might run through some VarDecls which were returned from non-taken
// 'if constexpr' branches, and these will end up being constructed on the
// return slot even if they will never be returned, as a sort of accidental
// 'optimization'. Notably, functions with 'auto' return types won't have it
// deduced by this point. Coupled with the limitation described
// previously, this makes it very hard to support copy elision for these.
Sema::NamedReturnInfo Info = SemaRef.getNamedReturnInfo(Var);
bool NRVO = SemaRef.getCopyElisionCandidate(Info, RT) != nullptr;
Var->setNRVOVariable(NRVO);
}
Var->setImplicit(D->isImplicit());
if (Var->isStaticLocal())
SemaRef.CheckStaticLocalForDllExport(Var);
return Var;
}
Decl *TemplateDeclInstantiator::VisitAccessSpecDecl(AccessSpecDecl *D) {
AccessSpecDecl* AD
= AccessSpecDecl::Create(SemaRef.Context, D->getAccess(), Owner,
D->getAccessSpecifierLoc(), D->getColonLoc());
Owner->addHiddenDecl(AD);
return AD;
}
Decl *TemplateDeclInstantiator::VisitFieldDecl(FieldDecl *D) {
bool Invalid = false;
TypeSourceInfo *DI = D->getTypeSourceInfo();
if (DI->getType()->isInstantiationDependentType() ||
DI->getType()->isVariablyModifiedType()) {
DI = SemaRef.SubstType(DI, TemplateArgs,
D->getLocation(), D->getDeclName());
if (!DI) {
DI = D->getTypeSourceInfo();
Invalid = true;
} else if (DI->getType()->isFunctionType()) {
// C++ [temp.arg.type]p3:
// If a declaration acquires a function type through a type
// dependent on a template-parameter and this causes a
// declaration that does not use the syntactic form of a
// function declarator to have function type, the program is
// ill-formed.
SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function)
<< DI->getType();
Invalid = true;
}
} else {
SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType());
}
Expr *BitWidth = D->getBitWidth();
if (Invalid)
BitWidth = nullptr;
else if (BitWidth) {
// The bit-width expression is a constant expression.
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult InstantiatedBitWidth
= SemaRef.SubstExpr(BitWidth, TemplateArgs);
if (InstantiatedBitWidth.isInvalid()) {
Invalid = true;
BitWidth = nullptr;
} else
BitWidth = InstantiatedBitWidth.getAs<Expr>();
}
FieldDecl *Field = SemaRef.CheckFieldDecl(D->getDeclName(),
DI->getType(), DI,
cast<RecordDecl>(Owner),
D->getLocation(),
D->isMutable(),
BitWidth,
D->getInClassInitStyle(),
D->getInnerLocStart(),
D->getAccess(),
nullptr);
if (!Field) {
cast<Decl>(Owner)->setInvalidDecl();
return nullptr;
}
SemaRef.InstantiateAttrs(TemplateArgs, D, Field, LateAttrs, StartingScope);
if (Field->hasAttrs())
SemaRef.CheckAlignasUnderalignment(Field);
if (Invalid)
Field->setInvalidDecl();
if (!Field->getDeclName()) {
// Keep track of where this decl came from.
SemaRef.Context.setInstantiatedFromUnnamedFieldDecl(Field, D);
}
if (CXXRecordDecl *Parent= dyn_cast<CXXRecordDecl>(Field->getDeclContext())) {
if (Parent->isAnonymousStructOrUnion() &&
Parent->getRedeclContext()->isFunctionOrMethod())
SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Field);
}
Field->setImplicit(D->isImplicit());
Field->setAccess(D->getAccess());
Owner->addDecl(Field);
return Field;
}
Decl *TemplateDeclInstantiator::VisitMSPropertyDecl(MSPropertyDecl *D) {
bool Invalid = false;
TypeSourceInfo *DI = D->getTypeSourceInfo();
if (DI->getType()->isVariablyModifiedType()) {
SemaRef.Diag(D->getLocation(), diag::err_property_is_variably_modified)
<< D;
Invalid = true;
} else if (DI->getType()->isInstantiationDependentType()) {
DI = SemaRef.SubstType(DI, TemplateArgs,
D->getLocation(), D->getDeclName());
if (!DI) {
DI = D->getTypeSourceInfo();
Invalid = true;
} else if (DI->getType()->isFunctionType()) {
// C++ [temp.arg.type]p3:
// If a declaration acquires a function type through a type
// dependent on a template-parameter and this causes a
// declaration that does not use the syntactic form of a
// function declarator to have function type, the program is
// ill-formed.
SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function)
<< DI->getType();
Invalid = true;
}
} else {
SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType());
}
MSPropertyDecl *Property = MSPropertyDecl::Create(
SemaRef.Context, Owner, D->getLocation(), D->getDeclName(), DI->getType(),
DI, D->getBeginLoc(), D->getGetterId(), D->getSetterId());
SemaRef.InstantiateAttrs(TemplateArgs, D, Property, LateAttrs,
StartingScope);
if (Invalid)
Property->setInvalidDecl();
Property->setAccess(D->getAccess());
Owner->addDecl(Property);
return Property;
}
Decl *TemplateDeclInstantiator::VisitIndirectFieldDecl(IndirectFieldDecl *D) {
NamedDecl **NamedChain =
new (SemaRef.Context)NamedDecl*[D->getChainingSize()];
int i = 0;
for (auto *PI : D->chain()) {
NamedDecl *Next = SemaRef.FindInstantiatedDecl(D->getLocation(), PI,
TemplateArgs);
if (!Next)
return nullptr;
NamedChain[i++] = Next;
}
QualType T = cast<FieldDecl>(NamedChain[i-1])->getType();
IndirectFieldDecl *IndirectField = IndirectFieldDecl::Create(
SemaRef.Context, Owner, D->getLocation(), D->getIdentifier(), T,
{NamedChain, D->getChainingSize()});
for (const auto *Attr : D->attrs())
IndirectField->addAttr(Attr->clone(SemaRef.Context));
IndirectField->setImplicit(D->isImplicit());
IndirectField->setAccess(D->getAccess());
Owner->addDecl(IndirectField);
return IndirectField;
}
Decl *TemplateDeclInstantiator::VisitFriendDecl(FriendDecl *D) {
// Handle friend type expressions by simply substituting template
// parameters into the pattern type and checking the result.
if (TypeSourceInfo *Ty = D->getFriendType()) {
TypeSourceInfo *InstTy;
// If this is an unsupported friend, don't bother substituting template
// arguments into it. The actual type referred to won't be used by any
// parts of Clang, and may not be valid for instantiating. Just use the
// same info for the instantiated friend.
if (D->isUnsupportedFriend()) {
InstTy = Ty;
} else {
InstTy = SemaRef.SubstType(Ty, TemplateArgs,
D->getLocation(), DeclarationName());
}
if (!InstTy)
return nullptr;
FriendDecl *FD = SemaRef.CheckFriendTypeDecl(D->getBeginLoc(),
D->getFriendLoc(), InstTy);
if (!FD)
return nullptr;
FD->setAccess(AS_public);
FD->setUnsupportedFriend(D->isUnsupportedFriend());
Owner->addDecl(FD);
return FD;
}
NamedDecl *ND = D->getFriendDecl();
assert(ND && "friend decl must be a decl or a type!");
// All of the Visit implementations for the various potential friend
// declarations have to be carefully written to work for friend
// objects, with the most important detail being that the target
// decl should almost certainly not be placed in Owner.
Decl *NewND = Visit(ND);
if (!NewND) return nullptr;
FriendDecl *FD =
FriendDecl::Create(SemaRef.Context, Owner, D->getLocation(),
cast<NamedDecl>(NewND), D->getFriendLoc());
FD->setAccess(AS_public);
FD->setUnsupportedFriend(D->isUnsupportedFriend());
Owner->addDecl(FD);
return FD;
}
Decl *TemplateDeclInstantiator::VisitStaticAssertDecl(StaticAssertDecl *D) {
Expr *AssertExpr = D->getAssertExpr();
// The expression in a static assertion is a constant expression.
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult InstantiatedAssertExpr
= SemaRef.SubstExpr(AssertExpr, TemplateArgs);
if (InstantiatedAssertExpr.isInvalid())
return nullptr;
return SemaRef.BuildStaticAssertDeclaration(D->getLocation(),
InstantiatedAssertExpr.get(),
D->getMessage(),
D->getRParenLoc(),
D->isFailed());
}
Decl *TemplateDeclInstantiator::VisitEnumDecl(EnumDecl *D) {
EnumDecl *PrevDecl = nullptr;
if (EnumDecl *PatternPrev = getPreviousDeclForInstantiation(D)) {
NamedDecl *Prev = SemaRef.FindInstantiatedDecl(D->getLocation(),
PatternPrev,
TemplateArgs);
if (!Prev) return nullptr;
PrevDecl = cast<EnumDecl>(Prev);
}
EnumDecl *Enum =
EnumDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(),
D->getLocation(), D->getIdentifier(), PrevDecl,
D->isScoped(), D->isScopedUsingClassTag(), D->isFixed());
if (D->isFixed()) {
if (TypeSourceInfo *TI = D->getIntegerTypeSourceInfo()) {
// If we have type source information for the underlying type, it means it
// has been explicitly set by the user. Perform substitution on it before
// moving on.
SourceLocation UnderlyingLoc = TI->getTypeLoc().getBeginLoc();
TypeSourceInfo *NewTI = SemaRef.SubstType(TI, TemplateArgs, UnderlyingLoc,
DeclarationName());
if (!NewTI || SemaRef.CheckEnumUnderlyingType(NewTI))
Enum->setIntegerType(SemaRef.Context.IntTy);
else
Enum->setIntegerTypeSourceInfo(NewTI);
} else {
assert(!D->getIntegerType()->isDependentType()
&& "Dependent type without type source info");
Enum->setIntegerType(D->getIntegerType());
}
}
SemaRef.InstantiateAttrs(TemplateArgs, D, Enum);
Enum->setInstantiationOfMemberEnum(D, TSK_ImplicitInstantiation);
Enum->setAccess(D->getAccess());
// Forward the mangling number from the template to the instantiated decl.
SemaRef.Context.setManglingNumber(Enum, SemaRef.Context.getManglingNumber(D));
// See if the old tag was defined along with a declarator.
// If it did, mark the new tag as being associated with that declarator.
if (DeclaratorDecl *DD = SemaRef.Context.getDeclaratorForUnnamedTagDecl(D))
SemaRef.Context.addDeclaratorForUnnamedTagDecl(Enum, DD);
// See if the old tag was defined along with a typedef.
// If it did, mark the new tag as being associated with that typedef.
if (TypedefNameDecl *TND = SemaRef.Context.getTypedefNameForUnnamedTagDecl(D))
SemaRef.Context.addTypedefNameForUnnamedTagDecl(Enum, TND);
if (SubstQualifier(D, Enum)) return nullptr;
Owner->addDecl(Enum);
EnumDecl *Def = D->getDefinition();
if (Def && Def != D) {
// If this is an out-of-line definition of an enum member template, check
// that the underlying types match in the instantiation of both
// declarations.
if (TypeSourceInfo *TI = Def->getIntegerTypeSourceInfo()) {
SourceLocation UnderlyingLoc = TI->getTypeLoc().getBeginLoc();
QualType DefnUnderlying =
SemaRef.SubstType(TI->getType(), TemplateArgs,
UnderlyingLoc, DeclarationName());
SemaRef.CheckEnumRedeclaration(Def->getLocation(), Def->isScoped(),
DefnUnderlying, /*IsFixed=*/true, Enum);
}
}
// C++11 [temp.inst]p1: The implicit instantiation of a class template
// specialization causes the implicit instantiation of the declarations, but
// not the definitions of scoped member enumerations.
//
// DR1484 clarifies that enumeration definitions inside of a template
// declaration aren't considered entities that can be separately instantiated
// from the rest of the entity they are declared inside of.
if (isDeclWithinFunction(D) ? D == Def : Def && !Enum->isScoped()) {
SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Enum);
InstantiateEnumDefinition(Enum, Def);
}
return Enum;
}
void TemplateDeclInstantiator::InstantiateEnumDefinition(
EnumDecl *Enum, EnumDecl *Pattern) {
Enum->startDefinition();
// Update the location to refer to the definition.
Enum->setLocation(Pattern->getLocation());
SmallVector<Decl*, 4> Enumerators;
EnumConstantDecl *LastEnumConst = nullptr;
for (auto *EC : Pattern->enumerators()) {
// The specified value for the enumerator.
ExprResult Value((Expr *)nullptr);
if (Expr *UninstValue = EC->getInitExpr()) {
// The enumerator's value expression is a constant expression.
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
Value = SemaRef.SubstExpr(UninstValue, TemplateArgs);
}
// Drop the initial value and continue.
bool isInvalid = false;
if (Value.isInvalid()) {
Value = nullptr;
isInvalid = true;
}
EnumConstantDecl *EnumConst
= SemaRef.CheckEnumConstant(Enum, LastEnumConst,
EC->getLocation(), EC->getIdentifier(),
Value.get());
if (isInvalid) {
if (EnumConst)
EnumConst->setInvalidDecl();
Enum->setInvalidDecl();
}
if (EnumConst) {
SemaRef.InstantiateAttrs(TemplateArgs, EC, EnumConst);
EnumConst->setAccess(Enum->getAccess());
Enum->addDecl(EnumConst);
Enumerators.push_back(EnumConst);
LastEnumConst = EnumConst;
if (Pattern->getDeclContext()->isFunctionOrMethod() &&
!Enum->isScoped()) {
// If the enumeration is within a function or method, record the enum
// constant as a local.
SemaRef.CurrentInstantiationScope->InstantiatedLocal(EC, EnumConst);
}
}
}
SemaRef.ActOnEnumBody(Enum->getLocation(), Enum->getBraceRange(), Enum,
Enumerators, nullptr, ParsedAttributesView());
}
Decl *TemplateDeclInstantiator::VisitEnumConstantDecl(EnumConstantDecl *D) {
llvm_unreachable("EnumConstantDecls can only occur within EnumDecls.");
}
Decl *
TemplateDeclInstantiator::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) {
llvm_unreachable("BuiltinTemplateDecls cannot be instantiated.");
}
Decl *TemplateDeclInstantiator::VisitClassTemplateDecl(ClassTemplateDecl *D) {
bool isFriend = (D->getFriendObjectKind() != Decl::FOK_None);
// Create a local instantiation scope for this class template, which
// will contain the instantiations of the template parameters.
LocalInstantiationScope Scope(SemaRef);
TemplateParameterList *TempParams = D->getTemplateParameters();
TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
if (!InstParams)
return nullptr;
CXXRecordDecl *Pattern = D->getTemplatedDecl();
// Instantiate the qualifier. We have to do this first in case
// we're a friend declaration, because if we are then we need to put
// the new declaration in the appropriate context.
NestedNameSpecifierLoc QualifierLoc = Pattern->getQualifierLoc();
if (QualifierLoc) {
QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc,
TemplateArgs);
if (!QualifierLoc)
return nullptr;
}
CXXRecordDecl *PrevDecl = nullptr;
ClassTemplateDecl *PrevClassTemplate = nullptr;
if (!isFriend && getPreviousDeclForInstantiation(Pattern)) {
DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName());
if (!Found.empty()) {
PrevClassTemplate = dyn_cast<ClassTemplateDecl>(Found.front());
if (PrevClassTemplate)
PrevDecl = PrevClassTemplate->getTemplatedDecl();
}
}
// If this isn't a friend, then it's a member template, in which
// case we just want to build the instantiation in the
// specialization. If it is a friend, we want to build it in
// the appropriate context.
DeclContext *DC = Owner;
if (isFriend) {
if (QualifierLoc) {
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
DC = SemaRef.computeDeclContext(SS);
if (!DC) return nullptr;
} else {
DC = SemaRef.FindInstantiatedContext(Pattern->getLocation(),
Pattern->getDeclContext(),
TemplateArgs);
}
// Look for a previous declaration of the template in the owning
// context.
LookupResult R(SemaRef, Pattern->getDeclName(), Pattern->getLocation(),
Sema::LookupOrdinaryName,
SemaRef.forRedeclarationInCurContext());
SemaRef.LookupQualifiedName(R, DC);
if (R.isSingleResult()) {
PrevClassTemplate = R.getAsSingle<ClassTemplateDecl>();
if (PrevClassTemplate)
PrevDecl = PrevClassTemplate->getTemplatedDecl();
}
if (!PrevClassTemplate && QualifierLoc) {
SemaRef.Diag(Pattern->getLocation(), diag::err_not_tag_in_scope)
<< D->getTemplatedDecl()->getTagKind() << Pattern->getDeclName() << DC
<< QualifierLoc.getSourceRange();
return nullptr;
}
if (PrevClassTemplate) {
TemplateParameterList *PrevParams
= PrevClassTemplate->getMostRecentDecl()->getTemplateParameters();
// Make sure the parameter lists match.
if (!SemaRef.TemplateParameterListsAreEqual(InstParams, PrevParams, true,
Sema::TPL_TemplateMatch))
return nullptr;
// Do some additional validation, then merge default arguments
// from the existing declarations.
if (SemaRef.CheckTemplateParameterList(InstParams, PrevParams,
Sema::TPC_ClassTemplate))
return nullptr;
}
}
CXXRecordDecl *RecordInst = CXXRecordDecl::Create(
SemaRef.Context, Pattern->getTagKind(), DC, Pattern->getBeginLoc(),
Pattern->getLocation(), Pattern->getIdentifier(), PrevDecl,
/*DelayTypeCreation=*/true);
if (QualifierLoc)
RecordInst->setQualifierInfo(QualifierLoc);
SemaRef.InstantiateAttrsForDecl(TemplateArgs, Pattern, RecordInst, LateAttrs,
StartingScope);
ClassTemplateDecl *Inst
= ClassTemplateDecl::Create(SemaRef.Context, DC, D->getLocation(),
D->getIdentifier(), InstParams, RecordInst);
assert(!(isFriend && Owner->isDependentContext()));
Inst->setPreviousDecl(PrevClassTemplate);
RecordInst->setDescribedClassTemplate(Inst);
if (isFriend) {
if (PrevClassTemplate)
Inst->setAccess(PrevClassTemplate->getAccess());
else
Inst->setAccess(D->getAccess());
Inst->setObjectOfFriendDecl();
// TODO: do we want to track the instantiation progeny of this
// friend target decl?
} else {
Inst->setAccess(D->getAccess());
if (!PrevClassTemplate)
Inst->setInstantiatedFromMemberTemplate(D);
}
// Trigger creation of the type for the instantiation.
SemaRef.Context.getInjectedClassNameType(RecordInst,
Inst->getInjectedClassNameSpecialization());
// Finish handling of friends.
if (isFriend) {
DC->makeDeclVisibleInContext(Inst);
Inst->setLexicalDeclContext(Owner);
RecordInst->setLexicalDeclContext(Owner);
return Inst;
}
if (D->isOutOfLine()) {
Inst->setLexicalDeclContext(D->getLexicalDeclContext());
RecordInst->setLexicalDeclContext(D->getLexicalDeclContext());
}
Owner->addDecl(Inst);
if (!PrevClassTemplate) {
// Queue up any out-of-line partial specializations of this member
// class template; the client will force their instantiation once
// the enclosing class has been instantiated.
SmallVector<ClassTemplatePartialSpecializationDecl *, 4> PartialSpecs;
D->getPartialSpecializations(PartialSpecs);
for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I)
if (PartialSpecs[I]->getFirstDecl()->isOutOfLine())
OutOfLinePartialSpecs.push_back(std::make_pair(Inst, PartialSpecs[I]));
}
return Inst;
}
Decl *
TemplateDeclInstantiator::VisitClassTemplatePartialSpecializationDecl(
ClassTemplatePartialSpecializationDecl *D) {
ClassTemplateDecl *ClassTemplate = D->getSpecializedTemplate();
// Lookup the already-instantiated declaration in the instantiation
// of the class template and return that.
DeclContext::lookup_result Found
= Owner->lookup(ClassTemplate->getDeclName());
if (Found.empty())
return nullptr;
ClassTemplateDecl *InstClassTemplate
= dyn_cast<ClassTemplateDecl>(Found.front());
if (!InstClassTemplate)
return nullptr;
if (ClassTemplatePartialSpecializationDecl *Result
= InstClassTemplate->findPartialSpecInstantiatedFromMember(D))
return Result;
return InstantiateClassTemplatePartialSpecialization(InstClassTemplate, D);
}
Decl *TemplateDeclInstantiator::VisitVarTemplateDecl(VarTemplateDecl *D) {
assert(D->getTemplatedDecl()->isStaticDataMember() &&
"Only static data member templates are allowed.");
// Create a local instantiation scope for this variable template, which
// will contain the instantiations of the template parameters.
LocalInstantiationScope Scope(SemaRef);
TemplateParameterList *TempParams = D->getTemplateParameters();
TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
if (!InstParams)
return nullptr;
VarDecl *Pattern = D->getTemplatedDecl();
VarTemplateDecl *PrevVarTemplate = nullptr;
if (getPreviousDeclForInstantiation(Pattern)) {
DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName());
if (!Found.empty())
PrevVarTemplate = dyn_cast<VarTemplateDecl>(Found.front());
}
VarDecl *VarInst =
cast_or_null<VarDecl>(VisitVarDecl(Pattern,
/*InstantiatingVarTemplate=*/true));
if (!VarInst) return nullptr;
DeclContext *DC = Owner;
VarTemplateDecl *Inst = VarTemplateDecl::Create(
SemaRef.Context, DC, D->getLocation(), D->getIdentifier(), InstParams,
VarInst);
VarInst->setDescribedVarTemplate(Inst);
Inst->setPreviousDecl(PrevVarTemplate);
Inst->setAccess(D->getAccess());
if (!PrevVarTemplate)
Inst->setInstantiatedFromMemberTemplate(D);
if (D->isOutOfLine()) {
Inst->setLexicalDeclContext(D->getLexicalDeclContext());
VarInst->setLexicalDeclContext(D->getLexicalDeclContext());
}
Owner->addDecl(Inst);
if (!PrevVarTemplate) {
// Queue up any out-of-line partial specializations of this member
// variable template; the client will force their instantiation once
// the enclosing class has been instantiated.
SmallVector<VarTemplatePartialSpecializationDecl *, 4> PartialSpecs;
D->getPartialSpecializations(PartialSpecs);
for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I)
if (PartialSpecs[I]->getFirstDecl()->isOutOfLine())
OutOfLineVarPartialSpecs.push_back(
std::make_pair(Inst, PartialSpecs[I]));
}
return Inst;
}
Decl *TemplateDeclInstantiator::VisitVarTemplatePartialSpecializationDecl(
VarTemplatePartialSpecializationDecl *D) {
assert(D->isStaticDataMember() &&
"Only static data member templates are allowed.");
VarTemplateDecl *VarTemplate = D->getSpecializedTemplate();
// Lookup the already-instantiated declaration and return that.
DeclContext::lookup_result Found = Owner->lookup(VarTemplate->getDeclName());
assert(!Found.empty() && "Instantiation found nothing?");
VarTemplateDecl *InstVarTemplate = dyn_cast<VarTemplateDecl>(Found.front());
assert(InstVarTemplate && "Instantiation did not find a variable template?");
if (VarTemplatePartialSpecializationDecl *Result =
InstVarTemplate->findPartialSpecInstantiatedFromMember(D))
return Result;
return InstantiateVarTemplatePartialSpecialization(InstVarTemplate, D);
}
Decl *
TemplateDeclInstantiator::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
// Create a local instantiation scope for this function template, which
// will contain the instantiations of the template parameters and then get
// merged with the local instantiation scope for the function template
// itself.
LocalInstantiationScope Scope(SemaRef);
TemplateParameterList *TempParams = D->getTemplateParameters();
TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
if (!InstParams)
return nullptr;
FunctionDecl *Instantiated = nullptr;
if (CXXMethodDecl *DMethod = dyn_cast<CXXMethodDecl>(D->getTemplatedDecl()))
Instantiated = cast_or_null<FunctionDecl>(VisitCXXMethodDecl(DMethod,
InstParams));
else
Instantiated = cast_or_null<FunctionDecl>(VisitFunctionDecl(
D->getTemplatedDecl(),
InstParams));
if (!Instantiated)
return nullptr;
// Link the instantiated function template declaration to the function
// template from which it was instantiated.
FunctionTemplateDecl *InstTemplate
= Instantiated->getDescribedFunctionTemplate();
InstTemplate->setAccess(D->getAccess());
assert(InstTemplate &&
"VisitFunctionDecl/CXXMethodDecl didn't create a template!");
bool isFriend = (InstTemplate->getFriendObjectKind() != Decl::FOK_None);
// Link the instantiation back to the pattern *unless* this is a
// non-definition friend declaration.
if (!InstTemplate->getInstantiatedFromMemberTemplate() &&
!(isFriend && !D->getTemplatedDecl()->isThisDeclarationADefinition()))
InstTemplate->setInstantiatedFromMemberTemplate(D);
// Make declarations visible in the appropriate context.
if (!isFriend) {
Owner->addDecl(InstTemplate);
} else if (InstTemplate->getDeclContext()->isRecord() &&
!getPreviousDeclForInstantiation(D)) {
SemaRef.CheckFriendAccess(InstTemplate);
}
return InstTemplate;
}
Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) {
CXXRecordDecl *PrevDecl = nullptr;
if (D->isInjectedClassName())
PrevDecl = cast<CXXRecordDecl>(Owner);
else if (CXXRecordDecl *PatternPrev = getPreviousDeclForInstantiation(D)) {
NamedDecl *Prev = SemaRef.FindInstantiatedDecl(D->getLocation(),
PatternPrev,
TemplateArgs);
if (!Prev) return nullptr;
PrevDecl = cast<CXXRecordDecl>(Prev);
}
CXXRecordDecl *Record = nullptr;
if (D->isLambda())
Record = CXXRecordDecl::CreateLambda(
SemaRef.Context, Owner, D->getLambdaTypeInfo(), D->getLocation(),
D->isDependentLambda(), D->isGenericLambda(),
D->getLambdaCaptureDefault());
else
Record = CXXRecordDecl::Create(SemaRef.Context, D->getTagKind(), Owner,
D->getBeginLoc(), D->getLocation(),
D->getIdentifier(), PrevDecl);
// Substitute the nested name specifier, if any.
if (SubstQualifier(D, Record))
return nullptr;
SemaRef.InstantiateAttrsForDecl(TemplateArgs, D, Record, LateAttrs,
StartingScope);
Record->setImplicit(D->isImplicit());
// FIXME: Check against AS_none is an ugly hack to work around the issue that
// the tag decls introduced by friend class declarations don't have an access
// specifier. Remove once this area of the code gets sorted out.
if (D->getAccess() != AS_none)
Record->setAccess(D->getAccess());
if (!D->isInjectedClassName())
Record->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation);
// If the original function was part of a friend declaration,
// inherit its namespace state.
if (D->getFriendObjectKind())
Record->setObjectOfFriendDecl();
// Make sure that anonymous structs and unions are recorded.
if (D->isAnonymousStructOrUnion())
Record->setAnonymousStructOrUnion(true);
if (D->isLocalClass())
SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Record);
// Forward the mangling number from the template to the instantiated decl.
SemaRef.Context.setManglingNumber(Record,
SemaRef.Context.getManglingNumber(D));
// See if the old tag was defined along with a declarator.
// If it did, mark the new tag as being associated with that declarator.
if (DeclaratorDecl *DD = SemaRef.Context.getDeclaratorForUnnamedTagDecl(D))
SemaRef.Context.addDeclaratorForUnnamedTagDecl(Record, DD);
// See if the old tag was defined along with a typedef.
// If it did, mark the new tag as being associated with that typedef.
if (TypedefNameDecl *TND = SemaRef.Context.getTypedefNameForUnnamedTagDecl(D))
SemaRef.Context.addTypedefNameForUnnamedTagDecl(Record, TND);
Owner->addDecl(Record);
// DR1484 clarifies that the members of a local class are instantiated as part
// of the instantiation of their enclosing entity.
if (D->isCompleteDefinition() && D->isLocalClass()) {
Sema::LocalEagerInstantiationScope LocalInstantiations(SemaRef);
SemaRef.InstantiateClass(D->getLocation(), Record, D, TemplateArgs,
TSK_ImplicitInstantiation,
/*Complain=*/true);
// For nested local classes, we will instantiate the members when we
// reach the end of the outermost (non-nested) local class.
if (!D->isCXXClassMember())
SemaRef.InstantiateClassMembers(D->getLocation(), Record, TemplateArgs,
TSK_ImplicitInstantiation);
// This class may have local implicit instantiations that need to be
// performed within this scope.
LocalInstantiations.perform();
}
SemaRef.DiagnoseUnusedNestedTypedefs(Record);
return Record;
}
/// Adjust the given function type for an instantiation of the
/// given declaration, to cope with modifications to the function's type that
/// aren't reflected in the type-source information.
///
/// \param D The declaration we're instantiating.
/// \param TInfo The already-instantiated type.
static QualType adjustFunctionTypeForInstantiation(ASTContext &Context,
FunctionDecl *D,
TypeSourceInfo *TInfo) {
const FunctionProtoType *OrigFunc
= D->getType()->castAs<FunctionProtoType>();
const FunctionProtoType *NewFunc
= TInfo->getType()->castAs<FunctionProtoType>();
if (OrigFunc->getExtInfo() == NewFunc->getExtInfo())
return TInfo->getType();
FunctionProtoType::ExtProtoInfo NewEPI = NewFunc->getExtProtoInfo();
NewEPI.ExtInfo = OrigFunc->getExtInfo();
return Context.getFunctionType(NewFunc->getReturnType(),
NewFunc->getParamTypes(), NewEPI);
}
/// Normal class members are of more specific types and therefore
/// don't make it here. This function serves three purposes:
/// 1) instantiating function templates
/// 2) substituting friend declarations
/// 3) substituting deduction guide declarations for nested class templates
Decl *TemplateDeclInstantiator::VisitFunctionDecl(
FunctionDecl *D, TemplateParameterList *TemplateParams,
RewriteKind FunctionRewriteKind) {
// Check whether there is already a function template specialization for
// this declaration.
FunctionTemplateDecl *FunctionTemplate = D->getDescribedFunctionTemplate();
if (FunctionTemplate && !TemplateParams) {
ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();
void *InsertPos = nullptr;
FunctionDecl *SpecFunc
= FunctionTemplate->findSpecialization(Innermost, InsertPos);
// If we already have a function template specialization, return it.
if (SpecFunc)
return SpecFunc;
}
bool isFriend;
if (FunctionTemplate)
isFriend = (FunctionTemplate->getFriendObjectKind() != Decl::FOK_None);
else
isFriend = (D->getFriendObjectKind() != Decl::FOK_None);
bool MergeWithParentScope = (TemplateParams != nullptr) ||
Owner->isFunctionOrMethod() ||
!(isa<Decl>(Owner) &&
cast<Decl>(Owner)->isDefinedOutsideFunctionOrMethod());
LocalInstantiationScope Scope(SemaRef, MergeWithParentScope);
ExplicitSpecifier InstantiatedExplicitSpecifier;
if (auto *DGuide = dyn_cast<CXXDeductionGuideDecl>(D)) {
InstantiatedExplicitSpecifier = instantiateExplicitSpecifier(
SemaRef, TemplateArgs, DGuide->getExplicitSpecifier(), DGuide);
if (InstantiatedExplicitSpecifier.isInvalid())
return nullptr;
}
SmallVector<ParmVarDecl *, 4> Params;
TypeSourceInfo *TInfo = SubstFunctionType(D, Params);
if (!TInfo)
return nullptr;
QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo);
if (TemplateParams && TemplateParams->size()) {
auto *LastParam =
dyn_cast<TemplateTypeParmDecl>(TemplateParams->asArray().back());
if (LastParam && LastParam->isImplicit() &&
LastParam->hasTypeConstraint()) {
// In abbreviated templates, the type-constraints of invented template
// type parameters are instantiated with the function type, invalidating
// the TemplateParameterList which relied on the template type parameter
// not having a type constraint. Recreate the TemplateParameterList with
// the updated parameter list.
TemplateParams = TemplateParameterList::Create(
SemaRef.Context, TemplateParams->getTemplateLoc(),
TemplateParams->getLAngleLoc(), TemplateParams->asArray(),
TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause());
}
}
NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc();
if (QualifierLoc) {
QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc,
TemplateArgs);
if (!QualifierLoc)
return nullptr;
}
// FIXME: Concepts: Do not substitute into constraint expressions
Expr *TrailingRequiresClause = D->getTrailingRequiresClause();
if (TrailingRequiresClause) {
EnterExpressionEvaluationContext ConstantEvaluated(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause,
TemplateArgs);
if (SubstRC.isInvalid())
return nullptr;
TrailingRequiresClause = SubstRC.get();
if (!SemaRef.CheckConstraintExpression(TrailingRequiresClause))
return nullptr;
}
// If we're instantiating a local function declaration, put the result
// in the enclosing namespace; otherwise we need to find the instantiated
// context.
DeclContext *DC;
if (D->isLocalExternDecl()) {
DC = Owner;
SemaRef.adjustContextForLocalExternDecl(DC);
} else if (isFriend && QualifierLoc) {
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
DC = SemaRef.computeDeclContext(SS);
if (!DC) return nullptr;
} else {
DC = SemaRef.FindInstantiatedContext(D->getLocation(), D->getDeclContext(),
TemplateArgs);
}
DeclarationNameInfo NameInfo
= SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs);
if (FunctionRewriteKind != RewriteKind::None)
adjustForRewrite(FunctionRewriteKind, D, T, TInfo, NameInfo);
FunctionDecl *Function;
if (auto *DGuide = dyn_cast<CXXDeductionGuideDecl>(D)) {
Function = CXXDeductionGuideDecl::Create(
SemaRef.Context, DC, D->getInnerLocStart(),
InstantiatedExplicitSpecifier, NameInfo, T, TInfo,
D->getSourceRange().getEnd());
if (DGuide->isCopyDeductionCandidate())
cast<CXXDeductionGuideDecl>(Function)->setIsCopyDeductionCandidate();
Function->setAccess(D->getAccess());
} else {
Function = FunctionDecl::Create(
SemaRef.Context, DC, D->getInnerLocStart(), NameInfo, T, TInfo,
D->getCanonicalDecl()->getStorageClass(), D->isInlineSpecified(),
D->hasWrittenPrototype(), D->getConstexprKind(),
TrailingRequiresClause);
Function->setRangeEnd(D->getSourceRange().getEnd());
}
if (D->isInlined())
Function->setImplicitlyInline();
if (QualifierLoc)
Function->setQualifierInfo(QualifierLoc);
if (D->isLocalExternDecl())
Function->setLocalExternDecl();
DeclContext *LexicalDC = Owner;
if (!isFriend && D->isOutOfLine() && !D->isLocalExternDecl()) {
assert(D->getDeclContext()->isFileContext());
LexicalDC = D->getDeclContext();
}
Function->setLexicalDeclContext(LexicalDC);
// Attach the parameters
for (unsigned P = 0; P < Params.size(); ++P)
if (Params[P])
Params[P]->setOwningFunction(Function);
Function->setParams(Params);
if (TrailingRequiresClause)
Function->setTrailingRequiresClause(TrailingRequiresClause);
if (TemplateParams) {
// Our resulting instantiation is actually a function template, since we
// are substituting only the outer template parameters. For example, given
//
// template<typename T>
// struct X {
// template<typename U> friend void f(T, U);
// };
//
// X<int> x;
//
// We are instantiating the friend function template "f" within X<int>,
// which means substituting int for T, but leaving "f" as a friend function
// template.
// Build the function template itself.
FunctionTemplate = FunctionTemplateDecl::Create(SemaRef.Context, DC,
Function->getLocation(),
Function->getDeclName(),
TemplateParams, Function);
Function->setDescribedFunctionTemplate(FunctionTemplate);
FunctionTemplate->setLexicalDeclContext(LexicalDC);
if (isFriend && D->isThisDeclarationADefinition()) {
FunctionTemplate->setInstantiatedFromMemberTemplate(
D->getDescribedFunctionTemplate());
}
} else if (FunctionTemplate) {
// Record this function template specialization.
ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();
Function->setFunctionTemplateSpecialization(FunctionTemplate,
TemplateArgumentList::CreateCopy(SemaRef.Context,
Innermost),
/*InsertPos=*/nullptr);
} else if (isFriend && D->isThisDeclarationADefinition()) {
// Do not connect the friend to the template unless it's actually a
// definition. We don't want non-template functions to be marked as being
// template instantiations.
Function->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation);
}
if (isFriend) {
Function->setObjectOfFriendDecl();
if (FunctionTemplateDecl *FT = Function->getDescribedFunctionTemplate())
FT->setObjectOfFriendDecl();
}
if (InitFunctionInstantiation(Function, D))
Function->setInvalidDecl();
bool IsExplicitSpecialization = false;
LookupResult Previous(
SemaRef, Function->getDeclName(), SourceLocation(),
D->isLocalExternDecl() ? Sema::LookupRedeclarationWithLinkage
: Sema::LookupOrdinaryName,
D->isLocalExternDecl() ? Sema::ForExternalRedeclaration
: SemaRef.forRedeclarationInCurContext());
if (DependentFunctionTemplateSpecializationInfo *Info
= D->getDependentSpecializationInfo()) {
assert(isFriend && "non-friend has dependent specialization info?");
// Instantiate the explicit template arguments.
TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
Info->getRAngleLoc());
if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
ExplicitArgs, TemplateArgs))
return nullptr;
// Map the candidate templates to their instantiations.
for (unsigned I = 0, E = Info->getNumTemplates(); I != E; ++I) {
Decl *Temp = SemaRef.FindInstantiatedDecl(D->getLocation(),
Info->getTemplate(I),
TemplateArgs);
if (!Temp) return nullptr;
Previous.addDecl(cast<FunctionTemplateDecl>(Temp));
}
if (SemaRef.CheckFunctionTemplateSpecialization(Function,
&ExplicitArgs,
Previous))
Function->setInvalidDecl();
IsExplicitSpecialization = true;
} else if (const ASTTemplateArgumentListInfo *Info =
D->getTemplateSpecializationArgsAsWritten()) {
// The name of this function was written as a template-id.
SemaRef.LookupQualifiedName(Previous, DC);
// Instantiate the explicit template arguments.
TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
Info->getRAngleLoc());
if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
ExplicitArgs, TemplateArgs))
return nullptr;
if (SemaRef.CheckFunctionTemplateSpecialization(Function,
&ExplicitArgs,
Previous))
Function->setInvalidDecl();
IsExplicitSpecialization = true;
} else if (TemplateParams || !FunctionTemplate) {
// Look only into the namespace where the friend would be declared to
// find a previous declaration. This is the innermost enclosing namespace,
// as described in ActOnFriendFunctionDecl.
SemaRef.LookupQualifiedName(Previous, DC->getRedeclContext());
// In C++, the previous declaration we find might be a tag type
// (class or enum). In this case, the new declaration will hide the
// tag type. Note that this does does not apply if we're declaring a
// typedef (C++ [dcl.typedef]p4).
if (Previous.isSingleTagDecl())
Previous.clear();
// Filter out previous declarations that don't match the scope. The only
// effect this has is to remove declarations found in inline namespaces
// for friend declarations with unqualified names.
SemaRef.FilterLookupForScope(Previous, DC, /*Scope*/ nullptr,
/*ConsiderLinkage*/ true,
QualifierLoc.hasQualifier());
}
SemaRef.CheckFunctionDeclaration(/*Scope*/ nullptr, Function, Previous,
IsExplicitSpecialization);
// Check the template parameter list against the previous declaration. The
// goal here is to pick up default arguments added since the friend was
// declared; we know the template parameter lists match, since otherwise
// we would not have picked this template as the previous declaration.
if (isFriend && TemplateParams && FunctionTemplate->getPreviousDecl()) {
SemaRef.CheckTemplateParameterList(
TemplateParams,
FunctionTemplate->getPreviousDecl()->getTemplateParameters(),
Function->isThisDeclarationADefinition()
? Sema::TPC_FriendFunctionTemplateDefinition
: Sema::TPC_FriendFunctionTemplate);
}
// If we're introducing a friend definition after the first use, trigger
// instantiation.
// FIXME: If this is a friend function template definition, we should check
// to see if any specializations have been used.
if (isFriend && D->isThisDeclarationADefinition() && Function->isUsed(false)) {
if (MemberSpecializationInfo *MSInfo =
Function->getMemberSpecializationInfo()) {
if (MSInfo->getPointOfInstantiation().isInvalid()) {
SourceLocation Loc = D->getLocation(); // FIXME
MSInfo->setPointOfInstantiation(Loc);
SemaRef.PendingLocalImplicitInstantiations.push_back(
std::make_pair(Function, Loc));
}
}
}
if (D->isExplicitlyDefaulted()) {
if (SubstDefaultedFunction(Function, D))
return nullptr;
}
if (D->isDeleted())
SemaRef.SetDeclDeleted(Function, D->getLocation());
NamedDecl *PrincipalDecl =
(TemplateParams ? cast<NamedDecl>(FunctionTemplate) : Function);
// If this declaration lives in a different context from its lexical context,
// add it to the corresponding lookup table.
if (isFriend ||
(Function->isLocalExternDecl() && !Function->getPreviousDecl()))
DC->makeDeclVisibleInContext(PrincipalDecl);
if (Function->isOverloadedOperator() && !DC->isRecord() &&
PrincipalDecl->isInIdentifierNamespace(Decl::IDNS_Ordinary))
PrincipalDecl->setNonMemberOperator();
return Function;
}
Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(
CXXMethodDecl *D, TemplateParameterList *TemplateParams,
Optional<const ASTTemplateArgumentListInfo *> ClassScopeSpecializationArgs,
RewriteKind FunctionRewriteKind) {
FunctionTemplateDecl *FunctionTemplate = D->getDescribedFunctionTemplate();
if (FunctionTemplate && !TemplateParams) {
// We are creating a function template specialization from a function
// template. Check whether there is already a function template
// specialization for this particular set of template arguments.
ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();
void *InsertPos = nullptr;
FunctionDecl *SpecFunc
= FunctionTemplate->findSpecialization(Innermost, InsertPos);
// If we already have a function template specialization, return it.
if (SpecFunc)
return SpecFunc;
}
bool isFriend;
if (FunctionTemplate)
isFriend = (FunctionTemplate->getFriendObjectKind() != Decl::FOK_None);
else
isFriend = (D->getFriendObjectKind() != Decl::FOK_None);
bool MergeWithParentScope = (TemplateParams != nullptr) ||
!(isa<Decl>(Owner) &&
cast<Decl>(Owner)->isDefinedOutsideFunctionOrMethod());
LocalInstantiationScope Scope(SemaRef, MergeWithParentScope);
// Instantiate enclosing template arguments for friends.
SmallVector<TemplateParameterList *, 4> TempParamLists;
unsigned NumTempParamLists = 0;
if (isFriend && (NumTempParamLists = D->getNumTemplateParameterLists())) {
TempParamLists.resize(NumTempParamLists);
for (unsigned I = 0; I != NumTempParamLists; ++I) {
TemplateParameterList *TempParams = D->getTemplateParameterList(I);
TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
if (!InstParams)
return nullptr;
TempParamLists[I] = InstParams;
}
}
ExplicitSpecifier InstantiatedExplicitSpecifier =
instantiateExplicitSpecifier(SemaRef, TemplateArgs,
ExplicitSpecifier::getFromDecl(D), D);
if (InstantiatedExplicitSpecifier.isInvalid())
return nullptr;
// Implicit destructors/constructors created for local classes in
// DeclareImplicit* (see SemaDeclCXX.cpp) might not have an associated TSI.
// Unfortunately there isn't enough context in those functions to
// conditionally populate the TSI without breaking non-template related use
// cases. Populate TSIs prior to calling SubstFunctionType to make sure we get
// a proper transformation.
if (cast<CXXRecordDecl>(D->getParent())->isLambda() &&
!D->getTypeSourceInfo() &&
isa<CXXConstructorDecl, CXXDestructorDecl>(D)) {
TypeSourceInfo *TSI =
SemaRef.Context.getTrivialTypeSourceInfo(D->getType());
D->setTypeSourceInfo(TSI);
}
SmallVector<ParmVarDecl *, 4> Params;
TypeSourceInfo *TInfo = SubstFunctionType(D, Params);
if (!TInfo)
return nullptr;
QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo);
if (TemplateParams && TemplateParams->size()) {
auto *LastParam =
dyn_cast<TemplateTypeParmDecl>(TemplateParams->asArray().back());
if (LastParam && LastParam->isImplicit() &&
LastParam->hasTypeConstraint()) {
// In abbreviated templates, the type-constraints of invented template
// type parameters are instantiated with the function type, invalidating
// the TemplateParameterList which relied on the template type parameter
// not having a type constraint. Recreate the TemplateParameterList with
// the updated parameter list.
TemplateParams = TemplateParameterList::Create(
SemaRef.Context, TemplateParams->getTemplateLoc(),
TemplateParams->getLAngleLoc(), TemplateParams->asArray(),
TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause());
}
}
NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc();
if (QualifierLoc) {
QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc,
TemplateArgs);
if (!QualifierLoc)
return nullptr;
}
// FIXME: Concepts: Do not substitute into constraint expressions
Expr *TrailingRequiresClause = D->getTrailingRequiresClause();
if (TrailingRequiresClause) {
EnterExpressionEvaluationContext ConstantEvaluated(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(Owner);
Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext,
D->getMethodQualifiers(), ThisContext);
ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause,
TemplateArgs);
if (SubstRC.isInvalid())
return nullptr;
TrailingRequiresClause = SubstRC.get();
if (!SemaRef.CheckConstraintExpression(TrailingRequiresClause))
return nullptr;
}
DeclContext *DC = Owner;
if (isFriend) {
if (QualifierLoc) {
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
DC = SemaRef.computeDeclContext(SS);
if (DC && SemaRef.RequireCompleteDeclContext(SS, DC))
return nullptr;
} else {
DC = SemaRef.FindInstantiatedContext(D->getLocation(),
D->getDeclContext(),
TemplateArgs);
}
if (!DC) return nullptr;
}
DeclarationNameInfo NameInfo
= SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs);
if (FunctionRewriteKind != RewriteKind::None)
adjustForRewrite(FunctionRewriteKind, D, T, TInfo, NameInfo);
// Build the instantiated method declaration.
CXXRecordDecl *Record = cast<CXXRecordDecl>(DC);
CXXMethodDecl *Method = nullptr;
SourceLocation StartLoc = D->getInnerLocStart();
if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(D)) {
Method = CXXConstructorDecl::Create(
SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo,
InstantiatedExplicitSpecifier, Constructor->isInlineSpecified(), false,
Constructor->getConstexprKind(), InheritedConstructor(),
TrailingRequiresClause);
Method->setRangeEnd(Constructor->getEndLoc());
} else if (CXXDestructorDecl *Destructor = dyn_cast<CXXDestructorDecl>(D)) {
Method = CXXDestructorDecl::Create(
SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo,
Destructor->isInlineSpecified(), false, Destructor->getConstexprKind(),
TrailingRequiresClause);
Method->setRangeEnd(Destructor->getEndLoc());
Method->setDeclName(SemaRef.Context.DeclarationNames.getCXXDestructorName(
SemaRef.Context.getCanonicalType(
SemaRef.Context.getTypeDeclType(Record))));
} else if (CXXConversionDecl *Conversion = dyn_cast<CXXConversionDecl>(D)) {
Method = CXXConversionDecl::Create(
SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo,
Conversion->isInlineSpecified(), InstantiatedExplicitSpecifier,
Conversion->getConstexprKind(), Conversion->getEndLoc(),
TrailingRequiresClause);
} else {
StorageClass SC = D->isStatic() ? SC_Static : SC_None;
Method = CXXMethodDecl::Create(SemaRef.Context, Record, StartLoc, NameInfo,
T, TInfo, SC, D->isInlineSpecified(),
D->getConstexprKind(), D->getEndLoc(),
TrailingRequiresClause);
}
if (D->isInlined())
Method->setImplicitlyInline();
if (QualifierLoc)
Method->setQualifierInfo(QualifierLoc);
if (TemplateParams) {
// Our resulting instantiation is actually a function template, since we
// are substituting only the outer template parameters. For example, given
//
// template<typename T>
// struct X {
// template<typename U> void f(T, U);
// };
//
// X<int> x;
//
// We are instantiating the member template "f" within X<int>, which means
// substituting int for T, but leaving "f" as a member function template.
// Build the function template itself.
FunctionTemplate = FunctionTemplateDecl::Create(SemaRef.Context, Record,
Method->getLocation(),
Method->getDeclName(),
TemplateParams, Method);
if (isFriend) {
FunctionTemplate->setLexicalDeclContext(Owner);
FunctionTemplate->setObjectOfFriendDecl();
} else if (D->isOutOfLine())
FunctionTemplate->setLexicalDeclContext(D->getLexicalDeclContext());
Method->setDescribedFunctionTemplate(FunctionTemplate);
} else if (FunctionTemplate) {
// Record this function template specialization.
ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();
Method->setFunctionTemplateSpecialization(FunctionTemplate,
TemplateArgumentList::CreateCopy(SemaRef.Context,
Innermost),
/*InsertPos=*/nullptr);
} else if (!isFriend) {
// Record that this is an instantiation of a member function.
Method->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation);
}
// If we are instantiating a member function defined
// out-of-line, the instantiation will have the same lexical
// context (which will be a namespace scope) as the template.
if (isFriend) {
if (NumTempParamLists)
Method->setTemplateParameterListsInfo(
SemaRef.Context,
llvm::makeArrayRef(TempParamLists.data(), NumTempParamLists));
Method->setLexicalDeclContext(Owner);
Method->setObjectOfFriendDecl();
} else if (D->isOutOfLine())
Method->setLexicalDeclContext(D->getLexicalDeclContext());
// Attach the parameters
for (unsigned P = 0; P < Params.size(); ++P)
Params[P]->setOwningFunction(Method);
Method->setParams(Params);
if (InitMethodInstantiation(Method, D))
Method->setInvalidDecl();
LookupResult Previous(SemaRef, NameInfo, Sema::LookupOrdinaryName,
Sema::ForExternalRedeclaration);
bool IsExplicitSpecialization = false;
// If the name of this function was written as a template-id, instantiate
// the explicit template arguments.
if (DependentFunctionTemplateSpecializationInfo *Info
= D->getDependentSpecializationInfo()) {
assert(isFriend && "non-friend has dependent specialization info?");
// Instantiate the explicit template arguments.
TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
Info->getRAngleLoc());
if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
ExplicitArgs, TemplateArgs))
return nullptr;
// Map the candidate templates to their instantiations.
for (unsigned I = 0, E = Info->getNumTemplates(); I != E; ++I) {
Decl *Temp = SemaRef.FindInstantiatedDecl(D->getLocation(),
Info->getTemplate(I),
TemplateArgs);
if (!Temp) return nullptr;
Previous.addDecl(cast<FunctionTemplateDecl>(Temp));
}
if (SemaRef.CheckFunctionTemplateSpecialization(Method,
&ExplicitArgs,
Previous))
Method->setInvalidDecl();
IsExplicitSpecialization = true;
} else if (const ASTTemplateArgumentListInfo *Info =
ClassScopeSpecializationArgs.getValueOr(
D->getTemplateSpecializationArgsAsWritten())) {
SemaRef.LookupQualifiedName(Previous, DC);
TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
Info->getRAngleLoc());
if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
ExplicitArgs, TemplateArgs))
return nullptr;
if (SemaRef.CheckFunctionTemplateSpecialization(Method,
&ExplicitArgs,
Previous))
Method->setInvalidDecl();
IsExplicitSpecialization = true;
} else if (ClassScopeSpecializationArgs) {
// Class-scope explicit specialization written without explicit template
// arguments.
SemaRef.LookupQualifiedName(Previous, DC);
if (SemaRef.CheckFunctionTemplateSpecialization(Method, nullptr, Previous))
Method->setInvalidDecl();
IsExplicitSpecialization = true;
} else if (!FunctionTemplate || TemplateParams || isFriend) {
SemaRef.LookupQualifiedName(Previous, Record);
// In C++, the previous declaration we find might be a tag type
// (class or enum). In this case, the new declaration will hide the
// tag type. Note that this does does not apply if we're declaring a
// typedef (C++ [dcl.typedef]p4).
if (Previous.isSingleTagDecl())
Previous.clear();
}
SemaRef.CheckFunctionDeclaration(nullptr, Method, Previous,
IsExplicitSpecialization);
if (D->isPure())
SemaRef.CheckPureMethod(Method, SourceRange());
// Propagate access. For a non-friend declaration, the access is
// whatever we're propagating from. For a friend, it should be the
// previous declaration we just found.
if (isFriend && Method->getPreviousDecl())
Method->setAccess(Method->getPreviousDecl()->getAccess());
else
Method->setAccess(D->getAccess());
if (FunctionTemplate)
FunctionTemplate->setAccess(Method->getAccess());
SemaRef.CheckOverrideControl(Method);
// If a function is defined as defaulted or deleted, mark it as such now.
if (D->isExplicitlyDefaulted()) {
if (SubstDefaultedFunction(Method, D))
return nullptr;
}
if (D->isDeletedAsWritten())
SemaRef.SetDeclDeleted(Method, Method->getLocation());
// If this is an explicit specialization, mark the implicitly-instantiated
// template specialization as being an explicit specialization too.
// FIXME: Is this necessary?
if (IsExplicitSpecialization && !isFriend)
SemaRef.CompleteMemberSpecialization(Method, Previous);
// If there's a function template, let our caller handle it.
if (FunctionTemplate) {
// do nothing
// Don't hide a (potentially) valid declaration with an invalid one.
} else if (Method->isInvalidDecl() && !Previous.empty()) {
// do nothing
// Otherwise, check access to friends and make them visible.
} else if (isFriend) {
// We only need to re-check access for methods which we didn't
// manage to match during parsing.
if (!D->getPreviousDecl())
SemaRef.CheckFriendAccess(Method);
Record->makeDeclVisibleInContext(Method);
// Otherwise, add the declaration. We don't need to do this for
// class-scope specializations because we'll have matched them with
// the appropriate template.
} else {
Owner->addDecl(Method);
}
// PR17480: Honor the used attribute to instantiate member function
// definitions
if (Method->hasAttr<UsedAttr>()) {
if (const auto *A = dyn_cast<CXXRecordDecl>(Owner)) {
SourceLocation Loc;
if (const MemberSpecializationInfo *MSInfo =
A->getMemberSpecializationInfo())
Loc = MSInfo->getPointOfInstantiation();
else if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(A))
Loc = Spec->getPointOfInstantiation();
SemaRef.MarkFunctionReferenced(Loc, Method);
}
}
return Method;
}
Decl *TemplateDeclInstantiator::VisitCXXConstructorDecl(CXXConstructorDecl *D) {
return VisitCXXMethodDecl(D);
}
Decl *TemplateDeclInstantiator::VisitCXXDestructorDecl(CXXDestructorDecl *D) {
return VisitCXXMethodDecl(D);
}
Decl *TemplateDeclInstantiator::VisitCXXConversionDecl(CXXConversionDecl *D) {
return VisitCXXMethodDecl(D);
}
Decl *TemplateDeclInstantiator::VisitParmVarDecl(ParmVarDecl *D) {
return SemaRef.SubstParmVarDecl(D, TemplateArgs, /*indexAdjustment*/ 0, None,
/*ExpectParameterPack=*/ false);
}
Decl *TemplateDeclInstantiator::VisitTemplateTypeParmDecl(
TemplateTypeParmDecl *D) {
assert(D->getTypeForDecl()->isTemplateTypeParmType());
Optional<unsigned> NumExpanded;
if (const TypeConstraint *TC = D->getTypeConstraint()) {
if (D->isPackExpansion() && !D->isExpandedParameterPack()) {
assert(TC->getTemplateArgsAsWritten() &&
"type parameter can only be an expansion when explicit arguments "
"are specified");
// The template type parameter pack's type is a pack expansion of types.
// Determine whether we need to expand this parameter pack into separate
// types.
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
for (auto &ArgLoc : TC->getTemplateArgsAsWritten()->arguments())
SemaRef.collectUnexpandedParameterPacks(ArgLoc, Unexpanded);
// Determine whether the set of unexpanded parameter packs can and should
// be expanded.
bool Expand = true;
bool RetainExpansion = false;
if (SemaRef.CheckParameterPacksForExpansion(
cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
->getEllipsisLoc(),
SourceRange(TC->getConceptNameLoc(),
TC->hasExplicitTemplateArgs() ?
TC->getTemplateArgsAsWritten()->getRAngleLoc() :
TC->getConceptNameInfo().getEndLoc()),
Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpanded))
return nullptr;
}
}
TemplateTypeParmDecl *Inst = TemplateTypeParmDecl::Create(
SemaRef.Context, Owner, D->getBeginLoc(), D->getLocation(),
D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getIndex(),
D->getIdentifier(), D->wasDeclaredWithTypename(), D->isParameterPack(),
D->hasTypeConstraint(), NumExpanded);
Inst->setAccess(AS_public);
Inst->setImplicit(D->isImplicit());
if (auto *TC = D->getTypeConstraint()) {
if (!D->isImplicit()) {
// Invented template parameter type constraints will be instantiated with
// the corresponding auto-typed parameter as it might reference other
// parameters.
// TODO: Concepts: do not instantiate the constraint (delayed constraint
// substitution)
const ASTTemplateArgumentListInfo *TemplArgInfo
= TC->getTemplateArgsAsWritten();
TemplateArgumentListInfo InstArgs;
if (TemplArgInfo) {
InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc);
InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc);
if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
TemplArgInfo->NumTemplateArgs,
InstArgs, TemplateArgs))
return nullptr;
}
if (SemaRef.AttachTypeConstraint(
TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
TC->getNamedConcept(), &InstArgs, Inst,
D->isParameterPack()
? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
->getEllipsisLoc()
: SourceLocation()))
return nullptr;
}
}
if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
TypeSourceInfo *InstantiatedDefaultArg =
SemaRef.SubstType(D->getDefaultArgumentInfo(), TemplateArgs,
D->getDefaultArgumentLoc(), D->getDeclName());
if (InstantiatedDefaultArg)
Inst->setDefaultArgument(InstantiatedDefaultArg);
}
// Introduce this template parameter's instantiation into the instantiation
// scope.
SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Inst);
return Inst;
}
Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl(
NonTypeTemplateParmDecl *D) {
// Substitute into the type of the non-type template parameter.
TypeLoc TL = D->getTypeSourceInfo()->getTypeLoc();
SmallVector<TypeSourceInfo *, 4> ExpandedParameterPackTypesAsWritten;
SmallVector<QualType, 4> ExpandedParameterPackTypes;
bool IsExpandedParameterPack = false;
TypeSourceInfo *DI;
QualType T;
bool Invalid = false;
if (D->isExpandedParameterPack()) {
// The non-type template parameter pack is an already-expanded pack
// expansion of types. Substitute into each of the expanded types.
ExpandedParameterPackTypes.reserve(D->getNumExpansionTypes());
ExpandedParameterPackTypesAsWritten.reserve(D->getNumExpansionTypes());
for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) {
TypeSourceInfo *NewDI =
SemaRef.SubstType(D->getExpansionTypeSourceInfo(I), TemplateArgs,
D->getLocation(), D->getDeclName());
if (!NewDI)
return nullptr;
QualType NewT =
SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation());
if (NewT.isNull())
return nullptr;
ExpandedParameterPackTypesAsWritten.push_back(NewDI);
ExpandedParameterPackTypes.push_back(NewT);
}
IsExpandedParameterPack = true;
DI = D->getTypeSourceInfo();
T = DI->getType();
} else if (D->isPackExpansion()) {
// The non-type template parameter pack's type is a pack expansion of types.
// Determine whether we need to expand this parameter pack into separate
// types.
PackExpansionTypeLoc Expansion = TL.castAs<PackExpansionTypeLoc>();
TypeLoc Pattern = Expansion.getPatternLoc();
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
SemaRef.collectUnexpandedParameterPacks(Pattern, Unexpanded);
// Determine whether the set of unexpanded parameter packs can and should
// be expanded.
bool Expand = true;
bool RetainExpansion = false;
Optional<unsigned> OrigNumExpansions
= Expansion.getTypePtr()->getNumExpansions();
Optional<unsigned> NumExpansions = OrigNumExpansions;
if (SemaRef.CheckParameterPacksForExpansion(Expansion.getEllipsisLoc(),
Pattern.getSourceRange(),
Unexpanded,
TemplateArgs,
Expand, RetainExpansion,
NumExpansions))
return nullptr;
if (Expand) {
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I);
TypeSourceInfo *NewDI = SemaRef.SubstType(Pattern, TemplateArgs,
D->getLocation(),
D->getDeclName());
if (!NewDI)
return nullptr;
QualType NewT =
SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation());
if (NewT.isNull())
return nullptr;
ExpandedParameterPackTypesAsWritten.push_back(NewDI);
ExpandedParameterPackTypes.push_back(NewT);
}
// Note that we have an expanded parameter pack. The "type" of this
// expanded parameter pack is the original expansion type, but callers
// will end up using the expanded parameter pack types for type-checking.
IsExpandedParameterPack = true;
DI = D->getTypeSourceInfo();
T = DI->getType();
} else {
// We cannot fully expand the pack expansion now, so substitute into the
// pattern and create a new pack expansion type.
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1);
TypeSourceInfo *NewPattern = SemaRef.SubstType(Pattern, TemplateArgs,
D->getLocation(),
D->getDeclName());
if (!NewPattern)
return nullptr;
SemaRef.CheckNonTypeTemplateParameterType(NewPattern, D->getLocation());
DI = SemaRef.CheckPackExpansion(NewPattern, Expansion.getEllipsisLoc(),
NumExpansions);
if (!DI)
return nullptr;
T = DI->getType();
}
} else {
// Simple case: substitution into a parameter that is not a parameter pack.
DI = SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs,
D->getLocation(), D->getDeclName());
if (!DI)
return nullptr;
// Check that this type is acceptable for a non-type template parameter.
T = SemaRef.CheckNonTypeTemplateParameterType(DI, D->getLocation());
if (T.isNull()) {
T = SemaRef.Context.IntTy;
Invalid = true;
}
}
NonTypeTemplateParmDecl *Param;
if (IsExpandedParameterPack)
Param = NonTypeTemplateParmDecl::Create(
SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
D->getDepth() - TemplateArgs.getNumSubstitutedLevels(),
D->getPosition(), D->getIdentifier(), T, DI, ExpandedParameterPackTypes,
ExpandedParameterPackTypesAsWritten);
else
Param = NonTypeTemplateParmDecl::Create(
SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
D->getDepth() - TemplateArgs.getNumSubstitutedLevels(),
D->getPosition(), D->getIdentifier(), T, D->isParameterPack(), DI);
if (AutoTypeLoc AutoLoc = DI->getTypeLoc().getContainedAutoTypeLoc())
if (AutoLoc.isConstrained())
if (SemaRef.AttachTypeConstraint(
AutoLoc, Param,
IsExpandedParameterPack
? DI->getTypeLoc().getAs<PackExpansionTypeLoc>()
.getEllipsisLoc()
: SourceLocation()))
Invalid = true;
Param->setAccess(AS_public);
Param->setImplicit(D->isImplicit());
if (Invalid)
Param->setInvalidDecl();
if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
EnterExpressionEvaluationContext ConstantEvaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Value = SemaRef.SubstExpr(D->getDefaultArgument(), TemplateArgs);
if (!Value.isInvalid())
Param->setDefaultArgument(Value.get());
}
// Introduce this template parameter's instantiation into the instantiation
// scope.
SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Param);
return Param;
}
static void collectUnexpandedParameterPacks(
Sema &S,
TemplateParameterList *Params,
SmallVectorImpl<UnexpandedParameterPack> &Unexpanded) {
for (const auto &P : *Params) {
if (P->isTemplateParameterPack())
continue;
if (NonTypeTemplateParmDecl *NTTP = dyn_cast<NonTypeTemplateParmDecl>(P))
S.collectUnexpandedParameterPacks(NTTP->getTypeSourceInfo()->getTypeLoc(),
Unexpanded);
if (TemplateTemplateParmDecl *TTP = dyn_cast<TemplateTemplateParmDecl>(P))
collectUnexpandedParameterPacks(S, TTP->getTemplateParameters(),
Unexpanded);
}
}
Decl *
TemplateDeclInstantiator::VisitTemplateTemplateParmDecl(
TemplateTemplateParmDecl *D) {
// Instantiate the template parameter list of the template template parameter.
TemplateParameterList *TempParams = D->getTemplateParameters();
TemplateParameterList *InstParams;
SmallVector<TemplateParameterList*, 8> ExpandedParams;
bool IsExpandedParameterPack = false;
if (D->isExpandedParameterPack()) {
// The template template parameter pack is an already-expanded pack
// expansion of template parameters. Substitute into each of the expanded
// parameters.
ExpandedParams.reserve(D->getNumExpansionTemplateParameters());
for (unsigned I = 0, N = D->getNumExpansionTemplateParameters();
I != N; ++I) {
LocalInstantiationScope Scope(SemaRef);
TemplateParameterList *Expansion =
SubstTemplateParams(D->getExpansionTemplateParameters(I));
if (!Expansion)
return nullptr;
ExpandedParams.push_back(Expansion);
}
IsExpandedParameterPack = true;
InstParams = TempParams;
} else if (D->isPackExpansion()) {
// The template template parameter pack expands to a pack of template
// template parameters. Determine whether we need to expand this parameter
// pack into separate parameters.
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
collectUnexpandedParameterPacks(SemaRef, D->getTemplateParameters(),
Unexpanded);
// Determine whether the set of unexpanded parameter packs can and should
// be expanded.
bool Expand = true;
bool RetainExpansion = false;
Optional<unsigned> NumExpansions;
if (SemaRef.CheckParameterPacksForExpansion(D->getLocation(),
TempParams->getSourceRange(),
Unexpanded,
TemplateArgs,
Expand, RetainExpansion,
NumExpansions))
return nullptr;
if (Expand) {
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I);
LocalInstantiationScope Scope(SemaRef);
TemplateParameterList *Expansion = SubstTemplateParams(TempParams);
if (!Expansion)
return nullptr;
ExpandedParams.push_back(Expansion);
}
// Note that we have an expanded parameter pack. The "type" of this
// expanded parameter pack is the original expansion type, but callers
// will end up using the expanded parameter pack types for type-checking.
IsExpandedParameterPack = true;
InstParams = TempParams;
} else {
// We cannot fully expand the pack expansion now, so just substitute
// into the pattern.
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1);
LocalInstantiationScope Scope(SemaRef);
InstParams = SubstTemplateParams(TempParams);
if (!InstParams)
return nullptr;
}
} else {
// Perform the actual substitution of template parameters within a new,
// local instantiation scope.
LocalInstantiationScope Scope(SemaRef);
InstParams = SubstTemplateParams(TempParams);
if (!InstParams)
return nullptr;
}
// Build the template template parameter.
TemplateTemplateParmDecl *Param;
if (IsExpandedParameterPack)
Param = TemplateTemplateParmDecl::Create(
SemaRef.Context, Owner, D->getLocation(),
D->getDepth() - TemplateArgs.getNumSubstitutedLevels(),
D->getPosition(), D->getIdentifier(), InstParams, ExpandedParams);
else
Param = TemplateTemplateParmDecl::Create(
SemaRef.Context, Owner, D->getLocation(),
D->getDepth() - TemplateArgs.getNumSubstitutedLevels(),
D->getPosition(), D->isParameterPack(), D->getIdentifier(), InstParams);
if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
NestedNameSpecifierLoc QualifierLoc =
D->getDefaultArgument().getTemplateQualifierLoc();
QualifierLoc =
SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, TemplateArgs);
TemplateName TName = SemaRef.SubstTemplateName(
QualifierLoc, D->getDefaultArgument().getArgument().getAsTemplate(),
D->getDefaultArgument().getTemplateNameLoc(), TemplateArgs);
if (!TName.isNull())
Param->setDefaultArgument(
SemaRef.Context,
TemplateArgumentLoc(SemaRef.Context, TemplateArgument(TName),
D->getDefaultArgument().getTemplateQualifierLoc(),
D->getDefaultArgument().getTemplateNameLoc()));
}
Param->setAccess(AS_public);
Param->setImplicit(D->isImplicit());
// Introduce this template parameter's instantiation into the instantiation
// scope.
SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Param);
return Param;
}
Decl *TemplateDeclInstantiator::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) {
// Using directives are never dependent (and never contain any types or
// expressions), so they require no explicit instantiation work.
UsingDirectiveDecl *Inst
= UsingDirectiveDecl::Create(SemaRef.Context, Owner, D->getLocation(),
D->getNamespaceKeyLocation(),
D->getQualifierLoc(),
D->getIdentLocation(),
D->getNominatedNamespace(),
D->getCommonAncestor());
// Add the using directive to its declaration context
// only if this is not a function or method.
if (!Owner->isFunctionOrMethod())
Owner->addDecl(Inst);
return Inst;
}
Decl *TemplateDeclInstantiator::VisitBaseUsingDecls(BaseUsingDecl *D,
BaseUsingDecl *Inst,
LookupResult *Lookup) {
bool isFunctionScope = Owner->isFunctionOrMethod();
for (auto *Shadow : D->shadows()) {
// FIXME: UsingShadowDecl doesn't preserve its immediate target, so
// reconstruct it in the case where it matters. Hm, can we extract it from
// the DeclSpec when parsing and save it in the UsingDecl itself?
NamedDecl *OldTarget = Shadow->getTargetDecl();
if (auto *CUSD = dyn_cast<ConstructorUsingShadowDecl>(Shadow))
if (auto *BaseShadow = CUSD->getNominatedBaseClassShadowDecl())
OldTarget = BaseShadow;
NamedDecl *InstTarget = nullptr;
if (auto *EmptyD =
dyn_cast<UnresolvedUsingIfExistsDecl>(Shadow->getTargetDecl())) {
InstTarget = UnresolvedUsingIfExistsDecl::Create(
SemaRef.Context, Owner, EmptyD->getLocation(), EmptyD->getDeclName());
} else {
InstTarget = cast_or_null<NamedDecl>(SemaRef.FindInstantiatedDecl(
Shadow->getLocation(), OldTarget, TemplateArgs));
}
if (!InstTarget)
return nullptr;
UsingShadowDecl *PrevDecl = nullptr;
if (Lookup &&
SemaRef.CheckUsingShadowDecl(Inst, InstTarget, *Lookup, PrevDecl))
continue;
if (UsingShadowDecl *OldPrev = getPreviousDeclForInstantiation(Shadow))
PrevDecl = cast_or_null<UsingShadowDecl>(SemaRef.FindInstantiatedDecl(
Shadow->getLocation(), OldPrev, TemplateArgs));
UsingShadowDecl *InstShadow = SemaRef.BuildUsingShadowDecl(
/*Scope*/ nullptr, Inst, InstTarget, PrevDecl);
SemaRef.Context.setInstantiatedFromUsingShadowDecl(InstShadow, Shadow);
if (isFunctionScope)
SemaRef.CurrentInstantiationScope->InstantiatedLocal(Shadow, InstShadow);
}
return Inst;
}
Decl *TemplateDeclInstantiator::VisitUsingDecl(UsingDecl *D) {
// The nested name specifier may be dependent, for example
// template <typename T> struct t {
// struct s1 { T f1(); };
// struct s2 : s1 { using s1::f1; };
// };
// template struct t<int>;
// Here, in using s1::f1, s1 refers to t<T>::s1;
// we need to substitute for t<int>::s1.
NestedNameSpecifierLoc QualifierLoc
= SemaRef.SubstNestedNameSpecifierLoc(D->getQualifierLoc(),
TemplateArgs);
if (!QualifierLoc)
return nullptr;
// For an inheriting constructor declaration, the name of the using
// declaration is the name of a constructor in this class, not in the
// base class.
DeclarationNameInfo NameInfo = D->getNameInfo();
if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName)
if (auto *RD = dyn_cast<CXXRecordDecl>(SemaRef.CurContext))
NameInfo.setName(SemaRef.Context.DeclarationNames.getCXXConstructorName(
SemaRef.Context.getCanonicalType(SemaRef.Context.getRecordType(RD))));
// We only need to do redeclaration lookups if we're in a class scope (in
// fact, it's not really even possible in non-class scopes).
bool CheckRedeclaration = Owner->isRecord();
LookupResult Prev(SemaRef, NameInfo, Sema::LookupUsingDeclName,
Sema::ForVisibleRedeclaration);
UsingDecl *NewUD = UsingDecl::Create(SemaRef.Context, Owner,
D->getUsingLoc(),
QualifierLoc,
NameInfo,
D->hasTypename());
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
if (CheckRedeclaration) {
Prev.setHideTags(false);
SemaRef.LookupQualifiedName(Prev, Owner);
// Check for invalid redeclarations.
if (SemaRef.CheckUsingDeclRedeclaration(D->getUsingLoc(),
D->hasTypename(), SS,
D->getLocation(), Prev))
NewUD->setInvalidDecl();
}
if (!NewUD->isInvalidDecl() &&
SemaRef.CheckUsingDeclQualifier(D->getUsingLoc(), D->hasTypename(), SS,
NameInfo, D->getLocation(), nullptr, D))
NewUD->setInvalidDecl();
SemaRef.Context.setInstantiatedFromUsingDecl(NewUD, D);
NewUD->setAccess(D->getAccess());
Owner->addDecl(NewUD);
// Don't process the shadow decls for an invalid decl.
if (NewUD->isInvalidDecl())
return NewUD;
// If the using scope was dependent, or we had dependent bases, we need to
// recheck the inheritance
if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName)
SemaRef.CheckInheritingConstructorUsingDecl(NewUD);
return VisitBaseUsingDecls(D, NewUD, CheckRedeclaration ? &Prev : nullptr);
}
Decl *TemplateDeclInstantiator::VisitUsingEnumDecl(UsingEnumDecl *D) {
// Cannot be a dependent type, but still could be an instantiation
EnumDecl *EnumD = cast_or_null<EnumDecl>(SemaRef.FindInstantiatedDecl(
D->getLocation(), D->getEnumDecl(), TemplateArgs));
if (SemaRef.RequireCompleteEnumDecl(EnumD, EnumD->getLocation()))
return nullptr;
UsingEnumDecl *NewUD =
UsingEnumDecl::Create(SemaRef.Context, Owner, D->getUsingLoc(),
D->getEnumLoc(), D->getLocation(), EnumD);
SemaRef.Context.setInstantiatedFromUsingEnumDecl(NewUD, D);
NewUD->setAccess(D->getAccess());
Owner->addDecl(NewUD);
// Don't process the shadow decls for an invalid decl.
if (NewUD->isInvalidDecl())
return NewUD;
// We don't have to recheck for duplication of the UsingEnumDecl itself, as it
// cannot be dependent, and will therefore have been checked during template
// definition.
return VisitBaseUsingDecls(D, NewUD, nullptr);
}
Decl *TemplateDeclInstantiator::VisitUsingShadowDecl(UsingShadowDecl *D) {
// Ignore these; we handle them in bulk when processing the UsingDecl.
return nullptr;
}
Decl *TemplateDeclInstantiator::VisitConstructorUsingShadowDecl(
ConstructorUsingShadowDecl *D) {
// Ignore these; we handle them in bulk when processing the UsingDecl.
return nullptr;
}
template <typename T>
Decl *TemplateDeclInstantiator::instantiateUnresolvedUsingDecl(
T *D, bool InstantiatingPackElement) {
// If this is a pack expansion, expand it now.
if (D->isPackExpansion() && !InstantiatingPackElement) {
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
SemaRef.collectUnexpandedParameterPacks(D->getQualifierLoc(), Unexpanded);
SemaRef.collectUnexpandedParameterPacks(D->getNameInfo(), Unexpanded);
// Determine whether the set of unexpanded parameter packs can and should
// be expanded.
bool Expand = true;
bool RetainExpansion = false;
Optional<unsigned> NumExpansions;
if (SemaRef.CheckParameterPacksForExpansion(
D->getEllipsisLoc(), D->getSourceRange(), Unexpanded, TemplateArgs,
Expand, RetainExpansion, NumExpansions))
return nullptr;
// This declaration cannot appear within a function template signature,
// so we can't have a partial argument list for a parameter pack.
assert(!RetainExpansion &&
"should never need to retain an expansion for UsingPackDecl");
if (!Expand) {
// We cannot fully expand the pack expansion now, so substitute into the
// pattern and create a new pack expansion.
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1);
return instantiateUnresolvedUsingDecl(D, true);
}
// Within a function, we don't have any normal way to check for conflicts
// between shadow declarations from different using declarations in the
// same pack expansion, but this is always ill-formed because all expansions
// must produce (conflicting) enumerators.
//
// Sadly we can't just reject this in the template definition because it
// could be valid if the pack is empty or has exactly one expansion.
if (D->getDeclContext()->isFunctionOrMethod() && *NumExpansions > 1) {
SemaRef.Diag(D->getEllipsisLoc(),
diag::err_using_decl_redeclaration_expansion);
return nullptr;
}
// Instantiate the slices of this pack and build a UsingPackDecl.
SmallVector<NamedDecl*, 8> Expansions;
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I);
Decl *Slice = instantiateUnresolvedUsingDecl(D, true);
if (!Slice)
return nullptr;
// Note that we can still get unresolved using declarations here, if we
// had arguments for all packs but the pattern also contained other
// template arguments (this only happens during partial substitution, eg
// into the body of a generic lambda in a function template).
Expansions.push_back(cast<NamedDecl>(Slice));
}
auto *NewD = SemaRef.BuildUsingPackDecl(D, Expansions);
if (isDeclWithinFunction(D))
SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewD);
return NewD;
}
UnresolvedUsingTypenameDecl *TD = dyn_cast<UnresolvedUsingTypenameDecl>(D);
SourceLocation TypenameLoc = TD ? TD->getTypenameLoc() : SourceLocation();
NestedNameSpecifierLoc QualifierLoc
= SemaRef.SubstNestedNameSpecifierLoc(D->getQualifierLoc(),
TemplateArgs);
if (!QualifierLoc)
return nullptr;
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
DeclarationNameInfo NameInfo
= SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs);
// Produce a pack expansion only if we're not instantiating a particular
// slice of a pack expansion.
bool InstantiatingSlice = D->getEllipsisLoc().isValid() &&
SemaRef.ArgumentPackSubstitutionIndex != -1;
SourceLocation EllipsisLoc =
InstantiatingSlice ? SourceLocation() : D->getEllipsisLoc();
bool IsUsingIfExists = D->template hasAttr<UsingIfExistsAttr>();
NamedDecl *UD = SemaRef.BuildUsingDeclaration(
/*Scope*/ nullptr, D->getAccess(), D->getUsingLoc(),
/*HasTypename*/ TD, TypenameLoc, SS, NameInfo, EllipsisLoc,
ParsedAttributesView(),
/*IsInstantiation*/ true, IsUsingIfExists);
if (UD) {
SemaRef.InstantiateAttrs(TemplateArgs, D, UD);
SemaRef.Context.setInstantiatedFromUsingDecl(UD, D);
}
return UD;
}
Decl *TemplateDeclInstantiator::VisitUnresolvedUsingTypenameDecl(
UnresolvedUsingTypenameDecl *D) {
return instantiateUnresolvedUsingDecl(D);
}
Decl *TemplateDeclInstantiator::VisitUnresolvedUsingValueDecl(
UnresolvedUsingValueDecl *D) {
return instantiateUnresolvedUsingDecl(D);
}
Decl *TemplateDeclInstantiator::VisitUnresolvedUsingIfExistsDecl(
UnresolvedUsingIfExistsDecl *D) {
llvm_unreachable("referring to unresolved decl out of UsingShadowDecl");
}
Decl *TemplateDeclInstantiator::VisitUsingPackDecl(UsingPackDecl *D) {
SmallVector<NamedDecl*, 8> Expansions;
for (auto *UD : D->expansions()) {
if (NamedDecl *NewUD =
SemaRef.FindInstantiatedDecl(D->getLocation(), UD, TemplateArgs))
Expansions.push_back(NewUD);
else
return nullptr;
}
auto *NewD = SemaRef.BuildUsingPackDecl(D, Expansions);
if (isDeclWithinFunction(D))
SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewD);
return NewD;
}
Decl *TemplateDeclInstantiator::VisitClassScopeFunctionSpecializationDecl(
ClassScopeFunctionSpecializationDecl *Decl) {
CXXMethodDecl *OldFD = Decl->getSpecialization();
return cast_or_null<CXXMethodDecl>(
VisitCXXMethodDecl(OldFD, nullptr, Decl->getTemplateArgsAsWritten()));
}
Decl *TemplateDeclInstantiator::VisitOMPThreadPrivateDecl(
OMPThreadPrivateDecl *D) {
SmallVector<Expr *, 5> Vars;
for (auto *I : D->varlists()) {
Expr *Var = SemaRef.SubstExpr(I, TemplateArgs).get();
assert(isa<DeclRefExpr>(Var) && "threadprivate arg is not a DeclRefExpr");
Vars.push_back(Var);
}
OMPThreadPrivateDecl *TD =
SemaRef.CheckOMPThreadPrivateDecl(D->getLocation(), Vars);
TD->setAccess(AS_public);
Owner->addDecl(TD);
return TD;
}
Decl *TemplateDeclInstantiator::VisitOMPAllocateDecl(OMPAllocateDecl *D) {
SmallVector<Expr *, 5> Vars;
for (auto *I : D->varlists()) {
Expr *Var = SemaRef.SubstExpr(I, TemplateArgs).get();
assert(isa<DeclRefExpr>(Var) && "allocate arg is not a DeclRefExpr");
Vars.push_back(Var);
}
SmallVector<OMPClause *, 4> Clauses;
// Copy map clauses from the original mapper.
for (OMPClause *C : D->clauselists()) {
auto *AC = cast<OMPAllocatorClause>(C);
ExprResult NewE = SemaRef.SubstExpr(AC->getAllocator(), TemplateArgs);
if (!NewE.isUsable())
continue;
OMPClause *IC = SemaRef.ActOnOpenMPAllocatorClause(
NewE.get(), AC->getBeginLoc(), AC->getLParenLoc(), AC->getEndLoc());
Clauses.push_back(IC);
}
Sema::DeclGroupPtrTy Res = SemaRef.ActOnOpenMPAllocateDirective(
D->getLocation(), Vars, Clauses, Owner);
if (Res.get().isNull())
return nullptr;
return Res.get().getSingleDecl();
}
Decl *TemplateDeclInstantiator::VisitOMPRequiresDecl(OMPRequiresDecl *D) {
llvm_unreachable(
"Requires directive cannot be instantiated within a dependent context");
}
Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl(
OMPDeclareReductionDecl *D) {
// Instantiate type and check if it is allowed.
const bool RequiresInstantiation =
D->getType()->isDependentType() ||
D->getType()->isInstantiationDependentType() ||
D->getType()->containsUnexpandedParameterPack();
QualType SubstReductionType;
if (RequiresInstantiation) {
SubstReductionType = SemaRef.ActOnOpenMPDeclareReductionType(
D->getLocation(),
ParsedType::make(SemaRef.SubstType(
D->getType(), TemplateArgs, D->getLocation(), DeclarationName())));
} else {
SubstReductionType = D->getType();
}
if (SubstReductionType.isNull())
return nullptr;
Expr *Combiner = D->getCombiner();
Expr *Init = D->getInitializer();
bool IsCorrect = true;
// Create instantiated copy.
std::pair<QualType, SourceLocation> ReductionTypes[] = {
std::make_pair(SubstReductionType, D->getLocation())};
auto *PrevDeclInScope = D->getPrevDeclInScope();
if (PrevDeclInScope && !PrevDeclInScope->isInvalidDecl()) {
PrevDeclInScope = cast<OMPDeclareReductionDecl>(
SemaRef.CurrentInstantiationScope->findInstantiationOf(PrevDeclInScope)
->get<Decl *>());
}
auto DRD = SemaRef.ActOnOpenMPDeclareReductionDirectiveStart(
/*S=*/nullptr, Owner, D->getDeclName(), ReductionTypes, D->getAccess(),
PrevDeclInScope);
auto *NewDRD = cast<OMPDeclareReductionDecl>(DRD.get().getSingleDecl());
SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewDRD);
Expr *SubstCombiner = nullptr;
Expr *SubstInitializer = nullptr;
// Combiners instantiation sequence.
if (Combiner) {
SemaRef.ActOnOpenMPDeclareReductionCombinerStart(
/*S=*/nullptr, NewDRD);
SemaRef.CurrentInstantiationScope->InstantiatedLocal(
cast<DeclRefExpr>(D->getCombinerIn())->getDecl(),
cast<DeclRefExpr>(NewDRD->getCombinerIn())->getDecl());
SemaRef.CurrentInstantiationScope->InstantiatedLocal(
cast<DeclRefExpr>(D->getCombinerOut())->getDecl(),
cast<DeclRefExpr>(NewDRD->getCombinerOut())->getDecl());
auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(Owner);
Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(),
ThisContext);
SubstCombiner = SemaRef.SubstExpr(Combiner, TemplateArgs).get();
SemaRef.ActOnOpenMPDeclareReductionCombinerEnd(NewDRD, SubstCombiner);
}
// Initializers instantiation sequence.
if (Init) {
VarDecl *OmpPrivParm = SemaRef.ActOnOpenMPDeclareReductionInitializerStart(
/*S=*/nullptr, NewDRD);
SemaRef.CurrentInstantiationScope->InstantiatedLocal(
cast<DeclRefExpr>(D->getInitOrig())->getDecl(),
cast<DeclRefExpr>(NewDRD->getInitOrig())->getDecl());
SemaRef.CurrentInstantiationScope->InstantiatedLocal(
cast<DeclRefExpr>(D->getInitPriv())->getDecl(),
cast<DeclRefExpr>(NewDRD->getInitPriv())->getDecl());
if (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit) {
SubstInitializer = SemaRef.SubstExpr(Init, TemplateArgs).get();
} else {
auto *OldPrivParm =
cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl());
IsCorrect = IsCorrect && OldPrivParm->hasInit();
if (IsCorrect)
SemaRef.InstantiateVariableInitializer(OmpPrivParm, OldPrivParm,
TemplateArgs);
}
SemaRef.ActOnOpenMPDeclareReductionInitializerEnd(NewDRD, SubstInitializer,
OmpPrivParm);
}
IsCorrect = IsCorrect && SubstCombiner &&
(!Init ||
(D->getInitializerKind() == OMPDeclareReductionDecl::CallInit &&
SubstInitializer) ||
(D->getInitializerKind() != OMPDeclareReductionDecl::CallInit &&
!SubstInitializer));
(void)SemaRef.ActOnOpenMPDeclareReductionDirectiveEnd(
/*S=*/nullptr, DRD, IsCorrect && !D->isInvalidDecl());
return NewDRD;
}
Decl *
TemplateDeclInstantiator::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) {
// Instantiate type and check if it is allowed.
const bool RequiresInstantiation =
D->getType()->isDependentType() ||
D->getType()->isInstantiationDependentType() ||
D->getType()->containsUnexpandedParameterPack();
QualType SubstMapperTy;
DeclarationName VN = D->getVarName();
if (RequiresInstantiation) {
SubstMapperTy = SemaRef.ActOnOpenMPDeclareMapperType(
D->getLocation(),
ParsedType::make(SemaRef.SubstType(D->getType(), TemplateArgs,
D->getLocation(), VN)));
} else {
SubstMapperTy = D->getType();
}
if (SubstMapperTy.isNull())
return nullptr;
// Create an instantiated copy of mapper.
auto *PrevDeclInScope = D->getPrevDeclInScope();
if (PrevDeclInScope && !PrevDeclInScope->isInvalidDecl()) {
PrevDeclInScope = cast<OMPDeclareMapperDecl>(
SemaRef.CurrentInstantiationScope->findInstantiationOf(PrevDeclInScope)
->get<Decl *>());
}
bool IsCorrect = true;
SmallVector<OMPClause *, 6> Clauses;
// Instantiate the mapper variable.
DeclarationNameInfo DirName;
SemaRef.StartOpenMPDSABlock(llvm::omp::OMPD_declare_mapper, DirName,
/*S=*/nullptr,
(*D->clauselist_begin())->getBeginLoc());
ExprResult MapperVarRef = SemaRef.ActOnOpenMPDeclareMapperDirectiveVarDecl(
/*S=*/nullptr, SubstMapperTy, D->getLocation(), VN);
SemaRef.CurrentInstantiationScope->InstantiatedLocal(
cast<DeclRefExpr>(D->getMapperVarRef())->getDecl(),
cast<DeclRefExpr>(MapperVarRef.get())->getDecl());
auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(Owner);
Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(),
ThisContext);
// Instantiate map clauses.
for (OMPClause *C : D->clauselists()) {
auto *OldC = cast<OMPMapClause>(C);
SmallVector<Expr *, 4> NewVars;
for (Expr *OE : OldC->varlists()) {
Expr *NE = SemaRef.SubstExpr(OE, TemplateArgs).get();
if (!NE) {
IsCorrect = false;
break;
}
NewVars.push_back(NE);
}
if (!IsCorrect)
break;
NestedNameSpecifierLoc NewQualifierLoc =
SemaRef.SubstNestedNameSpecifierLoc(OldC->getMapperQualifierLoc(),
TemplateArgs);
CXXScopeSpec SS;
SS.Adopt(NewQualifierLoc);
DeclarationNameInfo NewNameInfo =
SemaRef.SubstDeclarationNameInfo(OldC->getMapperIdInfo(), TemplateArgs);
OMPVarListLocTy Locs(OldC->getBeginLoc(), OldC->getLParenLoc(),
OldC->getEndLoc());
OMPClause *NewC = SemaRef.ActOnOpenMPMapClause(
OldC->getMapTypeModifiers(), OldC->getMapTypeModifiersLoc(), SS,
NewNameInfo, OldC->getMapType(), OldC->isImplicitMapType(),
OldC->getMapLoc(), OldC->getColonLoc(), NewVars, Locs);
Clauses.push_back(NewC);
}
SemaRef.EndOpenMPDSABlock(nullptr);
if (!IsCorrect)
return nullptr;
Sema::DeclGroupPtrTy DG = SemaRef.ActOnOpenMPDeclareMapperDirective(
/*S=*/nullptr, Owner, D->getDeclName(), SubstMapperTy, D->getLocation(),
VN, D->getAccess(), MapperVarRef.get(), Clauses, PrevDeclInScope);
Decl *NewDMD = DG.get().getSingleDecl();
SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewDMD);
return NewDMD;
}
Decl *TemplateDeclInstantiator::VisitOMPCapturedExprDecl(
OMPCapturedExprDecl * /*D*/) {
llvm_unreachable("Should not be met in templates");
}
Decl *TemplateDeclInstantiator::VisitFunctionDecl(FunctionDecl *D) {
return VisitFunctionDecl(D, nullptr);
}
Decl *
TemplateDeclInstantiator::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) {
Decl *Inst = VisitFunctionDecl(D, nullptr);
if (Inst && !D->getDescribedFunctionTemplate())
Owner->addDecl(Inst);
return Inst;
}
Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(CXXMethodDecl *D) {
return VisitCXXMethodDecl(D, nullptr);
}
Decl *TemplateDeclInstantiator::VisitRecordDecl(RecordDecl *D) {
llvm_unreachable("There are only CXXRecordDecls in C++");
}
Decl *
TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl(
ClassTemplateSpecializationDecl *D) {
// As a MS extension, we permit class-scope explicit specialization
// of member class templates.
ClassTemplateDecl *ClassTemplate = D->getSpecializedTemplate();
assert(ClassTemplate->getDeclContext()->isRecord() &&
D->getTemplateSpecializationKind() == TSK_ExplicitSpecialization &&
"can only instantiate an explicit specialization "
"for a member class template");
// Lookup the already-instantiated declaration in the instantiation
// of the class template.
ClassTemplateDecl *InstClassTemplate =
cast_or_null<ClassTemplateDecl>(SemaRef.FindInstantiatedDecl(
D->getLocation(), ClassTemplate, TemplateArgs));
if (!InstClassTemplate)
return nullptr;
// Substitute into the template arguments of the class template explicit
// specialization.
TemplateSpecializationTypeLoc Loc = D->getTypeAsWritten()->getTypeLoc().
castAs<TemplateSpecializationTypeLoc>();
TemplateArgumentListInfo InstTemplateArgs(Loc.getLAngleLoc(),
Loc.getRAngleLoc());
SmallVector<TemplateArgumentLoc, 4> ArgLocs;
for (unsigned I = 0; I != Loc.getNumArgs(); ++I)
ArgLocs.push_back(Loc.getArgLoc(I));
if (SemaRef.Subst(ArgLocs.data(), ArgLocs.size(),
InstTemplateArgs, TemplateArgs))
return nullptr;
// Check that the template argument list is well-formed for this
// class template.
SmallVector<TemplateArgument, 4> Converted;
if (SemaRef.CheckTemplateArgumentList(InstClassTemplate,
D->getLocation(),
InstTemplateArgs,
false,
Converted,
/*UpdateArgsWithConversion=*/true))
return nullptr;
// Figure out where to insert this class template explicit specialization
// in the member template's set of class template explicit specializations.
void *InsertPos = nullptr;
ClassTemplateSpecializationDecl *PrevDecl =
InstClassTemplate->findSpecialization(Converted, InsertPos);
// Check whether we've already seen a conflicting instantiation of this
// declaration (for instance, if there was a prior implicit instantiation).
bool Ignored;
if (PrevDecl &&
SemaRef.CheckSpecializationInstantiationRedecl(D->getLocation(),
D->getSpecializationKind(),
PrevDecl,
PrevDecl->getSpecializationKind(),
PrevDecl->getPointOfInstantiation(),
Ignored))
return nullptr;
// If PrevDecl was a definition and D is also a definition, diagnose.
// This happens in cases like:
//
// template<typename T, typename U>
// struct Outer {
// template<typename X> struct Inner;
// template<> struct Inner<T> {};
// template<> struct Inner<U> {};
// };
//
// Outer<int, int> outer; // error: the explicit specializations of Inner
// // have the same signature.
if (PrevDecl && PrevDecl->getDefinition() &&
D->isThisDeclarationADefinition()) {
SemaRef.Diag(D->getLocation(), diag::err_redefinition) << PrevDecl;
SemaRef.Diag(PrevDecl->getDefinition()->getLocation(),
diag::note_previous_definition);
return nullptr;
}
// Create the class template partial specialization declaration.
ClassTemplateSpecializationDecl *InstD =
ClassTemplateSpecializationDecl::Create(
SemaRef.Context, D->getTagKind(), Owner, D->getBeginLoc(),
D->getLocation(), InstClassTemplate, Converted, PrevDecl);
// Add this partial specialization to the set of class template partial
// specializations.
if (!PrevDecl)
InstClassTemplate->AddSpecialization(InstD, InsertPos);
// Substitute the nested name specifier, if any.
if (SubstQualifier(D, InstD))
return nullptr;
// Build the canonical type that describes the converted template
// arguments of the class template explicit specialization.
QualType CanonType = SemaRef.Context.getTemplateSpecializationType(
TemplateName(InstClassTemplate), Converted,
SemaRef.Context.getRecordType(InstD));
// Build the fully-sugared type for this class template
// specialization as the user wrote in the specialization
// itself. This means that we'll pretty-print the type retrieved
// from the specialization's declaration the way that the user
// actually wrote the specialization, rather than formatting the
// name based on the "canonical" representation used to store the
// template arguments in the specialization.
TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo(
TemplateName(InstClassTemplate), D->getLocation(), InstTemplateArgs,
CanonType);
InstD->setAccess(D->getAccess());
InstD->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation);
InstD->setSpecializationKind(D->getSpecializationKind());
InstD->setTypeAsWritten(WrittenTy);
InstD->setExternLoc(D->getExternLoc());
InstD->setTemplateKeywordLoc(D->getTemplateKeywordLoc());
Owner->addDecl(InstD);
// Instantiate the members of the class-scope explicit specialization eagerly.
// We don't have support for lazy instantiation of an explicit specialization
// yet, and MSVC eagerly instantiates in this case.
// FIXME: This is wrong in standard C++.
if (D->isThisDeclarationADefinition() &&
SemaRef.InstantiateClass(D->getLocation(), InstD, D, TemplateArgs,
TSK_ImplicitInstantiation,
/*Complain=*/true))
return nullptr;
return InstD;
}
Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
VarTemplateSpecializationDecl *D) {
TemplateArgumentListInfo VarTemplateArgsInfo;
VarTemplateDecl *VarTemplate = D->getSpecializedTemplate();
assert(VarTemplate &&
"A template specialization without specialized template?");
VarTemplateDecl *InstVarTemplate =
cast_or_null<VarTemplateDecl>(SemaRef.FindInstantiatedDecl(
D->getLocation(), VarTemplate, TemplateArgs));
if (!InstVarTemplate)
return nullptr;
// Substitute the current template arguments.
const TemplateArgumentListInfo &TemplateArgsInfo = D->getTemplateArgsInfo();
VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo.getLAngleLoc());
VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo.getRAngleLoc());
if (SemaRef.Subst(TemplateArgsInfo.getArgumentArray(),
TemplateArgsInfo.size(), VarTemplateArgsInfo, TemplateArgs))
return nullptr;
// Check that the template argument list is well-formed for this template.
SmallVector<TemplateArgument, 4> Converted;
if (SemaRef.CheckTemplateArgumentList(InstVarTemplate, D->getLocation(),
VarTemplateArgsInfo, false, Converted,
/*UpdateArgsWithConversion=*/true))
return nullptr;
// Check whether we've already seen a declaration of this specialization.
void *InsertPos = nullptr;
VarTemplateSpecializationDecl *PrevDecl =
InstVarTemplate->findSpecialization(Converted, InsertPos);
// Check whether we've already seen a conflicting instantiation of this
// declaration (for instance, if there was a prior implicit instantiation).
bool Ignored;
if (PrevDecl && SemaRef.CheckSpecializationInstantiationRedecl(
D->getLocation(), D->getSpecializationKind(), PrevDecl,
PrevDecl->getSpecializationKind(),
PrevDecl->getPointOfInstantiation(), Ignored))
return nullptr;
return VisitVarTemplateSpecializationDecl(
InstVarTemplate, D, VarTemplateArgsInfo, Converted, PrevDecl);
}
Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
VarTemplateDecl *VarTemplate, VarDecl *D,
const TemplateArgumentListInfo &TemplateArgsInfo,
ArrayRef<TemplateArgument> Converted,
VarTemplateSpecializationDecl *PrevDecl) {
// Do substitution on the type of the declaration
TypeSourceInfo *DI =
SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs,
D->getTypeSpecStartLoc(), D->getDeclName());
if (!DI)
return nullptr;
if (DI->getType()->isFunctionType()) {
SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function)
<< D->isStaticDataMember() << DI->getType();
return nullptr;
}
// Build the instantiated declaration
VarTemplateSpecializationDecl *Var = VarTemplateSpecializationDecl::Create(
SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
VarTemplate, DI->getType(), DI, D->getStorageClass(), Converted);
Var->setTemplateArgsInfo(TemplateArgsInfo);
if (!PrevDecl) {
void *InsertPos = nullptr;
VarTemplate->findSpecialization(Converted, InsertPos);
VarTemplate->AddSpecialization(Var, InsertPos);
}
if (SemaRef.getLangOpts().OpenCL)
SemaRef.deduceOpenCLAddressSpace(Var);
// Substitute the nested name specifier, if any.
if (SubstQualifier(D, Var))
return nullptr;
SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner,
StartingScope, false, PrevDecl);
return Var;
}
Decl *TemplateDeclInstantiator::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D) {
llvm_unreachable("@defs is not supported in Objective-C++");
}
Decl *TemplateDeclInstantiator::VisitFriendTemplateDecl(FriendTemplateDecl *D) {
// FIXME: We need to be able to instantiate FriendTemplateDecls.
unsigned DiagID = SemaRef.getDiagnostics().getCustomDiagID(
DiagnosticsEngine::Error,
"cannot instantiate %0 yet");
SemaRef.Diag(D->getLocation(), DiagID)
<< D->getDeclKindName();
return nullptr;
}
Decl *TemplateDeclInstantiator::VisitConceptDecl(ConceptDecl *D) {
llvm_unreachable("Concept definitions cannot reside inside a template");
}
Decl *
TemplateDeclInstantiator::VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D) {
return RequiresExprBodyDecl::Create(SemaRef.Context, D->getDeclContext(),
D->getBeginLoc());
}
Decl *TemplateDeclInstantiator::VisitDecl(Decl *D) {
llvm_unreachable("Unexpected decl");
}
Decl *Sema::SubstDecl(Decl *D, DeclContext *Owner,
const MultiLevelTemplateArgumentList &TemplateArgs) {
TemplateDeclInstantiator Instantiator(*this, Owner, TemplateArgs);
if (D->isInvalidDecl())
return nullptr;
Decl *SubstD;
runWithSufficientStackSpace(D->getLocation(), [&] {
SubstD = Instantiator.Visit(D);
});
return SubstD;
}
void TemplateDeclInstantiator::adjustForRewrite(RewriteKind RK,
FunctionDecl *Orig, QualType &T,
TypeSourceInfo *&TInfo,
DeclarationNameInfo &NameInfo) {
assert(RK == RewriteKind::RewriteSpaceshipAsEqualEqual);
// C++2a [class.compare.default]p3:
// the return type is replaced with bool
auto *FPT = T->castAs<FunctionProtoType>();
T = SemaRef.Context.getFunctionType(
SemaRef.Context.BoolTy, FPT->getParamTypes(), FPT->getExtProtoInfo());
// Update the return type in the source info too. The most straightforward
// way is to create new TypeSourceInfo for the new type. Use the location of
// the '= default' as the location of the new type.
//
// FIXME: Set the correct return type when we initially transform the type,
// rather than delaying it to now.
TypeSourceInfo *NewTInfo =
SemaRef.Context.getTrivialTypeSourceInfo(T, Orig->getEndLoc());
auto OldLoc = TInfo->getTypeLoc().getAsAdjusted<FunctionProtoTypeLoc>();
assert(OldLoc && "type of function is not a function type?");
auto NewLoc = NewTInfo->getTypeLoc().castAs<FunctionProtoTypeLoc>();
for (unsigned I = 0, N = OldLoc.getNumParams(); I != N; ++I)
NewLoc.setParam(I, OldLoc.getParam(I));
TInfo = NewTInfo;
// and the declarator-id is replaced with operator==
NameInfo.setName(
SemaRef.Context.DeclarationNames.getCXXOperatorName(OO_EqualEqual));
}
FunctionDecl *Sema::SubstSpaceshipAsEqualEqual(CXXRecordDecl *RD,
FunctionDecl *Spaceship) {
if (Spaceship->isInvalidDecl())
return nullptr;
// C++2a [class.compare.default]p3:
// an == operator function is declared implicitly [...] with the same
// access and function-definition and in the same class scope as the
// three-way comparison operator function
MultiLevelTemplateArgumentList NoTemplateArgs;
NoTemplateArgs.setKind(TemplateSubstitutionKind::Rewrite);
NoTemplateArgs.addOuterRetainedLevels(RD->getTemplateDepth());
TemplateDeclInstantiator Instantiator(*this, RD, NoTemplateArgs);
Decl *R;
if (auto *MD = dyn_cast<CXXMethodDecl>(Spaceship)) {
R = Instantiator.VisitCXXMethodDecl(
MD, nullptr, None,
TemplateDeclInstantiator::RewriteKind::RewriteSpaceshipAsEqualEqual);
} else {
assert(Spaceship->getFriendObjectKind() &&
"defaulted spaceship is neither a member nor a friend");
R = Instantiator.VisitFunctionDecl(
Spaceship, nullptr,
TemplateDeclInstantiator::RewriteKind::RewriteSpaceshipAsEqualEqual);
if (!R)
return nullptr;
FriendDecl *FD =
FriendDecl::Create(Context, RD, Spaceship->getLocation(),
cast<NamedDecl>(R), Spaceship->getBeginLoc());
FD->setAccess(AS_public);
RD->addDecl(FD);
}
return cast_or_null<FunctionDecl>(R);
}
/// Instantiates a nested template parameter list in the current
/// instantiation context.
///
/// \param L The parameter list to instantiate
///
/// \returns NULL if there was an error
TemplateParameterList *
TemplateDeclInstantiator::SubstTemplateParams(TemplateParameterList *L) {
// Get errors for all the parameters before bailing out.
bool Invalid = false;
unsigned N = L->size();
typedef SmallVector<NamedDecl *, 8> ParamVector;
ParamVector Params;
Params.reserve(N);
for (auto &P : *L) {
NamedDecl *D = cast_or_null<NamedDecl>(Visit(P));
Params.push_back(D);
Invalid = Invalid || !D || D->isInvalidDecl();
}
// Clean up if we had an error.
if (Invalid)
return nullptr;
// FIXME: Concepts: Substitution into requires clause should only happen when
// checking satisfaction.
Expr *InstRequiresClause = nullptr;
if (Expr *E = L->getRequiresClause()) {
EnterExpressionEvaluationContext ConstantEvaluated(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
ExprResult Res = SemaRef.SubstExpr(E, TemplateArgs);
if (Res.isInvalid() || !Res.isUsable()) {
return nullptr;
}
InstRequiresClause = Res.get();
}
TemplateParameterList *InstL
= TemplateParameterList::Create(SemaRef.Context, L->getTemplateLoc(),
L->getLAngleLoc(), Params,
L->getRAngleLoc(), InstRequiresClause);
return InstL;
}
TemplateParameterList *
Sema::SubstTemplateParams(TemplateParameterList *Params, DeclContext *Owner,
const MultiLevelTemplateArgumentList &TemplateArgs) {
TemplateDeclInstantiator Instantiator(*this, Owner, TemplateArgs);
return Instantiator.SubstTemplateParams(Params);
}
/// Instantiate the declaration of a class template partial
/// specialization.
///
/// \param ClassTemplate the (instantiated) class template that is partially
// specialized by the instantiation of \p PartialSpec.
///
/// \param PartialSpec the (uninstantiated) class template partial
/// specialization that we are instantiating.
///
/// \returns The instantiated partial specialization, if successful; otherwise,
/// NULL to indicate an error.
ClassTemplatePartialSpecializationDecl *
TemplateDeclInstantiator::InstantiateClassTemplatePartialSpecialization(
ClassTemplateDecl *ClassTemplate,
ClassTemplatePartialSpecializationDecl *PartialSpec) {
// Create a local instantiation scope for this class template partial
// specialization, which will contain the instantiations of the template
// parameters.
LocalInstantiationScope Scope(SemaRef);
// Substitute into the template parameters of the class template partial
// specialization.
TemplateParameterList *TempParams = PartialSpec->getTemplateParameters();
TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
if (!InstParams)
return nullptr;
// Substitute into the template arguments of the class template partial
// specialization.
const ASTTemplateArgumentListInfo *TemplArgInfo
= PartialSpec->getTemplateArgsAsWritten();
TemplateArgumentListInfo InstTemplateArgs(TemplArgInfo->LAngleLoc,
TemplArgInfo->RAngleLoc);
if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
TemplArgInfo->NumTemplateArgs,
InstTemplateArgs, TemplateArgs))
return nullptr;
// Check that the template argument list is well-formed for this
// class template.
SmallVector<TemplateArgument, 4> Converted;
if (SemaRef.CheckTemplateArgumentList(ClassTemplate,
PartialSpec->getLocation(),
InstTemplateArgs,
false,
Converted))
return nullptr;
// Check these arguments are valid for a template partial specialization.
if (SemaRef.CheckTemplatePartialSpecializationArgs(
PartialSpec->getLocation(), ClassTemplate, InstTemplateArgs.size(),
Converted))
return nullptr;
// Figure out where to insert this class template partial specialization
// in the member template's set of class template partial specializations.
void *InsertPos = nullptr;
ClassTemplateSpecializationDecl *PrevDecl
= ClassTemplate->findPartialSpecialization(Converted, InstParams,
InsertPos);
// Build the canonical type that describes the converted template
// arguments of the class template partial specialization.
QualType CanonType
= SemaRef.Context.getTemplateSpecializationType(TemplateName(ClassTemplate),
Converted);
// Build the fully-sugared type for this class template
// specialization as the user wrote in the specialization
// itself. This means that we'll pretty-print the type retrieved
// from the specialization's declaration the way that the user
// actually wrote the specialization, rather than formatting the
// name based on the "canonical" representation used to store the
// template arguments in the specialization.
TypeSourceInfo *WrittenTy
= SemaRef.Context.getTemplateSpecializationTypeInfo(
TemplateName(ClassTemplate),
PartialSpec->getLocation(),
InstTemplateArgs,
CanonType);
if (PrevDecl) {
// We've already seen a partial specialization with the same template
// parameters and template arguments. This can happen, for example, when
// substituting the outer template arguments ends up causing two
// class template partial specializations of a member class template
// to have identical forms, e.g.,
//
// template<typename T, typename U>
// struct Outer {
// template<typename X, typename Y> struct Inner;
// template<typename Y> struct Inner<T, Y>;
// template<typename Y> struct Inner<U, Y>;
// };
//
// Outer<int, int> outer; // error: the partial specializations of Inner
// // have the same signature.
SemaRef.Diag(PartialSpec->getLocation(), diag::err_partial_spec_redeclared)
<< WrittenTy->getType();
SemaRef.Diag(PrevDecl->getLocation(), diag::note_prev_partial_spec_here)
<< SemaRef.Context.getTypeDeclType(PrevDecl);
return nullptr;
}
// Create the class template partial specialization declaration.
ClassTemplatePartialSpecializationDecl *InstPartialSpec =
ClassTemplatePartialSpecializationDecl::Create(
SemaRef.Context, PartialSpec->getTagKind(), Owner,
PartialSpec->getBeginLoc(), PartialSpec->getLocation(), InstParams,
ClassTemplate, Converted, InstTemplateArgs, CanonType, nullptr);
// Substitute the nested name specifier, if any.
if (SubstQualifier(PartialSpec, InstPartialSpec))
return nullptr;
InstPartialSpec->setInstantiatedFromMember(PartialSpec);
InstPartialSpec->setTypeAsWritten(WrittenTy);
// Check the completed partial specialization.
SemaRef.CheckTemplatePartialSpecialization(InstPartialSpec);
// Add this partial specialization to the set of class template partial
// specializations.
ClassTemplate->AddPartialSpecialization(InstPartialSpec,
/*InsertPos=*/nullptr);
return InstPartialSpec;
}
/// Instantiate the declaration of a variable template partial
/// specialization.
///
/// \param VarTemplate the (instantiated) variable template that is partially
/// specialized by the instantiation of \p PartialSpec.
///
/// \param PartialSpec the (uninstantiated) variable template partial
/// specialization that we are instantiating.
///
/// \returns The instantiated partial specialization, if successful; otherwise,
/// NULL to indicate an error.
VarTemplatePartialSpecializationDecl *
TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization(
VarTemplateDecl *VarTemplate,
VarTemplatePartialSpecializationDecl *PartialSpec) {
// Create a local instantiation scope for this variable template partial
// specialization, which will contain the instantiations of the template
// parameters.
LocalInstantiationScope Scope(SemaRef);
// Substitute into the template parameters of the variable template partial
// specialization.
TemplateParameterList *TempParams = PartialSpec->getTemplateParameters();
TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
if (!InstParams)
return nullptr;
// Substitute into the template arguments of the variable template partial
// specialization.
const ASTTemplateArgumentListInfo *TemplArgInfo
= PartialSpec->getTemplateArgsAsWritten();
TemplateArgumentListInfo InstTemplateArgs(TemplArgInfo->LAngleLoc,
TemplArgInfo->RAngleLoc);
if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
TemplArgInfo->NumTemplateArgs,
InstTemplateArgs, TemplateArgs))
return nullptr;
// Check that the template argument list is well-formed for this
// class template.
SmallVector<TemplateArgument, 4> Converted;
if (SemaRef.CheckTemplateArgumentList(VarTemplate, PartialSpec->getLocation(),
InstTemplateArgs, false, Converted))
return nullptr;
// Check these arguments are valid for a template partial specialization.
if (SemaRef.CheckTemplatePartialSpecializationArgs(
PartialSpec->getLocation(), VarTemplate, InstTemplateArgs.size(),
Converted))
return nullptr;
// Figure out where to insert this variable template partial specialization
// in the member template's set of variable template partial specializations.
void *InsertPos = nullptr;
VarTemplateSpecializationDecl *PrevDecl =
VarTemplate->findPartialSpecialization(Converted, InstParams, InsertPos);
// Build the canonical type that describes the converted template
// arguments of the variable template partial specialization.
QualType CanonType = SemaRef.Context.getTemplateSpecializationType(
TemplateName(VarTemplate), Converted);
// Build the fully-sugared type for this variable template
// specialization as the user wrote in the specialization
// itself. This means that we'll pretty-print the type retrieved
// from the specialization's declaration the way that the user
// actually wrote the specialization, rather than formatting the
// name based on the "canonical" representation used to store the
// template arguments in the specialization.
TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo(
TemplateName(VarTemplate), PartialSpec->getLocation(), InstTemplateArgs,
CanonType);
if (PrevDecl) {
// We've already seen a partial specialization with the same template
// parameters and template arguments. This can happen, for example, when
// substituting the outer template arguments ends up causing two
// variable template partial specializations of a member variable template
// to have identical forms, e.g.,
//
// template<typename T, typename U>
// struct Outer {
// template<typename X, typename Y> pair<X,Y> p;
// template<typename Y> pair<T, Y> p;
// template<typename Y> pair<U, Y> p;
// };
//
// Outer<int, int> outer; // error: the partial specializations of Inner
// // have the same signature.
SemaRef.Diag(PartialSpec->getLocation(),
diag::err_var_partial_spec_redeclared)
<< WrittenTy->getType();
SemaRef.Diag(PrevDecl->getLocation(),
diag::note_var_prev_partial_spec_here);
return nullptr;
}
// Do substitution on the type of the declaration
TypeSourceInfo *DI = SemaRef.SubstType(
PartialSpec->getTypeSourceInfo(), TemplateArgs,
PartialSpec->getTypeSpecStartLoc(), PartialSpec->getDeclName());
if (!DI)
return nullptr;
if (DI->getType()->isFunctionType()) {
SemaRef.Diag(PartialSpec->getLocation(),
diag::err_variable_instantiates_to_function)
<< PartialSpec->isStaticDataMember() << DI->getType();
return nullptr;
}
// Create the variable template partial specialization declaration.
VarTemplatePartialSpecializationDecl *InstPartialSpec =
VarTemplatePartialSpecializationDecl::Create(
SemaRef.Context, Owner, PartialSpec->getInnerLocStart(),
PartialSpec->getLocation(), InstParams, VarTemplate, DI->getType(),
DI, PartialSpec->getStorageClass(), Converted, InstTemplateArgs);
// Substitute the nested name specifier, if any.
if (SubstQualifier(PartialSpec, InstPartialSpec))
return nullptr;
InstPartialSpec->setInstantiatedFromMember(PartialSpec);
InstPartialSpec->setTypeAsWritten(WrittenTy);
// Check the completed partial specialization.
SemaRef.CheckTemplatePartialSpecialization(InstPartialSpec);
// Add this partial specialization to the set of variable template partial
// specializations. The instantiation of the initializer is not necessary.
VarTemplate->AddPartialSpecialization(InstPartialSpec, /*InsertPos=*/nullptr);
SemaRef.BuildVariableInstantiation(InstPartialSpec, PartialSpec, TemplateArgs,
LateAttrs, Owner, StartingScope);
return InstPartialSpec;
}
TypeSourceInfo*
TemplateDeclInstantiator::SubstFunctionType(FunctionDecl *D,
SmallVectorImpl<ParmVarDecl *> &Params) {
TypeSourceInfo *OldTInfo = D->getTypeSourceInfo();
assert(OldTInfo && "substituting function without type source info");
assert(Params.empty() && "parameter vector is non-empty at start");
CXXRecordDecl *ThisContext = nullptr;
Qualifiers ThisTypeQuals;
if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
ThisContext = cast<CXXRecordDecl>(Owner);
ThisTypeQuals = Method->getMethodQualifiers();
}
TypeSourceInfo *NewTInfo
= SemaRef.SubstFunctionDeclType(OldTInfo, TemplateArgs,
D->getTypeSpecStartLoc(),
D->getDeclName(),
ThisContext, ThisTypeQuals);
if (!NewTInfo)
return nullptr;
TypeLoc OldTL = OldTInfo->getTypeLoc().IgnoreParens();
if (FunctionProtoTypeLoc OldProtoLoc = OldTL.getAs<FunctionProtoTypeLoc>()) {
if (NewTInfo != OldTInfo) {
// Get parameters from the new type info.
TypeLoc NewTL = NewTInfo->getTypeLoc().IgnoreParens();
FunctionProtoTypeLoc NewProtoLoc = NewTL.castAs<FunctionProtoTypeLoc>();
unsigned NewIdx = 0;
for (unsigned OldIdx = 0, NumOldParams = OldProtoLoc.getNumParams();
OldIdx != NumOldParams; ++OldIdx) {
ParmVarDecl *OldParam = OldProtoLoc.getParam(OldIdx);
if (!OldParam)
return nullptr;
LocalInstantiationScope *Scope = SemaRef.CurrentInstantiationScope;
Optional<unsigned> NumArgumentsInExpansion;
if (OldParam->isParameterPack())
NumArgumentsInExpansion =
SemaRef.getNumArgumentsInExpansion(OldParam->getType(),
TemplateArgs);
if (!NumArgumentsInExpansion) {
// Simple case: normal parameter, or a parameter pack that's
// instantiated to a (still-dependent) parameter pack.
ParmVarDecl *NewParam = NewProtoLoc.getParam(NewIdx++);
Params.push_back(NewParam);
Scope->InstantiatedLocal(OldParam, NewParam);
} else {
// Parameter pack expansion: make the instantiation an argument pack.
Scope->MakeInstantiatedLocalArgPack(OldParam);
for (unsigned I = 0; I != *NumArgumentsInExpansion; ++I) {
ParmVarDecl *NewParam = NewProtoLoc.getParam(NewIdx++);
Params.push_back(NewParam);
Scope->InstantiatedLocalPackArg(OldParam, NewParam);
}
}
}
} else {
// The function type itself was not dependent and therefore no
// substitution occurred. However, we still need to instantiate
// the function parameters themselves.
const FunctionProtoType *OldProto =
cast<FunctionProtoType>(OldProtoLoc.getType());
for (unsigned i = 0, i_end = OldProtoLoc.getNumParams(); i != i_end;
++i) {
ParmVarDecl *OldParam = OldProtoLoc.getParam(i);
if (!OldParam) {
Params.push_back(SemaRef.BuildParmVarDeclForTypedef(
D, D->getLocation(), OldProto->getParamType(i)));
continue;
}
ParmVarDecl *Parm =
cast_or_null<ParmVarDecl>(VisitParmVarDecl(OldParam));
if (!Parm)
return nullptr;
Params.push_back(Parm);
}
}
} else {
// If the type of this function, after ignoring parentheses, is not
// *directly* a function type, then we're instantiating a function that
// was declared via a typedef or with attributes, e.g.,
//
// typedef int functype(int, int);
// functype func;
// int __cdecl meth(int, int);
//
// In this case, we'll just go instantiate the ParmVarDecls that we
// synthesized in the method declaration.
SmallVector<QualType, 4> ParamTypes;
Sema::ExtParameterInfoBuilder ExtParamInfos;
if (SemaRef.SubstParmTypes(D->getLocation(), D->parameters(), nullptr,
TemplateArgs, ParamTypes, &Params,
ExtParamInfos))
return nullptr;
}
return NewTInfo;
}
/// Introduce the instantiated function parameters into the local
/// instantiation scope, and set the parameter names to those used
/// in the template.
static bool addInstantiatedParametersToScope(Sema &S, FunctionDecl *Function,
const FunctionDecl *PatternDecl,
LocalInstantiationScope &Scope,
const MultiLevelTemplateArgumentList &TemplateArgs) {
unsigned FParamIdx = 0;
for (unsigned I = 0, N = PatternDecl->getNumParams(); I != N; ++I) {
const ParmVarDecl *PatternParam = PatternDecl->getParamDecl(I);
if (!PatternParam->isParameterPack()) {
// Simple case: not a parameter pack.
assert(FParamIdx < Function->getNumParams());
ParmVarDecl *FunctionParam = Function->getParamDecl(FParamIdx);
FunctionParam->setDeclName(PatternParam->getDeclName());
// If the parameter's type is not dependent, update it to match the type
// in the pattern. They can differ in top-level cv-qualifiers, and we want
// the pattern's type here. If the type is dependent, they can't differ,
// per core issue 1668. Substitute into the type from the pattern, in case
// it's instantiation-dependent.
// FIXME: Updating the type to work around this is at best fragile.
if (!PatternDecl->getType()->isDependentType()) {
QualType T = S.SubstType(PatternParam->getType(), TemplateArgs,
FunctionParam->getLocation(),
FunctionParam->getDeclName());
if (T.isNull())
return true;
FunctionParam->setType(T);
}
Scope.InstantiatedLocal(PatternParam, FunctionParam);
++FParamIdx;
continue;
}
// Expand the parameter pack.
Scope.MakeInstantiatedLocalArgPack(PatternParam);
Optional<unsigned> NumArgumentsInExpansion
= S.getNumArgumentsInExpansion(PatternParam->getType(), TemplateArgs);
if (NumArgumentsInExpansion) {
QualType PatternType =
PatternParam->getType()->castAs<PackExpansionType>()->getPattern();
for (unsigned Arg = 0; Arg < *NumArgumentsInExpansion; ++Arg) {
ParmVarDecl *FunctionParam = Function->getParamDecl(FParamIdx);
FunctionParam->setDeclName(PatternParam->getDeclName());
if (!PatternDecl->getType()->isDependentType()) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, Arg);
QualType T = S.SubstType(PatternType, TemplateArgs,
FunctionParam->getLocation(),
FunctionParam->getDeclName());
if (T.isNull())
return true;
FunctionParam->setType(T);
}
Scope.InstantiatedLocalPackArg(PatternParam, FunctionParam);
++FParamIdx;
}
}
}
return false;
}
bool Sema::InstantiateDefaultArgument(SourceLocation CallLoc, FunctionDecl *FD,
ParmVarDecl *Param) {
assert(Param->hasUninstantiatedDefaultArg());
Expr *UninstExpr = Param->getUninstantiatedDefaultArg();
EnterExpressionEvaluationContext EvalContext(
*this, ExpressionEvaluationContext::PotentiallyEvaluated, Param);
// Instantiate the expression.
//
// FIXME: Pass in a correct Pattern argument, otherwise
// getTemplateInstantiationArgs uses the lexical context of FD, e.g.
//
// template<typename T>
// struct A {
// static int FooImpl();
//
// template<typename Tp>
// // bug: default argument A<T>::FooImpl() is evaluated with 2-level
// // template argument list [[T], [Tp]], should be [[Tp]].
// friend A<Tp> Foo(int a);
// };
//
// template<typename T>
// A<T> Foo(int a = A<T>::FooImpl());
MultiLevelTemplateArgumentList TemplateArgs
= getTemplateInstantiationArgs(FD, nullptr, /*RelativeToPrimary=*/true);
InstantiatingTemplate Inst(*this, CallLoc, Param,
TemplateArgs.getInnermost());
if (Inst.isInvalid())
return true;
if (Inst.isAlreadyInstantiating()) {
Diag(Param->getBeginLoc(), diag::err_recursive_default_argument) << FD;
Param->setInvalidDecl();
return true;
}
ExprResult Result;
{
// C++ [dcl.fct.default]p5:
// The names in the [default argument] expression are bound, and
// the semantic constraints are checked, at the point where the
// default argument expression appears.
ContextRAII SavedContext(*this, FD);
LocalInstantiationScope Local(*this);
FunctionDecl *Pattern = FD->getTemplateInstantiationPattern(
/*ForDefinition*/ false);
if (addInstantiatedParametersToScope(*this, FD, Pattern, Local,
TemplateArgs))
return true;
runWithSufficientStackSpace(CallLoc, [&] {
Result = SubstInitializer(UninstExpr, TemplateArgs,
/*DirectInit*/false);
});
}
if (Result.isInvalid())
return true;
// Check the expression as an initializer for the parameter.
InitializedEntity Entity
= InitializedEntity::InitializeParameter(Context, Param);
InitializationKind Kind = InitializationKind::CreateCopy(
Param->getLocation(),
/*FIXME:EqualLoc*/ UninstExpr->getBeginLoc());
Expr *ResultE = Result.getAs<Expr>();
InitializationSequence InitSeq(*this, Entity, Kind, ResultE);
Result = InitSeq.Perform(*this, Entity, Kind, ResultE);
if (Result.isInvalid())
return true;
Result =
ActOnFinishFullExpr(Result.getAs<Expr>(), Param->getOuterLocStart(),
/*DiscardedValue*/ false);
if (Result.isInvalid())
return true;
// Remember the instantiated default argument.
Param->setDefaultArg(Result.getAs<Expr>());
if (ASTMutationListener *L = getASTMutationListener())
L->DefaultArgumentInstantiated(Param);
return false;
}
void Sema::InstantiateExceptionSpec(SourceLocation PointOfInstantiation,
FunctionDecl *Decl) {
const FunctionProtoType *Proto = Decl->getType()->castAs<FunctionProtoType>();
if (Proto->getExceptionSpecType() != EST_Uninstantiated)
return;
InstantiatingTemplate Inst(*this, PointOfInstantiation, Decl,
InstantiatingTemplate::ExceptionSpecification());
if (Inst.isInvalid()) {
// We hit the instantiation depth limit. Clear the exception specification
// so that our callers don't have to cope with EST_Uninstantiated.
UpdateExceptionSpec(Decl, EST_None);
return;
}
if (Inst.isAlreadyInstantiating()) {
// This exception specification indirectly depends on itself. Reject.
// FIXME: Corresponding rule in the standard?
Diag(PointOfInstantiation, diag::err_exception_spec_cycle) << Decl;
UpdateExceptionSpec(Decl, EST_None);
return;
}
// Enter the scope of this instantiation. We don't use
// PushDeclContext because we don't have a scope.
Sema::ContextRAII savedContext(*this, Decl);
LocalInstantiationScope Scope(*this);
MultiLevelTemplateArgumentList TemplateArgs =
getTemplateInstantiationArgs(Decl, nullptr, /*RelativeToPrimary*/true);
// FIXME: We can't use getTemplateInstantiationPattern(false) in general
// here, because for a non-defining friend declaration in a class template,
// we don't store enough information to map back to the friend declaration in
// the template.
FunctionDecl *Template = Proto->getExceptionSpecTemplate();
if (addInstantiatedParametersToScope(*this, Decl, Template, Scope,
TemplateArgs)) {
UpdateExceptionSpec(Decl, EST_None);
return;
}
SubstExceptionSpec(Decl, Template->getType()->castAs<FunctionProtoType>(),
TemplateArgs);
}
bool Sema::CheckInstantiatedFunctionTemplateConstraints(
SourceLocation PointOfInstantiation, FunctionDecl *Decl,
ArrayRef<TemplateArgument> TemplateArgs,
ConstraintSatisfaction &Satisfaction) {
// In most cases we're not going to have constraints, so check for that first.
FunctionTemplateDecl *Template = Decl->getPrimaryTemplate();
// Note - code synthesis context for the constraints check is created
// inside CheckConstraintsSatisfaction.
SmallVector<const Expr *, 3> TemplateAC;
Template->getAssociatedConstraints(TemplateAC);
if (TemplateAC.empty()) {
Satisfaction.IsSatisfied = true;
return false;
}
// Enter the scope of this instantiation. We don't use
// PushDeclContext because we don't have a scope.
Sema::ContextRAII savedContext(*this, Decl);
LocalInstantiationScope Scope(*this);
// If this is not an explicit specialization - we need to get the instantiated
// version of the template arguments and add them to scope for the
// substitution.
if (Decl->isTemplateInstantiation()) {
InstantiatingTemplate Inst(*this, Decl->getPointOfInstantiation(),
InstantiatingTemplate::ConstraintsCheck{}, Decl->getPrimaryTemplate(),
TemplateArgs, SourceRange());
if (Inst.isInvalid())
return true;
MultiLevelTemplateArgumentList MLTAL(
*Decl->getTemplateSpecializationArgs());
if (addInstantiatedParametersToScope(
*this, Decl, Decl->getPrimaryTemplate()->getTemplatedDecl(),
Scope, MLTAL))
return true;
}
Qualifiers ThisQuals;
CXXRecordDecl *Record = nullptr;
if (auto *Method = dyn_cast<CXXMethodDecl>(Decl)) {
ThisQuals = Method->getMethodQualifiers();
Record = Method->getParent();
}
CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr);
return CheckConstraintSatisfaction(Template, TemplateAC, TemplateArgs,
PointOfInstantiation, Satisfaction);
}
/// Initializes the common fields of an instantiation function
/// declaration (New) from the corresponding fields of its template (Tmpl).
///
/// \returns true if there was an error
bool
TemplateDeclInstantiator::InitFunctionInstantiation(FunctionDecl *New,
FunctionDecl *Tmpl) {
New->setImplicit(Tmpl->isImplicit());
// Forward the mangling number from the template to the instantiated decl.
SemaRef.Context.setManglingNumber(New,
SemaRef.Context.getManglingNumber(Tmpl));
// If we are performing substituting explicitly-specified template arguments
// or deduced template arguments into a function template and we reach this
// point, we are now past the point where SFINAE applies and have committed
// to keeping the new function template specialization. We therefore
// convert the active template instantiation for the function template
// into a template instantiation for this specific function template
// specialization, which is not a SFINAE context, so that we diagnose any
// further errors in the declaration itself.
//
// FIXME: This is a hack.
typedef Sema::CodeSynthesisContext ActiveInstType;
ActiveInstType &ActiveInst = SemaRef.CodeSynthesisContexts.back();
if (ActiveInst.Kind == ActiveInstType::ExplicitTemplateArgumentSubstitution ||
ActiveInst.Kind == ActiveInstType::DeducedTemplateArgumentSubstitution) {
if (FunctionTemplateDecl *FunTmpl
= dyn_cast<FunctionTemplateDecl>(ActiveInst.Entity)) {
assert(FunTmpl->getTemplatedDecl() == Tmpl &&
"Deduction from the wrong function template?");
(void) FunTmpl;
SemaRef.InstantiatingSpecializations.erase(
{ActiveInst.Entity->getCanonicalDecl(), ActiveInst.Kind});
atTemplateEnd(SemaRef.TemplateInstCallbacks, SemaRef, ActiveInst);
ActiveInst.Kind = ActiveInstType::TemplateInstantiation;
ActiveInst.Entity = New;
atTemplateBegin(SemaRef.TemplateInstCallbacks, SemaRef, ActiveInst);
}
}
const FunctionProtoType *Proto = Tmpl->getType()->getAs<FunctionProtoType>();
assert(Proto && "Function template without prototype?");
if (Proto->hasExceptionSpec() || Proto->getNoReturnAttr()) {
FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo();
// DR1330: In C++11, defer instantiation of a non-trivial
// exception specification.
// DR1484: Local classes and their members are instantiated along with the
// containing function.
if (SemaRef.getLangOpts().CPlusPlus11 &&
EPI.ExceptionSpec.Type != EST_None &&
EPI.ExceptionSpec.Type != EST_DynamicNone &&
EPI.ExceptionSpec.Type != EST_BasicNoexcept &&
!Tmpl->isInLocalScopeForInstantiation()) {
FunctionDecl *ExceptionSpecTemplate = Tmpl;
if (EPI.ExceptionSpec.Type == EST_Uninstantiated)
ExceptionSpecTemplate = EPI.ExceptionSpec.SourceTemplate;
ExceptionSpecificationType NewEST = EST_Uninstantiated;
if (EPI.ExceptionSpec.Type == EST_Unevaluated)
NewEST = EST_Unevaluated;
// Mark the function has having an uninstantiated exception specification.
const FunctionProtoType *NewProto
= New->getType()->getAs<FunctionProtoType>();
assert(NewProto && "Template instantiation without function prototype?");
EPI = NewProto->getExtProtoInfo();
EPI.ExceptionSpec.Type = NewEST;
EPI.ExceptionSpec.SourceDecl = New;
EPI.ExceptionSpec.SourceTemplate = ExceptionSpecTemplate;
New->setType(SemaRef.Context.getFunctionType(
NewProto->getReturnType(), NewProto->getParamTypes(), EPI));
} else {
Sema::ContextRAII SwitchContext(SemaRef, New);
SemaRef.SubstExceptionSpec(New, Proto, TemplateArgs);
}
}
// Get the definition. Leaves the variable unchanged if undefined.
const FunctionDecl *Definition = Tmpl;
Tmpl->isDefined(Definition);
SemaRef.InstantiateAttrs(TemplateArgs, Definition, New,
LateAttrs, StartingScope);
return false;
}
/// Initializes common fields of an instantiated method
/// declaration (New) from the corresponding fields of its template
/// (Tmpl).
///
/// \returns true if there was an error
bool
TemplateDeclInstantiator::InitMethodInstantiation(CXXMethodDecl *New,
CXXMethodDecl *Tmpl) {
if (InitFunctionInstantiation(New, Tmpl))
return true;
if (isa<CXXDestructorDecl>(New) && SemaRef.getLangOpts().CPlusPlus11)
SemaRef.AdjustDestructorExceptionSpec(cast<CXXDestructorDecl>(New));
New->setAccess(Tmpl->getAccess());
if (Tmpl->isVirtualAsWritten())
New->setVirtualAsWritten(true);
// FIXME: New needs a pointer to Tmpl
return false;
}
bool TemplateDeclInstantiator::SubstDefaultedFunction(FunctionDecl *New,
FunctionDecl *Tmpl) {
// Transfer across any unqualified lookups.
if (auto *DFI = Tmpl->getDefaultedFunctionInfo()) {
SmallVector<DeclAccessPair, 32> Lookups;
Lookups.reserve(DFI->getUnqualifiedLookups().size());
bool AnyChanged = false;
for (DeclAccessPair DA : DFI->getUnqualifiedLookups()) {
NamedDecl *D = SemaRef.FindInstantiatedDecl(New->getLocation(),
DA.getDecl(), TemplateArgs);
if (!D)
return true;
AnyChanged |= (D != DA.getDecl());
Lookups.push_back(DeclAccessPair::make(D, DA.getAccess()));
}
// It's unlikely that substitution will change any declarations. Don't
// store an unnecessary copy in that case.
New->setDefaultedFunctionInfo(
AnyChanged ? FunctionDecl::DefaultedFunctionInfo::Create(
SemaRef.Context, Lookups)
: DFI);
}
SemaRef.SetDeclDefaulted(New, Tmpl->getLocation());
return false;
}
/// Instantiate (or find existing instantiation of) a function template with a
/// given set of template arguments.
///
/// Usually this should not be used, and template argument deduction should be
/// used in its place.
FunctionDecl *
Sema::InstantiateFunctionDeclaration(FunctionTemplateDecl *FTD,
const TemplateArgumentList *Args,
SourceLocation Loc) {
FunctionDecl *FD = FTD->getTemplatedDecl();
sema::TemplateDeductionInfo Info(Loc);
InstantiatingTemplate Inst(
*this, Loc, FTD, Args->asArray(),
CodeSynthesisContext::ExplicitTemplateArgumentSubstitution, Info);
if (Inst.isInvalid())
return nullptr;
ContextRAII SavedContext(*this, FD);
MultiLevelTemplateArgumentList MArgs(*Args);
return cast_or_null<FunctionDecl>(SubstDecl(FD, FD->getParent(), MArgs));
}
/// Instantiate the definition of the given function from its
/// template.
///
/// \param PointOfInstantiation the point at which the instantiation was
/// required. Note that this is not precisely a "point of instantiation"
/// for the function, but it's close.
///
/// \param Function the already-instantiated declaration of a
/// function template specialization or member function of a class template
/// specialization.
///
/// \param Recursive if true, recursively instantiates any functions that
/// are required by this instantiation.
///
/// \param DefinitionRequired if true, then we are performing an explicit
/// instantiation where the body of the function is required. Complain if
/// there is no such body.
void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
FunctionDecl *Function,
bool Recursive,
bool DefinitionRequired,
bool AtEndOfTU) {
if (Function->isInvalidDecl() || isa<CXXDeductionGuideDecl>(Function))
return;
// Never instantiate an explicit specialization except if it is a class scope
// explicit specialization.
TemplateSpecializationKind TSK =
Function->getTemplateSpecializationKindForInstantiation();
if (TSK == TSK_ExplicitSpecialization)
return;
// Don't instantiate a definition if we already have one.
const FunctionDecl *ExistingDefn = nullptr;
if (Function->isDefined(ExistingDefn,
/*CheckForPendingFriendDefinition=*/true)) {
if (ExistingDefn->isThisDeclarationADefinition())
return;
// If we're asked to instantiate a function whose body comes from an
// instantiated friend declaration, attach the instantiated body to the
// corresponding declaration of the function.
assert(ExistingDefn->isThisDeclarationInstantiatedFromAFriendDefinition());
Function = const_cast<FunctionDecl*>(ExistingDefn);
}
// Find the function body that we'll be substituting.
const FunctionDecl *PatternDecl = Function->getTemplateInstantiationPattern();
assert(PatternDecl && "instantiating a non-template");
const FunctionDecl *PatternDef = PatternDecl->getDefinition();
Stmt *Pattern = nullptr;
if (PatternDef) {
Pattern = PatternDef->getBody(PatternDef);
PatternDecl = PatternDef;
if (PatternDef->willHaveBody())
PatternDef = nullptr;
}
// FIXME: We need to track the instantiation stack in order to know which
// definitions should be visible within this instantiation.
if (DiagnoseUninstantiableTemplate(PointOfInstantiation, Function,
Function->getInstantiatedFromMemberFunction(),
PatternDecl, PatternDef, TSK,
/*Complain*/DefinitionRequired)) {
if (DefinitionRequired)
Function->setInvalidDecl();
else if (TSK == TSK_ExplicitInstantiationDefinition) {
// Try again at the end of the translation unit (at which point a
// definition will be required).
assert(!Recursive);
Function->setInstantiationIsPending(true);
PendingInstantiations.push_back(
std::make_pair(Function, PointOfInstantiation));
} else if (TSK == TSK_ImplicitInstantiation) {
if (AtEndOfTU && !getDiagnostics().hasErrorOccurred() &&
!getSourceManager().isInSystemHeader(PatternDecl->getBeginLoc())) {
Diag(PointOfInstantiation, diag::warn_func_template_missing)
<< Function;
Diag(PatternDecl->getLocation(), diag::note_forward_template_decl);
if (getLangOpts().CPlusPlus11)
Diag(PointOfInstantiation, diag::note_inst_declaration_hint)
<< Function;
}
}
return;
}
// Postpone late parsed template instantiations.
if (PatternDecl->isLateTemplateParsed() &&
!LateTemplateParser) {
Function->setInstantiationIsPending(true);
LateParsedInstantiations.push_back(
std::make_pair(Function, PointOfInstantiation));
return;
}
llvm::TimeTraceScope TimeScope("InstantiateFunction", [&]() {
std::string Name;
llvm::raw_string_ostream OS(Name);
Function->getNameForDiagnostic(OS, getPrintingPolicy(),
/*Qualified=*/true);
return Name;
});
// If we're performing recursive template instantiation, create our own
// queue of pending implicit instantiations that we will instantiate later,
// while we're still within our own instantiation context.
// This has to happen before LateTemplateParser below is called, so that
// it marks vtables used in late parsed templates as used.
GlobalEagerInstantiationScope GlobalInstantiations(*this,
/*Enabled=*/Recursive);
LocalEagerInstantiationScope LocalInstantiations(*this);
// Call the LateTemplateParser callback if there is a need to late parse
// a templated function definition.
if (!Pattern && PatternDecl->isLateTemplateParsed() &&
LateTemplateParser) {
// FIXME: Optimize to allow individual templates to be deserialized.
if (PatternDecl->isFromASTFile())
ExternalSource->ReadLateParsedTemplates(LateParsedTemplateMap);
auto LPTIter = LateParsedTemplateMap.find(PatternDecl);
assert(LPTIter != LateParsedTemplateMap.end() &&
"missing LateParsedTemplate");
LateTemplateParser(OpaqueParser, *LPTIter->second);
Pattern = PatternDecl->getBody(PatternDecl);
}
// Note, we should never try to instantiate a deleted function template.
assert((Pattern || PatternDecl->isDefaulted() ||
PatternDecl->hasSkippedBody()) &&
"unexpected kind of function template definition");
// C++1y [temp.explicit]p10:
// Except for inline functions, declarations with types deduced from their
// initializer or return value, and class template specializations, other
// explicit instantiation declarations have the effect of suppressing the
// implicit instantiation of the entity to which they refer.
if (TSK == TSK_ExplicitInstantiationDeclaration &&
!PatternDecl->isInlined() &&
!PatternDecl->getReturnType()->getContainedAutoType())
return;
if (PatternDecl->isInlined()) {
// Function, and all later redeclarations of it (from imported modules,
// for instance), are now implicitly inline.
for (auto *D = Function->getMostRecentDecl(); /**/;
D = D->getPreviousDecl()) {
D->setImplicitlyInline();
if (D == Function)
break;
}
}
InstantiatingTemplate Inst(*this, PointOfInstantiation, Function);
if (Inst.isInvalid() || Inst.isAlreadyInstantiating())
return;
PrettyDeclStackTraceEntry CrashInfo(Context, Function, SourceLocation(),
"instantiating function definition");
// The instantiation is visible here, even if it was first declared in an
// unimported module.
Function->setVisibleDespiteOwningModule();
// Copy the inner loc start from the pattern.
Function->setInnerLocStart(PatternDecl->getInnerLocStart());
EnterExpressionEvaluationContext EvalContext(
*this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
// Introduce a new scope where local variable instantiations will be
// recorded, unless we're actually a member function within a local
// class, in which case we need to merge our results with the parent
// scope (of the enclosing function). The exception is instantiating
// a function template specialization, since the template to be
// instantiated already has references to locals properly substituted.
bool MergeWithParentScope = false;
if (CXXRecordDecl *Rec = dyn_cast<CXXRecordDecl>(Function->getDeclContext()))
MergeWithParentScope =
Rec->isLocalClass() && !Function->isFunctionTemplateSpecialization();
LocalInstantiationScope Scope(*this, MergeWithParentScope);
auto RebuildTypeSourceInfoForDefaultSpecialMembers = [&]() {
// Special members might get their TypeSourceInfo set up w.r.t the
// PatternDecl context, in which case parameters could still be pointing
// back to the original class, make sure arguments are bound to the
// instantiated record instead.
assert(PatternDecl->isDefaulted() &&
"Special member needs to be defaulted");
auto PatternSM = getDefaultedFunctionKind(PatternDecl).asSpecialMember();
if (!(PatternSM == Sema::CXXCopyConstructor ||
PatternSM == Sema::CXXCopyAssignment ||
PatternSM == Sema::CXXMoveConstructor ||
PatternSM == Sema::CXXMoveAssignment))
return;
auto *NewRec = dyn_cast<CXXRecordDecl>(Function->getDeclContext());
const auto *PatternRec =
dyn_cast<CXXRecordDecl>(PatternDecl->getDeclContext());
if (!NewRec || !PatternRec)
return;
if (!PatternRec->isLambda())
return;
struct SpecialMemberTypeInfoRebuilder
: TreeTransform<SpecialMemberTypeInfoRebuilder> {
using Base = TreeTransform<SpecialMemberTypeInfoRebuilder>;
const CXXRecordDecl *OldDecl;
CXXRecordDecl *NewDecl;
SpecialMemberTypeInfoRebuilder(Sema &SemaRef, const CXXRecordDecl *O,
CXXRecordDecl *N)
: TreeTransform(SemaRef), OldDecl(O), NewDecl(N) {}
bool TransformExceptionSpec(SourceLocation Loc,
FunctionProtoType::ExceptionSpecInfo &ESI,
SmallVectorImpl<QualType> &Exceptions,
bool &Changed) {
return false;
}
QualType TransformRecordType(TypeLocBuilder &TLB, RecordTypeLoc TL) {
const RecordType *T = TL.getTypePtr();
RecordDecl *Record = cast_or_null<RecordDecl>(
getDerived().TransformDecl(TL.getNameLoc(), T->getDecl()));
if (Record != OldDecl)
return Base::TransformRecordType(TLB, TL);
QualType Result = getDerived().RebuildRecordType(NewDecl);
if (Result.isNull())
return QualType();
RecordTypeLoc NewTL = TLB.push<RecordTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
return Result;
}
} IR{*this, PatternRec, NewRec};
TypeSourceInfo *NewSI = IR.TransformType(Function->getTypeSourceInfo());
Function->setType(NewSI->getType());
Function->setTypeSourceInfo(NewSI);
ParmVarDecl *Parm = Function->getParamDecl(0);
TypeSourceInfo *NewParmSI = IR.TransformType(Parm->getTypeSourceInfo());
Parm->setType(NewParmSI->getType());
Parm->setTypeSourceInfo(NewParmSI);
};
if (PatternDecl->isDefaulted()) {
RebuildTypeSourceInfoForDefaultSpecialMembers();
SetDeclDefaulted(Function, PatternDecl->getLocation());
} else {
MultiLevelTemplateArgumentList TemplateArgs =
getTemplateInstantiationArgs(Function, nullptr, false, PatternDecl);
// Substitute into the qualifier; we can get a substitution failure here
// through evil use of alias templates.
// FIXME: Is CurContext correct for this? Should we go to the (instantiation
// of the) lexical context of the pattern?
SubstQualifier(*this, PatternDecl, Function, TemplateArgs);
ActOnStartOfFunctionDef(nullptr, Function);
// Enter the scope of this instantiation. We don't use
// PushDeclContext because we don't have a scope.
Sema::ContextRAII savedContext(*this, Function);
if (addInstantiatedParametersToScope(*this, Function, PatternDecl, Scope,
TemplateArgs))
return;
StmtResult Body;
if (PatternDecl->hasSkippedBody()) {
ActOnSkippedFunctionBody(Function);
Body = nullptr;
} else {
if (CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(Function)) {
// If this is a constructor, instantiate the member initializers.
InstantiateMemInitializers(Ctor, cast<CXXConstructorDecl>(PatternDecl),
TemplateArgs);
// If this is an MS ABI dllexport default constructor, instantiate any
// default arguments.
if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
Ctor->isDefaultConstructor()) {
InstantiateDefaultCtorDefaultArgs(Ctor);
}
}
// Instantiate the function body.
Body = SubstStmt(Pattern, TemplateArgs);
if (Body.isInvalid())
Function->setInvalidDecl();
}
// FIXME: finishing the function body while in an expression evaluation
// context seems wrong. Investigate more.
ActOnFinishFunctionBody(Function, Body.get(), /*IsInstantiation=*/true);
PerformDependentDiagnostics(PatternDecl, TemplateArgs);
if (auto *Listener = getASTMutationListener())
Listener->FunctionDefinitionInstantiated(Function);
savedContext.pop();
}
DeclGroupRef DG(Function);
Consumer.HandleTopLevelDecl(DG);
// This class may have local implicit instantiations that need to be
// instantiation within this scope.
LocalInstantiations.perform();
Scope.Exit();
GlobalInstantiations.perform();
}
VarTemplateSpecializationDecl *Sema::BuildVarTemplateInstantiation(
VarTemplateDecl *VarTemplate, VarDecl *FromVar,
const TemplateArgumentList &TemplateArgList,
const TemplateArgumentListInfo &TemplateArgsInfo,
SmallVectorImpl<TemplateArgument> &Converted,
SourceLocation PointOfInstantiation,
LateInstantiatedAttrVec *LateAttrs,
LocalInstantiationScope *StartingScope) {
if (FromVar->isInvalidDecl())
return nullptr;
InstantiatingTemplate Inst(*this, PointOfInstantiation, FromVar);
if (Inst.isInvalid())
return nullptr;
MultiLevelTemplateArgumentList TemplateArgLists;
TemplateArgLists.addOuterTemplateArguments(&TemplateArgList);
// Instantiate the first declaration of the variable template: for a partial
// specialization of a static data member template, the first declaration may
// or may not be the declaration in the class; if it's in the class, we want
// to instantiate a member in the class (a declaration), and if it's outside,
// we want to instantiate a definition.
//
// If we're instantiating an explicitly-specialized member template or member
// partial specialization, don't do this. The member specialization completely
// replaces the original declaration in this case.
bool IsMemberSpec = false;
if (VarTemplatePartialSpecializationDecl *PartialSpec =
dyn_cast<VarTemplatePartialSpecializationDecl>(FromVar))
IsMemberSpec = PartialSpec->isMemberSpecialization();
else if (VarTemplateDecl *FromTemplate = FromVar->getDescribedVarTemplate())
IsMemberSpec = FromTemplate->isMemberSpecialization();
if (!IsMemberSpec)
FromVar = FromVar->getFirstDecl();
MultiLevelTemplateArgumentList MultiLevelList(TemplateArgList);
TemplateDeclInstantiator Instantiator(*this, FromVar->getDeclContext(),
MultiLevelList);
// TODO: Set LateAttrs and StartingScope ...
return cast_or_null<VarTemplateSpecializationDecl>(
Instantiator.VisitVarTemplateSpecializationDecl(
VarTemplate, FromVar, TemplateArgsInfo, Converted));
}
/// Instantiates a variable template specialization by completing it
/// with appropriate type information and initializer.
VarTemplateSpecializationDecl *Sema::CompleteVarTemplateSpecializationDecl(
VarTemplateSpecializationDecl *VarSpec, VarDecl *PatternDecl,
const MultiLevelTemplateArgumentList &TemplateArgs) {
assert(PatternDecl->isThisDeclarationADefinition() &&
"don't have a definition to instantiate from");
// Do substitution on the type of the declaration
TypeSourceInfo *DI =
SubstType(PatternDecl->getTypeSourceInfo(), TemplateArgs,
PatternDecl->getTypeSpecStartLoc(), PatternDecl->getDeclName());
if (!DI)
return nullptr;
// Update the type of this variable template specialization.
VarSpec->setType(DI->getType());
// Convert the declaration into a definition now.
VarSpec->setCompleteDefinition();
// Instantiate the initializer.
InstantiateVariableInitializer(VarSpec, PatternDecl, TemplateArgs);
if (getLangOpts().OpenCL)
deduceOpenCLAddressSpace(VarSpec);
return VarSpec;
}
/// BuildVariableInstantiation - Used after a new variable has been created.
/// Sets basic variable data and decides whether to postpone the
/// variable instantiation.
void Sema::BuildVariableInstantiation(
VarDecl *NewVar, VarDecl *OldVar,
const MultiLevelTemplateArgumentList &TemplateArgs,
LateInstantiatedAttrVec *LateAttrs, DeclContext *Owner,
LocalInstantiationScope *StartingScope,
bool InstantiatingVarTemplate,
VarTemplateSpecializationDecl *PrevDeclForVarTemplateSpecialization) {
// Instantiating a partial specialization to produce a partial
// specialization.
bool InstantiatingVarTemplatePartialSpec =
isa<VarTemplatePartialSpecializationDecl>(OldVar) &&
isa<VarTemplatePartialSpecializationDecl>(NewVar);
// Instantiating from a variable template (or partial specialization) to
// produce a variable template specialization.
bool InstantiatingSpecFromTemplate =
isa<VarTemplateSpecializationDecl>(NewVar) &&
(OldVar->getDescribedVarTemplate() ||
isa<VarTemplatePartialSpecializationDecl>(OldVar));
// If we are instantiating a local extern declaration, the
// instantiation belongs lexically to the containing function.
// If we are instantiating a static data member defined
// out-of-line, the instantiation will have the same lexical
// context (which will be a namespace scope) as the template.
if (OldVar->isLocalExternDecl()) {
NewVar->setLocalExternDecl();
NewVar->setLexicalDeclContext(Owner);
} else if (OldVar->isOutOfLine())
NewVar->setLexicalDeclContext(OldVar->getLexicalDeclContext());
NewVar->setTSCSpec(OldVar->getTSCSpec());
NewVar->setInitStyle(OldVar->getInitStyle());
NewVar->setCXXForRangeDecl(OldVar->isCXXForRangeDecl());
NewVar->setObjCForDecl(OldVar->isObjCForDecl());
NewVar->setConstexpr(OldVar->isConstexpr());
NewVar->setInitCapture(OldVar->isInitCapture());
NewVar->setPreviousDeclInSameBlockScope(
OldVar->isPreviousDeclInSameBlockScope());
NewVar->setAccess(OldVar->getAccess());
if (!OldVar->isStaticDataMember()) {
if (OldVar->isUsed(false))
NewVar->setIsUsed();
NewVar->setReferenced(OldVar->isReferenced());
}
InstantiateAttrs(TemplateArgs, OldVar, NewVar, LateAttrs, StartingScope);
LookupResult Previous(
*this, NewVar->getDeclName(), NewVar->getLocation(),
NewVar->isLocalExternDecl() ? Sema::LookupRedeclarationWithLinkage
: Sema::LookupOrdinaryName,
NewVar->isLocalExternDecl() ? Sema::ForExternalRedeclaration
: forRedeclarationInCurContext());
if (NewVar->isLocalExternDecl() && OldVar->getPreviousDecl() &&
(!OldVar->getPreviousDecl()->getDeclContext()->isDependentContext() ||
OldVar->getPreviousDecl()->getDeclContext()==OldVar->getDeclContext())) {
// We have a previous declaration. Use that one, so we merge with the
// right type.
if (NamedDecl *NewPrev = FindInstantiatedDecl(
NewVar->getLocation(), OldVar->getPreviousDecl(), TemplateArgs))
Previous.addDecl(NewPrev);
} else if (!isa<VarTemplateSpecializationDecl>(NewVar) &&
OldVar->hasLinkage()) {
LookupQualifiedName(Previous, NewVar->getDeclContext(), false);
} else if (PrevDeclForVarTemplateSpecialization) {
Previous.addDecl(PrevDeclForVarTemplateSpecialization);
}
CheckVariableDeclaration(NewVar, Previous);
if (!InstantiatingVarTemplate) {
NewVar->getLexicalDeclContext()->addHiddenDecl(NewVar);
if (!NewVar->isLocalExternDecl() || !NewVar->getPreviousDecl())
NewVar->getDeclContext()->makeDeclVisibleInContext(NewVar);
}
if (!OldVar->isOutOfLine()) {
if (NewVar->getDeclContext()->isFunctionOrMethod())
CurrentInstantiationScope->InstantiatedLocal(OldVar, NewVar);
}
// Link instantiations of static data members back to the template from
// which they were instantiated.
//
// Don't do this when instantiating a template (we link the template itself
// back in that case) nor when instantiating a static data member template
// (that's not a member specialization).
if (NewVar->isStaticDataMember() && !InstantiatingVarTemplate &&
!InstantiatingSpecFromTemplate)
NewVar->setInstantiationOfStaticDataMember(OldVar,
TSK_ImplicitInstantiation);
// If the pattern is an (in-class) explicit specialization, then the result
// is also an explicit specialization.
if (VarTemplateSpecializationDecl *OldVTSD =
dyn_cast<VarTemplateSpecializationDecl>(OldVar)) {
if (OldVTSD->getSpecializationKind() == TSK_ExplicitSpecialization &&
!isa<VarTemplatePartialSpecializationDecl>(OldVTSD))
cast<VarTemplateSpecializationDecl>(NewVar)->setSpecializationKind(
TSK_ExplicitSpecialization);
}
// Forward the mangling number from the template to the instantiated decl.
Context.setManglingNumber(NewVar, Context.getManglingNumber(OldVar));
Context.setStaticLocalNumber(NewVar, Context.getStaticLocalNumber(OldVar));
// Figure out whether to eagerly instantiate the initializer.
if (InstantiatingVarTemplate || InstantiatingVarTemplatePartialSpec) {
// We're producing a template. Don't instantiate the initializer yet.
} else if (NewVar->getType()->isUndeducedType()) {
// We need the type to complete the declaration of the variable.
InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs);
} else if (InstantiatingSpecFromTemplate ||
(OldVar->isInline() && OldVar->isThisDeclarationADefinition() &&
!NewVar->isThisDeclarationADefinition())) {
// Delay instantiation of the initializer for variable template
// specializations or inline static data members until a definition of the
// variable is needed.
} else {
InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs);
}
// Diagnose unused local variables with dependent types, where the diagnostic
// will have been deferred.
if (!NewVar->isInvalidDecl() &&
NewVar->getDeclContext()->isFunctionOrMethod() &&
OldVar->getType()->isDependentType())
DiagnoseUnusedDecl(NewVar);
}
/// Instantiate the initializer of a variable.
void Sema::InstantiateVariableInitializer(
VarDecl *Var, VarDecl *OldVar,
const MultiLevelTemplateArgumentList &TemplateArgs) {
if (ASTMutationListener *L = getASTContext().getASTMutationListener())
L->VariableDefinitionInstantiated(Var);
// We propagate the 'inline' flag with the initializer, because it
// would otherwise imply that the variable is a definition for a
// non-static data member.
if (OldVar->isInlineSpecified())
Var->setInlineSpecified();
else if (OldVar->isInline())
Var->setImplicitlyInline();
if (OldVar->getInit()) {
EnterExpressionEvaluationContext Evaluated(
*this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, Var);
// Instantiate the initializer.
ExprResult Init;
{
ContextRAII SwitchContext(*this, Var->getDeclContext());
Init = SubstInitializer(OldVar->getInit(), TemplateArgs,
OldVar->getInitStyle() == VarDecl::CallInit);
}
if (!Init.isInvalid()) {
Expr *InitExpr = Init.get();
if (Var->hasAttr<DLLImportAttr>() &&
(!InitExpr ||
!InitExpr->isConstantInitializer(getASTContext(), false))) {
// Do not dynamically initialize dllimport variables.
} else if (InitExpr) {
bool DirectInit = OldVar->isDirectInit();
AddInitializerToDecl(Var, InitExpr, DirectInit);
} else
ActOnUninitializedDecl(Var);
} else {
// FIXME: Not too happy about invalidating the declaration
// because of a bogus initializer.
Var->setInvalidDecl();
}
} else {
// `inline` variables are a definition and declaration all in one; we won't
// pick up an initializer from anywhere else.
if (Var->isStaticDataMember() && !Var->isInline()) {
if (!Var->isOutOfLine())
return;
// If the declaration inside the class had an initializer, don't add
// another one to the out-of-line definition.
if (OldVar->getFirstDecl()->hasInit())
return;
}
// We'll add an initializer to a for-range declaration later.
if (Var->isCXXForRangeDecl() || Var->isObjCForDecl())
return;
ActOnUninitializedDecl(Var);
}
if (getLangOpts().CUDA)
checkAllowedCUDAInitializer(Var);
}
/// Instantiate the definition of the given variable from its
/// template.
///
/// \param PointOfInstantiation the point at which the instantiation was
/// required. Note that this is not precisely a "point of instantiation"
/// for the variable, but it's close.
///
/// \param Var the already-instantiated declaration of a templated variable.
///
/// \param Recursive if true, recursively instantiates any functions that
/// are required by this instantiation.
///
/// \param DefinitionRequired if true, then we are performing an explicit
/// instantiation where a definition of the variable is required. Complain
/// if there is no such definition.
void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
VarDecl *Var, bool Recursive,
bool DefinitionRequired, bool AtEndOfTU) {
if (Var->isInvalidDecl())
return;
// Never instantiate an explicitly-specialized entity.
TemplateSpecializationKind TSK =
Var->getTemplateSpecializationKindForInstantiation();
if (TSK == TSK_ExplicitSpecialization)
return;
// Find the pattern and the arguments to substitute into it.
VarDecl *PatternDecl = Var->getTemplateInstantiationPattern();
assert(PatternDecl && "no pattern for templated variable");
MultiLevelTemplateArgumentList TemplateArgs =
getTemplateInstantiationArgs(Var);
VarTemplateSpecializationDecl *VarSpec =
dyn_cast<VarTemplateSpecializationDecl>(Var);
if (VarSpec) {
// If this is a static data member template, there might be an
// uninstantiated initializer on the declaration. If so, instantiate
// it now.
//
// FIXME: This largely duplicates what we would do below. The difference
// is that along this path we may instantiate an initializer from an
// in-class declaration of the template and instantiate the definition
// from a separate out-of-class definition.
if (PatternDecl->isStaticDataMember() &&
(PatternDecl = PatternDecl->getFirstDecl())->hasInit() &&
!Var->hasInit()) {
// FIXME: Factor out the duplicated instantiation context setup/tear down
// code here.
InstantiatingTemplate Inst(*this, PointOfInstantiation, Var);
if (Inst.isInvalid() || Inst.isAlreadyInstantiating())
return;
PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(),
"instantiating variable initializer");
// The instantiation is visible here, even if it was first declared in an
// unimported module.
Var->setVisibleDespiteOwningModule();
// If we're performing recursive template instantiation, create our own
// queue of pending implicit instantiations that we will instantiate
// later, while we're still within our own instantiation context.
GlobalEagerInstantiationScope GlobalInstantiations(*this,
/*Enabled=*/Recursive);
LocalInstantiationScope Local(*this);
LocalEagerInstantiationScope LocalInstantiations(*this);
// Enter the scope of this instantiation. We don't use
// PushDeclContext because we don't have a scope.
ContextRAII PreviousContext(*this, Var->getDeclContext());
InstantiateVariableInitializer(Var, PatternDecl, TemplateArgs);
PreviousContext.pop();
// This variable may have local implicit instantiations that need to be
// instantiated within this scope.
LocalInstantiations.perform();
Local.Exit();
GlobalInstantiations.perform();
}
} else {
assert(Var->isStaticDataMember() && PatternDecl->isStaticDataMember() &&
"not a static data member?");
}
VarDecl *Def = PatternDecl->getDefinition(getASTContext());
// If we don't have a definition of the variable template, we won't perform
// any instantiation. Rather, we rely on the user to instantiate this
// definition (or provide a specialization for it) in another translation
// unit.
if (!Def && !DefinitionRequired) {
if (TSK == TSK_ExplicitInstantiationDefinition) {
PendingInstantiations.push_back(
std::make_pair(Var, PointOfInstantiation));
} else if (TSK == TSK_ImplicitInstantiation) {
// Warn about missing definition at the end of translation unit.
if (AtEndOfTU && !getDiagnostics().hasErrorOccurred() &&
!getSourceManager().isInSystemHeader(PatternDecl->getBeginLoc())) {
Diag(PointOfInstantiation, diag::warn_var_template_missing)
<< Var;
Diag(PatternDecl->getLocation(), diag::note_forward_template_decl);
if (getLangOpts().CPlusPlus11)
Diag(PointOfInstantiation, diag::note_inst_declaration_hint) << Var;
}
return;
}
}
// FIXME: We need to track the instantiation stack in order to know which
// definitions should be visible within this instantiation.
// FIXME: Produce diagnostics when Var->getInstantiatedFromStaticDataMember().
if (DiagnoseUninstantiableTemplate(PointOfInstantiation, Var,
/*InstantiatedFromMember*/false,
PatternDecl, Def, TSK,
/*Complain*/DefinitionRequired))
return;
// C++11 [temp.explicit]p10:
// Except for inline functions, const variables of literal types, variables
// of reference types, [...] explicit instantiation declarations
// have the effect of suppressing the implicit instantiation of the entity
// to which they refer.
//
// FIXME: That's not exactly the same as "might be usable in constant
// expressions", which only allows constexpr variables and const integral
// types, not arbitrary const literal types.
if (TSK == TSK_ExplicitInstantiationDeclaration &&
!Var->mightBeUsableInConstantExpressions(getASTContext()))
return;
// Make sure to pass the instantiated variable to the consumer at the end.
struct PassToConsumerRAII {
ASTConsumer &Consumer;
VarDecl *Var;
PassToConsumerRAII(ASTConsumer &Consumer, VarDecl *Var)
: Consumer(Consumer), Var(Var) { }
~PassToConsumerRAII() {
Consumer.HandleCXXStaticMemberVarInstantiation(Var);
}
} PassToConsumerRAII(Consumer, Var);
// If we already have a definition, we're done.
if (VarDecl *Def = Var->getDefinition()) {
// We may be explicitly instantiating something we've already implicitly
// instantiated.
Def->setTemplateSpecializationKind(Var->getTemplateSpecializationKind(),
PointOfInstantiation);
return;
}
InstantiatingTemplate Inst(*this, PointOfInstantiation, Var);
if (Inst.isInvalid() || Inst.isAlreadyInstantiating())
return;
PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(),
"instantiating variable definition");
// If we're performing recursive template instantiation, create our own
// queue of pending implicit instantiations that we will instantiate later,
// while we're still within our own instantiation context.
GlobalEagerInstantiationScope GlobalInstantiations(*this,
/*Enabled=*/Recursive);
// Enter the scope of this instantiation. We don't use
// PushDeclContext because we don't have a scope.
ContextRAII PreviousContext(*this, Var->getDeclContext());
LocalInstantiationScope Local(*this);
LocalEagerInstantiationScope LocalInstantiations(*this);
VarDecl *OldVar = Var;
if (Def->isStaticDataMember() && !Def->isOutOfLine()) {
// We're instantiating an inline static data member whose definition was
// provided inside the class.
InstantiateVariableInitializer(Var, Def, TemplateArgs);
} else if (!VarSpec) {
Var = cast_or_null<VarDecl>(SubstDecl(Def, Var->getDeclContext(),
TemplateArgs));
} else if (Var->isStaticDataMember() &&
Var->getLexicalDeclContext()->isRecord()) {
// We need to instantiate the definition of a static data member template,
// and all we have is the in-class declaration of it. Instantiate a separate
// declaration of the definition.
TemplateDeclInstantiator Instantiator(*this, Var->getDeclContext(),
TemplateArgs);
Var = cast_or_null<VarDecl>(Instantiator.VisitVarTemplateSpecializationDecl(
VarSpec->getSpecializedTemplate(), Def, VarSpec->getTemplateArgsInfo(),
VarSpec->getTemplateArgs().asArray(), VarSpec));
if (Var) {
llvm::PointerUnion<VarTemplateDecl *,
VarTemplatePartialSpecializationDecl *> PatternPtr =
VarSpec->getSpecializedTemplateOrPartial();
if (VarTemplatePartialSpecializationDecl *Partial =
PatternPtr.dyn_cast<VarTemplatePartialSpecializationDecl *>())
cast<VarTemplateSpecializationDecl>(Var)->setInstantiationOf(
Partial, &VarSpec->getTemplateInstantiationArgs());
// Attach the initializer.
InstantiateVariableInitializer(Var, Def, TemplateArgs);
}
} else
// Complete the existing variable's definition with an appropriately
// substituted type and initializer.
Var = CompleteVarTemplateSpecializationDecl(VarSpec, Def, TemplateArgs);
PreviousContext.pop();
if (Var) {
PassToConsumerRAII.Var = Var;
Var->setTemplateSpecializationKind(OldVar->getTemplateSpecializationKind(),
OldVar->getPointOfInstantiation());
}
// This variable may have local implicit instantiations that need to be
// instantiated within this scope.
LocalInstantiations.perform();
Local.Exit();
GlobalInstantiations.perform();
}
void
Sema::InstantiateMemInitializers(CXXConstructorDecl *New,
const CXXConstructorDecl *Tmpl,
const MultiLevelTemplateArgumentList &TemplateArgs) {
SmallVector<CXXCtorInitializer*, 4> NewInits;
bool AnyErrors = Tmpl->isInvalidDecl();
// Instantiate all the initializers.
for (const auto *Init : Tmpl->inits()) {
// Only instantiate written initializers, let Sema re-construct implicit
// ones.
if (!Init->isWritten())
continue;
SourceLocation EllipsisLoc;
if (Init->isPackExpansion()) {
// This is a pack expansion. We should expand it now.
TypeLoc BaseTL = Init->getTypeSourceInfo()->getTypeLoc();
SmallVector<UnexpandedParameterPack, 4> Unexpanded;
collectUnexpandedParameterPacks(BaseTL, Unexpanded);
collectUnexpandedParameterPacks(Init->getInit(), Unexpanded);
bool ShouldExpand = false;
bool RetainExpansion = false;
Optional<unsigned> NumExpansions;
if (CheckParameterPacksForExpansion(Init->getEllipsisLoc(),
BaseTL.getSourceRange(),
Unexpanded,
TemplateArgs, ShouldExpand,
RetainExpansion,
NumExpansions)) {
AnyErrors = true;
New->setInvalidDecl();
continue;
}
assert(ShouldExpand && "Partial instantiation of base initializer?");
// Loop over all of the arguments in the argument pack(s),
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(*this, I);
// Instantiate the initializer.
ExprResult TempInit = SubstInitializer(Init->getInit(), TemplateArgs,
/*CXXDirectInit=*/true);
if (TempInit.isInvalid()) {
AnyErrors = true;
break;
}
// Instantiate the base type.
TypeSourceInfo *BaseTInfo = SubstType(Init->getTypeSourceInfo(),
TemplateArgs,
Init->getSourceLocation(),
New->getDeclName());
if (!BaseTInfo) {
AnyErrors = true;
break;
}
// Build the initializer.
MemInitResult NewInit = BuildBaseInitializer(BaseTInfo->getType(),
BaseTInfo, TempInit.get(),
New->getParent(),
SourceLocation());
if (NewInit.isInvalid()) {
AnyErrors = true;
break;
}
NewInits.push_back(NewInit.get());
}
continue;
}
// Instantiate the initializer.
ExprResult TempInit = SubstInitializer(Init->getInit(), TemplateArgs,
/*CXXDirectInit=*/true);
if (TempInit.isInvalid()) {
AnyErrors = true;
continue;
}
MemInitResult NewInit;
if (Init->isDelegatingInitializer() || Init->isBaseInitializer()) {
TypeSourceInfo *TInfo = SubstType(Init->getTypeSourceInfo(),
TemplateArgs,
Init->getSourceLocation(),
New->getDeclName());
if (!TInfo) {
AnyErrors = true;
New->setInvalidDecl();
continue;
}
if (Init->isBaseInitializer())
NewInit = BuildBaseInitializer(TInfo->getType(), TInfo, TempInit.get(),
New->getParent(), EllipsisLoc);
else
NewInit = BuildDelegatingInitializer(TInfo, TempInit.get(),
cast<CXXRecordDecl>(CurContext->getParent()));
} else if (Init->isMemberInitializer()) {
FieldDecl *Member = cast_or_null<FieldDecl>(FindInstantiatedDecl(
Init->getMemberLocation(),
Init->getMember(),
TemplateArgs));
if (!Member) {
AnyErrors = true;
New->setInvalidDecl();
continue;
}
NewInit = BuildMemberInitializer(Member, TempInit.get(),
Init->getSourceLocation());
} else if (Init->isIndirectMemberInitializer()) {
IndirectFieldDecl *IndirectMember =
cast_or_null<IndirectFieldDecl>(FindInstantiatedDecl(
Init->getMemberLocation(),
Init->getIndirectMember(), TemplateArgs));
if (!IndirectMember) {
AnyErrors = true;
New->setInvalidDecl();
continue;
}
NewInit = BuildMemberInitializer(IndirectMember, TempInit.get(),
Init->getSourceLocation());
}
if (NewInit.isInvalid()) {
AnyErrors = true;
New->setInvalidDecl();
} else {
NewInits.push_back(NewInit.get());
}
}
// Assign all the initializers to the new constructor.
ActOnMemInitializers(New,
/*FIXME: ColonLoc */
SourceLocation(),
NewInits,
AnyErrors);
}
// TODO: this could be templated if the various decl types used the
// same method name.
static bool isInstantiationOf(ClassTemplateDecl *Pattern,
ClassTemplateDecl *Instance) {
Pattern = Pattern->getCanonicalDecl();
do {
Instance = Instance->getCanonicalDecl();
if (Pattern == Instance) return true;
Instance = Instance->getInstantiatedFromMemberTemplate();
} while (Instance);
return false;
}
static bool isInstantiationOf(FunctionTemplateDecl *Pattern,
FunctionTemplateDecl *Instance) {
Pattern = Pattern->getCanonicalDecl();
do {
Instance = Instance->getCanonicalDecl();
if (Pattern == Instance) return true;
Instance = Instance->getInstantiatedFromMemberTemplate();
} while (Instance);
return false;
}
static bool
isInstantiationOf(ClassTemplatePartialSpecializationDecl *Pattern,
ClassTemplatePartialSpecializationDecl *Instance) {
Pattern
= cast<ClassTemplatePartialSpecializationDecl>(Pattern->getCanonicalDecl());
do {
Instance = cast<ClassTemplatePartialSpecializationDecl>(
Instance->getCanonicalDecl());
if (Pattern == Instance)
return true;
Instance = Instance->getInstantiatedFromMember();
} while (Instance);
return false;
}
static bool isInstantiationOf(CXXRecordDecl *Pattern,
CXXRecordDecl *Instance) {
Pattern = Pattern->getCanonicalDecl();
do {
Instance = Instance->getCanonicalDecl();
if (Pattern == Instance) return true;
Instance = Instance->getInstantiatedFromMemberClass();
} while (Instance);
return false;
}
static bool isInstantiationOf(FunctionDecl *Pattern,
FunctionDecl *Instance) {
Pattern = Pattern->getCanonicalDecl();
do {
Instance = Instance->getCanonicalDecl();
if (Pattern == Instance) return true;
Instance = Instance->getInstantiatedFromMemberFunction();
} while (Instance);
return false;
}
static bool isInstantiationOf(EnumDecl *Pattern,
EnumDecl *Instance) {
Pattern = Pattern->getCanonicalDecl();
do {
Instance = Instance->getCanonicalDecl();
if (Pattern == Instance) return true;
Instance = Instance->getInstantiatedFromMemberEnum();
} while (Instance);
return false;
}
static bool isInstantiationOf(UsingShadowDecl *Pattern,
UsingShadowDecl *Instance,
ASTContext &C) {
return declaresSameEntity(C.getInstantiatedFromUsingShadowDecl(Instance),
Pattern);
}
static bool isInstantiationOf(UsingDecl *Pattern, UsingDecl *Instance,
ASTContext &C) {
return declaresSameEntity(C.getInstantiatedFromUsingDecl(Instance), Pattern);
}
template<typename T>
static bool isInstantiationOfUnresolvedUsingDecl(T *Pattern, Decl *Other,
ASTContext &Ctx) {
// An unresolved using declaration can instantiate to an unresolved using
// declaration, or to a using declaration or a using declaration pack.
//
// Multiple declarations can claim to be instantiated from an unresolved
// using declaration if it's a pack expansion. We want the UsingPackDecl
// in that case, not the individual UsingDecls within the pack.
bool OtherIsPackExpansion;
NamedDecl *OtherFrom;
if (auto *OtherUUD = dyn_cast<T>(Other)) {
OtherIsPackExpansion = OtherUUD->isPackExpansion();
OtherFrom = Ctx.getInstantiatedFromUsingDecl(OtherUUD);
} else if (auto *OtherUPD = dyn_cast<UsingPackDecl>(Other)) {
OtherIsPackExpansion = true;
OtherFrom = OtherUPD->getInstantiatedFromUsingDecl();
} else if (auto *OtherUD = dyn_cast<UsingDecl>(Other)) {
OtherIsPackExpansion = false;
OtherFrom = Ctx.getInstantiatedFromUsingDecl(OtherUD);
} else {
return false;
}
return Pattern->isPackExpansion() == OtherIsPackExpansion &&
declaresSameEntity(OtherFrom, Pattern);
}
static bool isInstantiationOfStaticDataMember(VarDecl *Pattern,
VarDecl *Instance) {
assert(Instance->isStaticDataMember());
Pattern = Pattern->getCanonicalDecl();
do {
Instance = Instance->getCanonicalDecl();
if (Pattern == Instance) return true;
Instance = Instance->getInstantiatedFromStaticDataMember();
} while (Instance);
return false;
}
// Other is the prospective instantiation
// D is the prospective pattern
static bool isInstantiationOf(ASTContext &Ctx, NamedDecl *D, Decl *Other) {
if (auto *UUD = dyn_cast<UnresolvedUsingTypenameDecl>(D))
return isInstantiationOfUnresolvedUsingDecl(UUD, Other, Ctx);
if (auto *UUD = dyn_cast<UnresolvedUsingValueDecl>(D))
return isInstantiationOfUnresolvedUsingDecl(UUD, Other, Ctx);
if (D->getKind() != Other->getKind())
return false;
if (auto *Record = dyn_cast<CXXRecordDecl>(Other))
return isInstantiationOf(cast<CXXRecordDecl>(D), Record);
if (auto *Function = dyn_cast<FunctionDecl>(Other))
return isInstantiationOf(cast<FunctionDecl>(D), Function);
if (auto *Enum = dyn_cast<EnumDecl>(Other))
return isInstantiationOf(cast<EnumDecl>(D), Enum);
if (auto *Var = dyn_cast<VarDecl>(Other))
if (Var->isStaticDataMember())
return isInstantiationOfStaticDataMember(cast<VarDecl>(D), Var);
if (auto *Temp = dyn_cast<ClassTemplateDecl>(Other))
return isInstantiationOf(cast<ClassTemplateDecl>(D), Temp);
if (auto *Temp = dyn_cast<FunctionTemplateDecl>(Other))
return isInstantiationOf(cast<FunctionTemplateDecl>(D), Temp);
if (auto *PartialSpec =
dyn_cast<ClassTemplatePartialSpecializationDecl>(Other))
return isInstantiationOf(cast<ClassTemplatePartialSpecializationDecl>(D),
PartialSpec);
if (auto *Field = dyn_cast<FieldDecl>(Other)) {
if (!Field->getDeclName()) {
// This is an unnamed field.
return declaresSameEntity(Ctx.getInstantiatedFromUnnamedFieldDecl(Field),
cast<FieldDecl>(D));
}
}
if (auto *Using = dyn_cast<UsingDecl>(Other))
return isInstantiationOf(cast<UsingDecl>(D), Using, Ctx);
if (auto *Shadow = dyn_cast<UsingShadowDecl>(Other))
return isInstantiationOf(cast<UsingShadowDecl>(D), Shadow, Ctx);
return D->getDeclName() &&
D->getDeclName() == cast<NamedDecl>(Other)->getDeclName();
}
template<typename ForwardIterator>
static NamedDecl *findInstantiationOf(ASTContext &Ctx,
NamedDecl *D,
ForwardIterator first,
ForwardIterator last) {
for (; first != last; ++first)
if (isInstantiationOf(Ctx, D, *first))
return cast<NamedDecl>(*first);
return nullptr;
}
/// Finds the instantiation of the given declaration context
/// within the current instantiation.
///
/// \returns NULL if there was an error
DeclContext *Sema::FindInstantiatedContext(SourceLocation Loc, DeclContext* DC,
const MultiLevelTemplateArgumentList &TemplateArgs) {
if (NamedDecl *D = dyn_cast<NamedDecl>(DC)) {
Decl* ID = FindInstantiatedDecl(Loc, D, TemplateArgs, true);
return cast_or_null<DeclContext>(ID);
} else return DC;
}
/// Determine whether the given context is dependent on template parameters at
/// level \p Level or below.
///
/// Sometimes we only substitute an inner set of template arguments and leave
/// the outer templates alone. In such cases, contexts dependent only on the
/// outer levels are not effectively dependent.
static bool isDependentContextAtLevel(DeclContext *DC, unsigned Level) {
if (!DC->isDependentContext())
return false;
if (!Level)
return true;
return cast<Decl>(DC)->getTemplateDepth() > Level;
}
/// Find the instantiation of the given declaration within the
/// current instantiation.
///
/// This routine is intended to be used when \p D is a declaration
/// referenced from within a template, that needs to mapped into the
/// corresponding declaration within an instantiation. For example,
/// given:
///
/// \code
/// template<typename T>
/// struct X {
/// enum Kind {
/// KnownValue = sizeof(T)
/// };
///
/// bool getKind() const { return KnownValue; }
/// };
///
/// template struct X<int>;
/// \endcode
///
/// In the instantiation of X<int>::getKind(), we need to map the \p
/// EnumConstantDecl for \p KnownValue (which refers to
/// X<T>::<Kind>::KnownValue) to its instantiation (X<int>::<Kind>::KnownValue).
/// \p FindInstantiatedDecl performs this mapping from within the instantiation
/// of X<int>.
NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
const MultiLevelTemplateArgumentList &TemplateArgs,
bool FindingInstantiatedContext) {
DeclContext *ParentDC = D->getDeclContext();
// Determine whether our parent context depends on any of the tempalte
// arguments we're currently substituting.
bool ParentDependsOnArgs = isDependentContextAtLevel(
ParentDC, TemplateArgs.getNumRetainedOuterLevels());
// FIXME: Parmeters of pointer to functions (y below) that are themselves
// parameters (p below) can have their ParentDC set to the translation-unit
// - thus we can not consistently check if the ParentDC of such a parameter
// is Dependent or/and a FunctionOrMethod.
// For e.g. this code, during Template argument deduction tries to
// find an instantiated decl for (T y) when the ParentDC for y is
// the translation unit.
// e.g. template <class T> void Foo(auto (*p)(T y) -> decltype(y())) {}
// float baz(float(*)()) { return 0.0; }
// Foo(baz);
// The better fix here is perhaps to ensure that a ParmVarDecl, by the time
// it gets here, always has a FunctionOrMethod as its ParentDC??
// For now:
// - as long as we have a ParmVarDecl whose parent is non-dependent and
// whose type is not instantiation dependent, do nothing to the decl
// - otherwise find its instantiated decl.
if (isa<ParmVarDecl>(D) && !ParentDependsOnArgs &&
!cast<ParmVarDecl>(D)->getType()->isInstantiationDependentType())
return D;
if (isa<ParmVarDecl>(D) || isa<NonTypeTemplateParmDecl>(D) ||
isa<TemplateTypeParmDecl>(D) || isa<TemplateTemplateParmDecl>(D) ||
(ParentDependsOnArgs && (ParentDC->isFunctionOrMethod() ||
isa<OMPDeclareReductionDecl>(ParentDC) ||
isa<OMPDeclareMapperDecl>(ParentDC))) ||
(isa<CXXRecordDecl>(D) && cast<CXXRecordDecl>(D)->isLambda())) {
// D is a local of some kind. Look into the map of local
// declarations to their instantiations.
if (CurrentInstantiationScope) {
if (auto Found = CurrentInstantiationScope->findInstantiationOf(D)) {
if (Decl *FD = Found->dyn_cast<Decl *>())
return cast<NamedDecl>(FD);
int PackIdx = ArgumentPackSubstitutionIndex;
assert(PackIdx != -1 &&
"found declaration pack but not pack expanding");
typedef LocalInstantiationScope::DeclArgumentPack DeclArgumentPack;
return cast<NamedDecl>((*Found->get<DeclArgumentPack *>())[PackIdx]);
}
}
// If we're performing a partial substitution during template argument
// deduction, we may not have values for template parameters yet. They
// just map to themselves.
if (isa<NonTypeTemplateParmDecl>(D) || isa<TemplateTypeParmDecl>(D) ||
isa<TemplateTemplateParmDecl>(D))
return D;
if (D->isInvalidDecl())
return nullptr;
// Normally this function only searches for already instantiated declaration
// however we have to make an exclusion for local types used before
// definition as in the code:
//
// template<typename T> void f1() {
// void g1(struct x1);
// struct x1 {};
// }
//
// In this case instantiation of the type of 'g1' requires definition of
// 'x1', which is defined later. Error recovery may produce an enum used
// before definition. In these cases we need to instantiate relevant
// declarations here.
bool NeedInstantiate = false;
if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D))
NeedInstantiate = RD->isLocalClass();
else if (isa<TypedefNameDecl>(D) &&
isa<CXXDeductionGuideDecl>(D->getDeclContext()))
NeedInstantiate = true;
else
NeedInstantiate = isa<EnumDecl>(D);
if (NeedInstantiate) {
Decl *Inst = SubstDecl(D, CurContext, TemplateArgs);
CurrentInstantiationScope->InstantiatedLocal(D, Inst);
return cast<TypeDecl>(Inst);
}
// If we didn't find the decl, then we must have a label decl that hasn't
// been found yet. Lazily instantiate it and return it now.
assert(isa<LabelDecl>(D));
Decl *Inst = SubstDecl(D, CurContext, TemplateArgs);
assert(Inst && "Failed to instantiate label??");
CurrentInstantiationScope->InstantiatedLocal(D, Inst);
return cast<LabelDecl>(Inst);
}
if (CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(D)) {
if (!Record->isDependentContext())
return D;
// Determine whether this record is the "templated" declaration describing
// a class template or class template partial specialization.
ClassTemplateDecl *ClassTemplate = Record->getDescribedClassTemplate();
if (ClassTemplate)
ClassTemplate = ClassTemplate->getCanonicalDecl();
else if (ClassTemplatePartialSpecializationDecl *PartialSpec
= dyn_cast<ClassTemplatePartialSpecializationDecl>(Record))
ClassTemplate = PartialSpec->getSpecializedTemplate()->getCanonicalDecl();
// Walk the current context to find either the record or an instantiation of
// it.
DeclContext *DC = CurContext;
while (!DC->isFileContext()) {
// If we're performing substitution while we're inside the template
// definition, we'll find our own context. We're done.
if (DC->Equals(Record))
return Record;
if (CXXRecordDecl *InstRecord = dyn_cast<CXXRecordDecl>(DC)) {
// Check whether we're in the process of instantiating a class template
// specialization of the template we're mapping.
if (ClassTemplateSpecializationDecl *InstSpec
= dyn_cast<ClassTemplateSpecializationDecl>(InstRecord)){
ClassTemplateDecl *SpecTemplate = InstSpec->getSpecializedTemplate();
if (ClassTemplate && isInstantiationOf(ClassTemplate, SpecTemplate))
return InstRecord;
}
// Check whether we're in the process of instantiating a member class.
if (isInstantiationOf(Record, InstRecord))
return InstRecord;
}
// Move to the outer template scope.
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(DC)) {
if (FD->getFriendObjectKind() && FD->getDeclContext()->isFileContext()){
DC = FD->getLexicalDeclContext();
continue;
}
// An implicit deduction guide acts as if it's within the class template
// specialization described by its name and first N template params.
auto *Guide = dyn_cast<CXXDeductionGuideDecl>(FD);
if (Guide && Guide->isImplicit()) {
TemplateDecl *TD = Guide->getDeducedTemplate();
// Convert the arguments to an "as-written" list.
TemplateArgumentListInfo Args(Loc, Loc);
for (TemplateArgument Arg : TemplateArgs.getInnermost().take_front(
TD->getTemplateParameters()->size())) {
ArrayRef<TemplateArgument> Unpacked(Arg);
if (Arg.getKind() == TemplateArgument::Pack)
Unpacked = Arg.pack_elements();
for (TemplateArgument UnpackedArg : Unpacked)
Args.addArgument(
getTrivialTemplateArgumentLoc(UnpackedArg, QualType(), Loc));
}
QualType T = CheckTemplateIdType(TemplateName(TD), Loc, Args);
if (T.isNull())
return nullptr;
auto *SubstRecord = T->getAsCXXRecordDecl();
assert(SubstRecord && "class template id not a class type?");
// Check that this template-id names the primary template and not a
// partial or explicit specialization. (In the latter cases, it's
// meaningless to attempt to find an instantiation of D within the
// specialization.)
// FIXME: The standard doesn't say what should happen here.
if (FindingInstantiatedContext &&
usesPartialOrExplicitSpecialization(
Loc, cast<ClassTemplateSpecializationDecl>(SubstRecord))) {
Diag(Loc, diag::err_specialization_not_primary_template)
<< T << (SubstRecord->getTemplateSpecializationKind() ==
TSK_ExplicitSpecialization);
return nullptr;
}
DC = SubstRecord;
continue;
}
}
DC = DC->getParent();
}
// Fall through to deal with other dependent record types (e.g.,
// anonymous unions in class templates).
}
if (!ParentDependsOnArgs)
return D;
ParentDC = FindInstantiatedContext(Loc, ParentDC, TemplateArgs);
if (!ParentDC)
return nullptr;
if (ParentDC != D->getDeclContext()) {
// We performed some kind of instantiation in the parent context,
// so now we need to look into the instantiated parent context to
// find the instantiation of the declaration D.
// If our context used to be dependent, we may need to instantiate
// it before performing lookup into that context.
bool IsBeingInstantiated = false;
if (CXXRecordDecl *Spec = dyn_cast<CXXRecordDecl>(ParentDC)) {
if (!Spec->isDependentContext()) {
QualType T = Context.getTypeDeclType(Spec);
const RecordType *Tag = T->getAs<RecordType>();
assert(Tag && "type of non-dependent record is not a RecordType");
if (Tag->isBeingDefined())
IsBeingInstantiated = true;
if (!Tag->isBeingDefined() &&
RequireCompleteType(Loc, T, diag::err_incomplete_type))
return nullptr;
ParentDC = Tag->getDecl();
}
}
NamedDecl *Result = nullptr;
// FIXME: If the name is a dependent name, this lookup won't necessarily
// find it. Does that ever matter?
if (auto Name = D->getDeclName()) {
DeclarationNameInfo NameInfo(Name, D->getLocation());
DeclarationNameInfo NewNameInfo =
SubstDeclarationNameInfo(NameInfo, TemplateArgs);
Name = NewNameInfo.getName();
if (!Name)
return nullptr;
DeclContext::lookup_result Found = ParentDC->lookup(Name);
Result = findInstantiationOf(Context, D, Found.begin(), Found.end());
} else {
// Since we don't have a name for the entity we're looking for,
// our only option is to walk through all of the declarations to
// find that name. This will occur in a few cases:
//
// - anonymous struct/union within a template
// - unnamed class/struct/union/enum within a template
//
// FIXME: Find a better way to find these instantiations!
Result = findInstantiationOf(Context, D,
ParentDC->decls_begin(),
ParentDC->decls_end());
}
if (!Result) {
if (isa<UsingShadowDecl>(D)) {
// UsingShadowDecls can instantiate to nothing because of using hiding.
} else if (hasUncompilableErrorOccurred()) {
// We've already complained about some ill-formed code, so most likely
// this declaration failed to instantiate. There's no point in
// complaining further, since this is normal in invalid code.
// FIXME: Use more fine-grained 'invalid' tracking for this.
} else if (IsBeingInstantiated) {
// The class in which this member exists is currently being
// instantiated, and we haven't gotten around to instantiating this
// member yet. This can happen when the code uses forward declarations
// of member classes, and introduces ordering dependencies via
// template instantiation.
Diag(Loc, diag::err_member_not_yet_instantiated)
<< D->getDeclName()
<< Context.getTypeDeclType(cast<CXXRecordDecl>(ParentDC));
Diag(D->getLocation(), diag::note_non_instantiated_member_here);
} else if (EnumConstantDecl *ED = dyn_cast<EnumConstantDecl>(D)) {
// This enumeration constant was found when the template was defined,
// but can't be found in the instantiation. This can happen if an
// unscoped enumeration member is explicitly specialized.
EnumDecl *Enum = cast<EnumDecl>(ED->getLexicalDeclContext());
EnumDecl *Spec = cast<EnumDecl>(FindInstantiatedDecl(Loc, Enum,
TemplateArgs));
assert(Spec->getTemplateSpecializationKind() ==
TSK_ExplicitSpecialization);
Diag(Loc, diag::err_enumerator_does_not_exist)
<< D->getDeclName()
<< Context.getTypeDeclType(cast<TypeDecl>(Spec->getDeclContext()));
Diag(Spec->getLocation(), diag::note_enum_specialized_here)
<< Context.getTypeDeclType(Spec);
} else {
// We should have found something, but didn't.
llvm_unreachable("Unable to find instantiation of declaration!");
}
}
D = Result;
}
return D;
}
/// Performs template instantiation for all implicit template
/// instantiations we have seen until this point.
void Sema::PerformPendingInstantiations(bool LocalOnly) {
std::deque<PendingImplicitInstantiation> delayedPCHInstantiations;
while (!PendingLocalImplicitInstantiations.empty() ||
(!LocalOnly && !PendingInstantiations.empty())) {
PendingImplicitInstantiation Inst;
if (PendingLocalImplicitInstantiations.empty()) {
Inst = PendingInstantiations.front();
PendingInstantiations.pop_front();
} else {
Inst = PendingLocalImplicitInstantiations.front();
PendingLocalImplicitInstantiations.pop_front();
}
// Instantiate function definitions
if (FunctionDecl *Function = dyn_cast<FunctionDecl>(Inst.first)) {
bool DefinitionRequired = Function->getTemplateSpecializationKind() ==
TSK_ExplicitInstantiationDefinition;
if (Function->isMultiVersion()) {
getASTContext().forEachMultiversionedFunctionVersion(
Function, [this, Inst, DefinitionRequired](FunctionDecl *CurFD) {
InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, CurFD, true,
DefinitionRequired, true);
if (CurFD->isDefined())
CurFD->setInstantiationIsPending(false);
});
} else {
InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, Function, true,
DefinitionRequired, true);
if (Function->isDefined())
Function->setInstantiationIsPending(false);
}
// Definition of a PCH-ed template declaration may be available only in the TU.
if (!LocalOnly && LangOpts.PCHInstantiateTemplates &&
TUKind == TU_Prefix && Function->instantiationIsPending())
delayedPCHInstantiations.push_back(Inst);
continue;
}
// Instantiate variable definitions
VarDecl *Var = cast<VarDecl>(Inst.first);
assert((Var->isStaticDataMember() ||
isa<VarTemplateSpecializationDecl>(Var)) &&
"Not a static data member, nor a variable template"
" specialization?");
// Don't try to instantiate declarations if the most recent redeclaration
// is invalid.
if (Var->getMostRecentDecl()->isInvalidDecl())
continue;
// Check if the most recent declaration has changed the specialization kind
// and removed the need for implicit instantiation.
switch (Var->getMostRecentDecl()
->getTemplateSpecializationKindForInstantiation()) {
case TSK_Undeclared:
llvm_unreachable("Cannot instantitiate an undeclared specialization.");
case TSK_ExplicitInstantiationDeclaration:
case TSK_ExplicitSpecialization:
continue; // No longer need to instantiate this type.
case TSK_ExplicitInstantiationDefinition:
// We only need an instantiation if the pending instantiation *is* the
// explicit instantiation.
if (Var != Var->getMostRecentDecl())
continue;
break;
case TSK_ImplicitInstantiation:
break;
}
PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(),
"instantiating variable definition");
bool DefinitionRequired = Var->getTemplateSpecializationKind() ==
TSK_ExplicitInstantiationDefinition;
// Instantiate static data member definitions or variable template
// specializations.
InstantiateVariableDefinition(/*FIXME:*/ Inst.second, Var, true,
DefinitionRequired, true);
}
if (!LocalOnly && LangOpts.PCHInstantiateTemplates)
PendingInstantiations.swap(delayedPCHInstantiations);
}
void Sema::PerformDependentDiagnostics(const DeclContext *Pattern,
const MultiLevelTemplateArgumentList &TemplateArgs) {
for (auto DD : Pattern->ddiags()) {
switch (DD->getKind()) {
case DependentDiagnostic::Access:
HandleDependentAccessCheck(*DD, TemplateArgs);
break;
}
}
}
diff --git a/contrib/llvm-project/clang/lib/Sema/TreeTransform.h b/contrib/llvm-project/clang/lib/Sema/TreeTransform.h
index 70ba631dbfc6..d8a5b6ad4f94 100644
--- a/contrib/llvm-project/clang/lib/Sema/TreeTransform.h
+++ b/contrib/llvm-project/clang/lib/Sema/TreeTransform.h
@@ -1,14760 +1,14760 @@
//===------- TreeTransform.h - Semantic Tree Transformation -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
//
// This file implements a semantic tree transformation that takes a given
// AST and rebuilds it, possibly transforming some nodes in the process.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_SEMA_TREETRANSFORM_H
#define LLVM_CLANG_LIB_SEMA_TREETRANSFORM_H
#include "CoroutineStmtBuilder.h"
#include "TypeLocBuilder.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprConcepts.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExprObjC.h"
#include "clang/AST/ExprOpenMP.h"
#include "clang/AST/OpenMPClause.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/StmtObjC.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/Basic/DiagnosticParse.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Sema/Designator.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/Ownership.h"
#include "clang/Sema/ParsedTemplate.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/SemaDiagnostic.h"
#include "clang/Sema/SemaInternal.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
using namespace llvm::omp;
namespace clang {
using namespace sema;
/// A semantic tree transformation that allows one to transform one
/// abstract syntax tree into another.
///
/// A new tree transformation is defined by creating a new subclass \c X of
/// \c TreeTransform<X> and then overriding certain operations to provide
/// behavior specific to that transformation. For example, template
/// instantiation is implemented as a tree transformation where the
/// transformation of TemplateTypeParmType nodes involves substituting the
/// template arguments for their corresponding template parameters; a similar
/// transformation is performed for non-type template parameters and
/// template template parameters.
///
/// This tree-transformation template uses static polymorphism to allow
/// subclasses to customize any of its operations. Thus, a subclass can
/// override any of the transformation or rebuild operators by providing an
/// operation with the same signature as the default implementation. The
/// overriding function should not be virtual.
///
/// Semantic tree transformations are split into two stages, either of which
/// can be replaced by a subclass. The "transform" step transforms an AST node
/// or the parts of an AST node using the various transformation functions,
/// then passes the pieces on to the "rebuild" step, which constructs a new AST
/// node of the appropriate kind from the pieces. The default transformation
/// routines recursively transform the operands to composite AST nodes (e.g.,
/// the pointee type of a PointerType node) and, if any of those operand nodes
/// were changed by the transformation, invokes the rebuild operation to create
/// a new AST node.
///
/// Subclasses can customize the transformation at various levels. The
/// most coarse-grained transformations involve replacing TransformType(),
/// TransformExpr(), TransformDecl(), TransformNestedNameSpecifierLoc(),
/// TransformTemplateName(), or TransformTemplateArgument() with entirely
/// new implementations.
///
/// For more fine-grained transformations, subclasses can replace any of the
/// \c TransformXXX functions (where XXX is the name of an AST node, e.g.,
/// PointerType, StmtExpr) to alter the transformation. As mentioned previously,
/// replacing TransformTemplateTypeParmType() allows template instantiation
/// to substitute template arguments for their corresponding template
/// parameters. Additionally, subclasses can override the \c RebuildXXX
/// functions to control how AST nodes are rebuilt when their operands change.
/// By default, \c TreeTransform will invoke semantic analysis to rebuild
/// AST nodes. However, certain other tree transformations (e.g, cloning) may
/// be able to use more efficient rebuild steps.
///
/// There are a handful of other functions that can be overridden, allowing one
/// to avoid traversing nodes that don't need any transformation
/// (\c AlreadyTransformed()), force rebuilding AST nodes even when their
/// operands have not changed (\c AlwaysRebuild()), and customize the
/// default locations and entity names used for type-checking
/// (\c getBaseLocation(), \c getBaseEntity()).
template<typename Derived>
class TreeTransform {
/// Private RAII object that helps us forget and then re-remember
/// the template argument corresponding to a partially-substituted parameter
/// pack.
class ForgetPartiallySubstitutedPackRAII {
Derived &Self;
TemplateArgument Old;
public:
ForgetPartiallySubstitutedPackRAII(Derived &Self) : Self(Self) {
Old = Self.ForgetPartiallySubstitutedPack();
}
~ForgetPartiallySubstitutedPackRAII() {
Self.RememberPartiallySubstitutedPack(Old);
}
};
protected:
Sema &SemaRef;
/// The set of local declarations that have been transformed, for
/// cases where we are forced to build new declarations within the transformer
/// rather than in the subclass (e.g., lambda closure types).
llvm::DenseMap<Decl *, Decl *> TransformedLocalDecls;
public:
/// Initializes a new tree transformer.
TreeTransform(Sema &SemaRef) : SemaRef(SemaRef) { }
/// Retrieves a reference to the derived class.
Derived &getDerived() { return static_cast<Derived&>(*this); }
/// Retrieves a reference to the derived class.
const Derived &getDerived() const {
return static_cast<const Derived&>(*this);
}
static inline ExprResult Owned(Expr *E) { return E; }
static inline StmtResult Owned(Stmt *S) { return S; }
/// Retrieves a reference to the semantic analysis object used for
/// this tree transform.
Sema &getSema() const { return SemaRef; }
/// Whether the transformation should always rebuild AST nodes, even
/// if none of the children have changed.
///
/// Subclasses may override this function to specify when the transformation
/// should rebuild all AST nodes.
///
/// We must always rebuild all AST nodes when performing variadic template
/// pack expansion, in order to avoid violating the AST invariant that each
/// statement node appears at most once in its containing declaration.
bool AlwaysRebuild() { return SemaRef.ArgumentPackSubstitutionIndex != -1; }
/// Whether the transformation is forming an expression or statement that
/// replaces the original. In this case, we'll reuse mangling numbers from
/// existing lambdas.
bool ReplacingOriginal() { return false; }
/// Wether CXXConstructExpr can be skipped when they are implicit.
/// They will be reconstructed when used if needed.
/// This is usefull when the user that cause rebuilding of the
/// CXXConstructExpr is outside of the expression at which the TreeTransform
/// started.
bool AllowSkippingCXXConstructExpr() { return true; }
/// Returns the location of the entity being transformed, if that
/// information was not available elsewhere in the AST.
///
/// By default, returns no source-location information. Subclasses can
/// provide an alternative implementation that provides better location
/// information.
SourceLocation getBaseLocation() { return SourceLocation(); }
/// Returns the name of the entity being transformed, if that
/// information was not available elsewhere in the AST.
///
/// By default, returns an empty name. Subclasses can provide an alternative
/// implementation with a more precise name.
DeclarationName getBaseEntity() { return DeclarationName(); }
/// Sets the "base" location and entity when that
/// information is known based on another transformation.
///
/// By default, the source location and entity are ignored. Subclasses can
/// override this function to provide a customized implementation.
void setBase(SourceLocation Loc, DeclarationName Entity) { }
/// RAII object that temporarily sets the base location and entity
/// used for reporting diagnostics in types.
class TemporaryBase {
TreeTransform &Self;
SourceLocation OldLocation;
DeclarationName OldEntity;
public:
TemporaryBase(TreeTransform &Self, SourceLocation Location,
DeclarationName Entity) : Self(Self) {
OldLocation = Self.getDerived().getBaseLocation();
OldEntity = Self.getDerived().getBaseEntity();
if (Location.isValid())
Self.getDerived().setBase(Location, Entity);
}
~TemporaryBase() {
Self.getDerived().setBase(OldLocation, OldEntity);
}
};
/// Determine whether the given type \p T has already been
/// transformed.
///
/// Subclasses can provide an alternative implementation of this routine
/// to short-circuit evaluation when it is known that a given type will
/// not change. For example, template instantiation need not traverse
/// non-dependent types.
bool AlreadyTransformed(QualType T) {
return T.isNull();
}
/// Transform a template parameter depth level.
///
/// During a transformation that transforms template parameters, this maps
/// an old template parameter depth to a new depth.
unsigned TransformTemplateDepth(unsigned Depth) {
return Depth;
}
/// Determine whether the given call argument should be dropped, e.g.,
/// because it is a default argument.
///
/// Subclasses can provide an alternative implementation of this routine to
/// determine which kinds of call arguments get dropped. By default,
/// CXXDefaultArgument nodes are dropped (prior to transformation).
bool DropCallArgument(Expr *E) {
return E->isDefaultArgument();
}
/// Determine whether we should expand a pack expansion with the
/// given set of parameter packs into separate arguments by repeatedly
/// transforming the pattern.
///
/// By default, the transformer never tries to expand pack expansions.
/// Subclasses can override this routine to provide different behavior.
///
/// \param EllipsisLoc The location of the ellipsis that identifies the
/// pack expansion.
///
/// \param PatternRange The source range that covers the entire pattern of
/// the pack expansion.
///
/// \param Unexpanded The set of unexpanded parameter packs within the
/// pattern.
///
/// \param ShouldExpand Will be set to \c true if the transformer should
/// expand the corresponding pack expansions into separate arguments. When
/// set, \c NumExpansions must also be set.
///
/// \param RetainExpansion Whether the caller should add an unexpanded
/// pack expansion after all of the expanded arguments. This is used
/// when extending explicitly-specified template argument packs per
/// C++0x [temp.arg.explicit]p9.
///
/// \param NumExpansions The number of separate arguments that will be in
/// the expanded form of the corresponding pack expansion. This is both an
/// input and an output parameter, which can be set by the caller if the
/// number of expansions is known a priori (e.g., due to a prior substitution)
/// and will be set by the callee when the number of expansions is known.
/// The callee must set this value when \c ShouldExpand is \c true; it may
/// set this value in other cases.
///
/// \returns true if an error occurred (e.g., because the parameter packs
/// are to be instantiated with arguments of different lengths), false
/// otherwise. If false, \c ShouldExpand (and possibly \c NumExpansions)
/// must be set.
bool TryExpandParameterPacks(SourceLocation EllipsisLoc,
SourceRange PatternRange,
ArrayRef<UnexpandedParameterPack> Unexpanded,
bool &ShouldExpand,
bool &RetainExpansion,
Optional<unsigned> &NumExpansions) {
ShouldExpand = false;
return false;
}
/// "Forget" about the partially-substituted pack template argument,
/// when performing an instantiation that must preserve the parameter pack
/// use.
///
/// This routine is meant to be overridden by the template instantiator.
TemplateArgument ForgetPartiallySubstitutedPack() {
return TemplateArgument();
}
/// "Remember" the partially-substituted pack template argument
/// after performing an instantiation that must preserve the parameter pack
/// use.
///
/// This routine is meant to be overridden by the template instantiator.
void RememberPartiallySubstitutedPack(TemplateArgument Arg) { }
/// Note to the derived class when a function parameter pack is
/// being expanded.
void ExpandingFunctionParameterPack(ParmVarDecl *Pack) { }
/// Transforms the given type into another type.
///
/// By default, this routine transforms a type by creating a
/// TypeSourceInfo for it and delegating to the appropriate
/// function. This is expensive, but we don't mind, because
/// this method is deprecated anyway; all users should be
/// switched to storing TypeSourceInfos.
///
/// \returns the transformed type.
QualType TransformType(QualType T);
/// Transforms the given type-with-location into a new
/// type-with-location.
///
/// By default, this routine transforms a type by delegating to the
/// appropriate TransformXXXType to build a new type. Subclasses
/// may override this function (to take over all type
/// transformations) or some set of the TransformXXXType functions
/// to alter the transformation.
TypeSourceInfo *TransformType(TypeSourceInfo *DI);
/// Transform the given type-with-location into a new
/// type, collecting location information in the given builder
/// as necessary.
///
QualType TransformType(TypeLocBuilder &TLB, TypeLoc TL);
/// Transform a type that is permitted to produce a
/// DeducedTemplateSpecializationType.
///
/// This is used in the (relatively rare) contexts where it is acceptable
/// for transformation to produce a class template type with deduced
/// template arguments.
/// @{
QualType TransformTypeWithDeducedTST(QualType T);
TypeSourceInfo *TransformTypeWithDeducedTST(TypeSourceInfo *DI);
/// @}
/// The reason why the value of a statement is not discarded, if any.
enum StmtDiscardKind {
SDK_Discarded,
SDK_NotDiscarded,
SDK_StmtExprResult,
};
/// Transform the given statement.
///
/// By default, this routine transforms a statement by delegating to the
/// appropriate TransformXXXStmt function to transform a specific kind of
/// statement or the TransformExpr() function to transform an expression.
/// Subclasses may override this function to transform statements using some
/// other mechanism.
///
/// \returns the transformed statement.
StmtResult TransformStmt(Stmt *S, StmtDiscardKind SDK = SDK_Discarded);
/// Transform the given statement.
///
/// By default, this routine transforms a statement by delegating to the
/// appropriate TransformOMPXXXClause function to transform a specific kind
/// of clause. Subclasses may override this function to transform statements
/// using some other mechanism.
///
/// \returns the transformed OpenMP clause.
OMPClause *TransformOMPClause(OMPClause *S);
/// Transform the given attribute.
///
/// By default, this routine transforms a statement by delegating to the
/// appropriate TransformXXXAttr function to transform a specific kind
/// of attribute. Subclasses may override this function to transform
/// attributed statements using some other mechanism.
///
/// \returns the transformed attribute
const Attr *TransformAttr(const Attr *S);
/// Transform the specified attribute.
///
/// Subclasses should override the transformation of attributes with a pragma
/// spelling to transform expressions stored within the attribute.
///
/// \returns the transformed attribute.
#define ATTR(X)
#define PRAGMA_SPELLING_ATTR(X) \
const X##Attr *Transform##X##Attr(const X##Attr *R) { return R; }
#include "clang/Basic/AttrList.inc"
/// Transform the given expression.
///
/// By default, this routine transforms an expression by delegating to the
/// appropriate TransformXXXExpr function to build a new expression.
/// Subclasses may override this function to transform expressions using some
/// other mechanism.
///
/// \returns the transformed expression.
ExprResult TransformExpr(Expr *E);
/// Transform the given initializer.
///
/// By default, this routine transforms an initializer by stripping off the
/// semantic nodes added by initialization, then passing the result to
/// TransformExpr or TransformExprs.
///
/// \returns the transformed initializer.
ExprResult TransformInitializer(Expr *Init, bool NotCopyInit);
/// Transform the given list of expressions.
///
/// This routine transforms a list of expressions by invoking
/// \c TransformExpr() for each subexpression. However, it also provides
/// support for variadic templates by expanding any pack expansions (if the
/// derived class permits such expansion) along the way. When pack expansions
/// are present, the number of outputs may not equal the number of inputs.
///
/// \param Inputs The set of expressions to be transformed.
///
/// \param NumInputs The number of expressions in \c Inputs.
///
/// \param IsCall If \c true, then this transform is being performed on
/// function-call arguments, and any arguments that should be dropped, will
/// be.
///
/// \param Outputs The transformed input expressions will be added to this
/// vector.
///
/// \param ArgChanged If non-NULL, will be set \c true if any argument changed
/// due to transformation.
///
/// \returns true if an error occurred, false otherwise.
bool TransformExprs(Expr *const *Inputs, unsigned NumInputs, bool IsCall,
SmallVectorImpl<Expr *> &Outputs,
bool *ArgChanged = nullptr);
/// Transform the given declaration, which is referenced from a type
/// or expression.
///
/// By default, acts as the identity function on declarations, unless the
/// transformer has had to transform the declaration itself. Subclasses
/// may override this function to provide alternate behavior.
Decl *TransformDecl(SourceLocation Loc, Decl *D) {
llvm::DenseMap<Decl *, Decl *>::iterator Known
= TransformedLocalDecls.find(D);
if (Known != TransformedLocalDecls.end())
return Known->second;
return D;
}
/// Transform the specified condition.
///
/// By default, this transforms the variable and expression and rebuilds
/// the condition.
Sema::ConditionResult TransformCondition(SourceLocation Loc, VarDecl *Var,
Expr *Expr,
Sema::ConditionKind Kind);
/// Transform the attributes associated with the given declaration and
/// place them on the new declaration.
///
/// By default, this operation does nothing. Subclasses may override this
/// behavior to transform attributes.
void transformAttrs(Decl *Old, Decl *New) { }
/// Note that a local declaration has been transformed by this
/// transformer.
///
/// Local declarations are typically transformed via a call to
/// TransformDefinition. However, in some cases (e.g., lambda expressions),
/// the transformer itself has to transform the declarations. This routine
/// can be overridden by a subclass that keeps track of such mappings.
void transformedLocalDecl(Decl *Old, ArrayRef<Decl *> New) {
assert(New.size() == 1 &&
"must override transformedLocalDecl if performing pack expansion");
TransformedLocalDecls[Old] = New.front();
}
/// Transform the definition of the given declaration.
///
/// By default, invokes TransformDecl() to transform the declaration.
/// Subclasses may override this function to provide alternate behavior.
Decl *TransformDefinition(SourceLocation Loc, Decl *D) {
return getDerived().TransformDecl(Loc, D);
}
/// Transform the given declaration, which was the first part of a
/// nested-name-specifier in a member access expression.
///
/// This specific declaration transformation only applies to the first
/// identifier in a nested-name-specifier of a member access expression, e.g.,
/// the \c T in \c x->T::member
///
/// By default, invokes TransformDecl() to transform the declaration.
/// Subclasses may override this function to provide alternate behavior.
NamedDecl *TransformFirstQualifierInScope(NamedDecl *D, SourceLocation Loc) {
return cast_or_null<NamedDecl>(getDerived().TransformDecl(Loc, D));
}
/// Transform the set of declarations in an OverloadExpr.
bool TransformOverloadExprDecls(OverloadExpr *Old, bool RequiresADL,
LookupResult &R);
/// Transform the given nested-name-specifier with source-location
/// information.
///
/// By default, transforms all of the types and declarations within the
/// nested-name-specifier. Subclasses may override this function to provide
/// alternate behavior.
NestedNameSpecifierLoc
TransformNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS,
QualType ObjectType = QualType(),
NamedDecl *FirstQualifierInScope = nullptr);
/// Transform the given declaration name.
///
/// By default, transforms the types of conversion function, constructor,
/// and destructor names and then (if needed) rebuilds the declaration name.
/// Identifiers and selectors are returned unmodified. Sublcasses may
/// override this function to provide alternate behavior.
DeclarationNameInfo
TransformDeclarationNameInfo(const DeclarationNameInfo &NameInfo);
bool TransformRequiresExprRequirements(ArrayRef<concepts::Requirement *> Reqs,
llvm::SmallVectorImpl<concepts::Requirement *> &Transformed);
concepts::TypeRequirement *
TransformTypeRequirement(concepts::TypeRequirement *Req);
concepts::ExprRequirement *
TransformExprRequirement(concepts::ExprRequirement *Req);
concepts::NestedRequirement *
TransformNestedRequirement(concepts::NestedRequirement *Req);
/// Transform the given template name.
///
/// \param SS The nested-name-specifier that qualifies the template
/// name. This nested-name-specifier must already have been transformed.
///
/// \param Name The template name to transform.
///
/// \param NameLoc The source location of the template name.
///
/// \param ObjectType If we're translating a template name within a member
/// access expression, this is the type of the object whose member template
/// is being referenced.
///
/// \param FirstQualifierInScope If the first part of a nested-name-specifier
/// also refers to a name within the current (lexical) scope, this is the
/// declaration it refers to.
///
/// By default, transforms the template name by transforming the declarations
/// and nested-name-specifiers that occur within the template name.
/// Subclasses may override this function to provide alternate behavior.
TemplateName
TransformTemplateName(CXXScopeSpec &SS, TemplateName Name,
SourceLocation NameLoc,
QualType ObjectType = QualType(),
NamedDecl *FirstQualifierInScope = nullptr,
bool AllowInjectedClassName = false);
/// Transform the given template argument.
///
/// By default, this operation transforms the type, expression, or
/// declaration stored within the template argument and constructs a
/// new template argument from the transformed result. Subclasses may
/// override this function to provide alternate behavior.
///
/// Returns true if there was an error.
bool TransformTemplateArgument(const TemplateArgumentLoc &Input,
TemplateArgumentLoc &Output,
bool Uneval = false);
/// Transform the given set of template arguments.
///
/// By default, this operation transforms all of the template arguments
/// in the input set using \c TransformTemplateArgument(), and appends
/// the transformed arguments to the output list.
///
/// Note that this overload of \c TransformTemplateArguments() is merely
/// a convenience function. Subclasses that wish to override this behavior
/// should override the iterator-based member template version.
///
/// \param Inputs The set of template arguments to be transformed.
///
/// \param NumInputs The number of template arguments in \p Inputs.
///
/// \param Outputs The set of transformed template arguments output by this
/// routine.
///
/// Returns true if an error occurred.
bool TransformTemplateArguments(const TemplateArgumentLoc *Inputs,
unsigned NumInputs,
TemplateArgumentListInfo &Outputs,
bool Uneval = false) {
return TransformTemplateArguments(Inputs, Inputs + NumInputs, Outputs,
Uneval);
}
/// Transform the given set of template arguments.
///
/// By default, this operation transforms all of the template arguments
/// in the input set using \c TransformTemplateArgument(), and appends
/// the transformed arguments to the output list.
///
/// \param First An iterator to the first template argument.
///
/// \param Last An iterator one step past the last template argument.
///
/// \param Outputs The set of transformed template arguments output by this
/// routine.
///
/// Returns true if an error occurred.
template<typename InputIterator>
bool TransformTemplateArguments(InputIterator First,
InputIterator Last,
TemplateArgumentListInfo &Outputs,
bool Uneval = false);
/// Fakes up a TemplateArgumentLoc for a given TemplateArgument.
void InventTemplateArgumentLoc(const TemplateArgument &Arg,
TemplateArgumentLoc &ArgLoc);
/// Fakes up a TypeSourceInfo for a type.
TypeSourceInfo *InventTypeSourceInfo(QualType T) {
return SemaRef.Context.getTrivialTypeSourceInfo(T,
getDerived().getBaseLocation());
}
#define ABSTRACT_TYPELOC(CLASS, PARENT)
#define TYPELOC(CLASS, PARENT) \
QualType Transform##CLASS##Type(TypeLocBuilder &TLB, CLASS##TypeLoc T);
#include "clang/AST/TypeLocNodes.def"
template<typename Fn>
QualType TransformFunctionProtoType(TypeLocBuilder &TLB,
FunctionProtoTypeLoc TL,
CXXRecordDecl *ThisContext,
Qualifiers ThisTypeQuals,
Fn TransformExceptionSpec);
bool TransformExceptionSpec(SourceLocation Loc,
FunctionProtoType::ExceptionSpecInfo &ESI,
SmallVectorImpl<QualType> &Exceptions,
bool &Changed);
StmtResult TransformSEHHandler(Stmt *Handler);
QualType
TransformTemplateSpecializationType(TypeLocBuilder &TLB,
TemplateSpecializationTypeLoc TL,
TemplateName Template);
QualType
TransformDependentTemplateSpecializationType(TypeLocBuilder &TLB,
DependentTemplateSpecializationTypeLoc TL,
TemplateName Template,
CXXScopeSpec &SS);
QualType TransformDependentTemplateSpecializationType(
TypeLocBuilder &TLB, DependentTemplateSpecializationTypeLoc TL,
NestedNameSpecifierLoc QualifierLoc);
/// Transforms the parameters of a function type into the
/// given vectors.
///
/// The result vectors should be kept in sync; null entries in the
/// variables vector are acceptable.
///
/// Return true on error.
bool TransformFunctionTypeParams(
SourceLocation Loc, ArrayRef<ParmVarDecl *> Params,
const QualType *ParamTypes,
const FunctionProtoType::ExtParameterInfo *ParamInfos,
SmallVectorImpl<QualType> &PTypes, SmallVectorImpl<ParmVarDecl *> *PVars,
Sema::ExtParameterInfoBuilder &PInfos);
/// Transforms a single function-type parameter. Return null
/// on error.
///
/// \param indexAdjustment - A number to add to the parameter's
/// scope index; can be negative
ParmVarDecl *TransformFunctionTypeParam(ParmVarDecl *OldParm,
int indexAdjustment,
Optional<unsigned> NumExpansions,
bool ExpectParameterPack);
/// Transform the body of a lambda-expression.
StmtResult TransformLambdaBody(LambdaExpr *E, Stmt *Body);
/// Alternative implementation of TransformLambdaBody that skips transforming
/// the body.
StmtResult SkipLambdaBody(LambdaExpr *E, Stmt *Body);
QualType TransformReferenceType(TypeLocBuilder &TLB, ReferenceTypeLoc TL);
StmtResult TransformCompoundStmt(CompoundStmt *S, bool IsStmtExpr);
ExprResult TransformCXXNamedCastExpr(CXXNamedCastExpr *E);
TemplateParameterList *TransformTemplateParameterList(
TemplateParameterList *TPL) {
return TPL;
}
ExprResult TransformAddressOfOperand(Expr *E);
ExprResult TransformDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E,
bool IsAddressOfOperand,
TypeSourceInfo **RecoveryTSI);
ExprResult TransformParenDependentScopeDeclRefExpr(
ParenExpr *PE, DependentScopeDeclRefExpr *DRE, bool IsAddressOfOperand,
TypeSourceInfo **RecoveryTSI);
StmtResult TransformOMPExecutableDirective(OMPExecutableDirective *S);
// FIXME: We use LLVM_ATTRIBUTE_NOINLINE because inlining causes a ridiculous
// amount of stack usage with clang.
#define STMT(Node, Parent) \
LLVM_ATTRIBUTE_NOINLINE \
StmtResult Transform##Node(Node *S);
#define VALUESTMT(Node, Parent) \
LLVM_ATTRIBUTE_NOINLINE \
StmtResult Transform##Node(Node *S, StmtDiscardKind SDK);
#define EXPR(Node, Parent) \
LLVM_ATTRIBUTE_NOINLINE \
ExprResult Transform##Node(Node *E);
#define ABSTRACT_STMT(Stmt)
#include "clang/AST/StmtNodes.inc"
#define GEN_CLANG_CLAUSE_CLASS
#define CLAUSE_CLASS(Enum, Str, Class) \
LLVM_ATTRIBUTE_NOINLINE \
OMPClause *Transform##Class(Class *S);
#include "llvm/Frontend/OpenMP/OMP.inc"
/// Build a new qualified type given its unqualified type and type location.
///
/// By default, this routine adds type qualifiers only to types that can
/// have qualifiers, and silently suppresses those qualifiers that are not
/// permitted. Subclasses may override this routine to provide different
/// behavior.
QualType RebuildQualifiedType(QualType T, QualifiedTypeLoc TL);
/// Build a new pointer type given its pointee type.
///
/// By default, performs semantic analysis when building the pointer type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildPointerType(QualType PointeeType, SourceLocation Sigil);
/// Build a new block pointer type given its pointee type.
///
/// By default, performs semantic analysis when building the block pointer
/// type. Subclasses may override this routine to provide different behavior.
QualType RebuildBlockPointerType(QualType PointeeType, SourceLocation Sigil);
/// Build a new reference type given the type it references.
///
/// By default, performs semantic analysis when building the
/// reference type. Subclasses may override this routine to provide
/// different behavior.
///
/// \param LValue whether the type was written with an lvalue sigil
/// or an rvalue sigil.
QualType RebuildReferenceType(QualType ReferentType,
bool LValue,
SourceLocation Sigil);
/// Build a new member pointer type given the pointee type and the
/// class type it refers into.
///
/// By default, performs semantic analysis when building the member pointer
/// type. Subclasses may override this routine to provide different behavior.
QualType RebuildMemberPointerType(QualType PointeeType, QualType ClassType,
SourceLocation Sigil);
QualType RebuildObjCTypeParamType(const ObjCTypeParamDecl *Decl,
SourceLocation ProtocolLAngleLoc,
ArrayRef<ObjCProtocolDecl *> Protocols,
ArrayRef<SourceLocation> ProtocolLocs,
SourceLocation ProtocolRAngleLoc);
/// Build an Objective-C object type.
///
/// By default, performs semantic analysis when building the object type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildObjCObjectType(QualType BaseType,
SourceLocation Loc,
SourceLocation TypeArgsLAngleLoc,
ArrayRef<TypeSourceInfo *> TypeArgs,
SourceLocation TypeArgsRAngleLoc,
SourceLocation ProtocolLAngleLoc,
ArrayRef<ObjCProtocolDecl *> Protocols,
ArrayRef<SourceLocation> ProtocolLocs,
SourceLocation ProtocolRAngleLoc);
/// Build a new Objective-C object pointer type given the pointee type.
///
/// By default, directly builds the pointer type, with no additional semantic
/// analysis.
QualType RebuildObjCObjectPointerType(QualType PointeeType,
SourceLocation Star);
/// Build a new array type given the element type, size
/// modifier, size of the array (if known), size expression, and index type
/// qualifiers.
///
/// By default, performs semantic analysis when building the array type.
/// Subclasses may override this routine to provide different behavior.
/// Also by default, all of the other Rebuild*Array
QualType RebuildArrayType(QualType ElementType,
ArrayType::ArraySizeModifier SizeMod,
const llvm::APInt *Size,
Expr *SizeExpr,
unsigned IndexTypeQuals,
SourceRange BracketsRange);
/// Build a new constant array type given the element type, size
/// modifier, (known) size of the array, and index type qualifiers.
///
/// By default, performs semantic analysis when building the array type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildConstantArrayType(QualType ElementType,
ArrayType::ArraySizeModifier SizeMod,
const llvm::APInt &Size,
Expr *SizeExpr,
unsigned IndexTypeQuals,
SourceRange BracketsRange);
/// Build a new incomplete array type given the element type, size
/// modifier, and index type qualifiers.
///
/// By default, performs semantic analysis when building the array type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildIncompleteArrayType(QualType ElementType,
ArrayType::ArraySizeModifier SizeMod,
unsigned IndexTypeQuals,
SourceRange BracketsRange);
/// Build a new variable-length array type given the element type,
/// size modifier, size expression, and index type qualifiers.
///
/// By default, performs semantic analysis when building the array type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildVariableArrayType(QualType ElementType,
ArrayType::ArraySizeModifier SizeMod,
Expr *SizeExpr,
unsigned IndexTypeQuals,
SourceRange BracketsRange);
/// Build a new dependent-sized array type given the element type,
/// size modifier, size expression, and index type qualifiers.
///
/// By default, performs semantic analysis when building the array type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildDependentSizedArrayType(QualType ElementType,
ArrayType::ArraySizeModifier SizeMod,
Expr *SizeExpr,
unsigned IndexTypeQuals,
SourceRange BracketsRange);
/// Build a new vector type given the element type and
/// number of elements.
///
/// By default, performs semantic analysis when building the vector type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildVectorType(QualType ElementType, unsigned NumElements,
VectorType::VectorKind VecKind);
/// Build a new potentially dependently-sized extended vector type
/// given the element type and number of elements.
///
/// By default, performs semantic analysis when building the vector type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildDependentVectorType(QualType ElementType, Expr *SizeExpr,
SourceLocation AttributeLoc,
VectorType::VectorKind);
/// Build a new extended vector type given the element type and
/// number of elements.
///
/// By default, performs semantic analysis when building the vector type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildExtVectorType(QualType ElementType, unsigned NumElements,
SourceLocation AttributeLoc);
/// Build a new potentially dependently-sized extended vector type
/// given the element type and number of elements.
///
/// By default, performs semantic analysis when building the vector type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildDependentSizedExtVectorType(QualType ElementType,
Expr *SizeExpr,
SourceLocation AttributeLoc);
/// Build a new matrix type given the element type and dimensions.
QualType RebuildConstantMatrixType(QualType ElementType, unsigned NumRows,
unsigned NumColumns);
/// Build a new matrix type given the type and dependently-defined
/// dimensions.
QualType RebuildDependentSizedMatrixType(QualType ElementType, Expr *RowExpr,
Expr *ColumnExpr,
SourceLocation AttributeLoc);
/// Build a new DependentAddressSpaceType or return the pointee
/// type variable with the correct address space (retrieved from
/// AddrSpaceExpr) applied to it. The former will be returned in cases
/// where the address space remains dependent.
///
/// By default, performs semantic analysis when building the type with address
/// space applied. Subclasses may override this routine to provide different
/// behavior.
QualType RebuildDependentAddressSpaceType(QualType PointeeType,
Expr *AddrSpaceExpr,
SourceLocation AttributeLoc);
/// Build a new function type.
///
/// By default, performs semantic analysis when building the function type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildFunctionProtoType(QualType T,
MutableArrayRef<QualType> ParamTypes,
const FunctionProtoType::ExtProtoInfo &EPI);
/// Build a new unprototyped function type.
QualType RebuildFunctionNoProtoType(QualType ResultType);
/// Rebuild an unresolved typename type, given the decl that
/// the UnresolvedUsingTypenameDecl was transformed to.
QualType RebuildUnresolvedUsingType(SourceLocation NameLoc, Decl *D);
/// Build a new typedef type.
QualType RebuildTypedefType(TypedefNameDecl *Typedef) {
return SemaRef.Context.getTypeDeclType(Typedef);
}
/// Build a new MacroDefined type.
QualType RebuildMacroQualifiedType(QualType T,
const IdentifierInfo *MacroII) {
return SemaRef.Context.getMacroQualifiedType(T, MacroII);
}
/// Build a new class/struct/union type.
QualType RebuildRecordType(RecordDecl *Record) {
return SemaRef.Context.getTypeDeclType(Record);
}
/// Build a new Enum type.
QualType RebuildEnumType(EnumDecl *Enum) {
return SemaRef.Context.getTypeDeclType(Enum);
}
/// Build a new typeof(expr) type.
///
/// By default, performs semantic analysis when building the typeof type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildTypeOfExprType(Expr *Underlying, SourceLocation Loc);
/// Build a new typeof(type) type.
///
/// By default, builds a new TypeOfType with the given underlying type.
QualType RebuildTypeOfType(QualType Underlying);
/// Build a new unary transform type.
QualType RebuildUnaryTransformType(QualType BaseType,
UnaryTransformType::UTTKind UKind,
SourceLocation Loc);
/// Build a new C++11 decltype type.
///
/// By default, performs semantic analysis when building the decltype type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildDecltypeType(Expr *Underlying, SourceLocation Loc);
/// Build a new C++11 auto type.
///
/// By default, builds a new AutoType with the given deduced type.
QualType RebuildAutoType(QualType Deduced, AutoTypeKeyword Keyword,
ConceptDecl *TypeConstraintConcept,
ArrayRef<TemplateArgument> TypeConstraintArgs) {
// Note, IsDependent is always false here: we implicitly convert an 'auto'
// which has been deduced to a dependent type into an undeduced 'auto', so
// that we'll retry deduction after the transformation.
return SemaRef.Context.getAutoType(Deduced, Keyword,
/*IsDependent*/ false, /*IsPack=*/false,
TypeConstraintConcept,
TypeConstraintArgs);
}
/// By default, builds a new DeducedTemplateSpecializationType with the given
/// deduced type.
QualType RebuildDeducedTemplateSpecializationType(TemplateName Template,
QualType Deduced) {
return SemaRef.Context.getDeducedTemplateSpecializationType(
Template, Deduced, /*IsDependent*/ false);
}
/// Build a new template specialization type.
///
/// By default, performs semantic analysis when building the template
/// specialization type. Subclasses may override this routine to provide
/// different behavior.
QualType RebuildTemplateSpecializationType(TemplateName Template,
SourceLocation TemplateLoc,
TemplateArgumentListInfo &Args);
/// Build a new parenthesized type.
///
/// By default, builds a new ParenType type from the inner type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildParenType(QualType InnerType) {
return SemaRef.BuildParenType(InnerType);
}
/// Build a new qualified name type.
///
/// By default, builds a new ElaboratedType type from the keyword,
/// the nested-name-specifier and the named type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildElaboratedType(SourceLocation KeywordLoc,
ElaboratedTypeKeyword Keyword,
NestedNameSpecifierLoc QualifierLoc,
QualType Named) {
return SemaRef.Context.getElaboratedType(Keyword,
QualifierLoc.getNestedNameSpecifier(),
Named);
}
/// Build a new typename type that refers to a template-id.
///
/// By default, builds a new DependentNameType type from the
/// nested-name-specifier and the given type. Subclasses may override
/// this routine to provide different behavior.
QualType RebuildDependentTemplateSpecializationType(
ElaboratedTypeKeyword Keyword,
NestedNameSpecifierLoc QualifierLoc,
SourceLocation TemplateKWLoc,
const IdentifierInfo *Name,
SourceLocation NameLoc,
TemplateArgumentListInfo &Args,
bool AllowInjectedClassName) {
// Rebuild the template name.
// TODO: avoid TemplateName abstraction
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
TemplateName InstName = getDerived().RebuildTemplateName(
SS, TemplateKWLoc, *Name, NameLoc, QualType(), nullptr,
AllowInjectedClassName);
if (InstName.isNull())
return QualType();
// If it's still dependent, make a dependent specialization.
if (InstName.getAsDependentTemplateName())
return SemaRef.Context.getDependentTemplateSpecializationType(Keyword,
QualifierLoc.getNestedNameSpecifier(),
Name,
Args);
// Otherwise, make an elaborated type wrapping a non-dependent
// specialization.
QualType T =
getDerived().RebuildTemplateSpecializationType(InstName, NameLoc, Args);
if (T.isNull()) return QualType();
if (Keyword == ETK_None && QualifierLoc.getNestedNameSpecifier() == nullptr)
return T;
return SemaRef.Context.getElaboratedType(Keyword,
QualifierLoc.getNestedNameSpecifier(),
T);
}
/// Build a new typename type that refers to an identifier.
///
/// By default, performs semantic analysis when building the typename type
/// (or elaborated type). Subclasses may override this routine to provide
/// different behavior.
QualType RebuildDependentNameType(ElaboratedTypeKeyword Keyword,
SourceLocation KeywordLoc,
NestedNameSpecifierLoc QualifierLoc,
const IdentifierInfo *Id,
SourceLocation IdLoc,
bool DeducedTSTContext) {
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
if (QualifierLoc.getNestedNameSpecifier()->isDependent()) {
// If the name is still dependent, just build a new dependent name type.
if (!SemaRef.computeDeclContext(SS))
return SemaRef.Context.getDependentNameType(Keyword,
QualifierLoc.getNestedNameSpecifier(),
Id);
}
if (Keyword == ETK_None || Keyword == ETK_Typename) {
return SemaRef.CheckTypenameType(Keyword, KeywordLoc, QualifierLoc,
*Id, IdLoc, DeducedTSTContext);
}
TagTypeKind Kind = TypeWithKeyword::getTagTypeKindForKeyword(Keyword);
// We had a dependent elaborated-type-specifier that has been transformed
// into a non-dependent elaborated-type-specifier. Find the tag we're
// referring to.
LookupResult Result(SemaRef, Id, IdLoc, Sema::LookupTagName);
DeclContext *DC = SemaRef.computeDeclContext(SS, false);
if (!DC)
return QualType();
if (SemaRef.RequireCompleteDeclContext(SS, DC))
return QualType();
TagDecl *Tag = nullptr;
SemaRef.LookupQualifiedName(Result, DC);
switch (Result.getResultKind()) {
case LookupResult::NotFound:
case LookupResult::NotFoundInCurrentInstantiation:
break;
case LookupResult::Found:
Tag = Result.getAsSingle<TagDecl>();
break;
case LookupResult::FoundOverloaded:
case LookupResult::FoundUnresolvedValue:
llvm_unreachable("Tag lookup cannot find non-tags");
case LookupResult::Ambiguous:
// Let the LookupResult structure handle ambiguities.
return QualType();
}
if (!Tag) {
// Check where the name exists but isn't a tag type and use that to emit
// better diagnostics.
LookupResult Result(SemaRef, Id, IdLoc, Sema::LookupTagName);
SemaRef.LookupQualifiedName(Result, DC);
switch (Result.getResultKind()) {
case LookupResult::Found:
case LookupResult::FoundOverloaded:
case LookupResult::FoundUnresolvedValue: {
NamedDecl *SomeDecl = Result.getRepresentativeDecl();
Sema::NonTagKind NTK = SemaRef.getNonTagTypeDeclKind(SomeDecl, Kind);
SemaRef.Diag(IdLoc, diag::err_tag_reference_non_tag) << SomeDecl
<< NTK << Kind;
SemaRef.Diag(SomeDecl->getLocation(), diag::note_declared_at);
break;
}
default:
SemaRef.Diag(IdLoc, diag::err_not_tag_in_scope)
<< Kind << Id << DC << QualifierLoc.getSourceRange();
break;
}
return QualType();
}
if (!SemaRef.isAcceptableTagRedeclaration(Tag, Kind, /*isDefinition*/false,
IdLoc, Id)) {
SemaRef.Diag(KeywordLoc, diag::err_use_with_wrong_tag) << Id;
SemaRef.Diag(Tag->getLocation(), diag::note_previous_use);
return QualType();
}
// Build the elaborated-type-specifier type.
QualType T = SemaRef.Context.getTypeDeclType(Tag);
return SemaRef.Context.getElaboratedType(Keyword,
QualifierLoc.getNestedNameSpecifier(),
T);
}
/// Build a new pack expansion type.
///
/// By default, builds a new PackExpansionType type from the given pattern.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildPackExpansionType(QualType Pattern,
SourceRange PatternRange,
SourceLocation EllipsisLoc,
Optional<unsigned> NumExpansions) {
return getSema().CheckPackExpansion(Pattern, PatternRange, EllipsisLoc,
NumExpansions);
}
/// Build a new atomic type given its value type.
///
/// By default, performs semantic analysis when building the atomic type.
/// Subclasses may override this routine to provide different behavior.
QualType RebuildAtomicType(QualType ValueType, SourceLocation KWLoc);
/// Build a new pipe type given its value type.
QualType RebuildPipeType(QualType ValueType, SourceLocation KWLoc,
bool isReadPipe);
/// Build an extended int given its value type.
QualType RebuildExtIntType(bool IsUnsigned, unsigned NumBits,
SourceLocation Loc);
/// Build a dependent extended int given its value type.
QualType RebuildDependentExtIntType(bool IsUnsigned, Expr *NumBitsExpr,
SourceLocation Loc);
/// Build a new template name given a nested name specifier, a flag
/// indicating whether the "template" keyword was provided, and the template
/// that the template name refers to.
///
/// By default, builds the new template name directly. Subclasses may override
/// this routine to provide different behavior.
TemplateName RebuildTemplateName(CXXScopeSpec &SS,
bool TemplateKW,
TemplateDecl *Template);
/// Build a new template name given a nested name specifier and the
/// name that is referred to as a template.
///
/// By default, performs semantic analysis to determine whether the name can
/// be resolved to a specific template, then builds the appropriate kind of
/// template name. Subclasses may override this routine to provide different
/// behavior.
TemplateName RebuildTemplateName(CXXScopeSpec &SS,
SourceLocation TemplateKWLoc,
const IdentifierInfo &Name,
SourceLocation NameLoc, QualType ObjectType,
NamedDecl *FirstQualifierInScope,
bool AllowInjectedClassName);
/// Build a new template name given a nested name specifier and the
/// overloaded operator name that is referred to as a template.
///
/// By default, performs semantic analysis to determine whether the name can
/// be resolved to a specific template, then builds the appropriate kind of
/// template name. Subclasses may override this routine to provide different
/// behavior.
TemplateName RebuildTemplateName(CXXScopeSpec &SS,
SourceLocation TemplateKWLoc,
OverloadedOperatorKind Operator,
SourceLocation NameLoc, QualType ObjectType,
bool AllowInjectedClassName);
/// Build a new template name given a template template parameter pack
/// and the
///
/// By default, performs semantic analysis to determine whether the name can
/// be resolved to a specific template, then builds the appropriate kind of
/// template name. Subclasses may override this routine to provide different
/// behavior.
TemplateName RebuildTemplateName(TemplateTemplateParmDecl *Param,
const TemplateArgument &ArgPack) {
return getSema().Context.getSubstTemplateTemplateParmPack(Param, ArgPack);
}
/// Build a new compound statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildCompoundStmt(SourceLocation LBraceLoc,
MultiStmtArg Statements,
SourceLocation RBraceLoc,
bool IsStmtExpr) {
return getSema().ActOnCompoundStmt(LBraceLoc, RBraceLoc, Statements,
IsStmtExpr);
}
/// Build a new case statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildCaseStmt(SourceLocation CaseLoc,
Expr *LHS,
SourceLocation EllipsisLoc,
Expr *RHS,
SourceLocation ColonLoc) {
return getSema().ActOnCaseStmt(CaseLoc, LHS, EllipsisLoc, RHS,
ColonLoc);
}
/// Attach the body to a new case statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildCaseStmtBody(Stmt *S, Stmt *Body) {
getSema().ActOnCaseStmtBody(S, Body);
return S;
}
/// Build a new default statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildDefaultStmt(SourceLocation DefaultLoc,
SourceLocation ColonLoc,
Stmt *SubStmt) {
return getSema().ActOnDefaultStmt(DefaultLoc, ColonLoc, SubStmt,
/*CurScope=*/nullptr);
}
/// Build a new label statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildLabelStmt(SourceLocation IdentLoc, LabelDecl *L,
SourceLocation ColonLoc, Stmt *SubStmt) {
return SemaRef.ActOnLabelStmt(IdentLoc, L, ColonLoc, SubStmt);
}
/// Build a new attributed statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildAttributedStmt(SourceLocation AttrLoc,
ArrayRef<const Attr *> Attrs,
Stmt *SubStmt) {
return SemaRef.BuildAttributedStmt(AttrLoc, Attrs, SubStmt);
}
/// Build a new "if" statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildIfStmt(SourceLocation IfLoc, bool IsConstexpr,
SourceLocation LParenLoc, Sema::ConditionResult Cond,
SourceLocation RParenLoc, Stmt *Init, Stmt *Then,
SourceLocation ElseLoc, Stmt *Else) {
return getSema().ActOnIfStmt(IfLoc, IsConstexpr, LParenLoc, Init, Cond,
RParenLoc, Then, ElseLoc, Else);
}
/// Start building a new switch statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildSwitchStmtStart(SourceLocation SwitchLoc,
SourceLocation LParenLoc, Stmt *Init,
Sema::ConditionResult Cond,
SourceLocation RParenLoc) {
return getSema().ActOnStartOfSwitchStmt(SwitchLoc, LParenLoc, Init, Cond,
RParenLoc);
}
/// Attach the body to the switch statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildSwitchStmtBody(SourceLocation SwitchLoc,
Stmt *Switch, Stmt *Body) {
return getSema().ActOnFinishSwitchStmt(SwitchLoc, Switch, Body);
}
/// Build a new while statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildWhileStmt(SourceLocation WhileLoc, SourceLocation LParenLoc,
Sema::ConditionResult Cond,
SourceLocation RParenLoc, Stmt *Body) {
return getSema().ActOnWhileStmt(WhileLoc, LParenLoc, Cond, RParenLoc, Body);
}
/// Build a new do-while statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildDoStmt(SourceLocation DoLoc, Stmt *Body,
SourceLocation WhileLoc, SourceLocation LParenLoc,
Expr *Cond, SourceLocation RParenLoc) {
return getSema().ActOnDoStmt(DoLoc, Body, WhileLoc, LParenLoc,
Cond, RParenLoc);
}
/// Build a new for statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildForStmt(SourceLocation ForLoc, SourceLocation LParenLoc,
Stmt *Init, Sema::ConditionResult Cond,
Sema::FullExprArg Inc, SourceLocation RParenLoc,
Stmt *Body) {
return getSema().ActOnForStmt(ForLoc, LParenLoc, Init, Cond,
Inc, RParenLoc, Body);
}
/// Build a new goto statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildGotoStmt(SourceLocation GotoLoc, SourceLocation LabelLoc,
LabelDecl *Label) {
return getSema().ActOnGotoStmt(GotoLoc, LabelLoc, Label);
}
/// Build a new indirect goto statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildIndirectGotoStmt(SourceLocation GotoLoc,
SourceLocation StarLoc,
Expr *Target) {
return getSema().ActOnIndirectGotoStmt(GotoLoc, StarLoc, Target);
}
/// Build a new return statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildReturnStmt(SourceLocation ReturnLoc, Expr *Result) {
return getSema().BuildReturnStmt(ReturnLoc, Result);
}
/// Build a new declaration statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildDeclStmt(MutableArrayRef<Decl *> Decls,
SourceLocation StartLoc, SourceLocation EndLoc) {
Sema::DeclGroupPtrTy DG = getSema().BuildDeclaratorGroup(Decls);
return getSema().ActOnDeclStmt(DG, StartLoc, EndLoc);
}
/// Build a new inline asm statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
bool IsVolatile, unsigned NumOutputs,
unsigned NumInputs, IdentifierInfo **Names,
MultiExprArg Constraints, MultiExprArg Exprs,
Expr *AsmString, MultiExprArg Clobbers,
unsigned NumLabels,
SourceLocation RParenLoc) {
return getSema().ActOnGCCAsmStmt(AsmLoc, IsSimple, IsVolatile, NumOutputs,
NumInputs, Names, Constraints, Exprs,
AsmString, Clobbers, NumLabels, RParenLoc);
}
/// Build a new MS style inline asm statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildMSAsmStmt(SourceLocation AsmLoc, SourceLocation LBraceLoc,
ArrayRef<Token> AsmToks,
StringRef AsmString,
unsigned NumOutputs, unsigned NumInputs,
ArrayRef<StringRef> Constraints,
ArrayRef<StringRef> Clobbers,
ArrayRef<Expr*> Exprs,
SourceLocation EndLoc) {
return getSema().ActOnMSAsmStmt(AsmLoc, LBraceLoc, AsmToks, AsmString,
NumOutputs, NumInputs,
Constraints, Clobbers, Exprs, EndLoc);
}
/// Build a new co_return statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildCoreturnStmt(SourceLocation CoreturnLoc, Expr *Result,
bool IsImplicit) {
return getSema().BuildCoreturnStmt(CoreturnLoc, Result, IsImplicit);
}
/// Build a new co_await expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCoawaitExpr(SourceLocation CoawaitLoc, Expr *Result,
bool IsImplicit) {
return getSema().BuildResolvedCoawaitExpr(CoawaitLoc, Result, IsImplicit);
}
/// Build a new co_await expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildDependentCoawaitExpr(SourceLocation CoawaitLoc,
Expr *Result,
UnresolvedLookupExpr *Lookup) {
return getSema().BuildUnresolvedCoawaitExpr(CoawaitLoc, Result, Lookup);
}
/// Build a new co_yield expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCoyieldExpr(SourceLocation CoyieldLoc, Expr *Result) {
return getSema().BuildCoyieldExpr(CoyieldLoc, Result);
}
StmtResult RebuildCoroutineBodyStmt(CoroutineBodyStmt::CtorArgs Args) {
return getSema().BuildCoroutineBodyStmt(Args);
}
/// Build a new Objective-C \@try statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildObjCAtTryStmt(SourceLocation AtLoc,
Stmt *TryBody,
MultiStmtArg CatchStmts,
Stmt *Finally) {
return getSema().ActOnObjCAtTryStmt(AtLoc, TryBody, CatchStmts,
Finally);
}
/// Rebuild an Objective-C exception declaration.
///
/// By default, performs semantic analysis to build the new declaration.
/// Subclasses may override this routine to provide different behavior.
VarDecl *RebuildObjCExceptionDecl(VarDecl *ExceptionDecl,
TypeSourceInfo *TInfo, QualType T) {
return getSema().BuildObjCExceptionDecl(TInfo, T,
ExceptionDecl->getInnerLocStart(),
ExceptionDecl->getLocation(),
ExceptionDecl->getIdentifier());
}
/// Build a new Objective-C \@catch statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildObjCAtCatchStmt(SourceLocation AtLoc,
SourceLocation RParenLoc,
VarDecl *Var,
Stmt *Body) {
return getSema().ActOnObjCAtCatchStmt(AtLoc, RParenLoc,
Var, Body);
}
/// Build a new Objective-C \@finally statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildObjCAtFinallyStmt(SourceLocation AtLoc,
Stmt *Body) {
return getSema().ActOnObjCAtFinallyStmt(AtLoc, Body);
}
/// Build a new Objective-C \@throw statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildObjCAtThrowStmt(SourceLocation AtLoc,
Expr *Operand) {
return getSema().BuildObjCAtThrowStmt(AtLoc, Operand);
}
/// Build a new OpenMP Canonical loop.
///
/// Ensures that the outermost loop in @p LoopStmt is wrapped by a
/// OMPCanonicalLoop.
StmtResult RebuildOMPCanonicalLoop(Stmt *LoopStmt) {
return getSema().ActOnOpenMPCanonicalLoop(LoopStmt);
}
/// Build a new OpenMP executable directive.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildOMPExecutableDirective(OpenMPDirectiveKind Kind,
DeclarationNameInfo DirName,
OpenMPDirectiveKind CancelRegion,
ArrayRef<OMPClause *> Clauses,
Stmt *AStmt, SourceLocation StartLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPExecutableDirective(
Kind, DirName, CancelRegion, Clauses, AStmt, StartLoc, EndLoc);
}
/// Build a new OpenMP 'if' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPIfClause(OpenMPDirectiveKind NameModifier,
Expr *Condition, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation NameModifierLoc,
SourceLocation ColonLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPIfClause(NameModifier, Condition, StartLoc,
LParenLoc, NameModifierLoc, ColonLoc,
EndLoc);
}
/// Build a new OpenMP 'final' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPFinalClause(Expr *Condition, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPFinalClause(Condition, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'num_threads' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPNumThreadsClause(Expr *NumThreads,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPNumThreadsClause(NumThreads, StartLoc,
LParenLoc, EndLoc);
}
/// Build a new OpenMP 'safelen' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPSafelenClause(Expr *Len, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPSafelenClause(Len, StartLoc, LParenLoc, EndLoc);
}
/// Build a new OpenMP 'simdlen' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPSimdlenClause(Expr *Len, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPSimdlenClause(Len, StartLoc, LParenLoc, EndLoc);
}
OMPClause *RebuildOMPSizesClause(ArrayRef<Expr *> Sizes,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPSizesClause(Sizes, StartLoc, LParenLoc, EndLoc);
}
/// Build a new OpenMP 'full' clause.
OMPClause *RebuildOMPFullClause(SourceLocation StartLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPFullClause(StartLoc, EndLoc);
}
/// Build a new OpenMP 'partial' clause.
OMPClause *RebuildOMPPartialClause(Expr *Factor, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPPartialClause(Factor, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'allocator' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPAllocatorClause(Expr *A, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPAllocatorClause(A, StartLoc, LParenLoc, EndLoc);
}
/// Build a new OpenMP 'collapse' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPCollapseClause(Expr *Num, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPCollapseClause(Num, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'default' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPDefaultClause(DefaultKind Kind, SourceLocation KindKwLoc,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPDefaultClause(Kind, KindKwLoc,
StartLoc, LParenLoc, EndLoc);
}
/// Build a new OpenMP 'proc_bind' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPProcBindClause(ProcBindKind Kind,
SourceLocation KindKwLoc,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPProcBindClause(Kind, KindKwLoc,
StartLoc, LParenLoc, EndLoc);
}
/// Build a new OpenMP 'schedule' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPScheduleClause(
OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
OpenMPScheduleClauseKind Kind, Expr *ChunkSize, SourceLocation StartLoc,
SourceLocation LParenLoc, SourceLocation M1Loc, SourceLocation M2Loc,
SourceLocation KindLoc, SourceLocation CommaLoc, SourceLocation EndLoc) {
return getSema().ActOnOpenMPScheduleClause(
M1, M2, Kind, ChunkSize, StartLoc, LParenLoc, M1Loc, M2Loc, KindLoc,
CommaLoc, EndLoc);
}
/// Build a new OpenMP 'ordered' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPOrderedClause(SourceLocation StartLoc,
SourceLocation EndLoc,
SourceLocation LParenLoc, Expr *Num) {
return getSema().ActOnOpenMPOrderedClause(StartLoc, EndLoc, LParenLoc, Num);
}
/// Build a new OpenMP 'private' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPPrivateClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPPrivateClause(VarList, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'firstprivate' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPFirstprivateClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPFirstprivateClause(VarList, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'lastprivate' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPLastprivateClause(ArrayRef<Expr *> VarList,
OpenMPLastprivateModifier LPKind,
SourceLocation LPKindLoc,
SourceLocation ColonLoc,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPLastprivateClause(
VarList, LPKind, LPKindLoc, ColonLoc, StartLoc, LParenLoc, EndLoc);
}
/// Build a new OpenMP 'shared' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPSharedClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPSharedClause(VarList, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'reduction' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPReductionClause(
ArrayRef<Expr *> VarList, OpenMPReductionClauseModifier Modifier,
SourceLocation StartLoc, SourceLocation LParenLoc,
SourceLocation ModifierLoc, SourceLocation ColonLoc,
SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec,
const DeclarationNameInfo &ReductionId,
ArrayRef<Expr *> UnresolvedReductions) {
return getSema().ActOnOpenMPReductionClause(
VarList, Modifier, StartLoc, LParenLoc, ModifierLoc, ColonLoc, EndLoc,
ReductionIdScopeSpec, ReductionId, UnresolvedReductions);
}
/// Build a new OpenMP 'task_reduction' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPTaskReductionClause(
ArrayRef<Expr *> VarList, SourceLocation StartLoc,
SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc,
CXXScopeSpec &ReductionIdScopeSpec,
const DeclarationNameInfo &ReductionId,
ArrayRef<Expr *> UnresolvedReductions) {
return getSema().ActOnOpenMPTaskReductionClause(
VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec,
ReductionId, UnresolvedReductions);
}
/// Build a new OpenMP 'in_reduction' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *
RebuildOMPInReductionClause(ArrayRef<Expr *> VarList, SourceLocation StartLoc,
SourceLocation LParenLoc, SourceLocation ColonLoc,
SourceLocation EndLoc,
CXXScopeSpec &ReductionIdScopeSpec,
const DeclarationNameInfo &ReductionId,
ArrayRef<Expr *> UnresolvedReductions) {
return getSema().ActOnOpenMPInReductionClause(
VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec,
ReductionId, UnresolvedReductions);
}
/// Build a new OpenMP 'linear' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPLinearClause(ArrayRef<Expr *> VarList, Expr *Step,
SourceLocation StartLoc,
SourceLocation LParenLoc,
OpenMPLinearClauseKind Modifier,
SourceLocation ModifierLoc,
SourceLocation ColonLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPLinearClause(VarList, Step, StartLoc, LParenLoc,
Modifier, ModifierLoc, ColonLoc,
EndLoc);
}
/// Build a new OpenMP 'aligned' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPAlignedClause(ArrayRef<Expr *> VarList, Expr *Alignment,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation ColonLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPAlignedClause(VarList, Alignment, StartLoc,
LParenLoc, ColonLoc, EndLoc);
}
/// Build a new OpenMP 'copyin' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPCopyinClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPCopyinClause(VarList, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'copyprivate' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPCopyprivateClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPCopyprivateClause(VarList, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'flush' pseudo clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPFlushClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPFlushClause(VarList, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'depobj' pseudo clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPDepobjClause(Expr *Depobj, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPDepobjClause(Depobj, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'depend' pseudo clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *
RebuildOMPDependClause(Expr *DepModifier, OpenMPDependClauseKind DepKind,
SourceLocation DepLoc, SourceLocation ColonLoc,
ArrayRef<Expr *> VarList, SourceLocation StartLoc,
SourceLocation LParenLoc, SourceLocation EndLoc) {
return getSema().ActOnOpenMPDependClause(DepModifier, DepKind, DepLoc,
ColonLoc, VarList, StartLoc,
LParenLoc, EndLoc);
}
/// Build a new OpenMP 'device' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPDeviceClause(OpenMPDeviceClauseModifier Modifier,
Expr *Device, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation ModifierLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPDeviceClause(Modifier, Device, StartLoc,
LParenLoc, ModifierLoc, EndLoc);
}
/// Build a new OpenMP 'map' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPMapClause(
ArrayRef<OpenMPMapModifierKind> MapTypeModifiers,
ArrayRef<SourceLocation> MapTypeModifiersLoc,
CXXScopeSpec MapperIdScopeSpec, DeclarationNameInfo MapperId,
OpenMPMapClauseKind MapType, bool IsMapTypeImplicit,
SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
const OMPVarListLocTy &Locs, ArrayRef<Expr *> UnresolvedMappers) {
return getSema().ActOnOpenMPMapClause(MapTypeModifiers, MapTypeModifiersLoc,
MapperIdScopeSpec, MapperId, MapType,
IsMapTypeImplicit, MapLoc, ColonLoc,
VarList, Locs, UnresolvedMappers);
}
/// Build a new OpenMP 'allocate' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPAllocateClause(Expr *Allocate, ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation ColonLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPAllocateClause(Allocate, VarList, StartLoc,
LParenLoc, ColonLoc, EndLoc);
}
/// Build a new OpenMP 'num_teams' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPNumTeamsClause(NumTeams, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'thread_limit' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPThreadLimitClause(Expr *ThreadLimit,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPThreadLimitClause(ThreadLimit, StartLoc,
LParenLoc, EndLoc);
}
/// Build a new OpenMP 'priority' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPPriorityClause(Expr *Priority, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPPriorityClause(Priority, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'grainsize' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPGrainsizeClause(Expr *Grainsize, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPGrainsizeClause(Grainsize, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'num_tasks' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPNumTasksClause(Expr *NumTasks, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPNumTasksClause(NumTasks, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'hint' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPHintClause(Expr *Hint, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPHintClause(Hint, StartLoc, LParenLoc, EndLoc);
}
/// Build a new OpenMP 'detach' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPDetachClause(Expr *Evt, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPDetachClause(Evt, StartLoc, LParenLoc, EndLoc);
}
/// Build a new OpenMP 'dist_schedule' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *
RebuildOMPDistScheduleClause(OpenMPDistScheduleClauseKind Kind,
Expr *ChunkSize, SourceLocation StartLoc,
SourceLocation LParenLoc, SourceLocation KindLoc,
SourceLocation CommaLoc, SourceLocation EndLoc) {
return getSema().ActOnOpenMPDistScheduleClause(
Kind, ChunkSize, StartLoc, LParenLoc, KindLoc, CommaLoc, EndLoc);
}
/// Build a new OpenMP 'to' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *
RebuildOMPToClause(ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
ArrayRef<SourceLocation> MotionModifiersLoc,
CXXScopeSpec &MapperIdScopeSpec,
DeclarationNameInfo &MapperId, SourceLocation ColonLoc,
ArrayRef<Expr *> VarList, const OMPVarListLocTy &Locs,
ArrayRef<Expr *> UnresolvedMappers) {
return getSema().ActOnOpenMPToClause(MotionModifiers, MotionModifiersLoc,
MapperIdScopeSpec, MapperId, ColonLoc,
VarList, Locs, UnresolvedMappers);
}
/// Build a new OpenMP 'from' clause.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
OMPClause *
RebuildOMPFromClause(ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
ArrayRef<SourceLocation> MotionModifiersLoc,
CXXScopeSpec &MapperIdScopeSpec,
DeclarationNameInfo &MapperId, SourceLocation ColonLoc,
ArrayRef<Expr *> VarList, const OMPVarListLocTy &Locs,
ArrayRef<Expr *> UnresolvedMappers) {
return getSema().ActOnOpenMPFromClause(
MotionModifiers, MotionModifiersLoc, MapperIdScopeSpec, MapperId,
ColonLoc, VarList, Locs, UnresolvedMappers);
}
/// Build a new OpenMP 'use_device_ptr' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPUseDevicePtrClause(ArrayRef<Expr *> VarList,
const OMPVarListLocTy &Locs) {
return getSema().ActOnOpenMPUseDevicePtrClause(VarList, Locs);
}
/// Build a new OpenMP 'use_device_addr' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPUseDeviceAddrClause(ArrayRef<Expr *> VarList,
const OMPVarListLocTy &Locs) {
return getSema().ActOnOpenMPUseDeviceAddrClause(VarList, Locs);
}
/// Build a new OpenMP 'is_device_ptr' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPIsDevicePtrClause(ArrayRef<Expr *> VarList,
const OMPVarListLocTy &Locs) {
return getSema().ActOnOpenMPIsDevicePtrClause(VarList, Locs);
}
/// Build a new OpenMP 'defaultmap' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPDefaultmapClause(OpenMPDefaultmapClauseModifier M,
OpenMPDefaultmapClauseKind Kind,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation MLoc,
SourceLocation KindLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPDefaultmapClause(M, Kind, StartLoc, LParenLoc,
MLoc, KindLoc, EndLoc);
}
/// Build a new OpenMP 'nontemporal' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPNontemporalClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPNontemporalClause(VarList, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'inclusive' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPInclusiveClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPInclusiveClause(VarList, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'exclusive' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPExclusiveClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPExclusiveClause(VarList, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'uses_allocators' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPUsesAllocatorsClause(
ArrayRef<Sema::UsesAllocatorsData> Data, SourceLocation StartLoc,
SourceLocation LParenLoc, SourceLocation EndLoc) {
return getSema().ActOnOpenMPUsesAllocatorClause(StartLoc, LParenLoc, EndLoc,
Data);
}
/// Build a new OpenMP 'affinity' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPAffinityClause(SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation ColonLoc,
SourceLocation EndLoc, Expr *Modifier,
ArrayRef<Expr *> Locators) {
return getSema().ActOnOpenMPAffinityClause(StartLoc, LParenLoc, ColonLoc,
EndLoc, Modifier, Locators);
}
/// Build a new OpenMP 'order' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPOrderClause(OpenMPOrderClauseKind Kind,
SourceLocation KindKwLoc,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPOrderClause(Kind, KindKwLoc, StartLoc,
LParenLoc, EndLoc);
}
/// Build a new OpenMP 'init' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPInitClause(Expr *InteropVar, ArrayRef<Expr *> PrefExprs,
bool IsTarget, bool IsTargetSync,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation VarLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPInitClause(InteropVar, PrefExprs, IsTarget,
IsTargetSync, StartLoc, LParenLoc,
VarLoc, EndLoc);
}
/// Build a new OpenMP 'use' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPUseClause(Expr *InteropVar, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation VarLoc, SourceLocation EndLoc) {
return getSema().ActOnOpenMPUseClause(InteropVar, StartLoc, LParenLoc,
VarLoc, EndLoc);
}
/// Build a new OpenMP 'destroy' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPDestroyClause(Expr *InteropVar, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation VarLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPDestroyClause(InteropVar, StartLoc, LParenLoc,
VarLoc, EndLoc);
}
/// Build a new OpenMP 'novariants' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPNovariantsClause(Expr *Condition,
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPNovariantsClause(Condition, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'nocontext' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPNocontextClause(Expr *Condition, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPNocontextClause(Condition, StartLoc, LParenLoc,
EndLoc);
}
/// Build a new OpenMP 'filter' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPFilterClause(Expr *ThreadID, SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPFilterClause(ThreadID, StartLoc, LParenLoc,
EndLoc);
}
/// Rebuild the operand to an Objective-C \@synchronized statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildObjCAtSynchronizedOperand(SourceLocation atLoc,
Expr *object) {
return getSema().ActOnObjCAtSynchronizedOperand(atLoc, object);
}
/// Build a new Objective-C \@synchronized statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildObjCAtSynchronizedStmt(SourceLocation AtLoc,
Expr *Object, Stmt *Body) {
return getSema().ActOnObjCAtSynchronizedStmt(AtLoc, Object, Body);
}
/// Build a new Objective-C \@autoreleasepool statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildObjCAutoreleasePoolStmt(SourceLocation AtLoc,
Stmt *Body) {
return getSema().ActOnObjCAutoreleasePoolStmt(AtLoc, Body);
}
/// Build a new Objective-C fast enumeration statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildObjCForCollectionStmt(SourceLocation ForLoc,
Stmt *Element,
Expr *Collection,
SourceLocation RParenLoc,
Stmt *Body) {
StmtResult ForEachStmt = getSema().ActOnObjCForCollectionStmt(ForLoc,
Element,
Collection,
RParenLoc);
if (ForEachStmt.isInvalid())
return StmtError();
return getSema().FinishObjCForCollectionStmt(ForEachStmt.get(), Body);
}
/// Build a new C++ exception declaration.
///
/// By default, performs semantic analysis to build the new decaration.
/// Subclasses may override this routine to provide different behavior.
VarDecl *RebuildExceptionDecl(VarDecl *ExceptionDecl,
TypeSourceInfo *Declarator,
SourceLocation StartLoc,
SourceLocation IdLoc,
IdentifierInfo *Id) {
VarDecl *Var = getSema().BuildExceptionDeclaration(nullptr, Declarator,
StartLoc, IdLoc, Id);
if (Var)
getSema().CurContext->addDecl(Var);
return Var;
}
/// Build a new C++ catch statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildCXXCatchStmt(SourceLocation CatchLoc,
VarDecl *ExceptionDecl,
Stmt *Handler) {
return Owned(new (getSema().Context) CXXCatchStmt(CatchLoc, ExceptionDecl,
Handler));
}
/// Build a new C++ try statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildCXXTryStmt(SourceLocation TryLoc, Stmt *TryBlock,
ArrayRef<Stmt *> Handlers) {
return getSema().ActOnCXXTryBlock(TryLoc, TryBlock, Handlers);
}
/// Build a new C++0x range-based for statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildCXXForRangeStmt(SourceLocation ForLoc,
SourceLocation CoawaitLoc, Stmt *Init,
SourceLocation ColonLoc, Stmt *Range,
Stmt *Begin, Stmt *End, Expr *Cond,
Expr *Inc, Stmt *LoopVar,
SourceLocation RParenLoc) {
// If we've just learned that the range is actually an Objective-C
// collection, treat this as an Objective-C fast enumeration loop.
if (DeclStmt *RangeStmt = dyn_cast<DeclStmt>(Range)) {
if (RangeStmt->isSingleDecl()) {
if (VarDecl *RangeVar = dyn_cast<VarDecl>(RangeStmt->getSingleDecl())) {
if (RangeVar->isInvalidDecl())
return StmtError();
Expr *RangeExpr = RangeVar->getInit();
if (!RangeExpr->isTypeDependent() &&
RangeExpr->getType()->isObjCObjectPointerType()) {
// FIXME: Support init-statements in Objective-C++20 ranged for
// statement.
if (Init) {
return SemaRef.Diag(Init->getBeginLoc(),
diag::err_objc_for_range_init_stmt)
<< Init->getSourceRange();
}
return getSema().ActOnObjCForCollectionStmt(ForLoc, LoopVar,
RangeExpr, RParenLoc);
}
}
}
}
return getSema().BuildCXXForRangeStmt(ForLoc, CoawaitLoc, Init, ColonLoc,
Range, Begin, End, Cond, Inc, LoopVar,
RParenLoc, Sema::BFRK_Rebuild);
}
/// Build a new C++0x range-based for statement.
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildMSDependentExistsStmt(SourceLocation KeywordLoc,
bool IsIfExists,
NestedNameSpecifierLoc QualifierLoc,
DeclarationNameInfo NameInfo,
Stmt *Nested) {
return getSema().BuildMSDependentExistsStmt(KeywordLoc, IsIfExists,
QualifierLoc, NameInfo, Nested);
}
/// Attach body to a C++0x range-based for statement.
///
/// By default, performs semantic analysis to finish the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult FinishCXXForRangeStmt(Stmt *ForRange, Stmt *Body) {
return getSema().FinishCXXForRangeStmt(ForRange, Body);
}
StmtResult RebuildSEHTryStmt(bool IsCXXTry, SourceLocation TryLoc,
Stmt *TryBlock, Stmt *Handler) {
return getSema().ActOnSEHTryBlock(IsCXXTry, TryLoc, TryBlock, Handler);
}
StmtResult RebuildSEHExceptStmt(SourceLocation Loc, Expr *FilterExpr,
Stmt *Block) {
return getSema().ActOnSEHExceptBlock(Loc, FilterExpr, Block);
}
StmtResult RebuildSEHFinallyStmt(SourceLocation Loc, Stmt *Block) {
return SEHFinallyStmt::Create(getSema().getASTContext(), Loc, Block);
}
ExprResult RebuildSYCLUniqueStableNameExpr(SourceLocation OpLoc,
SourceLocation LParen,
SourceLocation RParen,
TypeSourceInfo *TSI) {
return getSema().BuildSYCLUniqueStableNameExpr(OpLoc, LParen, RParen, TSI);
}
/// Build a new predefined expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildPredefinedExpr(SourceLocation Loc,
PredefinedExpr::IdentKind IK) {
return getSema().BuildPredefinedExpr(Loc, IK);
}
/// Build a new expression that references a declaration.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildDeclarationNameExpr(const CXXScopeSpec &SS,
LookupResult &R,
bool RequiresADL) {
return getSema().BuildDeclarationNameExpr(SS, R, RequiresADL);
}
/// Build a new expression that references a declaration.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildDeclRefExpr(NestedNameSpecifierLoc QualifierLoc,
ValueDecl *VD,
const DeclarationNameInfo &NameInfo,
NamedDecl *Found,
TemplateArgumentListInfo *TemplateArgs) {
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
return getSema().BuildDeclarationNameExpr(SS, NameInfo, VD, Found,
TemplateArgs);
}
/// Build a new expression in parentheses.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildParenExpr(Expr *SubExpr, SourceLocation LParen,
SourceLocation RParen) {
return getSema().ActOnParenExpr(LParen, RParen, SubExpr);
}
/// Build a new pseudo-destructor expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXPseudoDestructorExpr(Expr *Base,
SourceLocation OperatorLoc,
bool isArrow,
CXXScopeSpec &SS,
TypeSourceInfo *ScopeType,
SourceLocation CCLoc,
SourceLocation TildeLoc,
PseudoDestructorTypeStorage Destroyed);
/// Build a new unary operator expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildUnaryOperator(SourceLocation OpLoc,
UnaryOperatorKind Opc,
Expr *SubExpr) {
return getSema().BuildUnaryOp(/*Scope=*/nullptr, OpLoc, Opc, SubExpr);
}
/// Build a new builtin offsetof expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildOffsetOfExpr(SourceLocation OperatorLoc,
TypeSourceInfo *Type,
ArrayRef<Sema::OffsetOfComponent> Components,
SourceLocation RParenLoc) {
return getSema().BuildBuiltinOffsetOf(OperatorLoc, Type, Components,
RParenLoc);
}
/// Build a new sizeof, alignof or vec_step expression with a
/// type argument.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildUnaryExprOrTypeTrait(TypeSourceInfo *TInfo,
SourceLocation OpLoc,
UnaryExprOrTypeTrait ExprKind,
SourceRange R) {
return getSema().CreateUnaryExprOrTypeTraitExpr(TInfo, OpLoc, ExprKind, R);
}
/// Build a new sizeof, alignof or vec step expression with an
/// expression argument.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildUnaryExprOrTypeTrait(Expr *SubExpr, SourceLocation OpLoc,
UnaryExprOrTypeTrait ExprKind,
SourceRange R) {
ExprResult Result
= getSema().CreateUnaryExprOrTypeTraitExpr(SubExpr, OpLoc, ExprKind);
if (Result.isInvalid())
return ExprError();
return Result;
}
/// Build a new array subscript expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildArraySubscriptExpr(Expr *LHS,
SourceLocation LBracketLoc,
Expr *RHS,
SourceLocation RBracketLoc) {
return getSema().ActOnArraySubscriptExpr(/*Scope=*/nullptr, LHS,
LBracketLoc, RHS,
RBracketLoc);
}
/// Build a new matrix subscript expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildMatrixSubscriptExpr(Expr *Base, Expr *RowIdx,
Expr *ColumnIdx,
SourceLocation RBracketLoc) {
return getSema().CreateBuiltinMatrixSubscriptExpr(Base, RowIdx, ColumnIdx,
RBracketLoc);
}
/// Build a new array section expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildOMPArraySectionExpr(Expr *Base, SourceLocation LBracketLoc,
Expr *LowerBound,
SourceLocation ColonLocFirst,
SourceLocation ColonLocSecond,
Expr *Length, Expr *Stride,
SourceLocation RBracketLoc) {
return getSema().ActOnOMPArraySectionExpr(Base, LBracketLoc, LowerBound,
ColonLocFirst, ColonLocSecond,
Length, Stride, RBracketLoc);
}
/// Build a new array shaping expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildOMPArrayShapingExpr(Expr *Base, SourceLocation LParenLoc,
SourceLocation RParenLoc,
ArrayRef<Expr *> Dims,
ArrayRef<SourceRange> BracketsRanges) {
return getSema().ActOnOMPArrayShapingExpr(Base, LParenLoc, RParenLoc, Dims,
BracketsRanges);
}
/// Build a new iterator expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildOMPIteratorExpr(
SourceLocation IteratorKwLoc, SourceLocation LLoc, SourceLocation RLoc,
ArrayRef<Sema::OMPIteratorData> Data) {
return getSema().ActOnOMPIteratorExpr(/*Scope=*/nullptr, IteratorKwLoc,
LLoc, RLoc, Data);
}
/// Build a new call expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCallExpr(Expr *Callee, SourceLocation LParenLoc,
MultiExprArg Args,
SourceLocation RParenLoc,
Expr *ExecConfig = nullptr) {
return getSema().ActOnCallExpr(
/*Scope=*/nullptr, Callee, LParenLoc, Args, RParenLoc, ExecConfig);
}
/// Build a new member access expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildMemberExpr(Expr *Base, SourceLocation OpLoc,
bool isArrow,
NestedNameSpecifierLoc QualifierLoc,
SourceLocation TemplateKWLoc,
const DeclarationNameInfo &MemberNameInfo,
ValueDecl *Member,
NamedDecl *FoundDecl,
const TemplateArgumentListInfo *ExplicitTemplateArgs,
NamedDecl *FirstQualifierInScope) {
ExprResult BaseResult = getSema().PerformMemberExprBaseConversion(Base,
isArrow);
if (!Member->getDeclName()) {
// We have a reference to an unnamed field. This is always the
// base of an anonymous struct/union member access, i.e. the
// field is always of record type.
assert(Member->getType()->isRecordType() &&
"unnamed member not of record type?");
BaseResult =
getSema().PerformObjectMemberConversion(BaseResult.get(),
QualifierLoc.getNestedNameSpecifier(),
FoundDecl, Member);
if (BaseResult.isInvalid())
return ExprError();
Base = BaseResult.get();
CXXScopeSpec EmptySS;
return getSema().BuildFieldReferenceExpr(
Base, isArrow, OpLoc, EmptySS, cast<FieldDecl>(Member),
DeclAccessPair::make(FoundDecl, FoundDecl->getAccess()), MemberNameInfo);
}
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
Base = BaseResult.get();
QualType BaseType = Base->getType();
if (isArrow && !BaseType->isPointerType())
return ExprError();
// FIXME: this involves duplicating earlier analysis in a lot of
// cases; we should avoid this when possible.
LookupResult R(getSema(), MemberNameInfo, Sema::LookupMemberName);
R.addDecl(FoundDecl);
R.resolveKind();
return getSema().BuildMemberReferenceExpr(Base, BaseType, OpLoc, isArrow,
SS, TemplateKWLoc,
FirstQualifierInScope,
R, ExplicitTemplateArgs,
/*S*/nullptr);
}
/// Build a new binary operator expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildBinaryOperator(SourceLocation OpLoc,
BinaryOperatorKind Opc,
Expr *LHS, Expr *RHS) {
return getSema().BuildBinOp(/*Scope=*/nullptr, OpLoc, Opc, LHS, RHS);
}
/// Build a new rewritten operator expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXRewrittenBinaryOperator(
SourceLocation OpLoc, BinaryOperatorKind Opcode,
const UnresolvedSetImpl &UnqualLookups, Expr *LHS, Expr *RHS) {
return getSema().CreateOverloadedBinOp(OpLoc, Opcode, UnqualLookups, LHS,
RHS, /*RequiresADL*/false);
}
/// Build a new conditional operator expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildConditionalOperator(Expr *Cond,
SourceLocation QuestionLoc,
Expr *LHS,
SourceLocation ColonLoc,
Expr *RHS) {
return getSema().ActOnConditionalOp(QuestionLoc, ColonLoc, Cond,
LHS, RHS);
}
/// Build a new C-style cast expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCStyleCastExpr(SourceLocation LParenLoc,
TypeSourceInfo *TInfo,
SourceLocation RParenLoc,
Expr *SubExpr) {
return getSema().BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc,
SubExpr);
}
/// Build a new compound literal expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCompoundLiteralExpr(SourceLocation LParenLoc,
TypeSourceInfo *TInfo,
SourceLocation RParenLoc,
Expr *Init) {
return getSema().BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc,
Init);
}
/// Build a new extended vector element access expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildExtVectorElementExpr(Expr *Base,
SourceLocation OpLoc,
SourceLocation AccessorLoc,
IdentifierInfo &Accessor) {
CXXScopeSpec SS;
DeclarationNameInfo NameInfo(&Accessor, AccessorLoc);
return getSema().BuildMemberReferenceExpr(Base, Base->getType(),
OpLoc, /*IsArrow*/ false,
SS, SourceLocation(),
/*FirstQualifierInScope*/ nullptr,
NameInfo,
/* TemplateArgs */ nullptr,
/*S*/ nullptr);
}
/// Build a new initializer list expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildInitList(SourceLocation LBraceLoc,
MultiExprArg Inits,
SourceLocation RBraceLoc) {
return SemaRef.BuildInitList(LBraceLoc, Inits, RBraceLoc);
}
/// Build a new designated initializer expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildDesignatedInitExpr(Designation &Desig,
MultiExprArg ArrayExprs,
SourceLocation EqualOrColonLoc,
bool GNUSyntax,
Expr *Init) {
ExprResult Result
= SemaRef.ActOnDesignatedInitializer(Desig, EqualOrColonLoc, GNUSyntax,
Init);
if (Result.isInvalid())
return ExprError();
return Result;
}
/// Build a new value-initialized expression.
///
/// By default, builds the implicit value initialization without performing
/// any semantic analysis. Subclasses may override this routine to provide
/// different behavior.
ExprResult RebuildImplicitValueInitExpr(QualType T) {
return new (SemaRef.Context) ImplicitValueInitExpr(T);
}
/// Build a new \c va_arg expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildVAArgExpr(SourceLocation BuiltinLoc,
Expr *SubExpr, TypeSourceInfo *TInfo,
SourceLocation RParenLoc) {
return getSema().BuildVAArgExpr(BuiltinLoc,
SubExpr, TInfo,
RParenLoc);
}
/// Build a new expression list in parentheses.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildParenListExpr(SourceLocation LParenLoc,
MultiExprArg SubExprs,
SourceLocation RParenLoc) {
return getSema().ActOnParenListExpr(LParenLoc, RParenLoc, SubExprs);
}
/// Build a new address-of-label expression.
///
/// By default, performs semantic analysis, using the name of the label
/// rather than attempting to map the label statement itself.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildAddrLabelExpr(SourceLocation AmpAmpLoc,
SourceLocation LabelLoc, LabelDecl *Label) {
return getSema().ActOnAddrLabel(AmpAmpLoc, LabelLoc, Label);
}
/// Build a new GNU statement expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildStmtExpr(SourceLocation LParenLoc, Stmt *SubStmt,
SourceLocation RParenLoc, unsigned TemplateDepth) {
return getSema().BuildStmtExpr(LParenLoc, SubStmt, RParenLoc,
TemplateDepth);
}
/// Build a new __builtin_choose_expr expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildChooseExpr(SourceLocation BuiltinLoc,
Expr *Cond, Expr *LHS, Expr *RHS,
SourceLocation RParenLoc) {
return SemaRef.ActOnChooseExpr(BuiltinLoc,
Cond, LHS, RHS,
RParenLoc);
}
/// Build a new generic selection expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildGenericSelectionExpr(SourceLocation KeyLoc,
SourceLocation DefaultLoc,
SourceLocation RParenLoc,
Expr *ControllingExpr,
ArrayRef<TypeSourceInfo *> Types,
ArrayRef<Expr *> Exprs) {
return getSema().CreateGenericSelectionExpr(KeyLoc, DefaultLoc, RParenLoc,
ControllingExpr, Types, Exprs);
}
/// Build a new overloaded operator call expression.
///
/// By default, performs semantic analysis to build the new expression.
/// The semantic analysis provides the behavior of template instantiation,
/// copying with transformations that turn what looks like an overloaded
/// operator call into a use of a builtin operator, performing
/// argument-dependent lookup, etc. Subclasses may override this routine to
/// provide different behavior.
ExprResult RebuildCXXOperatorCallExpr(OverloadedOperatorKind Op,
SourceLocation OpLoc,
Expr *Callee,
Expr *First,
Expr *Second);
/// Build a new C++ "named" cast expression, such as static_cast or
/// reinterpret_cast.
///
/// By default, this routine dispatches to one of the more-specific routines
/// for a particular named case, e.g., RebuildCXXStaticCastExpr().
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXNamedCastExpr(SourceLocation OpLoc,
Stmt::StmtClass Class,
SourceLocation LAngleLoc,
TypeSourceInfo *TInfo,
SourceLocation RAngleLoc,
SourceLocation LParenLoc,
Expr *SubExpr,
SourceLocation RParenLoc) {
switch (Class) {
case Stmt::CXXStaticCastExprClass:
return getDerived().RebuildCXXStaticCastExpr(OpLoc, LAngleLoc, TInfo,
RAngleLoc, LParenLoc,
SubExpr, RParenLoc);
case Stmt::CXXDynamicCastExprClass:
return getDerived().RebuildCXXDynamicCastExpr(OpLoc, LAngleLoc, TInfo,
RAngleLoc, LParenLoc,
SubExpr, RParenLoc);
case Stmt::CXXReinterpretCastExprClass:
return getDerived().RebuildCXXReinterpretCastExpr(OpLoc, LAngleLoc, TInfo,
RAngleLoc, LParenLoc,
SubExpr,
RParenLoc);
case Stmt::CXXConstCastExprClass:
return getDerived().RebuildCXXConstCastExpr(OpLoc, LAngleLoc, TInfo,
RAngleLoc, LParenLoc,
SubExpr, RParenLoc);
case Stmt::CXXAddrspaceCastExprClass:
return getDerived().RebuildCXXAddrspaceCastExpr(
OpLoc, LAngleLoc, TInfo, RAngleLoc, LParenLoc, SubExpr, RParenLoc);
default:
llvm_unreachable("Invalid C++ named cast");
}
}
/// Build a new C++ static_cast expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXStaticCastExpr(SourceLocation OpLoc,
SourceLocation LAngleLoc,
TypeSourceInfo *TInfo,
SourceLocation RAngleLoc,
SourceLocation LParenLoc,
Expr *SubExpr,
SourceLocation RParenLoc) {
return getSema().BuildCXXNamedCast(OpLoc, tok::kw_static_cast,
TInfo, SubExpr,
SourceRange(LAngleLoc, RAngleLoc),
SourceRange(LParenLoc, RParenLoc));
}
/// Build a new C++ dynamic_cast expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXDynamicCastExpr(SourceLocation OpLoc,
SourceLocation LAngleLoc,
TypeSourceInfo *TInfo,
SourceLocation RAngleLoc,
SourceLocation LParenLoc,
Expr *SubExpr,
SourceLocation RParenLoc) {
return getSema().BuildCXXNamedCast(OpLoc, tok::kw_dynamic_cast,
TInfo, SubExpr,
SourceRange(LAngleLoc, RAngleLoc),
SourceRange(LParenLoc, RParenLoc));
}
/// Build a new C++ reinterpret_cast expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXReinterpretCastExpr(SourceLocation OpLoc,
SourceLocation LAngleLoc,
TypeSourceInfo *TInfo,
SourceLocation RAngleLoc,
SourceLocation LParenLoc,
Expr *SubExpr,
SourceLocation RParenLoc) {
return getSema().BuildCXXNamedCast(OpLoc, tok::kw_reinterpret_cast,
TInfo, SubExpr,
SourceRange(LAngleLoc, RAngleLoc),
SourceRange(LParenLoc, RParenLoc));
}
/// Build a new C++ const_cast expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXConstCastExpr(SourceLocation OpLoc,
SourceLocation LAngleLoc,
TypeSourceInfo *TInfo,
SourceLocation RAngleLoc,
SourceLocation LParenLoc,
Expr *SubExpr,
SourceLocation RParenLoc) {
return getSema().BuildCXXNamedCast(OpLoc, tok::kw_const_cast,
TInfo, SubExpr,
SourceRange(LAngleLoc, RAngleLoc),
SourceRange(LParenLoc, RParenLoc));
}
ExprResult
RebuildCXXAddrspaceCastExpr(SourceLocation OpLoc, SourceLocation LAngleLoc,
TypeSourceInfo *TInfo, SourceLocation RAngleLoc,
SourceLocation LParenLoc, Expr *SubExpr,
SourceLocation RParenLoc) {
return getSema().BuildCXXNamedCast(
OpLoc, tok::kw_addrspace_cast, TInfo, SubExpr,
SourceRange(LAngleLoc, RAngleLoc), SourceRange(LParenLoc, RParenLoc));
}
/// Build a new C++ functional-style cast expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXFunctionalCastExpr(TypeSourceInfo *TInfo,
SourceLocation LParenLoc,
Expr *Sub,
SourceLocation RParenLoc,
bool ListInitialization) {
return getSema().BuildCXXTypeConstructExpr(TInfo, LParenLoc,
MultiExprArg(&Sub, 1), RParenLoc,
ListInitialization);
}
/// Build a new C++ __builtin_bit_cast expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildBuiltinBitCastExpr(SourceLocation KWLoc,
TypeSourceInfo *TSI, Expr *Sub,
SourceLocation RParenLoc) {
return getSema().BuildBuiltinBitCastExpr(KWLoc, TSI, Sub, RParenLoc);
}
/// Build a new C++ typeid(type) expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXTypeidExpr(QualType TypeInfoType,
SourceLocation TypeidLoc,
TypeSourceInfo *Operand,
SourceLocation RParenLoc) {
return getSema().BuildCXXTypeId(TypeInfoType, TypeidLoc, Operand,
RParenLoc);
}
/// Build a new C++ typeid(expr) expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXTypeidExpr(QualType TypeInfoType,
SourceLocation TypeidLoc,
Expr *Operand,
SourceLocation RParenLoc) {
return getSema().BuildCXXTypeId(TypeInfoType, TypeidLoc, Operand,
RParenLoc);
}
/// Build a new C++ __uuidof(type) expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXUuidofExpr(QualType Type, SourceLocation TypeidLoc,
TypeSourceInfo *Operand,
SourceLocation RParenLoc) {
return getSema().BuildCXXUuidof(Type, TypeidLoc, Operand, RParenLoc);
}
/// Build a new C++ __uuidof(expr) expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXUuidofExpr(QualType Type, SourceLocation TypeidLoc,
Expr *Operand, SourceLocation RParenLoc) {
return getSema().BuildCXXUuidof(Type, TypeidLoc, Operand, RParenLoc);
}
/// Build a new C++ "this" expression.
///
/// By default, builds a new "this" expression without performing any
/// semantic analysis. Subclasses may override this routine to provide
/// different behavior.
ExprResult RebuildCXXThisExpr(SourceLocation ThisLoc,
QualType ThisType,
bool isImplicit) {
return getSema().BuildCXXThisExpr(ThisLoc, ThisType, isImplicit);
}
/// Build a new C++ throw expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXThrowExpr(SourceLocation ThrowLoc, Expr *Sub,
bool IsThrownVariableInScope) {
return getSema().BuildCXXThrow(ThrowLoc, Sub, IsThrownVariableInScope);
}
/// Build a new C++ default-argument expression.
///
/// By default, builds a new default-argument expression, which does not
/// require any semantic analysis. Subclasses may override this routine to
/// provide different behavior.
ExprResult RebuildCXXDefaultArgExpr(SourceLocation Loc, ParmVarDecl *Param) {
return CXXDefaultArgExpr::Create(getSema().Context, Loc, Param,
getSema().CurContext);
}
/// Build a new C++11 default-initialization expression.
///
/// By default, builds a new default field initialization expression, which
/// does not require any semantic analysis. Subclasses may override this
/// routine to provide different behavior.
ExprResult RebuildCXXDefaultInitExpr(SourceLocation Loc,
FieldDecl *Field) {
return CXXDefaultInitExpr::Create(getSema().Context, Loc, Field,
getSema().CurContext);
}
/// Build a new C++ zero-initialization expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXScalarValueInitExpr(TypeSourceInfo *TSInfo,
SourceLocation LParenLoc,
SourceLocation RParenLoc) {
return getSema().BuildCXXTypeConstructExpr(
TSInfo, LParenLoc, None, RParenLoc, /*ListInitialization=*/false);
}
/// Build a new C++ "new" expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXNewExpr(SourceLocation StartLoc,
bool UseGlobal,
SourceLocation PlacementLParen,
MultiExprArg PlacementArgs,
SourceLocation PlacementRParen,
SourceRange TypeIdParens,
QualType AllocatedType,
TypeSourceInfo *AllocatedTypeInfo,
Optional<Expr *> ArraySize,
SourceRange DirectInitRange,
Expr *Initializer) {
return getSema().BuildCXXNew(StartLoc, UseGlobal,
PlacementLParen,
PlacementArgs,
PlacementRParen,
TypeIdParens,
AllocatedType,
AllocatedTypeInfo,
ArraySize,
DirectInitRange,
Initializer);
}
/// Build a new C++ "delete" expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXDeleteExpr(SourceLocation StartLoc,
bool IsGlobalDelete,
bool IsArrayForm,
Expr *Operand) {
return getSema().ActOnCXXDelete(StartLoc, IsGlobalDelete, IsArrayForm,
Operand);
}
/// Build a new type trait expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildTypeTrait(TypeTrait Trait,
SourceLocation StartLoc,
ArrayRef<TypeSourceInfo *> Args,
SourceLocation RParenLoc) {
return getSema().BuildTypeTrait(Trait, StartLoc, Args, RParenLoc);
}
/// Build a new array type trait expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildArrayTypeTrait(ArrayTypeTrait Trait,
SourceLocation StartLoc,
TypeSourceInfo *TSInfo,
Expr *DimExpr,
SourceLocation RParenLoc) {
return getSema().BuildArrayTypeTrait(Trait, StartLoc, TSInfo, DimExpr, RParenLoc);
}
/// Build a new expression trait expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildExpressionTrait(ExpressionTrait Trait,
SourceLocation StartLoc,
Expr *Queried,
SourceLocation RParenLoc) {
return getSema().BuildExpressionTrait(Trait, StartLoc, Queried, RParenLoc);
}
/// Build a new (previously unresolved) declaration reference
/// expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildDependentScopeDeclRefExpr(
NestedNameSpecifierLoc QualifierLoc,
SourceLocation TemplateKWLoc,
const DeclarationNameInfo &NameInfo,
const TemplateArgumentListInfo *TemplateArgs,
bool IsAddressOfOperand,
TypeSourceInfo **RecoveryTSI) {
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
if (TemplateArgs || TemplateKWLoc.isValid())
return getSema().BuildQualifiedTemplateIdExpr(SS, TemplateKWLoc, NameInfo,
TemplateArgs);
return getSema().BuildQualifiedDeclarationNameExpr(
SS, NameInfo, IsAddressOfOperand, /*S*/nullptr, RecoveryTSI);
}
/// Build a new template-id expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildTemplateIdExpr(const CXXScopeSpec &SS,
SourceLocation TemplateKWLoc,
LookupResult &R,
bool RequiresADL,
const TemplateArgumentListInfo *TemplateArgs) {
return getSema().BuildTemplateIdExpr(SS, TemplateKWLoc, R, RequiresADL,
TemplateArgs);
}
/// Build a new object-construction expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXConstructExpr(QualType T,
SourceLocation Loc,
CXXConstructorDecl *Constructor,
bool IsElidable,
MultiExprArg Args,
bool HadMultipleCandidates,
bool ListInitialization,
bool StdInitListInitialization,
bool RequiresZeroInit,
CXXConstructExpr::ConstructionKind ConstructKind,
SourceRange ParenRange) {
// Reconstruct the constructor we originally found, which might be
// different if this is a call to an inherited constructor.
CXXConstructorDecl *FoundCtor = Constructor;
if (Constructor->isInheritingConstructor())
FoundCtor = Constructor->getInheritedConstructor().getConstructor();
SmallVector<Expr *, 8> ConvertedArgs;
if (getSema().CompleteConstructorCall(FoundCtor, T, Args, Loc,
ConvertedArgs))
return ExprError();
return getSema().BuildCXXConstructExpr(Loc, T, Constructor,
IsElidable,
ConvertedArgs,
HadMultipleCandidates,
ListInitialization,
StdInitListInitialization,
RequiresZeroInit, ConstructKind,
ParenRange);
}
/// Build a new implicit construction via inherited constructor
/// expression.
ExprResult RebuildCXXInheritedCtorInitExpr(QualType T, SourceLocation Loc,
CXXConstructorDecl *Constructor,
bool ConstructsVBase,
bool InheritedFromVBase) {
return new (getSema().Context) CXXInheritedCtorInitExpr(
Loc, T, Constructor, ConstructsVBase, InheritedFromVBase);
}
/// Build a new object-construction expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXTemporaryObjectExpr(TypeSourceInfo *TSInfo,
SourceLocation LParenOrBraceLoc,
MultiExprArg Args,
SourceLocation RParenOrBraceLoc,
bool ListInitialization) {
return getSema().BuildCXXTypeConstructExpr(
TSInfo, LParenOrBraceLoc, Args, RParenOrBraceLoc, ListInitialization);
}
/// Build a new object-construction expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXUnresolvedConstructExpr(TypeSourceInfo *TSInfo,
SourceLocation LParenLoc,
MultiExprArg Args,
SourceLocation RParenLoc,
bool ListInitialization) {
return getSema().BuildCXXTypeConstructExpr(TSInfo, LParenLoc, Args,
RParenLoc, ListInitialization);
}
/// Build a new member reference expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXDependentScopeMemberExpr(Expr *BaseE,
QualType BaseType,
bool IsArrow,
SourceLocation OperatorLoc,
NestedNameSpecifierLoc QualifierLoc,
SourceLocation TemplateKWLoc,
NamedDecl *FirstQualifierInScope,
const DeclarationNameInfo &MemberNameInfo,
const TemplateArgumentListInfo *TemplateArgs) {
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
return SemaRef.BuildMemberReferenceExpr(BaseE, BaseType,
OperatorLoc, IsArrow,
SS, TemplateKWLoc,
FirstQualifierInScope,
MemberNameInfo,
TemplateArgs, /*S*/nullptr);
}
/// Build a new member reference expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildUnresolvedMemberExpr(Expr *BaseE, QualType BaseType,
SourceLocation OperatorLoc,
bool IsArrow,
NestedNameSpecifierLoc QualifierLoc,
SourceLocation TemplateKWLoc,
NamedDecl *FirstQualifierInScope,
LookupResult &R,
const TemplateArgumentListInfo *TemplateArgs) {
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
return SemaRef.BuildMemberReferenceExpr(BaseE, BaseType,
OperatorLoc, IsArrow,
SS, TemplateKWLoc,
FirstQualifierInScope,
R, TemplateArgs, /*S*/nullptr);
}
/// Build a new noexcept expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildCXXNoexceptExpr(SourceRange Range, Expr *Arg) {
return SemaRef.BuildCXXNoexceptExpr(Range.getBegin(), Arg, Range.getEnd());
}
/// Build a new expression to compute the length of a parameter pack.
ExprResult RebuildSizeOfPackExpr(SourceLocation OperatorLoc,
NamedDecl *Pack,
SourceLocation PackLoc,
SourceLocation RParenLoc,
Optional<unsigned> Length,
ArrayRef<TemplateArgument> PartialArgs) {
return SizeOfPackExpr::Create(SemaRef.Context, OperatorLoc, Pack, PackLoc,
RParenLoc, Length, PartialArgs);
}
/// Build a new expression representing a call to a source location
/// builtin.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildSourceLocExpr(SourceLocExpr::IdentKind Kind,
SourceLocation BuiltinLoc,
SourceLocation RPLoc,
DeclContext *ParentContext) {
return getSema().BuildSourceLocExpr(Kind, BuiltinLoc, RPLoc, ParentContext);
}
/// Build a new Objective-C boxed expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildConceptSpecializationExpr(NestedNameSpecifierLoc NNS,
SourceLocation TemplateKWLoc, DeclarationNameInfo ConceptNameInfo,
NamedDecl *FoundDecl, ConceptDecl *NamedConcept,
TemplateArgumentListInfo *TALI) {
CXXScopeSpec SS;
SS.Adopt(NNS);
ExprResult Result = getSema().CheckConceptTemplateId(SS, TemplateKWLoc,
ConceptNameInfo,
FoundDecl,
NamedConcept, TALI);
if (Result.isInvalid())
return ExprError();
return Result;
}
/// \brief Build a new requires expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildRequiresExpr(SourceLocation RequiresKWLoc,
RequiresExprBodyDecl *Body,
ArrayRef<ParmVarDecl *> LocalParameters,
ArrayRef<concepts::Requirement *> Requirements,
SourceLocation ClosingBraceLoc) {
return RequiresExpr::Create(SemaRef.Context, RequiresKWLoc, Body,
LocalParameters, Requirements, ClosingBraceLoc);
}
concepts::TypeRequirement *
RebuildTypeRequirement(
concepts::Requirement::SubstitutionDiagnostic *SubstDiag) {
return SemaRef.BuildTypeRequirement(SubstDiag);
}
concepts::TypeRequirement *RebuildTypeRequirement(TypeSourceInfo *T) {
return SemaRef.BuildTypeRequirement(T);
}
concepts::ExprRequirement *
RebuildExprRequirement(
concepts::Requirement::SubstitutionDiagnostic *SubstDiag, bool IsSimple,
SourceLocation NoexceptLoc,
concepts::ExprRequirement::ReturnTypeRequirement Ret) {
return SemaRef.BuildExprRequirement(SubstDiag, IsSimple, NoexceptLoc,
std::move(Ret));
}
concepts::ExprRequirement *
RebuildExprRequirement(Expr *E, bool IsSimple, SourceLocation NoexceptLoc,
concepts::ExprRequirement::ReturnTypeRequirement Ret) {
return SemaRef.BuildExprRequirement(E, IsSimple, NoexceptLoc,
std::move(Ret));
}
concepts::NestedRequirement *
RebuildNestedRequirement(
concepts::Requirement::SubstitutionDiagnostic *SubstDiag) {
return SemaRef.BuildNestedRequirement(SubstDiag);
}
concepts::NestedRequirement *RebuildNestedRequirement(Expr *Constraint) {
return SemaRef.BuildNestedRequirement(Constraint);
}
/// \brief Build a new Objective-C boxed expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
return getSema().BuildObjCBoxedExpr(SR, ValueExpr);
}
/// Build a new Objective-C array literal.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildObjCArrayLiteral(SourceRange Range,
Expr **Elements, unsigned NumElements) {
return getSema().BuildObjCArrayLiteral(Range,
MultiExprArg(Elements, NumElements));
}
ExprResult RebuildObjCSubscriptRefExpr(SourceLocation RB,
Expr *Base, Expr *Key,
ObjCMethodDecl *getterMethod,
ObjCMethodDecl *setterMethod) {
return getSema().BuildObjCSubscriptExpression(RB, Base, Key,
getterMethod, setterMethod);
}
/// Build a new Objective-C dictionary literal.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildObjCDictionaryLiteral(SourceRange Range,
MutableArrayRef<ObjCDictionaryElement> Elements) {
return getSema().BuildObjCDictionaryLiteral(Range, Elements);
}
/// Build a new Objective-C \@encode expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildObjCEncodeExpr(SourceLocation AtLoc,
TypeSourceInfo *EncodeTypeInfo,
SourceLocation RParenLoc) {
return SemaRef.BuildObjCEncodeExpression(AtLoc, EncodeTypeInfo, RParenLoc);
}
/// Build a new Objective-C class message.
ExprResult RebuildObjCMessageExpr(TypeSourceInfo *ReceiverTypeInfo,
Selector Sel,
ArrayRef<SourceLocation> SelectorLocs,
ObjCMethodDecl *Method,
SourceLocation LBracLoc,
MultiExprArg Args,
SourceLocation RBracLoc) {
return SemaRef.BuildClassMessage(ReceiverTypeInfo,
ReceiverTypeInfo->getType(),
/*SuperLoc=*/SourceLocation(),
Sel, Method, LBracLoc, SelectorLocs,
RBracLoc, Args);
}
/// Build a new Objective-C instance message.
ExprResult RebuildObjCMessageExpr(Expr *Receiver,
Selector Sel,
ArrayRef<SourceLocation> SelectorLocs,
ObjCMethodDecl *Method,
SourceLocation LBracLoc,
MultiExprArg Args,
SourceLocation RBracLoc) {
return SemaRef.BuildInstanceMessage(Receiver,
Receiver->getType(),
/*SuperLoc=*/SourceLocation(),
Sel, Method, LBracLoc, SelectorLocs,
RBracLoc, Args);
}
/// Build a new Objective-C instance/class message to 'super'.
ExprResult RebuildObjCMessageExpr(SourceLocation SuperLoc,
Selector Sel,
ArrayRef<SourceLocation> SelectorLocs,
QualType SuperType,
ObjCMethodDecl *Method,
SourceLocation LBracLoc,
MultiExprArg Args,
SourceLocation RBracLoc) {
return Method->isInstanceMethod() ? SemaRef.BuildInstanceMessage(nullptr,
SuperType,
SuperLoc,
Sel, Method, LBracLoc, SelectorLocs,
RBracLoc, Args)
: SemaRef.BuildClassMessage(nullptr,
SuperType,
SuperLoc,
Sel, Method, LBracLoc, SelectorLocs,
RBracLoc, Args);
}
/// Build a new Objective-C ivar reference expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildObjCIvarRefExpr(Expr *BaseArg, ObjCIvarDecl *Ivar,
SourceLocation IvarLoc,
bool IsArrow, bool IsFreeIvar) {
CXXScopeSpec SS;
DeclarationNameInfo NameInfo(Ivar->getDeclName(), IvarLoc);
ExprResult Result = getSema().BuildMemberReferenceExpr(
BaseArg, BaseArg->getType(),
/*FIXME:*/ IvarLoc, IsArrow, SS, SourceLocation(),
/*FirstQualifierInScope=*/nullptr, NameInfo,
/*TemplateArgs=*/nullptr,
/*S=*/nullptr);
if (IsFreeIvar && Result.isUsable())
cast<ObjCIvarRefExpr>(Result.get())->setIsFreeIvar(IsFreeIvar);
return Result;
}
/// Build a new Objective-C property reference expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildObjCPropertyRefExpr(Expr *BaseArg,
ObjCPropertyDecl *Property,
SourceLocation PropertyLoc) {
CXXScopeSpec SS;
DeclarationNameInfo NameInfo(Property->getDeclName(), PropertyLoc);
return getSema().BuildMemberReferenceExpr(BaseArg, BaseArg->getType(),
/*FIXME:*/PropertyLoc,
/*IsArrow=*/false,
SS, SourceLocation(),
/*FirstQualifierInScope=*/nullptr,
NameInfo,
/*TemplateArgs=*/nullptr,
/*S=*/nullptr);
}
/// Build a new Objective-C property reference expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildObjCPropertyRefExpr(Expr *Base, QualType T,
ObjCMethodDecl *Getter,
ObjCMethodDecl *Setter,
SourceLocation PropertyLoc) {
// Since these expressions can only be value-dependent, we do not
// need to perform semantic analysis again.
return Owned(
new (getSema().Context) ObjCPropertyRefExpr(Getter, Setter, T,
VK_LValue, OK_ObjCProperty,
PropertyLoc, Base));
}
/// Build a new Objective-C "isa" expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildObjCIsaExpr(Expr *BaseArg, SourceLocation IsaLoc,
SourceLocation OpLoc, bool IsArrow) {
CXXScopeSpec SS;
DeclarationNameInfo NameInfo(&getSema().Context.Idents.get("isa"), IsaLoc);
return getSema().BuildMemberReferenceExpr(BaseArg, BaseArg->getType(),
OpLoc, IsArrow,
SS, SourceLocation(),
/*FirstQualifierInScope=*/nullptr,
NameInfo,
/*TemplateArgs=*/nullptr,
/*S=*/nullptr);
}
/// Build a new shuffle vector expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildShuffleVectorExpr(SourceLocation BuiltinLoc,
MultiExprArg SubExprs,
SourceLocation RParenLoc) {
// Find the declaration for __builtin_shufflevector
const IdentifierInfo &Name
= SemaRef.Context.Idents.get("__builtin_shufflevector");
TranslationUnitDecl *TUDecl = SemaRef.Context.getTranslationUnitDecl();
DeclContext::lookup_result Lookup = TUDecl->lookup(DeclarationName(&Name));
assert(!Lookup.empty() && "No __builtin_shufflevector?");
// Build a reference to the __builtin_shufflevector builtin
FunctionDecl *Builtin = cast<FunctionDecl>(Lookup.front());
Expr *Callee = new (SemaRef.Context)
DeclRefExpr(SemaRef.Context, Builtin, false,
SemaRef.Context.BuiltinFnTy, VK_PRValue, BuiltinLoc);
QualType CalleePtrTy = SemaRef.Context.getPointerType(Builtin->getType());
Callee = SemaRef.ImpCastExprToType(Callee, CalleePtrTy,
CK_BuiltinFnToFnPtr).get();
// Build the CallExpr
ExprResult TheCall = CallExpr::Create(
SemaRef.Context, Callee, SubExprs, Builtin->getCallResultType(),
Expr::getValueKindForType(Builtin->getReturnType()), RParenLoc,
FPOptionsOverride());
// Type-check the __builtin_shufflevector expression.
return SemaRef.SemaBuiltinShuffleVector(cast<CallExpr>(TheCall.get()));
}
/// Build a new convert vector expression.
ExprResult RebuildConvertVectorExpr(SourceLocation BuiltinLoc,
Expr *SrcExpr, TypeSourceInfo *DstTInfo,
SourceLocation RParenLoc) {
return SemaRef.SemaConvertVectorExpr(SrcExpr, DstTInfo,
BuiltinLoc, RParenLoc);
}
/// Build a new template argument pack expansion.
///
/// By default, performs semantic analysis to build a new pack expansion
/// for a template argument. Subclasses may override this routine to provide
/// different behavior.
TemplateArgumentLoc RebuildPackExpansion(TemplateArgumentLoc Pattern,
SourceLocation EllipsisLoc,
Optional<unsigned> NumExpansions) {
switch (Pattern.getArgument().getKind()) {
case TemplateArgument::Expression: {
ExprResult Result
= getSema().CheckPackExpansion(Pattern.getSourceExpression(),
EllipsisLoc, NumExpansions);
if (Result.isInvalid())
return TemplateArgumentLoc();
return TemplateArgumentLoc(Result.get(), Result.get());
}
case TemplateArgument::Template:
return TemplateArgumentLoc(
SemaRef.Context,
TemplateArgument(Pattern.getArgument().getAsTemplate(),
NumExpansions),
Pattern.getTemplateQualifierLoc(), Pattern.getTemplateNameLoc(),
EllipsisLoc);
case TemplateArgument::Null:
case TemplateArgument::Integral:
case TemplateArgument::Declaration:
case TemplateArgument::Pack:
case TemplateArgument::TemplateExpansion:
case TemplateArgument::NullPtr:
llvm_unreachable("Pack expansion pattern has no parameter packs");
case TemplateArgument::Type:
if (TypeSourceInfo *Expansion
= getSema().CheckPackExpansion(Pattern.getTypeSourceInfo(),
EllipsisLoc,
NumExpansions))
return TemplateArgumentLoc(TemplateArgument(Expansion->getType()),
Expansion);
break;
}
return TemplateArgumentLoc();
}
/// Build a new expression pack expansion.
///
/// By default, performs semantic analysis to build a new pack expansion
/// for an expression. Subclasses may override this routine to provide
/// different behavior.
ExprResult RebuildPackExpansion(Expr *Pattern, SourceLocation EllipsisLoc,
Optional<unsigned> NumExpansions) {
return getSema().CheckPackExpansion(Pattern, EllipsisLoc, NumExpansions);
}
/// Build a new C++1z fold-expression.
///
/// By default, performs semantic analysis in order to build a new fold
/// expression.
ExprResult RebuildCXXFoldExpr(UnresolvedLookupExpr *ULE,
SourceLocation LParenLoc, Expr *LHS,
BinaryOperatorKind Operator,
SourceLocation EllipsisLoc, Expr *RHS,
SourceLocation RParenLoc,
Optional<unsigned> NumExpansions) {
return getSema().BuildCXXFoldExpr(ULE, LParenLoc, LHS, Operator,
EllipsisLoc, RHS, RParenLoc,
NumExpansions);
}
/// Build an empty C++1z fold-expression with the given operator.
///
/// By default, produces the fallback value for the fold-expression, or
/// produce an error if there is no fallback value.
ExprResult RebuildEmptyCXXFoldExpr(SourceLocation EllipsisLoc,
BinaryOperatorKind Operator) {
return getSema().BuildEmptyCXXFoldExpr(EllipsisLoc, Operator);
}
/// Build a new atomic operation expression.
///
/// By default, performs semantic analysis to build the new expression.
/// Subclasses may override this routine to provide different behavior.
ExprResult RebuildAtomicExpr(SourceLocation BuiltinLoc, MultiExprArg SubExprs,
AtomicExpr::AtomicOp Op,
SourceLocation RParenLoc) {
// Use this for all of the locations, since we don't know the difference
// between the call and the expr at this point.
SourceRange Range{BuiltinLoc, RParenLoc};
return getSema().BuildAtomicExpr(Range, Range, RParenLoc, SubExprs, Op,
Sema::AtomicArgumentOrder::AST);
}
ExprResult RebuildRecoveryExpr(SourceLocation BeginLoc, SourceLocation EndLoc,
ArrayRef<Expr *> SubExprs, QualType Type) {
return getSema().CreateRecoveryExpr(BeginLoc, EndLoc, SubExprs, Type);
}
private:
TypeLoc TransformTypeInObjectScope(TypeLoc TL,
QualType ObjectType,
NamedDecl *FirstQualifierInScope,
CXXScopeSpec &SS);
TypeSourceInfo *TransformTypeInObjectScope(TypeSourceInfo *TSInfo,
QualType ObjectType,
NamedDecl *FirstQualifierInScope,
CXXScopeSpec &SS);
TypeSourceInfo *TransformTSIInObjectScope(TypeLoc TL, QualType ObjectType,
NamedDecl *FirstQualifierInScope,
CXXScopeSpec &SS);
QualType TransformDependentNameType(TypeLocBuilder &TLB,
DependentNameTypeLoc TL,
bool DeducibleTSTContext);
};
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformStmt(Stmt *S, StmtDiscardKind SDK) {
if (!S)
return S;
switch (S->getStmtClass()) {
case Stmt::NoStmtClass: break;
// Transform individual statement nodes
// Pass SDK into statements that can produce a value
#define STMT(Node, Parent) \
case Stmt::Node##Class: return getDerived().Transform##Node(cast<Node>(S));
#define VALUESTMT(Node, Parent) \
case Stmt::Node##Class: \
return getDerived().Transform##Node(cast<Node>(S), SDK);
#define ABSTRACT_STMT(Node)
#define EXPR(Node, Parent)
#include "clang/AST/StmtNodes.inc"
// Transform expressions by calling TransformExpr.
#define STMT(Node, Parent)
#define ABSTRACT_STMT(Stmt)
#define EXPR(Node, Parent) case Stmt::Node##Class:
#include "clang/AST/StmtNodes.inc"
{
ExprResult E = getDerived().TransformExpr(cast<Expr>(S));
if (SDK == SDK_StmtExprResult)
E = getSema().ActOnStmtExprResult(E);
return getSema().ActOnExprStmt(E, SDK == SDK_Discarded);
}
}
return S;
}
template<typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPClause(OMPClause *S) {
if (!S)
return S;
switch (S->getClauseKind()) {
default: break;
// Transform individual clause nodes
#define GEN_CLANG_CLAUSE_CLASS
#define CLAUSE_CLASS(Enum, Str, Class) \
case Enum: \
return getDerived().Transform##Class(cast<Class>(S));
#include "llvm/Frontend/OpenMP/OMP.inc"
}
return S;
}
template<typename Derived>
ExprResult TreeTransform<Derived>::TransformExpr(Expr *E) {
if (!E)
return E;
switch (E->getStmtClass()) {
case Stmt::NoStmtClass: break;
#define STMT(Node, Parent) case Stmt::Node##Class: break;
#define ABSTRACT_STMT(Stmt)
#define EXPR(Node, Parent) \
case Stmt::Node##Class: return getDerived().Transform##Node(cast<Node>(E));
#include "clang/AST/StmtNodes.inc"
}
return E;
}
template<typename Derived>
ExprResult TreeTransform<Derived>::TransformInitializer(Expr *Init,
bool NotCopyInit) {
// Initializers are instantiated like expressions, except that various outer
// layers are stripped.
if (!Init)
return Init;
if (auto *FE = dyn_cast<FullExpr>(Init))
Init = FE->getSubExpr();
if (auto *AIL = dyn_cast<ArrayInitLoopExpr>(Init))
Init = AIL->getCommonExpr();
if (MaterializeTemporaryExpr *MTE = dyn_cast<MaterializeTemporaryExpr>(Init))
Init = MTE->getSubExpr();
while (CXXBindTemporaryExpr *Binder = dyn_cast<CXXBindTemporaryExpr>(Init))
Init = Binder->getSubExpr();
if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Init))
Init = ICE->getSubExprAsWritten();
if (CXXStdInitializerListExpr *ILE =
dyn_cast<CXXStdInitializerListExpr>(Init))
return TransformInitializer(ILE->getSubExpr(), NotCopyInit);
// If this is copy-initialization, we only need to reconstruct
// InitListExprs. Other forms of copy-initialization will be a no-op if
// the initializer is already the right type.
CXXConstructExpr *Construct = dyn_cast<CXXConstructExpr>(Init);
if (!NotCopyInit && !(Construct && Construct->isListInitialization()))
return getDerived().TransformExpr(Init);
// Revert value-initialization back to empty parens.
if (CXXScalarValueInitExpr *VIE = dyn_cast<CXXScalarValueInitExpr>(Init)) {
SourceRange Parens = VIE->getSourceRange();
return getDerived().RebuildParenListExpr(Parens.getBegin(), None,
Parens.getEnd());
}
// FIXME: We shouldn't build ImplicitValueInitExprs for direct-initialization.
if (isa<ImplicitValueInitExpr>(Init))
return getDerived().RebuildParenListExpr(SourceLocation(), None,
SourceLocation());
// Revert initialization by constructor back to a parenthesized or braced list
// of expressions. Any other form of initializer can just be reused directly.
if (!Construct || isa<CXXTemporaryObjectExpr>(Construct))
return getDerived().TransformExpr(Init);
// If the initialization implicitly converted an initializer list to a
// std::initializer_list object, unwrap the std::initializer_list too.
if (Construct && Construct->isStdInitListInitialization())
return TransformInitializer(Construct->getArg(0), NotCopyInit);
// Enter a list-init context if this was list initialization.
EnterExpressionEvaluationContext Context(
getSema(), EnterExpressionEvaluationContext::InitList,
Construct->isListInitialization());
SmallVector<Expr*, 8> NewArgs;
bool ArgChanged = false;
if (getDerived().TransformExprs(Construct->getArgs(), Construct->getNumArgs(),
/*IsCall*/true, NewArgs, &ArgChanged))
return ExprError();
// If this was list initialization, revert to syntactic list form.
if (Construct->isListInitialization())
return getDerived().RebuildInitList(Construct->getBeginLoc(), NewArgs,
Construct->getEndLoc());
// Build a ParenListExpr to represent anything else.
SourceRange Parens = Construct->getParenOrBraceRange();
if (Parens.isInvalid()) {
// This was a variable declaration's initialization for which no initializer
// was specified.
assert(NewArgs.empty() &&
"no parens or braces but have direct init with arguments?");
return ExprEmpty();
}
return getDerived().RebuildParenListExpr(Parens.getBegin(), NewArgs,
Parens.getEnd());
}
template<typename Derived>
bool TreeTransform<Derived>::TransformExprs(Expr *const *Inputs,
unsigned NumInputs,
bool IsCall,
SmallVectorImpl<Expr *> &Outputs,
bool *ArgChanged) {
for (unsigned I = 0; I != NumInputs; ++I) {
// If requested, drop call arguments that need to be dropped.
if (IsCall && getDerived().DropCallArgument(Inputs[I])) {
if (ArgChanged)
*ArgChanged = true;
break;
}
if (PackExpansionExpr *Expansion = dyn_cast<PackExpansionExpr>(Inputs[I])) {
Expr *Pattern = Expansion->getPattern();
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
getSema().collectUnexpandedParameterPacks(Pattern, Unexpanded);
assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");
// Determine whether the set of unexpanded parameter packs can and should
// be expanded.
bool Expand = true;
bool RetainExpansion = false;
Optional<unsigned> OrigNumExpansions = Expansion->getNumExpansions();
Optional<unsigned> NumExpansions = OrigNumExpansions;
if (getDerived().TryExpandParameterPacks(Expansion->getEllipsisLoc(),
Pattern->getSourceRange(),
Unexpanded,
Expand, RetainExpansion,
NumExpansions))
return true;
if (!Expand) {
// The transform has determined that we should perform a simple
// transformation on the pack expansion, producing another pack
// expansion.
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
ExprResult OutPattern = getDerived().TransformExpr(Pattern);
if (OutPattern.isInvalid())
return true;
ExprResult Out = getDerived().RebuildPackExpansion(OutPattern.get(),
Expansion->getEllipsisLoc(),
NumExpansions);
if (Out.isInvalid())
return true;
if (ArgChanged)
*ArgChanged = true;
Outputs.push_back(Out.get());
continue;
}
// Record right away that the argument was changed. This needs
// to happen even if the array expands to nothing.
if (ArgChanged) *ArgChanged = true;
// The transform has determined that we should perform an elementwise
// expansion of the pattern. Do so.
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
ExprResult Out = getDerived().TransformExpr(Pattern);
if (Out.isInvalid())
return true;
if (Out.get()->containsUnexpandedParameterPack()) {
Out = getDerived().RebuildPackExpansion(
Out.get(), Expansion->getEllipsisLoc(), OrigNumExpansions);
if (Out.isInvalid())
return true;
}
Outputs.push_back(Out.get());
}
// If we're supposed to retain a pack expansion, do so by temporarily
// forgetting the partially-substituted parameter pack.
if (RetainExpansion) {
ForgetPartiallySubstitutedPackRAII Forget(getDerived());
ExprResult Out = getDerived().TransformExpr(Pattern);
if (Out.isInvalid())
return true;
Out = getDerived().RebuildPackExpansion(
Out.get(), Expansion->getEllipsisLoc(), OrigNumExpansions);
if (Out.isInvalid())
return true;
Outputs.push_back(Out.get());
}
continue;
}
ExprResult Result =
IsCall ? getDerived().TransformInitializer(Inputs[I], /*DirectInit*/false)
: getDerived().TransformExpr(Inputs[I]);
if (Result.isInvalid())
return true;
if (Result.get() != Inputs[I] && ArgChanged)
*ArgChanged = true;
Outputs.push_back(Result.get());
}
return false;
}
template <typename Derived>
Sema::ConditionResult TreeTransform<Derived>::TransformCondition(
SourceLocation Loc, VarDecl *Var, Expr *Expr, Sema::ConditionKind Kind) {
if (Var) {
VarDecl *ConditionVar = cast_or_null<VarDecl>(
getDerived().TransformDefinition(Var->getLocation(), Var));
if (!ConditionVar)
return Sema::ConditionError();
return getSema().ActOnConditionVariable(ConditionVar, Loc, Kind);
}
if (Expr) {
ExprResult CondExpr = getDerived().TransformExpr(Expr);
if (CondExpr.isInvalid())
return Sema::ConditionError();
return getSema().ActOnCondition(nullptr, Loc, CondExpr.get(), Kind);
}
return Sema::ConditionResult();
}
template <typename Derived>
NestedNameSpecifierLoc TreeTransform<Derived>::TransformNestedNameSpecifierLoc(
NestedNameSpecifierLoc NNS, QualType ObjectType,
NamedDecl *FirstQualifierInScope) {
SmallVector<NestedNameSpecifierLoc, 4> Qualifiers;
for (NestedNameSpecifierLoc Qualifier = NNS; Qualifier;
Qualifier = Qualifier.getPrefix())
Qualifiers.push_back(Qualifier);
CXXScopeSpec SS;
while (!Qualifiers.empty()) {
NestedNameSpecifierLoc Q = Qualifiers.pop_back_val();
NestedNameSpecifier *QNNS = Q.getNestedNameSpecifier();
switch (QNNS->getKind()) {
case NestedNameSpecifier::Identifier: {
Sema::NestedNameSpecInfo IdInfo(QNNS->getAsIdentifier(),
Q.getLocalBeginLoc(), Q.getLocalEndLoc(),
ObjectType);
if (SemaRef.BuildCXXNestedNameSpecifier(/*Scope=*/nullptr, IdInfo, false,
SS, FirstQualifierInScope, false))
return NestedNameSpecifierLoc();
break;
}
case NestedNameSpecifier::Namespace: {
NamespaceDecl *NS =
cast_or_null<NamespaceDecl>(getDerived().TransformDecl(
Q.getLocalBeginLoc(), QNNS->getAsNamespace()));
SS.Extend(SemaRef.Context, NS, Q.getLocalBeginLoc(), Q.getLocalEndLoc());
break;
}
case NestedNameSpecifier::NamespaceAlias: {
NamespaceAliasDecl *Alias =
cast_or_null<NamespaceAliasDecl>(getDerived().TransformDecl(
Q.getLocalBeginLoc(), QNNS->getAsNamespaceAlias()));
SS.Extend(SemaRef.Context, Alias, Q.getLocalBeginLoc(),
Q.getLocalEndLoc());
break;
}
case NestedNameSpecifier::Global:
// There is no meaningful transformation that one could perform on the
// global scope.
SS.MakeGlobal(SemaRef.Context, Q.getBeginLoc());
break;
case NestedNameSpecifier::Super: {
CXXRecordDecl *RD =
cast_or_null<CXXRecordDecl>(getDerived().TransformDecl(
SourceLocation(), QNNS->getAsRecordDecl()));
SS.MakeSuper(SemaRef.Context, RD, Q.getBeginLoc(), Q.getEndLoc());
break;
}
case NestedNameSpecifier::TypeSpecWithTemplate:
case NestedNameSpecifier::TypeSpec: {
TypeLoc TL = TransformTypeInObjectScope(Q.getTypeLoc(), ObjectType,
FirstQualifierInScope, SS);
if (!TL)
return NestedNameSpecifierLoc();
if (TL.getType()->isDependentType() || TL.getType()->isRecordType() ||
(SemaRef.getLangOpts().CPlusPlus11 &&
TL.getType()->isEnumeralType())) {
assert(!TL.getType().hasLocalQualifiers() &&
"Can't get cv-qualifiers here");
if (TL.getType()->isEnumeralType())
SemaRef.Diag(TL.getBeginLoc(),
diag::warn_cxx98_compat_enum_nested_name_spec);
SS.Extend(SemaRef.Context, /*FIXME:*/ SourceLocation(), TL,
Q.getLocalEndLoc());
break;
}
// If the nested-name-specifier is an invalid type def, don't emit an
// error because a previous error should have already been emitted.
TypedefTypeLoc TTL = TL.getAs<TypedefTypeLoc>();
if (!TTL || !TTL.getTypedefNameDecl()->isInvalidDecl()) {
SemaRef.Diag(TL.getBeginLoc(), diag::err_nested_name_spec_non_tag)
<< TL.getType() << SS.getRange();
}
return NestedNameSpecifierLoc();
}
}
// The qualifier-in-scope and object type only apply to the leftmost entity.
FirstQualifierInScope = nullptr;
ObjectType = QualType();
}
// Don't rebuild the nested-name-specifier if we don't have to.
if (SS.getScopeRep() == NNS.getNestedNameSpecifier() &&
!getDerived().AlwaysRebuild())
return NNS;
// If we can re-use the source-location data from the original
// nested-name-specifier, do so.
if (SS.location_size() == NNS.getDataLength() &&
memcmp(SS.location_data(), NNS.getOpaqueData(), SS.location_size()) == 0)
return NestedNameSpecifierLoc(SS.getScopeRep(), NNS.getOpaqueData());
// Allocate new nested-name-specifier location information.
return SS.getWithLocInContext(SemaRef.Context);
}
template<typename Derived>
DeclarationNameInfo
TreeTransform<Derived>
::TransformDeclarationNameInfo(const DeclarationNameInfo &NameInfo) {
DeclarationName Name = NameInfo.getName();
if (!Name)
return DeclarationNameInfo();
switch (Name.getNameKind()) {
case DeclarationName::Identifier:
case DeclarationName::ObjCZeroArgSelector:
case DeclarationName::ObjCOneArgSelector:
case DeclarationName::ObjCMultiArgSelector:
case DeclarationName::CXXOperatorName:
case DeclarationName::CXXLiteralOperatorName:
case DeclarationName::CXXUsingDirective:
return NameInfo;
case DeclarationName::CXXDeductionGuideName: {
TemplateDecl *OldTemplate = Name.getCXXDeductionGuideTemplate();
TemplateDecl *NewTemplate = cast_or_null<TemplateDecl>(
getDerived().TransformDecl(NameInfo.getLoc(), OldTemplate));
if (!NewTemplate)
return DeclarationNameInfo();
DeclarationNameInfo NewNameInfo(NameInfo);
NewNameInfo.setName(
SemaRef.Context.DeclarationNames.getCXXDeductionGuideName(NewTemplate));
return NewNameInfo;
}
case DeclarationName::CXXConstructorName:
case DeclarationName::CXXDestructorName:
case DeclarationName::CXXConversionFunctionName: {
TypeSourceInfo *NewTInfo;
CanQualType NewCanTy;
if (TypeSourceInfo *OldTInfo = NameInfo.getNamedTypeInfo()) {
NewTInfo = getDerived().TransformType(OldTInfo);
if (!NewTInfo)
return DeclarationNameInfo();
NewCanTy = SemaRef.Context.getCanonicalType(NewTInfo->getType());
}
else {
NewTInfo = nullptr;
TemporaryBase Rebase(*this, NameInfo.getLoc(), Name);
QualType NewT = getDerived().TransformType(Name.getCXXNameType());
if (NewT.isNull())
return DeclarationNameInfo();
NewCanTy = SemaRef.Context.getCanonicalType(NewT);
}
DeclarationName NewName
= SemaRef.Context.DeclarationNames.getCXXSpecialName(Name.getNameKind(),
NewCanTy);
DeclarationNameInfo NewNameInfo(NameInfo);
NewNameInfo.setName(NewName);
NewNameInfo.setNamedTypeInfo(NewTInfo);
return NewNameInfo;
}
}
llvm_unreachable("Unknown name kind.");
}
template<typename Derived>
TemplateName
TreeTransform<Derived>::TransformTemplateName(CXXScopeSpec &SS,
TemplateName Name,
SourceLocation NameLoc,
QualType ObjectType,
NamedDecl *FirstQualifierInScope,
bool AllowInjectedClassName) {
if (QualifiedTemplateName *QTN = Name.getAsQualifiedTemplateName()) {
TemplateDecl *Template = QTN->getTemplateDecl();
assert(Template && "qualified template name must refer to a template");
TemplateDecl *TransTemplate
= cast_or_null<TemplateDecl>(getDerived().TransformDecl(NameLoc,
Template));
if (!TransTemplate)
return TemplateName();
if (!getDerived().AlwaysRebuild() &&
SS.getScopeRep() == QTN->getQualifier() &&
TransTemplate == Template)
return Name;
return getDerived().RebuildTemplateName(SS, QTN->hasTemplateKeyword(),
TransTemplate);
}
if (DependentTemplateName *DTN = Name.getAsDependentTemplateName()) {
if (SS.getScopeRep()) {
// These apply to the scope specifier, not the template.
ObjectType = QualType();
FirstQualifierInScope = nullptr;
}
if (!getDerived().AlwaysRebuild() &&
SS.getScopeRep() == DTN->getQualifier() &&
ObjectType.isNull())
return Name;
// FIXME: Preserve the location of the "template" keyword.
SourceLocation TemplateKWLoc = NameLoc;
if (DTN->isIdentifier()) {
return getDerived().RebuildTemplateName(SS,
TemplateKWLoc,
*DTN->getIdentifier(),
NameLoc,
ObjectType,
FirstQualifierInScope,
AllowInjectedClassName);
}
return getDerived().RebuildTemplateName(SS, TemplateKWLoc,
DTN->getOperator(), NameLoc,
ObjectType, AllowInjectedClassName);
}
if (TemplateDecl *Template = Name.getAsTemplateDecl()) {
TemplateDecl *TransTemplate
= cast_or_null<TemplateDecl>(getDerived().TransformDecl(NameLoc,
Template));
if (!TransTemplate)
return TemplateName();
if (!getDerived().AlwaysRebuild() &&
TransTemplate == Template)
return Name;
return TemplateName(TransTemplate);
}
if (SubstTemplateTemplateParmPackStorage *SubstPack
= Name.getAsSubstTemplateTemplateParmPack()) {
TemplateTemplateParmDecl *TransParam
= cast_or_null<TemplateTemplateParmDecl>(
getDerived().TransformDecl(NameLoc, SubstPack->getParameterPack()));
if (!TransParam)
return TemplateName();
if (!getDerived().AlwaysRebuild() &&
TransParam == SubstPack->getParameterPack())
return Name;
return getDerived().RebuildTemplateName(TransParam,
SubstPack->getArgumentPack());
}
// These should be getting filtered out before they reach the AST.
llvm_unreachable("overloaded function decl survived to here");
}
template<typename Derived>
void TreeTransform<Derived>::InventTemplateArgumentLoc(
const TemplateArgument &Arg,
TemplateArgumentLoc &Output) {
Output = getSema().getTrivialTemplateArgumentLoc(
Arg, QualType(), getDerived().getBaseLocation());
}
template <typename Derived>
bool TreeTransform<Derived>::TransformTemplateArgument(
const TemplateArgumentLoc &Input, TemplateArgumentLoc &Output,
bool Uneval) {
const TemplateArgument &Arg = Input.getArgument();
switch (Arg.getKind()) {
case TemplateArgument::Null:
case TemplateArgument::Pack:
llvm_unreachable("Unexpected TemplateArgument");
case TemplateArgument::Integral:
case TemplateArgument::NullPtr:
case TemplateArgument::Declaration: {
// Transform a resolved template argument straight to a resolved template
// argument. We get here when substituting into an already-substituted
// template type argument during concept satisfaction checking.
QualType T = Arg.getNonTypeTemplateArgumentType();
QualType NewT = getDerived().TransformType(T);
if (NewT.isNull())
return true;
ValueDecl *D = Arg.getKind() == TemplateArgument::Declaration
? Arg.getAsDecl()
: nullptr;
ValueDecl *NewD = D ? cast_or_null<ValueDecl>(getDerived().TransformDecl(
getDerived().getBaseLocation(), D))
: nullptr;
if (D && !NewD)
return true;
if (NewT == T && D == NewD)
Output = Input;
else if (Arg.getKind() == TemplateArgument::Integral)
Output = TemplateArgumentLoc(
TemplateArgument(getSema().Context, Arg.getAsIntegral(), NewT),
TemplateArgumentLocInfo());
else if (Arg.getKind() == TemplateArgument::NullPtr)
Output = TemplateArgumentLoc(TemplateArgument(NewT, /*IsNullPtr=*/true),
TemplateArgumentLocInfo());
else
Output = TemplateArgumentLoc(TemplateArgument(NewD, NewT),
TemplateArgumentLocInfo());
return false;
}
case TemplateArgument::Type: {
TypeSourceInfo *DI = Input.getTypeSourceInfo();
if (!DI)
DI = InventTypeSourceInfo(Input.getArgument().getAsType());
DI = getDerived().TransformType(DI);
if (!DI)
return true;
Output = TemplateArgumentLoc(TemplateArgument(DI->getType()), DI);
return false;
}
case TemplateArgument::Template: {
NestedNameSpecifierLoc QualifierLoc = Input.getTemplateQualifierLoc();
if (QualifierLoc) {
QualifierLoc = getDerived().TransformNestedNameSpecifierLoc(QualifierLoc);
if (!QualifierLoc)
return true;
}
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
TemplateName Template = getDerived().TransformTemplateName(
SS, Arg.getAsTemplate(), Input.getTemplateNameLoc());
if (Template.isNull())
return true;
Output = TemplateArgumentLoc(SemaRef.Context, TemplateArgument(Template),
QualifierLoc, Input.getTemplateNameLoc());
return false;
}
case TemplateArgument::TemplateExpansion:
llvm_unreachable("Caller should expand pack expansions");
case TemplateArgument::Expression: {
// Template argument expressions are constant expressions.
EnterExpressionEvaluationContext Unevaluated(
getSema(),
Uneval ? Sema::ExpressionEvaluationContext::Unevaluated
: Sema::ExpressionEvaluationContext::ConstantEvaluated,
/*LambdaContextDecl=*/nullptr, /*ExprContext=*/
Sema::ExpressionEvaluationContextRecord::EK_TemplateArgument);
Expr *InputExpr = Input.getSourceExpression();
if (!InputExpr)
InputExpr = Input.getArgument().getAsExpr();
ExprResult E = getDerived().TransformExpr(InputExpr);
E = SemaRef.ActOnConstantExpression(E);
if (E.isInvalid())
return true;
Output = TemplateArgumentLoc(TemplateArgument(E.get()), E.get());
return false;
}
}
// Work around bogus GCC warning
return true;
}
/// Iterator adaptor that invents template argument location information
/// for each of the template arguments in its underlying iterator.
template<typename Derived, typename InputIterator>
class TemplateArgumentLocInventIterator {
TreeTransform<Derived> &Self;
InputIterator Iter;
public:
typedef TemplateArgumentLoc value_type;
typedef TemplateArgumentLoc reference;
typedef typename std::iterator_traits<InputIterator>::difference_type
difference_type;
typedef std::input_iterator_tag iterator_category;
class pointer {
TemplateArgumentLoc Arg;
public:
explicit pointer(TemplateArgumentLoc Arg) : Arg(Arg) { }
const TemplateArgumentLoc *operator->() const { return &Arg; }
};
TemplateArgumentLocInventIterator() { }
explicit TemplateArgumentLocInventIterator(TreeTransform<Derived> &Self,
InputIterator Iter)
: Self(Self), Iter(Iter) { }
TemplateArgumentLocInventIterator &operator++() {
++Iter;
return *this;
}
TemplateArgumentLocInventIterator operator++(int) {
TemplateArgumentLocInventIterator Old(*this);
++(*this);
return Old;
}
reference operator*() const {
TemplateArgumentLoc Result;
Self.InventTemplateArgumentLoc(*Iter, Result);
return Result;
}
pointer operator->() const { return pointer(**this); }
friend bool operator==(const TemplateArgumentLocInventIterator &X,
const TemplateArgumentLocInventIterator &Y) {
return X.Iter == Y.Iter;
}
friend bool operator!=(const TemplateArgumentLocInventIterator &X,
const TemplateArgumentLocInventIterator &Y) {
return X.Iter != Y.Iter;
}
};
template<typename Derived>
template<typename InputIterator>
bool TreeTransform<Derived>::TransformTemplateArguments(
InputIterator First, InputIterator Last, TemplateArgumentListInfo &Outputs,
bool Uneval) {
for (; First != Last; ++First) {
TemplateArgumentLoc Out;
TemplateArgumentLoc In = *First;
if (In.getArgument().getKind() == TemplateArgument::Pack) {
// Unpack argument packs, which we translate them into separate
// arguments.
// FIXME: We could do much better if we could guarantee that the
// TemplateArgumentLocInfo for the pack expansion would be usable for
// all of the template arguments in the argument pack.
typedef TemplateArgumentLocInventIterator<Derived,
TemplateArgument::pack_iterator>
PackLocIterator;
if (TransformTemplateArguments(PackLocIterator(*this,
In.getArgument().pack_begin()),
PackLocIterator(*this,
In.getArgument().pack_end()),
Outputs, Uneval))
return true;
continue;
}
if (In.getArgument().isPackExpansion()) {
// We have a pack expansion, for which we will be substituting into
// the pattern.
SourceLocation Ellipsis;
Optional<unsigned> OrigNumExpansions;
TemplateArgumentLoc Pattern
= getSema().getTemplateArgumentPackExpansionPattern(
In, Ellipsis, OrigNumExpansions);
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
getSema().collectUnexpandedParameterPacks(Pattern, Unexpanded);
assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");
// Determine whether the set of unexpanded parameter packs can and should
// be expanded.
bool Expand = true;
bool RetainExpansion = false;
Optional<unsigned> NumExpansions = OrigNumExpansions;
if (getDerived().TryExpandParameterPacks(Ellipsis,
Pattern.getSourceRange(),
Unexpanded,
Expand,
RetainExpansion,
NumExpansions))
return true;
if (!Expand) {
// The transform has determined that we should perform a simple
// transformation on the pack expansion, producing another pack
// expansion.
TemplateArgumentLoc OutPattern;
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
if (getDerived().TransformTemplateArgument(Pattern, OutPattern, Uneval))
return true;
Out = getDerived().RebuildPackExpansion(OutPattern, Ellipsis,
NumExpansions);
if (Out.getArgument().isNull())
return true;
Outputs.addArgument(Out);
continue;
}
// The transform has determined that we should perform an elementwise
// expansion of the pattern. Do so.
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
if (getDerived().TransformTemplateArgument(Pattern, Out, Uneval))
return true;
if (Out.getArgument().containsUnexpandedParameterPack()) {
Out = getDerived().RebuildPackExpansion(Out, Ellipsis,
OrigNumExpansions);
if (Out.getArgument().isNull())
return true;
}
Outputs.addArgument(Out);
}
// If we're supposed to retain a pack expansion, do so by temporarily
// forgetting the partially-substituted parameter pack.
if (RetainExpansion) {
ForgetPartiallySubstitutedPackRAII Forget(getDerived());
if (getDerived().TransformTemplateArgument(Pattern, Out, Uneval))
return true;
Out = getDerived().RebuildPackExpansion(Out, Ellipsis,
OrigNumExpansions);
if (Out.getArgument().isNull())
return true;
Outputs.addArgument(Out);
}
continue;
}
// The simple case:
if (getDerived().TransformTemplateArgument(In, Out, Uneval))
return true;
Outputs.addArgument(Out);
}
return false;
}
//===----------------------------------------------------------------------===//
// Type transformation
//===----------------------------------------------------------------------===//
template<typename Derived>
QualType TreeTransform<Derived>::TransformType(QualType T) {
if (getDerived().AlreadyTransformed(T))
return T;
// Temporary workaround. All of these transformations should
// eventually turn into transformations on TypeLocs.
TypeSourceInfo *DI = getSema().Context.getTrivialTypeSourceInfo(T,
getDerived().getBaseLocation());
TypeSourceInfo *NewDI = getDerived().TransformType(DI);
if (!NewDI)
return QualType();
return NewDI->getType();
}
template<typename Derived>
TypeSourceInfo *TreeTransform<Derived>::TransformType(TypeSourceInfo *DI) {
// Refine the base location to the type's location.
TemporaryBase Rebase(*this, DI->getTypeLoc().getBeginLoc(),
getDerived().getBaseEntity());
if (getDerived().AlreadyTransformed(DI->getType()))
return DI;
TypeLocBuilder TLB;
TypeLoc TL = DI->getTypeLoc();
TLB.reserve(TL.getFullDataSize());
QualType Result = getDerived().TransformType(TLB, TL);
if (Result.isNull())
return nullptr;
return TLB.getTypeSourceInfo(SemaRef.Context, Result);
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformType(TypeLocBuilder &TLB, TypeLoc T) {
switch (T.getTypeLocClass()) {
#define ABSTRACT_TYPELOC(CLASS, PARENT)
#define TYPELOC(CLASS, PARENT) \
case TypeLoc::CLASS: \
return getDerived().Transform##CLASS##Type(TLB, \
T.castAs<CLASS##TypeLoc>());
#include "clang/AST/TypeLocNodes.def"
}
llvm_unreachable("unhandled type loc!");
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformTypeWithDeducedTST(QualType T) {
if (!isa<DependentNameType>(T))
return TransformType(T);
if (getDerived().AlreadyTransformed(T))
return T;
TypeSourceInfo *DI = getSema().Context.getTrivialTypeSourceInfo(T,
getDerived().getBaseLocation());
TypeSourceInfo *NewDI = getDerived().TransformTypeWithDeducedTST(DI);
return NewDI ? NewDI->getType() : QualType();
}
template<typename Derived>
TypeSourceInfo *
TreeTransform<Derived>::TransformTypeWithDeducedTST(TypeSourceInfo *DI) {
if (!isa<DependentNameType>(DI->getType()))
return TransformType(DI);
// Refine the base location to the type's location.
TemporaryBase Rebase(*this, DI->getTypeLoc().getBeginLoc(),
getDerived().getBaseEntity());
if (getDerived().AlreadyTransformed(DI->getType()))
return DI;
TypeLocBuilder TLB;
TypeLoc TL = DI->getTypeLoc();
TLB.reserve(TL.getFullDataSize());
auto QTL = TL.getAs<QualifiedTypeLoc>();
if (QTL)
TL = QTL.getUnqualifiedLoc();
auto DNTL = TL.castAs<DependentNameTypeLoc>();
QualType Result = getDerived().TransformDependentNameType(
TLB, DNTL, /*DeducedTSTContext*/true);
if (Result.isNull())
return nullptr;
if (QTL) {
Result = getDerived().RebuildQualifiedType(Result, QTL);
if (Result.isNull())
return nullptr;
TLB.TypeWasModifiedSafely(Result);
}
return TLB.getTypeSourceInfo(SemaRef.Context, Result);
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformQualifiedType(TypeLocBuilder &TLB,
QualifiedTypeLoc T) {
QualType Result = getDerived().TransformType(TLB, T.getUnqualifiedLoc());
if (Result.isNull())
return QualType();
Result = getDerived().RebuildQualifiedType(Result, T);
if (Result.isNull())
return QualType();
// RebuildQualifiedType might have updated the type, but not in a way
// that invalidates the TypeLoc. (There's no location information for
// qualifiers.)
TLB.TypeWasModifiedSafely(Result);
return Result;
}
template <typename Derived>
QualType TreeTransform<Derived>::RebuildQualifiedType(QualType T,
QualifiedTypeLoc TL) {
SourceLocation Loc = TL.getBeginLoc();
Qualifiers Quals = TL.getType().getLocalQualifiers();
if (((T.getAddressSpace() != LangAS::Default &&
Quals.getAddressSpace() != LangAS::Default)) &&
T.getAddressSpace() != Quals.getAddressSpace()) {
SemaRef.Diag(Loc, diag::err_address_space_mismatch_templ_inst)
<< TL.getType() << T;
return QualType();
}
// C++ [dcl.fct]p7:
// [When] adding cv-qualifications on top of the function type [...] the
// cv-qualifiers are ignored.
if (T->isFunctionType()) {
T = SemaRef.getASTContext().getAddrSpaceQualType(T,
Quals.getAddressSpace());
return T;
}
// C++ [dcl.ref]p1:
// when the cv-qualifiers are introduced through the use of a typedef-name
// or decltype-specifier [...] the cv-qualifiers are ignored.
// Note that [dcl.ref]p1 lists all cases in which cv-qualifiers can be
// applied to a reference type.
if (T->isReferenceType()) {
// The only qualifier that applies to a reference type is restrict.
if (!Quals.hasRestrict())
return T;
Quals = Qualifiers::fromCVRMask(Qualifiers::Restrict);
}
// Suppress Objective-C lifetime qualifiers if they don't make sense for the
// resulting type.
if (Quals.hasObjCLifetime()) {
if (!T->isObjCLifetimeType() && !T->isDependentType())
Quals.removeObjCLifetime();
else if (T.getObjCLifetime()) {
// Objective-C ARC:
// A lifetime qualifier applied to a substituted template parameter
// overrides the lifetime qualifier from the template argument.
const AutoType *AutoTy;
if (const SubstTemplateTypeParmType *SubstTypeParam
= dyn_cast<SubstTemplateTypeParmType>(T)) {
QualType Replacement = SubstTypeParam->getReplacementType();
Qualifiers Qs = Replacement.getQualifiers();
Qs.removeObjCLifetime();
Replacement = SemaRef.Context.getQualifiedType(
Replacement.getUnqualifiedType(), Qs);
T = SemaRef.Context.getSubstTemplateTypeParmType(
SubstTypeParam->getReplacedParameter(), Replacement);
} else if ((AutoTy = dyn_cast<AutoType>(T)) && AutoTy->isDeduced()) {
// 'auto' types behave the same way as template parameters.
QualType Deduced = AutoTy->getDeducedType();
Qualifiers Qs = Deduced.getQualifiers();
Qs.removeObjCLifetime();
Deduced =
SemaRef.Context.getQualifiedType(Deduced.getUnqualifiedType(), Qs);
T = SemaRef.Context.getAutoType(Deduced, AutoTy->getKeyword(),
AutoTy->isDependentType(),
/*isPack=*/false,
AutoTy->getTypeConstraintConcept(),
AutoTy->getTypeConstraintArguments());
} else {
// Otherwise, complain about the addition of a qualifier to an
// already-qualified type.
// FIXME: Why is this check not in Sema::BuildQualifiedType?
SemaRef.Diag(Loc, diag::err_attr_objc_ownership_redundant) << T;
Quals.removeObjCLifetime();
}
}
}
return SemaRef.BuildQualifiedType(T, Loc, Quals);
}
template<typename Derived>
TypeLoc
TreeTransform<Derived>::TransformTypeInObjectScope(TypeLoc TL,
QualType ObjectType,
NamedDecl *UnqualLookup,
CXXScopeSpec &SS) {
if (getDerived().AlreadyTransformed(TL.getType()))
return TL;
TypeSourceInfo *TSI =
TransformTSIInObjectScope(TL, ObjectType, UnqualLookup, SS);
if (TSI)
return TSI->getTypeLoc();
return TypeLoc();
}
template<typename Derived>
TypeSourceInfo *
TreeTransform<Derived>::TransformTypeInObjectScope(TypeSourceInfo *TSInfo,
QualType ObjectType,
NamedDecl *UnqualLookup,
CXXScopeSpec &SS) {
if (getDerived().AlreadyTransformed(TSInfo->getType()))
return TSInfo;
return TransformTSIInObjectScope(TSInfo->getTypeLoc(), ObjectType,
UnqualLookup, SS);
}
template <typename Derived>
TypeSourceInfo *TreeTransform<Derived>::TransformTSIInObjectScope(
TypeLoc TL, QualType ObjectType, NamedDecl *UnqualLookup,
CXXScopeSpec &SS) {
QualType T = TL.getType();
assert(!getDerived().AlreadyTransformed(T));
TypeLocBuilder TLB;
QualType Result;
if (isa<TemplateSpecializationType>(T)) {
TemplateSpecializationTypeLoc SpecTL =
TL.castAs<TemplateSpecializationTypeLoc>();
TemplateName Template = getDerived().TransformTemplateName(
SS, SpecTL.getTypePtr()->getTemplateName(), SpecTL.getTemplateNameLoc(),
ObjectType, UnqualLookup, /*AllowInjectedClassName*/true);
if (Template.isNull())
return nullptr;
Result = getDerived().TransformTemplateSpecializationType(TLB, SpecTL,
Template);
} else if (isa<DependentTemplateSpecializationType>(T)) {
DependentTemplateSpecializationTypeLoc SpecTL =
TL.castAs<DependentTemplateSpecializationTypeLoc>();
TemplateName Template
= getDerived().RebuildTemplateName(SS,
SpecTL.getTemplateKeywordLoc(),
*SpecTL.getTypePtr()->getIdentifier(),
SpecTL.getTemplateNameLoc(),
ObjectType, UnqualLookup,
/*AllowInjectedClassName*/true);
if (Template.isNull())
return nullptr;
Result = getDerived().TransformDependentTemplateSpecializationType(TLB,
SpecTL,
Template,
SS);
} else {
// Nothing special needs to be done for these.
Result = getDerived().TransformType(TLB, TL);
}
if (Result.isNull())
return nullptr;
return TLB.getTypeSourceInfo(SemaRef.Context, Result);
}
template <class TyLoc> static inline
QualType TransformTypeSpecType(TypeLocBuilder &TLB, TyLoc T) {
TyLoc NewT = TLB.push<TyLoc>(T.getType());
NewT.setNameLoc(T.getNameLoc());
return T.getType();
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformBuiltinType(TypeLocBuilder &TLB,
BuiltinTypeLoc T) {
BuiltinTypeLoc NewT = TLB.push<BuiltinTypeLoc>(T.getType());
NewT.setBuiltinLoc(T.getBuiltinLoc());
if (T.needsExtraLocalData())
NewT.getWrittenBuiltinSpecs() = T.getWrittenBuiltinSpecs();
return T.getType();
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformComplexType(TypeLocBuilder &TLB,
ComplexTypeLoc T) {
// FIXME: recurse?
return TransformTypeSpecType(TLB, T);
}
template <typename Derived>
QualType TreeTransform<Derived>::TransformAdjustedType(TypeLocBuilder &TLB,
AdjustedTypeLoc TL) {
// Adjustments applied during transformation are handled elsewhere.
return getDerived().TransformType(TLB, TL.getOriginalLoc());
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformDecayedType(TypeLocBuilder &TLB,
DecayedTypeLoc TL) {
QualType OriginalType = getDerived().TransformType(TLB, TL.getOriginalLoc());
if (OriginalType.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
OriginalType != TL.getOriginalLoc().getType())
Result = SemaRef.Context.getDecayedType(OriginalType);
TLB.push<DecayedTypeLoc>(Result);
// Nothing to set for DecayedTypeLoc.
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformPointerType(TypeLocBuilder &TLB,
PointerTypeLoc TL) {
QualType PointeeType
= getDerived().TransformType(TLB, TL.getPointeeLoc());
if (PointeeType.isNull())
return QualType();
QualType Result = TL.getType();
if (PointeeType->getAs<ObjCObjectType>()) {
// A dependent pointer type 'T *' has is being transformed such
// that an Objective-C class type is being replaced for 'T'. The
// resulting pointer type is an ObjCObjectPointerType, not a
// PointerType.
Result = SemaRef.Context.getObjCObjectPointerType(PointeeType);
ObjCObjectPointerTypeLoc NewT = TLB.push<ObjCObjectPointerTypeLoc>(Result);
NewT.setStarLoc(TL.getStarLoc());
return Result;
}
if (getDerived().AlwaysRebuild() ||
PointeeType != TL.getPointeeLoc().getType()) {
Result = getDerived().RebuildPointerType(PointeeType, TL.getSigilLoc());
if (Result.isNull())
return QualType();
}
// Objective-C ARC can add lifetime qualifiers to the type that we're
// pointing to.
TLB.TypeWasModifiedSafely(Result->getPointeeType());
PointerTypeLoc NewT = TLB.push<PointerTypeLoc>(Result);
NewT.setSigilLoc(TL.getSigilLoc());
return Result;
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformBlockPointerType(TypeLocBuilder &TLB,
BlockPointerTypeLoc TL) {
QualType PointeeType
= getDerived().TransformType(TLB, TL.getPointeeLoc());
if (PointeeType.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
PointeeType != TL.getPointeeLoc().getType()) {
Result = getDerived().RebuildBlockPointerType(PointeeType,
TL.getSigilLoc());
if (Result.isNull())
return QualType();
}
BlockPointerTypeLoc NewT = TLB.push<BlockPointerTypeLoc>(Result);
NewT.setSigilLoc(TL.getSigilLoc());
return Result;
}
/// Transforms a reference type. Note that somewhat paradoxically we
/// don't care whether the type itself is an l-value type or an r-value
/// type; we only care if the type was *written* as an l-value type
/// or an r-value type.
template<typename Derived>
QualType
TreeTransform<Derived>::TransformReferenceType(TypeLocBuilder &TLB,
ReferenceTypeLoc TL) {
const ReferenceType *T = TL.getTypePtr();
// Note that this works with the pointee-as-written.
QualType PointeeType = getDerived().TransformType(TLB, TL.getPointeeLoc());
if (PointeeType.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
PointeeType != T->getPointeeTypeAsWritten()) {
Result = getDerived().RebuildReferenceType(PointeeType,
T->isSpelledAsLValue(),
TL.getSigilLoc());
if (Result.isNull())
return QualType();
}
// Objective-C ARC can add lifetime qualifiers to the type that we're
// referring to.
TLB.TypeWasModifiedSafely(
Result->castAs<ReferenceType>()->getPointeeTypeAsWritten());
// r-value references can be rebuilt as l-value references.
ReferenceTypeLoc NewTL;
if (isa<LValueReferenceType>(Result))
NewTL = TLB.push<LValueReferenceTypeLoc>(Result);
else
NewTL = TLB.push<RValueReferenceTypeLoc>(Result);
NewTL.setSigilLoc(TL.getSigilLoc());
return Result;
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformLValueReferenceType(TypeLocBuilder &TLB,
LValueReferenceTypeLoc TL) {
return TransformReferenceType(TLB, TL);
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformRValueReferenceType(TypeLocBuilder &TLB,
RValueReferenceTypeLoc TL) {
return TransformReferenceType(TLB, TL);
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformMemberPointerType(TypeLocBuilder &TLB,
MemberPointerTypeLoc TL) {
QualType PointeeType = getDerived().TransformType(TLB, TL.getPointeeLoc());
if (PointeeType.isNull())
return QualType();
TypeSourceInfo* OldClsTInfo = TL.getClassTInfo();
TypeSourceInfo *NewClsTInfo = nullptr;
if (OldClsTInfo) {
NewClsTInfo = getDerived().TransformType(OldClsTInfo);
if (!NewClsTInfo)
return QualType();
}
const MemberPointerType *T = TL.getTypePtr();
QualType OldClsType = QualType(T->getClass(), 0);
QualType NewClsType;
if (NewClsTInfo)
NewClsType = NewClsTInfo->getType();
else {
NewClsType = getDerived().TransformType(OldClsType);
if (NewClsType.isNull())
return QualType();
}
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
PointeeType != T->getPointeeType() ||
NewClsType != OldClsType) {
Result = getDerived().RebuildMemberPointerType(PointeeType, NewClsType,
TL.getStarLoc());
if (Result.isNull())
return QualType();
}
// If we had to adjust the pointee type when building a member pointer, make
// sure to push TypeLoc info for it.
const MemberPointerType *MPT = Result->getAs<MemberPointerType>();
if (MPT && PointeeType != MPT->getPointeeType()) {
assert(isa<AdjustedType>(MPT->getPointeeType()));
TLB.push<AdjustedTypeLoc>(MPT->getPointeeType());
}
MemberPointerTypeLoc NewTL = TLB.push<MemberPointerTypeLoc>(Result);
NewTL.setSigilLoc(TL.getSigilLoc());
NewTL.setClassTInfo(NewClsTInfo);
return Result;
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformConstantArrayType(TypeLocBuilder &TLB,
ConstantArrayTypeLoc TL) {
const ConstantArrayType *T = TL.getTypePtr();
QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
if (ElementType.isNull())
return QualType();
// Prefer the expression from the TypeLoc; the other may have been uniqued.
Expr *OldSize = TL.getSizeExpr();
if (!OldSize)
OldSize = const_cast<Expr*>(T->getSizeExpr());
Expr *NewSize = nullptr;
if (OldSize) {
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
NewSize = getDerived().TransformExpr(OldSize).template getAs<Expr>();
NewSize = SemaRef.ActOnConstantExpression(NewSize).get();
}
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
ElementType != T->getElementType() ||
(T->getSizeExpr() && NewSize != OldSize)) {
Result = getDerived().RebuildConstantArrayType(ElementType,
T->getSizeModifier(),
T->getSize(), NewSize,
T->getIndexTypeCVRQualifiers(),
TL.getBracketsRange());
if (Result.isNull())
return QualType();
}
// We might have either a ConstantArrayType or a VariableArrayType now:
// a ConstantArrayType is allowed to have an element type which is a
// VariableArrayType if the type is dependent. Fortunately, all array
// types have the same location layout.
ArrayTypeLoc NewTL = TLB.push<ArrayTypeLoc>(Result);
NewTL.setLBracketLoc(TL.getLBracketLoc());
NewTL.setRBracketLoc(TL.getRBracketLoc());
NewTL.setSizeExpr(NewSize);
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformIncompleteArrayType(
TypeLocBuilder &TLB,
IncompleteArrayTypeLoc TL) {
const IncompleteArrayType *T = TL.getTypePtr();
QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
if (ElementType.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
ElementType != T->getElementType()) {
Result = getDerived().RebuildIncompleteArrayType(ElementType,
T->getSizeModifier(),
T->getIndexTypeCVRQualifiers(),
TL.getBracketsRange());
if (Result.isNull())
return QualType();
}
IncompleteArrayTypeLoc NewTL = TLB.push<IncompleteArrayTypeLoc>(Result);
NewTL.setLBracketLoc(TL.getLBracketLoc());
NewTL.setRBracketLoc(TL.getRBracketLoc());
NewTL.setSizeExpr(nullptr);
return Result;
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformVariableArrayType(TypeLocBuilder &TLB,
VariableArrayTypeLoc TL) {
const VariableArrayType *T = TL.getTypePtr();
QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
if (ElementType.isNull())
return QualType();
ExprResult SizeResult;
{
EnterExpressionEvaluationContext Context(
SemaRef, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
SizeResult = getDerived().TransformExpr(T->getSizeExpr());
}
if (SizeResult.isInvalid())
return QualType();
SizeResult =
SemaRef.ActOnFinishFullExpr(SizeResult.get(), /*DiscardedValue*/ false);
if (SizeResult.isInvalid())
return QualType();
Expr *Size = SizeResult.get();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
ElementType != T->getElementType() ||
Size != T->getSizeExpr()) {
Result = getDerived().RebuildVariableArrayType(ElementType,
T->getSizeModifier(),
Size,
T->getIndexTypeCVRQualifiers(),
TL.getBracketsRange());
if (Result.isNull())
return QualType();
}
// We might have constant size array now, but fortunately it has the same
// location layout.
ArrayTypeLoc NewTL = TLB.push<ArrayTypeLoc>(Result);
NewTL.setLBracketLoc(TL.getLBracketLoc());
NewTL.setRBracketLoc(TL.getRBracketLoc());
NewTL.setSizeExpr(Size);
return Result;
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformDependentSizedArrayType(TypeLocBuilder &TLB,
DependentSizedArrayTypeLoc TL) {
const DependentSizedArrayType *T = TL.getTypePtr();
QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
if (ElementType.isNull())
return QualType();
// Array bounds are constant expressions.
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
// Prefer the expression from the TypeLoc; the other may have been uniqued.
Expr *origSize = TL.getSizeExpr();
if (!origSize) origSize = T->getSizeExpr();
ExprResult sizeResult
= getDerived().TransformExpr(origSize);
sizeResult = SemaRef.ActOnConstantExpression(sizeResult);
if (sizeResult.isInvalid())
return QualType();
Expr *size = sizeResult.get();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
ElementType != T->getElementType() ||
size != origSize) {
Result = getDerived().RebuildDependentSizedArrayType(ElementType,
T->getSizeModifier(),
size,
T->getIndexTypeCVRQualifiers(),
TL.getBracketsRange());
if (Result.isNull())
return QualType();
}
// We might have any sort of array type now, but fortunately they
// all have the same location layout.
ArrayTypeLoc NewTL = TLB.push<ArrayTypeLoc>(Result);
NewTL.setLBracketLoc(TL.getLBracketLoc());
NewTL.setRBracketLoc(TL.getRBracketLoc());
NewTL.setSizeExpr(size);
return Result;
}
template <typename Derived>
QualType TreeTransform<Derived>::TransformDependentVectorType(
TypeLocBuilder &TLB, DependentVectorTypeLoc TL) {
const DependentVectorType *T = TL.getTypePtr();
QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
if (ElementType.isNull())
return QualType();
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Size = getDerived().TransformExpr(T->getSizeExpr());
Size = SemaRef.ActOnConstantExpression(Size);
if (Size.isInvalid())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || ElementType != T->getElementType() ||
Size.get() != T->getSizeExpr()) {
Result = getDerived().RebuildDependentVectorType(
ElementType, Size.get(), T->getAttributeLoc(), T->getVectorKind());
if (Result.isNull())
return QualType();
}
// Result might be dependent or not.
if (isa<DependentVectorType>(Result)) {
DependentVectorTypeLoc NewTL =
TLB.push<DependentVectorTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
} else {
VectorTypeLoc NewTL = TLB.push<VectorTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
}
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformDependentSizedExtVectorType(
TypeLocBuilder &TLB,
DependentSizedExtVectorTypeLoc TL) {
const DependentSizedExtVectorType *T = TL.getTypePtr();
// FIXME: ext vector locs should be nested
QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
if (ElementType.isNull())
return QualType();
// Vector sizes are constant expressions.
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Size = getDerived().TransformExpr(T->getSizeExpr());
Size = SemaRef.ActOnConstantExpression(Size);
if (Size.isInvalid())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
ElementType != T->getElementType() ||
Size.get() != T->getSizeExpr()) {
Result = getDerived().RebuildDependentSizedExtVectorType(ElementType,
Size.get(),
T->getAttributeLoc());
if (Result.isNull())
return QualType();
}
// Result might be dependent or not.
if (isa<DependentSizedExtVectorType>(Result)) {
DependentSizedExtVectorTypeLoc NewTL
= TLB.push<DependentSizedExtVectorTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
} else {
ExtVectorTypeLoc NewTL = TLB.push<ExtVectorTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
}
return Result;
}
template <typename Derived>
QualType
TreeTransform<Derived>::TransformConstantMatrixType(TypeLocBuilder &TLB,
ConstantMatrixTypeLoc TL) {
const ConstantMatrixType *T = TL.getTypePtr();
QualType ElementType = getDerived().TransformType(T->getElementType());
if (ElementType.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || ElementType != T->getElementType()) {
Result = getDerived().RebuildConstantMatrixType(
ElementType, T->getNumRows(), T->getNumColumns());
if (Result.isNull())
return QualType();
}
ConstantMatrixTypeLoc NewTL = TLB.push<ConstantMatrixTypeLoc>(Result);
NewTL.setAttrNameLoc(TL.getAttrNameLoc());
NewTL.setAttrOperandParensRange(TL.getAttrOperandParensRange());
NewTL.setAttrRowOperand(TL.getAttrRowOperand());
NewTL.setAttrColumnOperand(TL.getAttrColumnOperand());
return Result;
}
template <typename Derived>
QualType TreeTransform<Derived>::TransformDependentSizedMatrixType(
TypeLocBuilder &TLB, DependentSizedMatrixTypeLoc TL) {
const DependentSizedMatrixType *T = TL.getTypePtr();
QualType ElementType = getDerived().TransformType(T->getElementType());
if (ElementType.isNull()) {
return QualType();
}
// Matrix dimensions are constant expressions.
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
Expr *origRows = TL.getAttrRowOperand();
if (!origRows)
origRows = T->getRowExpr();
Expr *origColumns = TL.getAttrColumnOperand();
if (!origColumns)
origColumns = T->getColumnExpr();
ExprResult rowResult = getDerived().TransformExpr(origRows);
rowResult = SemaRef.ActOnConstantExpression(rowResult);
if (rowResult.isInvalid())
return QualType();
ExprResult columnResult = getDerived().TransformExpr(origColumns);
columnResult = SemaRef.ActOnConstantExpression(columnResult);
if (columnResult.isInvalid())
return QualType();
Expr *rows = rowResult.get();
Expr *columns = columnResult.get();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || ElementType != T->getElementType() ||
rows != origRows || columns != origColumns) {
Result = getDerived().RebuildDependentSizedMatrixType(
ElementType, rows, columns, T->getAttributeLoc());
if (Result.isNull())
return QualType();
}
// We might have any sort of matrix type now, but fortunately they
// all have the same location layout.
MatrixTypeLoc NewTL = TLB.push<MatrixTypeLoc>(Result);
NewTL.setAttrNameLoc(TL.getAttrNameLoc());
NewTL.setAttrOperandParensRange(TL.getAttrOperandParensRange());
NewTL.setAttrRowOperand(rows);
NewTL.setAttrColumnOperand(columns);
return Result;
}
template <typename Derived>
QualType TreeTransform<Derived>::TransformDependentAddressSpaceType(
TypeLocBuilder &TLB, DependentAddressSpaceTypeLoc TL) {
const DependentAddressSpaceType *T = TL.getTypePtr();
QualType pointeeType = getDerived().TransformType(T->getPointeeType());
if (pointeeType.isNull())
return QualType();
// Address spaces are constant expressions.
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult AddrSpace = getDerived().TransformExpr(T->getAddrSpaceExpr());
AddrSpace = SemaRef.ActOnConstantExpression(AddrSpace);
if (AddrSpace.isInvalid())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || pointeeType != T->getPointeeType() ||
AddrSpace.get() != T->getAddrSpaceExpr()) {
Result = getDerived().RebuildDependentAddressSpaceType(
pointeeType, AddrSpace.get(), T->getAttributeLoc());
if (Result.isNull())
return QualType();
}
// Result might be dependent or not.
if (isa<DependentAddressSpaceType>(Result)) {
DependentAddressSpaceTypeLoc NewTL =
TLB.push<DependentAddressSpaceTypeLoc>(Result);
NewTL.setAttrOperandParensRange(TL.getAttrOperandParensRange());
NewTL.setAttrExprOperand(TL.getAttrExprOperand());
NewTL.setAttrNameLoc(TL.getAttrNameLoc());
} else {
TypeSourceInfo *DI = getSema().Context.getTrivialTypeSourceInfo(
Result, getDerived().getBaseLocation());
TransformType(TLB, DI->getTypeLoc());
}
return Result;
}
template <typename Derived>
QualType TreeTransform<Derived>::TransformVectorType(TypeLocBuilder &TLB,
VectorTypeLoc TL) {
const VectorType *T = TL.getTypePtr();
QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
if (ElementType.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
ElementType != T->getElementType()) {
Result = getDerived().RebuildVectorType(ElementType, T->getNumElements(),
T->getVectorKind());
if (Result.isNull())
return QualType();
}
VectorTypeLoc NewTL = TLB.push<VectorTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformExtVectorType(TypeLocBuilder &TLB,
ExtVectorTypeLoc TL) {
const VectorType *T = TL.getTypePtr();
QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
if (ElementType.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
ElementType != T->getElementType()) {
Result = getDerived().RebuildExtVectorType(ElementType,
T->getNumElements(),
/*FIXME*/ SourceLocation());
if (Result.isNull())
return QualType();
}
ExtVectorTypeLoc NewTL = TLB.push<ExtVectorTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
return Result;
}
template <typename Derived>
ParmVarDecl *TreeTransform<Derived>::TransformFunctionTypeParam(
ParmVarDecl *OldParm, int indexAdjustment, Optional<unsigned> NumExpansions,
bool ExpectParameterPack) {
TypeSourceInfo *OldDI = OldParm->getTypeSourceInfo();
TypeSourceInfo *NewDI = nullptr;
if (NumExpansions && isa<PackExpansionType>(OldDI->getType())) {
// If we're substituting into a pack expansion type and we know the
// length we want to expand to, just substitute for the pattern.
TypeLoc OldTL = OldDI->getTypeLoc();
PackExpansionTypeLoc OldExpansionTL = OldTL.castAs<PackExpansionTypeLoc>();
TypeLocBuilder TLB;
TypeLoc NewTL = OldDI->getTypeLoc();
TLB.reserve(NewTL.getFullDataSize());
QualType Result = getDerived().TransformType(TLB,
OldExpansionTL.getPatternLoc());
if (Result.isNull())
return nullptr;
Result = RebuildPackExpansionType(Result,
OldExpansionTL.getPatternLoc().getSourceRange(),
OldExpansionTL.getEllipsisLoc(),
NumExpansions);
if (Result.isNull())
return nullptr;
PackExpansionTypeLoc NewExpansionTL
= TLB.push<PackExpansionTypeLoc>(Result);
NewExpansionTL.setEllipsisLoc(OldExpansionTL.getEllipsisLoc());
NewDI = TLB.getTypeSourceInfo(SemaRef.Context, Result);
} else
NewDI = getDerived().TransformType(OldDI);
if (!NewDI)
return nullptr;
if (NewDI == OldDI && indexAdjustment == 0)
return OldParm;
ParmVarDecl *newParm = ParmVarDecl::Create(SemaRef.Context,
OldParm->getDeclContext(),
OldParm->getInnerLocStart(),
OldParm->getLocation(),
OldParm->getIdentifier(),
NewDI->getType(),
NewDI,
OldParm->getStorageClass(),
/* DefArg */ nullptr);
newParm->setScopeInfo(OldParm->getFunctionScopeDepth(),
OldParm->getFunctionScopeIndex() + indexAdjustment);
transformedLocalDecl(OldParm, {newParm});
return newParm;
}
template <typename Derived>
bool TreeTransform<Derived>::TransformFunctionTypeParams(
SourceLocation Loc, ArrayRef<ParmVarDecl *> Params,
const QualType *ParamTypes,
const FunctionProtoType::ExtParameterInfo *ParamInfos,
SmallVectorImpl<QualType> &OutParamTypes,
SmallVectorImpl<ParmVarDecl *> *PVars,
Sema::ExtParameterInfoBuilder &PInfos) {
int indexAdjustment = 0;
unsigned NumParams = Params.size();
for (unsigned i = 0; i != NumParams; ++i) {
if (ParmVarDecl *OldParm = Params[i]) {
assert(OldParm->getFunctionScopeIndex() == i);
Optional<unsigned> NumExpansions;
ParmVarDecl *NewParm = nullptr;
if (OldParm->isParameterPack()) {
// We have a function parameter pack that may need to be expanded.
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
// Find the parameter packs that could be expanded.
TypeLoc TL = OldParm->getTypeSourceInfo()->getTypeLoc();
PackExpansionTypeLoc ExpansionTL = TL.castAs<PackExpansionTypeLoc>();
TypeLoc Pattern = ExpansionTL.getPatternLoc();
SemaRef.collectUnexpandedParameterPacks(Pattern, Unexpanded);
// Determine whether we should expand the parameter packs.
bool ShouldExpand = false;
bool RetainExpansion = false;
Optional<unsigned> OrigNumExpansions;
if (Unexpanded.size() > 0) {
OrigNumExpansions = ExpansionTL.getTypePtr()->getNumExpansions();
NumExpansions = OrigNumExpansions;
if (getDerived().TryExpandParameterPacks(ExpansionTL.getEllipsisLoc(),
Pattern.getSourceRange(),
Unexpanded,
ShouldExpand,
RetainExpansion,
NumExpansions)) {
return true;
}
} else {
#ifndef NDEBUG
const AutoType *AT =
Pattern.getType().getTypePtr()->getContainedAutoType();
assert((AT && (!AT->isDeduced() || AT->getDeducedType().isNull())) &&
"Could not find parameter packs or undeduced auto type!");
#endif
}
if (ShouldExpand) {
// Expand the function parameter pack into multiple, separate
// parameters.
getDerived().ExpandingFunctionParameterPack(OldParm);
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
ParmVarDecl *NewParm
= getDerived().TransformFunctionTypeParam(OldParm,
indexAdjustment++,
OrigNumExpansions,
/*ExpectParameterPack=*/false);
if (!NewParm)
return true;
if (ParamInfos)
PInfos.set(OutParamTypes.size(), ParamInfos[i]);
OutParamTypes.push_back(NewParm->getType());
if (PVars)
PVars->push_back(NewParm);
}
// If we're supposed to retain a pack expansion, do so by temporarily
// forgetting the partially-substituted parameter pack.
if (RetainExpansion) {
ForgetPartiallySubstitutedPackRAII Forget(getDerived());
ParmVarDecl *NewParm
= getDerived().TransformFunctionTypeParam(OldParm,
indexAdjustment++,
OrigNumExpansions,
/*ExpectParameterPack=*/false);
if (!NewParm)
return true;
if (ParamInfos)
PInfos.set(OutParamTypes.size(), ParamInfos[i]);
OutParamTypes.push_back(NewParm->getType());
if (PVars)
PVars->push_back(NewParm);
}
// The next parameter should have the same adjustment as the
// last thing we pushed, but we post-incremented indexAdjustment
// on every push. Also, if we push nothing, the adjustment should
// go down by one.
indexAdjustment--;
// We're done with the pack expansion.
continue;
}
// We'll substitute the parameter now without expanding the pack
// expansion.
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
NewParm = getDerived().TransformFunctionTypeParam(OldParm,
indexAdjustment,
NumExpansions,
/*ExpectParameterPack=*/true);
assert(NewParm->isParameterPack() &&
"Parameter pack no longer a parameter pack after "
"transformation.");
} else {
NewParm = getDerived().TransformFunctionTypeParam(
OldParm, indexAdjustment, None, /*ExpectParameterPack=*/ false);
}
if (!NewParm)
return true;
if (ParamInfos)
PInfos.set(OutParamTypes.size(), ParamInfos[i]);
OutParamTypes.push_back(NewParm->getType());
if (PVars)
PVars->push_back(NewParm);
continue;
}
// Deal with the possibility that we don't have a parameter
// declaration for this parameter.
QualType OldType = ParamTypes[i];
bool IsPackExpansion = false;
Optional<unsigned> NumExpansions;
QualType NewType;
if (const PackExpansionType *Expansion
= dyn_cast<PackExpansionType>(OldType)) {
// We have a function parameter pack that may need to be expanded.
QualType Pattern = Expansion->getPattern();
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
getSema().collectUnexpandedParameterPacks(Pattern, Unexpanded);
// Determine whether we should expand the parameter packs.
bool ShouldExpand = false;
bool RetainExpansion = false;
if (getDerived().TryExpandParameterPacks(Loc, SourceRange(),
Unexpanded,
ShouldExpand,
RetainExpansion,
NumExpansions)) {
return true;
}
if (ShouldExpand) {
// Expand the function parameter pack into multiple, separate
// parameters.
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
QualType NewType = getDerived().TransformType(Pattern);
if (NewType.isNull())
return true;
if (NewType->containsUnexpandedParameterPack()) {
NewType =
getSema().getASTContext().getPackExpansionType(NewType, None);
if (NewType.isNull())
return true;
}
if (ParamInfos)
PInfos.set(OutParamTypes.size(), ParamInfos[i]);
OutParamTypes.push_back(NewType);
if (PVars)
PVars->push_back(nullptr);
}
// We're done with the pack expansion.
continue;
}
// If we're supposed to retain a pack expansion, do so by temporarily
// forgetting the partially-substituted parameter pack.
if (RetainExpansion) {
ForgetPartiallySubstitutedPackRAII Forget(getDerived());
QualType NewType = getDerived().TransformType(Pattern);
if (NewType.isNull())
return true;
if (ParamInfos)
PInfos.set(OutParamTypes.size(), ParamInfos[i]);
OutParamTypes.push_back(NewType);
if (PVars)
PVars->push_back(nullptr);
}
// We'll substitute the parameter now without expanding the pack
// expansion.
OldType = Expansion->getPattern();
IsPackExpansion = true;
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
NewType = getDerived().TransformType(OldType);
} else {
NewType = getDerived().TransformType(OldType);
}
if (NewType.isNull())
return true;
if (IsPackExpansion)
NewType = getSema().Context.getPackExpansionType(NewType,
NumExpansions);
if (ParamInfos)
PInfos.set(OutParamTypes.size(), ParamInfos[i]);
OutParamTypes.push_back(NewType);
if (PVars)
PVars->push_back(nullptr);
}
#ifndef NDEBUG
if (PVars) {
for (unsigned i = 0, e = PVars->size(); i != e; ++i)
if (ParmVarDecl *parm = (*PVars)[i])
assert(parm->getFunctionScopeIndex() == i);
}
#endif
return false;
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformFunctionProtoType(TypeLocBuilder &TLB,
FunctionProtoTypeLoc TL) {
SmallVector<QualType, 4> ExceptionStorage;
TreeTransform *This = this; // Work around gcc.gnu.org/PR56135.
return getDerived().TransformFunctionProtoType(
TLB, TL, nullptr, Qualifiers(),
[&](FunctionProtoType::ExceptionSpecInfo &ESI, bool &Changed) {
return This->getDerived().TransformExceptionSpec(
TL.getBeginLoc(), ESI, ExceptionStorage, Changed);
});
}
template<typename Derived> template<typename Fn>
QualType TreeTransform<Derived>::TransformFunctionProtoType(
TypeLocBuilder &TLB, FunctionProtoTypeLoc TL, CXXRecordDecl *ThisContext,
Qualifiers ThisTypeQuals, Fn TransformExceptionSpec) {
// Transform the parameters and return type.
//
// We are required to instantiate the params and return type in source order.
// When the function has a trailing return type, we instantiate the
// parameters before the return type, since the return type can then refer
// to the parameters themselves (via decltype, sizeof, etc.).
//
SmallVector<QualType, 4> ParamTypes;
SmallVector<ParmVarDecl*, 4> ParamDecls;
Sema::ExtParameterInfoBuilder ExtParamInfos;
const FunctionProtoType *T = TL.getTypePtr();
QualType ResultType;
if (T->hasTrailingReturn()) {
if (getDerived().TransformFunctionTypeParams(
TL.getBeginLoc(), TL.getParams(),
TL.getTypePtr()->param_type_begin(),
T->getExtParameterInfosOrNull(),
ParamTypes, &ParamDecls, ExtParamInfos))
return QualType();
{
// C++11 [expr.prim.general]p3:
// If a declaration declares a member function or member function
// template of a class X, the expression this is a prvalue of type
// "pointer to cv-qualifier-seq X" between the optional cv-qualifer-seq
// and the end of the function-definition, member-declarator, or
// declarator.
Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, ThisTypeQuals);
ResultType = getDerived().TransformType(TLB, TL.getReturnLoc());
if (ResultType.isNull())
return QualType();
}
}
else {
ResultType = getDerived().TransformType(TLB, TL.getReturnLoc());
if (ResultType.isNull())
return QualType();
if (getDerived().TransformFunctionTypeParams(
TL.getBeginLoc(), TL.getParams(),
TL.getTypePtr()->param_type_begin(),
T->getExtParameterInfosOrNull(),
ParamTypes, &ParamDecls, ExtParamInfos))
return QualType();
}
FunctionProtoType::ExtProtoInfo EPI = T->getExtProtoInfo();
bool EPIChanged = false;
if (TransformExceptionSpec(EPI.ExceptionSpec, EPIChanged))
return QualType();
// Handle extended parameter information.
if (auto NewExtParamInfos =
ExtParamInfos.getPointerOrNull(ParamTypes.size())) {
if (!EPI.ExtParameterInfos ||
llvm::makeArrayRef(EPI.ExtParameterInfos, TL.getNumParams())
!= llvm::makeArrayRef(NewExtParamInfos, ParamTypes.size())) {
EPIChanged = true;
}
EPI.ExtParameterInfos = NewExtParamInfos;
} else if (EPI.ExtParameterInfos) {
EPIChanged = true;
EPI.ExtParameterInfos = nullptr;
}
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || ResultType != T->getReturnType() ||
T->getParamTypes() != llvm::makeArrayRef(ParamTypes) || EPIChanged) {
Result = getDerived().RebuildFunctionProtoType(ResultType, ParamTypes, EPI);
if (Result.isNull())
return QualType();
}
FunctionProtoTypeLoc NewTL = TLB.push<FunctionProtoTypeLoc>(Result);
NewTL.setLocalRangeBegin(TL.getLocalRangeBegin());
NewTL.setLParenLoc(TL.getLParenLoc());
NewTL.setRParenLoc(TL.getRParenLoc());
NewTL.setExceptionSpecRange(TL.getExceptionSpecRange());
NewTL.setLocalRangeEnd(TL.getLocalRangeEnd());
for (unsigned i = 0, e = NewTL.getNumParams(); i != e; ++i)
NewTL.setParam(i, ParamDecls[i]);
return Result;
}
template<typename Derived>
bool TreeTransform<Derived>::TransformExceptionSpec(
SourceLocation Loc, FunctionProtoType::ExceptionSpecInfo &ESI,
SmallVectorImpl<QualType> &Exceptions, bool &Changed) {
assert(ESI.Type != EST_Uninstantiated && ESI.Type != EST_Unevaluated);
// Instantiate a dynamic noexcept expression, if any.
if (isComputedNoexcept(ESI.Type)) {
EnterExpressionEvaluationContext Unevaluated(
getSema(), Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult NoexceptExpr = getDerived().TransformExpr(ESI.NoexceptExpr);
if (NoexceptExpr.isInvalid())
return true;
ExceptionSpecificationType EST = ESI.Type;
NoexceptExpr =
getSema().ActOnNoexceptSpec(Loc, NoexceptExpr.get(), EST);
if (NoexceptExpr.isInvalid())
return true;
if (ESI.NoexceptExpr != NoexceptExpr.get() || EST != ESI.Type)
Changed = true;
ESI.NoexceptExpr = NoexceptExpr.get();
ESI.Type = EST;
}
if (ESI.Type != EST_Dynamic)
return false;
// Instantiate a dynamic exception specification's type.
for (QualType T : ESI.Exceptions) {
if (const PackExpansionType *PackExpansion =
T->getAs<PackExpansionType>()) {
Changed = true;
// We have a pack expansion. Instantiate it.
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
SemaRef.collectUnexpandedParameterPacks(PackExpansion->getPattern(),
Unexpanded);
assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");
// Determine whether the set of unexpanded parameter packs can and
// should
// be expanded.
bool Expand = false;
bool RetainExpansion = false;
Optional<unsigned> NumExpansions = PackExpansion->getNumExpansions();
// FIXME: Track the location of the ellipsis (and track source location
// information for the types in the exception specification in general).
if (getDerived().TryExpandParameterPacks(
Loc, SourceRange(), Unexpanded, Expand,
RetainExpansion, NumExpansions))
return true;
if (!Expand) {
// We can't expand this pack expansion into separate arguments yet;
// just substitute into the pattern and create a new pack expansion
// type.
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
QualType U = getDerived().TransformType(PackExpansion->getPattern());
if (U.isNull())
return true;
U = SemaRef.Context.getPackExpansionType(U, NumExpansions);
Exceptions.push_back(U);
continue;
}
// Substitute into the pack expansion pattern for each slice of the
// pack.
for (unsigned ArgIdx = 0; ArgIdx != *NumExpansions; ++ArgIdx) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), ArgIdx);
QualType U = getDerived().TransformType(PackExpansion->getPattern());
if (U.isNull() || SemaRef.CheckSpecifiedExceptionType(U, Loc))
return true;
Exceptions.push_back(U);
}
} else {
QualType U = getDerived().TransformType(T);
if (U.isNull() || SemaRef.CheckSpecifiedExceptionType(U, Loc))
return true;
if (T != U)
Changed = true;
Exceptions.push_back(U);
}
}
ESI.Exceptions = Exceptions;
if (ESI.Exceptions.empty())
ESI.Type = EST_DynamicNone;
return false;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformFunctionNoProtoType(
TypeLocBuilder &TLB,
FunctionNoProtoTypeLoc TL) {
const FunctionNoProtoType *T = TL.getTypePtr();
QualType ResultType = getDerived().TransformType(TLB, TL.getReturnLoc());
if (ResultType.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || ResultType != T->getReturnType())
Result = getDerived().RebuildFunctionNoProtoType(ResultType);
FunctionNoProtoTypeLoc NewTL = TLB.push<FunctionNoProtoTypeLoc>(Result);
NewTL.setLocalRangeBegin(TL.getLocalRangeBegin());
NewTL.setLParenLoc(TL.getLParenLoc());
NewTL.setRParenLoc(TL.getRParenLoc());
NewTL.setLocalRangeEnd(TL.getLocalRangeEnd());
return Result;
}
template<typename Derived> QualType
TreeTransform<Derived>::TransformUnresolvedUsingType(TypeLocBuilder &TLB,
UnresolvedUsingTypeLoc TL) {
const UnresolvedUsingType *T = TL.getTypePtr();
Decl *D = getDerived().TransformDecl(TL.getNameLoc(), T->getDecl());
if (!D)
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || D != T->getDecl()) {
Result = getDerived().RebuildUnresolvedUsingType(TL.getNameLoc(), D);
if (Result.isNull())
return QualType();
}
// We might get an arbitrary type spec type back. We should at
// least always get a type spec type, though.
TypeSpecTypeLoc NewTL = TLB.pushTypeSpec(Result);
NewTL.setNameLoc(TL.getNameLoc());
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformTypedefType(TypeLocBuilder &TLB,
TypedefTypeLoc TL) {
const TypedefType *T = TL.getTypePtr();
TypedefNameDecl *Typedef
= cast_or_null<TypedefNameDecl>(getDerived().TransformDecl(TL.getNameLoc(),
T->getDecl()));
if (!Typedef)
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
Typedef != T->getDecl()) {
Result = getDerived().RebuildTypedefType(Typedef);
if (Result.isNull())
return QualType();
}
TypedefTypeLoc NewTL = TLB.push<TypedefTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformTypeOfExprType(TypeLocBuilder &TLB,
TypeOfExprTypeLoc TL) {
// typeof expressions are not potentially evaluated contexts
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated,
Sema::ReuseLambdaContextDecl);
ExprResult E = getDerived().TransformExpr(TL.getUnderlyingExpr());
if (E.isInvalid())
return QualType();
E = SemaRef.HandleExprEvaluationContextForTypeof(E.get());
if (E.isInvalid())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
E.get() != TL.getUnderlyingExpr()) {
Result = getDerived().RebuildTypeOfExprType(E.get(), TL.getTypeofLoc());
if (Result.isNull())
return QualType();
}
else E.get();
TypeOfExprTypeLoc NewTL = TLB.push<TypeOfExprTypeLoc>(Result);
NewTL.setTypeofLoc(TL.getTypeofLoc());
NewTL.setLParenLoc(TL.getLParenLoc());
NewTL.setRParenLoc(TL.getRParenLoc());
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformTypeOfType(TypeLocBuilder &TLB,
TypeOfTypeLoc TL) {
TypeSourceInfo* Old_Under_TI = TL.getUnderlyingTInfo();
TypeSourceInfo* New_Under_TI = getDerived().TransformType(Old_Under_TI);
if (!New_Under_TI)
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || New_Under_TI != Old_Under_TI) {
Result = getDerived().RebuildTypeOfType(New_Under_TI->getType());
if (Result.isNull())
return QualType();
}
TypeOfTypeLoc NewTL = TLB.push<TypeOfTypeLoc>(Result);
NewTL.setTypeofLoc(TL.getTypeofLoc());
NewTL.setLParenLoc(TL.getLParenLoc());
NewTL.setRParenLoc(TL.getRParenLoc());
NewTL.setUnderlyingTInfo(New_Under_TI);
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformDecltypeType(TypeLocBuilder &TLB,
DecltypeTypeLoc TL) {
const DecltypeType *T = TL.getTypePtr();
// decltype expressions are not potentially evaluated contexts
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated, nullptr,
Sema::ExpressionEvaluationContextRecord::EK_Decltype);
ExprResult E = getDerived().TransformExpr(T->getUnderlyingExpr());
if (E.isInvalid())
return QualType();
E = getSema().ActOnDecltypeExpression(E.get());
if (E.isInvalid())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
E.get() != T->getUnderlyingExpr()) {
Result = getDerived().RebuildDecltypeType(E.get(), TL.getNameLoc());
if (Result.isNull())
return QualType();
}
else E.get();
DecltypeTypeLoc NewTL = TLB.push<DecltypeTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformUnaryTransformType(
TypeLocBuilder &TLB,
UnaryTransformTypeLoc TL) {
QualType Result = TL.getType();
if (Result->isDependentType()) {
const UnaryTransformType *T = TL.getTypePtr();
QualType NewBase =
getDerived().TransformType(TL.getUnderlyingTInfo())->getType();
Result = getDerived().RebuildUnaryTransformType(NewBase,
T->getUTTKind(),
TL.getKWLoc());
if (Result.isNull())
return QualType();
}
UnaryTransformTypeLoc NewTL = TLB.push<UnaryTransformTypeLoc>(Result);
NewTL.setKWLoc(TL.getKWLoc());
NewTL.setParensRange(TL.getParensRange());
NewTL.setUnderlyingTInfo(TL.getUnderlyingTInfo());
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformDeducedTemplateSpecializationType(
TypeLocBuilder &TLB, DeducedTemplateSpecializationTypeLoc TL) {
const DeducedTemplateSpecializationType *T = TL.getTypePtr();
CXXScopeSpec SS;
TemplateName TemplateName = getDerived().TransformTemplateName(
SS, T->getTemplateName(), TL.getTemplateNameLoc());
if (TemplateName.isNull())
return QualType();
QualType OldDeduced = T->getDeducedType();
QualType NewDeduced;
if (!OldDeduced.isNull()) {
NewDeduced = getDerived().TransformType(OldDeduced);
if (NewDeduced.isNull())
return QualType();
}
QualType Result = getDerived().RebuildDeducedTemplateSpecializationType(
TemplateName, NewDeduced);
if (Result.isNull())
return QualType();
DeducedTemplateSpecializationTypeLoc NewTL =
TLB.push<DeducedTemplateSpecializationTypeLoc>(Result);
NewTL.setTemplateNameLoc(TL.getTemplateNameLoc());
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformRecordType(TypeLocBuilder &TLB,
RecordTypeLoc TL) {
const RecordType *T = TL.getTypePtr();
RecordDecl *Record
= cast_or_null<RecordDecl>(getDerived().TransformDecl(TL.getNameLoc(),
T->getDecl()));
if (!Record)
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
Record != T->getDecl()) {
Result = getDerived().RebuildRecordType(Record);
if (Result.isNull())
return QualType();
}
RecordTypeLoc NewTL = TLB.push<RecordTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformEnumType(TypeLocBuilder &TLB,
EnumTypeLoc TL) {
const EnumType *T = TL.getTypePtr();
EnumDecl *Enum
= cast_or_null<EnumDecl>(getDerived().TransformDecl(TL.getNameLoc(),
T->getDecl()));
if (!Enum)
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
Enum != T->getDecl()) {
Result = getDerived().RebuildEnumType(Enum);
if (Result.isNull())
return QualType();
}
EnumTypeLoc NewTL = TLB.push<EnumTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformInjectedClassNameType(
TypeLocBuilder &TLB,
InjectedClassNameTypeLoc TL) {
Decl *D = getDerived().TransformDecl(TL.getNameLoc(),
TL.getTypePtr()->getDecl());
if (!D) return QualType();
QualType T = SemaRef.Context.getTypeDeclType(cast<TypeDecl>(D));
TLB.pushTypeSpec(T).setNameLoc(TL.getNameLoc());
return T;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformTemplateTypeParmType(
TypeLocBuilder &TLB,
TemplateTypeParmTypeLoc TL) {
return TransformTypeSpecType(TLB, TL);
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformSubstTemplateTypeParmType(
TypeLocBuilder &TLB,
SubstTemplateTypeParmTypeLoc TL) {
const SubstTemplateTypeParmType *T = TL.getTypePtr();
// Substitute into the replacement type, which itself might involve something
// that needs to be transformed. This only tends to occur with default
// template arguments of template template parameters.
TemporaryBase Rebase(*this, TL.getNameLoc(), DeclarationName());
QualType Replacement = getDerived().TransformType(T->getReplacementType());
if (Replacement.isNull())
return QualType();
// Always canonicalize the replacement type.
Replacement = SemaRef.Context.getCanonicalType(Replacement);
QualType Result
= SemaRef.Context.getSubstTemplateTypeParmType(T->getReplacedParameter(),
Replacement);
// Propagate type-source information.
SubstTemplateTypeParmTypeLoc NewTL
= TLB.push<SubstTemplateTypeParmTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformSubstTemplateTypeParmPackType(
TypeLocBuilder &TLB,
SubstTemplateTypeParmPackTypeLoc TL) {
return TransformTypeSpecType(TLB, TL);
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformTemplateSpecializationType(
TypeLocBuilder &TLB,
TemplateSpecializationTypeLoc TL) {
const TemplateSpecializationType *T = TL.getTypePtr();
// The nested-name-specifier never matters in a TemplateSpecializationType,
// because we can't have a dependent nested-name-specifier anyway.
CXXScopeSpec SS;
TemplateName Template
= getDerived().TransformTemplateName(SS, T->getTemplateName(),
TL.getTemplateNameLoc());
if (Template.isNull())
return QualType();
return getDerived().TransformTemplateSpecializationType(TLB, TL, Template);
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformAtomicType(TypeLocBuilder &TLB,
AtomicTypeLoc TL) {
QualType ValueType = getDerived().TransformType(TLB, TL.getValueLoc());
if (ValueType.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
ValueType != TL.getValueLoc().getType()) {
Result = getDerived().RebuildAtomicType(ValueType, TL.getKWLoc());
if (Result.isNull())
return QualType();
}
AtomicTypeLoc NewTL = TLB.push<AtomicTypeLoc>(Result);
NewTL.setKWLoc(TL.getKWLoc());
NewTL.setLParenLoc(TL.getLParenLoc());
NewTL.setRParenLoc(TL.getRParenLoc());
return Result;
}
template <typename Derived>
QualType TreeTransform<Derived>::TransformPipeType(TypeLocBuilder &TLB,
PipeTypeLoc TL) {
QualType ValueType = getDerived().TransformType(TLB, TL.getValueLoc());
if (ValueType.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || ValueType != TL.getValueLoc().getType()) {
const PipeType *PT = Result->castAs<PipeType>();
bool isReadPipe = PT->isReadOnly();
Result = getDerived().RebuildPipeType(ValueType, TL.getKWLoc(), isReadPipe);
if (Result.isNull())
return QualType();
}
PipeTypeLoc NewTL = TLB.push<PipeTypeLoc>(Result);
NewTL.setKWLoc(TL.getKWLoc());
return Result;
}
template <typename Derived>
QualType TreeTransform<Derived>::TransformExtIntType(TypeLocBuilder &TLB,
ExtIntTypeLoc TL) {
const ExtIntType *EIT = TL.getTypePtr();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild()) {
Result = getDerived().RebuildExtIntType(EIT->isUnsigned(),
EIT->getNumBits(), TL.getNameLoc());
if (Result.isNull())
return QualType();
}
ExtIntTypeLoc NewTL = TLB.push<ExtIntTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
return Result;
}
template <typename Derived>
QualType TreeTransform<Derived>::TransformDependentExtIntType(
TypeLocBuilder &TLB, DependentExtIntTypeLoc TL) {
const DependentExtIntType *EIT = TL.getTypePtr();
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult BitsExpr = getDerived().TransformExpr(EIT->getNumBitsExpr());
BitsExpr = SemaRef.ActOnConstantExpression(BitsExpr);
if (BitsExpr.isInvalid())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || BitsExpr.get() != EIT->getNumBitsExpr()) {
Result = getDerived().RebuildDependentExtIntType(
EIT->isUnsigned(), BitsExpr.get(), TL.getNameLoc());
if (Result.isNull())
return QualType();
}
if (isa<DependentExtIntType>(Result)) {
DependentExtIntTypeLoc NewTL = TLB.push<DependentExtIntTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
} else {
ExtIntTypeLoc NewTL = TLB.push<ExtIntTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
}
return Result;
}
/// Simple iterator that traverses the template arguments in a
/// container that provides a \c getArgLoc() member function.
///
/// This iterator is intended to be used with the iterator form of
/// \c TreeTransform<Derived>::TransformTemplateArguments().
template<typename ArgLocContainer>
class TemplateArgumentLocContainerIterator {
ArgLocContainer *Container;
unsigned Index;
public:
typedef TemplateArgumentLoc value_type;
typedef TemplateArgumentLoc reference;
typedef int difference_type;
typedef std::input_iterator_tag iterator_category;
class pointer {
TemplateArgumentLoc Arg;
public:
explicit pointer(TemplateArgumentLoc Arg) : Arg(Arg) { }
const TemplateArgumentLoc *operator->() const {
return &Arg;
}
};
TemplateArgumentLocContainerIterator() {}
TemplateArgumentLocContainerIterator(ArgLocContainer &Container,
unsigned Index)
: Container(&Container), Index(Index) { }
TemplateArgumentLocContainerIterator &operator++() {
++Index;
return *this;
}
TemplateArgumentLocContainerIterator operator++(int) {
TemplateArgumentLocContainerIterator Old(*this);
++(*this);
return Old;
}
TemplateArgumentLoc operator*() const {
return Container->getArgLoc(Index);
}
pointer operator->() const {
return pointer(Container->getArgLoc(Index));
}
friend bool operator==(const TemplateArgumentLocContainerIterator &X,
const TemplateArgumentLocContainerIterator &Y) {
return X.Container == Y.Container && X.Index == Y.Index;
}
friend bool operator!=(const TemplateArgumentLocContainerIterator &X,
const TemplateArgumentLocContainerIterator &Y) {
return !(X == Y);
}
};
template<typename Derived>
QualType TreeTransform<Derived>::TransformAutoType(TypeLocBuilder &TLB,
AutoTypeLoc TL) {
const AutoType *T = TL.getTypePtr();
QualType OldDeduced = T->getDeducedType();
QualType NewDeduced;
if (!OldDeduced.isNull()) {
NewDeduced = getDerived().TransformType(OldDeduced);
if (NewDeduced.isNull())
return QualType();
}
ConceptDecl *NewCD = nullptr;
TemplateArgumentListInfo NewTemplateArgs;
NestedNameSpecifierLoc NewNestedNameSpec;
if (T->isConstrained()) {
NewCD = cast_or_null<ConceptDecl>(getDerived().TransformDecl(
TL.getConceptNameLoc(), T->getTypeConstraintConcept()));
NewTemplateArgs.setLAngleLoc(TL.getLAngleLoc());
NewTemplateArgs.setRAngleLoc(TL.getRAngleLoc());
typedef TemplateArgumentLocContainerIterator<AutoTypeLoc> ArgIterator;
if (getDerived().TransformTemplateArguments(ArgIterator(TL, 0),
ArgIterator(TL,
TL.getNumArgs()),
NewTemplateArgs))
return QualType();
if (TL.getNestedNameSpecifierLoc()) {
NewNestedNameSpec
= getDerived().TransformNestedNameSpecifierLoc(
TL.getNestedNameSpecifierLoc());
if (!NewNestedNameSpec)
return QualType();
}
}
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || NewDeduced != OldDeduced ||
T->isDependentType() || T->isConstrained()) {
// FIXME: Maybe don't rebuild if all template arguments are the same.
llvm::SmallVector<TemplateArgument, 4> NewArgList;
NewArgList.reserve(NewArgList.size());
for (const auto &ArgLoc : NewTemplateArgs.arguments())
NewArgList.push_back(ArgLoc.getArgument());
Result = getDerived().RebuildAutoType(NewDeduced, T->getKeyword(), NewCD,
NewArgList);
if (Result.isNull())
return QualType();
}
AutoTypeLoc NewTL = TLB.push<AutoTypeLoc>(Result);
NewTL.setNameLoc(TL.getNameLoc());
NewTL.setNestedNameSpecifierLoc(NewNestedNameSpec);
NewTL.setTemplateKWLoc(TL.getTemplateKWLoc());
NewTL.setConceptNameLoc(TL.getConceptNameLoc());
NewTL.setFoundDecl(TL.getFoundDecl());
NewTL.setLAngleLoc(TL.getLAngleLoc());
NewTL.setRAngleLoc(TL.getRAngleLoc());
- for (unsigned I = 0; I < TL.getNumArgs(); ++I)
+ for (unsigned I = 0; I < NewTL.getNumArgs(); ++I)
NewTL.setArgLocInfo(I, NewTemplateArgs.arguments()[I].getLocInfo());
return Result;
}
template <typename Derived>
QualType TreeTransform<Derived>::TransformTemplateSpecializationType(
TypeLocBuilder &TLB,
TemplateSpecializationTypeLoc TL,
TemplateName Template) {
TemplateArgumentListInfo NewTemplateArgs;
NewTemplateArgs.setLAngleLoc(TL.getLAngleLoc());
NewTemplateArgs.setRAngleLoc(TL.getRAngleLoc());
typedef TemplateArgumentLocContainerIterator<TemplateSpecializationTypeLoc>
ArgIterator;
if (getDerived().TransformTemplateArguments(ArgIterator(TL, 0),
ArgIterator(TL, TL.getNumArgs()),
NewTemplateArgs))
return QualType();
// FIXME: maybe don't rebuild if all the template arguments are the same.
QualType Result =
getDerived().RebuildTemplateSpecializationType(Template,
TL.getTemplateNameLoc(),
NewTemplateArgs);
if (!Result.isNull()) {
// Specializations of template template parameters are represented as
// TemplateSpecializationTypes, and substitution of type alias templates
// within a dependent context can transform them into
// DependentTemplateSpecializationTypes.
if (isa<DependentTemplateSpecializationType>(Result)) {
DependentTemplateSpecializationTypeLoc NewTL
= TLB.push<DependentTemplateSpecializationTypeLoc>(Result);
NewTL.setElaboratedKeywordLoc(SourceLocation());
NewTL.setQualifierLoc(NestedNameSpecifierLoc());
NewTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
NewTL.setTemplateNameLoc(TL.getTemplateNameLoc());
NewTL.setLAngleLoc(TL.getLAngleLoc());
NewTL.setRAngleLoc(TL.getRAngleLoc());
for (unsigned i = 0, e = NewTemplateArgs.size(); i != e; ++i)
NewTL.setArgLocInfo(i, NewTemplateArgs[i].getLocInfo());
return Result;
}
TemplateSpecializationTypeLoc NewTL
= TLB.push<TemplateSpecializationTypeLoc>(Result);
NewTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
NewTL.setTemplateNameLoc(TL.getTemplateNameLoc());
NewTL.setLAngleLoc(TL.getLAngleLoc());
NewTL.setRAngleLoc(TL.getRAngleLoc());
for (unsigned i = 0, e = NewTemplateArgs.size(); i != e; ++i)
NewTL.setArgLocInfo(i, NewTemplateArgs[i].getLocInfo());
}
return Result;
}
template <typename Derived>
QualType TreeTransform<Derived>::TransformDependentTemplateSpecializationType(
TypeLocBuilder &TLB,
DependentTemplateSpecializationTypeLoc TL,
TemplateName Template,
CXXScopeSpec &SS) {
TemplateArgumentListInfo NewTemplateArgs;
NewTemplateArgs.setLAngleLoc(TL.getLAngleLoc());
NewTemplateArgs.setRAngleLoc(TL.getRAngleLoc());
typedef TemplateArgumentLocContainerIterator<
DependentTemplateSpecializationTypeLoc> ArgIterator;
if (getDerived().TransformTemplateArguments(ArgIterator(TL, 0),
ArgIterator(TL, TL.getNumArgs()),
NewTemplateArgs))
return QualType();
// FIXME: maybe don't rebuild if all the template arguments are the same.
if (DependentTemplateName *DTN = Template.getAsDependentTemplateName()) {
QualType Result
= getSema().Context.getDependentTemplateSpecializationType(
TL.getTypePtr()->getKeyword(),
DTN->getQualifier(),
DTN->getIdentifier(),
NewTemplateArgs);
DependentTemplateSpecializationTypeLoc NewTL
= TLB.push<DependentTemplateSpecializationTypeLoc>(Result);
NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
NewTL.setQualifierLoc(SS.getWithLocInContext(SemaRef.Context));
NewTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
NewTL.setTemplateNameLoc(TL.getTemplateNameLoc());
NewTL.setLAngleLoc(TL.getLAngleLoc());
NewTL.setRAngleLoc(TL.getRAngleLoc());
for (unsigned i = 0, e = NewTemplateArgs.size(); i != e; ++i)
NewTL.setArgLocInfo(i, NewTemplateArgs[i].getLocInfo());
return Result;
}
QualType Result
= getDerived().RebuildTemplateSpecializationType(Template,
TL.getTemplateNameLoc(),
NewTemplateArgs);
if (!Result.isNull()) {
/// FIXME: Wrap this in an elaborated-type-specifier?
TemplateSpecializationTypeLoc NewTL
= TLB.push<TemplateSpecializationTypeLoc>(Result);
NewTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
NewTL.setTemplateNameLoc(TL.getTemplateNameLoc());
NewTL.setLAngleLoc(TL.getLAngleLoc());
NewTL.setRAngleLoc(TL.getRAngleLoc());
for (unsigned i = 0, e = NewTemplateArgs.size(); i != e; ++i)
NewTL.setArgLocInfo(i, NewTemplateArgs[i].getLocInfo());
}
return Result;
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformElaboratedType(TypeLocBuilder &TLB,
ElaboratedTypeLoc TL) {
const ElaboratedType *T = TL.getTypePtr();
NestedNameSpecifierLoc QualifierLoc;
// NOTE: the qualifier in an ElaboratedType is optional.
if (TL.getQualifierLoc()) {
QualifierLoc
= getDerived().TransformNestedNameSpecifierLoc(TL.getQualifierLoc());
if (!QualifierLoc)
return QualType();
}
QualType NamedT = getDerived().TransformType(TLB, TL.getNamedTypeLoc());
if (NamedT.isNull())
return QualType();
// C++0x [dcl.type.elab]p2:
// If the identifier resolves to a typedef-name or the simple-template-id
// resolves to an alias template specialization, the
// elaborated-type-specifier is ill-formed.
if (T->getKeyword() != ETK_None && T->getKeyword() != ETK_Typename) {
if (const TemplateSpecializationType *TST =
NamedT->getAs<TemplateSpecializationType>()) {
TemplateName Template = TST->getTemplateName();
if (TypeAliasTemplateDecl *TAT = dyn_cast_or_null<TypeAliasTemplateDecl>(
Template.getAsTemplateDecl())) {
SemaRef.Diag(TL.getNamedTypeLoc().getBeginLoc(),
diag::err_tag_reference_non_tag)
<< TAT << Sema::NTK_TypeAliasTemplate
<< ElaboratedType::getTagTypeKindForKeyword(T->getKeyword());
SemaRef.Diag(TAT->getLocation(), diag::note_declared_at);
}
}
}
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
QualifierLoc != TL.getQualifierLoc() ||
NamedT != T->getNamedType()) {
Result = getDerived().RebuildElaboratedType(TL.getElaboratedKeywordLoc(),
T->getKeyword(),
QualifierLoc, NamedT);
if (Result.isNull())
return QualType();
}
ElaboratedTypeLoc NewTL = TLB.push<ElaboratedTypeLoc>(Result);
NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
NewTL.setQualifierLoc(QualifierLoc);
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformAttributedType(
TypeLocBuilder &TLB,
AttributedTypeLoc TL) {
const AttributedType *oldType = TL.getTypePtr();
QualType modifiedType = getDerived().TransformType(TLB, TL.getModifiedLoc());
if (modifiedType.isNull())
return QualType();
// oldAttr can be null if we started with a QualType rather than a TypeLoc.
const Attr *oldAttr = TL.getAttr();
const Attr *newAttr = oldAttr ? getDerived().TransformAttr(oldAttr) : nullptr;
if (oldAttr && !newAttr)
return QualType();
QualType result = TL.getType();
// FIXME: dependent operand expressions?
if (getDerived().AlwaysRebuild() ||
modifiedType != oldType->getModifiedType()) {
// TODO: this is really lame; we should really be rebuilding the
// equivalent type from first principles.
QualType equivalentType
= getDerived().TransformType(oldType->getEquivalentType());
if (equivalentType.isNull())
return QualType();
// Check whether we can add nullability; it is only represented as
// type sugar, and therefore cannot be diagnosed in any other way.
if (auto nullability = oldType->getImmediateNullability()) {
if (!modifiedType->canHaveNullability()) {
SemaRef.Diag(TL.getAttr()->getLocation(),
diag::err_nullability_nonpointer)
<< DiagNullabilityKind(*nullability, false) << modifiedType;
return QualType();
}
}
result = SemaRef.Context.getAttributedType(TL.getAttrKind(),
modifiedType,
equivalentType);
}
AttributedTypeLoc newTL = TLB.push<AttributedTypeLoc>(result);
newTL.setAttr(newAttr);
return result;
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformParenType(TypeLocBuilder &TLB,
ParenTypeLoc TL) {
QualType Inner = getDerived().TransformType(TLB, TL.getInnerLoc());
if (Inner.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
Inner != TL.getInnerLoc().getType()) {
Result = getDerived().RebuildParenType(Inner);
if (Result.isNull())
return QualType();
}
ParenTypeLoc NewTL = TLB.push<ParenTypeLoc>(Result);
NewTL.setLParenLoc(TL.getLParenLoc());
NewTL.setRParenLoc(TL.getRParenLoc());
return Result;
}
template <typename Derived>
QualType
TreeTransform<Derived>::TransformMacroQualifiedType(TypeLocBuilder &TLB,
MacroQualifiedTypeLoc TL) {
QualType Inner = getDerived().TransformType(TLB, TL.getInnerLoc());
if (Inner.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || Inner != TL.getInnerLoc().getType()) {
Result =
getDerived().RebuildMacroQualifiedType(Inner, TL.getMacroIdentifier());
if (Result.isNull())
return QualType();
}
MacroQualifiedTypeLoc NewTL = TLB.push<MacroQualifiedTypeLoc>(Result);
NewTL.setExpansionLoc(TL.getExpansionLoc());
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformDependentNameType(
TypeLocBuilder &TLB, DependentNameTypeLoc TL) {
return TransformDependentNameType(TLB, TL, false);
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformDependentNameType(
TypeLocBuilder &TLB, DependentNameTypeLoc TL, bool DeducedTSTContext) {
const DependentNameType *T = TL.getTypePtr();
NestedNameSpecifierLoc QualifierLoc
= getDerived().TransformNestedNameSpecifierLoc(TL.getQualifierLoc());
if (!QualifierLoc)
return QualType();
QualType Result
= getDerived().RebuildDependentNameType(T->getKeyword(),
TL.getElaboratedKeywordLoc(),
QualifierLoc,
T->getIdentifier(),
TL.getNameLoc(),
DeducedTSTContext);
if (Result.isNull())
return QualType();
if (const ElaboratedType* ElabT = Result->getAs<ElaboratedType>()) {
QualType NamedT = ElabT->getNamedType();
TLB.pushTypeSpec(NamedT).setNameLoc(TL.getNameLoc());
ElaboratedTypeLoc NewTL = TLB.push<ElaboratedTypeLoc>(Result);
NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
NewTL.setQualifierLoc(QualifierLoc);
} else {
DependentNameTypeLoc NewTL = TLB.push<DependentNameTypeLoc>(Result);
NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
NewTL.setQualifierLoc(QualifierLoc);
NewTL.setNameLoc(TL.getNameLoc());
}
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::
TransformDependentTemplateSpecializationType(TypeLocBuilder &TLB,
DependentTemplateSpecializationTypeLoc TL) {
NestedNameSpecifierLoc QualifierLoc;
if (TL.getQualifierLoc()) {
QualifierLoc
= getDerived().TransformNestedNameSpecifierLoc(TL.getQualifierLoc());
if (!QualifierLoc)
return QualType();
}
return getDerived()
.TransformDependentTemplateSpecializationType(TLB, TL, QualifierLoc);
}
template<typename Derived>
QualType TreeTransform<Derived>::
TransformDependentTemplateSpecializationType(TypeLocBuilder &TLB,
DependentTemplateSpecializationTypeLoc TL,
NestedNameSpecifierLoc QualifierLoc) {
const DependentTemplateSpecializationType *T = TL.getTypePtr();
TemplateArgumentListInfo NewTemplateArgs;
NewTemplateArgs.setLAngleLoc(TL.getLAngleLoc());
NewTemplateArgs.setRAngleLoc(TL.getRAngleLoc());
typedef TemplateArgumentLocContainerIterator<
DependentTemplateSpecializationTypeLoc> ArgIterator;
if (getDerived().TransformTemplateArguments(ArgIterator(TL, 0),
ArgIterator(TL, TL.getNumArgs()),
NewTemplateArgs))
return QualType();
QualType Result = getDerived().RebuildDependentTemplateSpecializationType(
T->getKeyword(), QualifierLoc, TL.getTemplateKeywordLoc(),
T->getIdentifier(), TL.getTemplateNameLoc(), NewTemplateArgs,
/*AllowInjectedClassName*/ false);
if (Result.isNull())
return QualType();
if (const ElaboratedType *ElabT = dyn_cast<ElaboratedType>(Result)) {
QualType NamedT = ElabT->getNamedType();
// Copy information relevant to the template specialization.
TemplateSpecializationTypeLoc NamedTL
= TLB.push<TemplateSpecializationTypeLoc>(NamedT);
NamedTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
NamedTL.setTemplateNameLoc(TL.getTemplateNameLoc());
NamedTL.setLAngleLoc(TL.getLAngleLoc());
NamedTL.setRAngleLoc(TL.getRAngleLoc());
for (unsigned I = 0, E = NewTemplateArgs.size(); I != E; ++I)
NamedTL.setArgLocInfo(I, NewTemplateArgs[I].getLocInfo());
// Copy information relevant to the elaborated type.
ElaboratedTypeLoc NewTL = TLB.push<ElaboratedTypeLoc>(Result);
NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
NewTL.setQualifierLoc(QualifierLoc);
} else if (isa<DependentTemplateSpecializationType>(Result)) {
DependentTemplateSpecializationTypeLoc SpecTL
= TLB.push<DependentTemplateSpecializationTypeLoc>(Result);
SpecTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
SpecTL.setQualifierLoc(QualifierLoc);
SpecTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
SpecTL.setTemplateNameLoc(TL.getTemplateNameLoc());
SpecTL.setLAngleLoc(TL.getLAngleLoc());
SpecTL.setRAngleLoc(TL.getRAngleLoc());
for (unsigned I = 0, E = NewTemplateArgs.size(); I != E; ++I)
SpecTL.setArgLocInfo(I, NewTemplateArgs[I].getLocInfo());
} else {
TemplateSpecializationTypeLoc SpecTL
= TLB.push<TemplateSpecializationTypeLoc>(Result);
SpecTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
SpecTL.setTemplateNameLoc(TL.getTemplateNameLoc());
SpecTL.setLAngleLoc(TL.getLAngleLoc());
SpecTL.setRAngleLoc(TL.getRAngleLoc());
for (unsigned I = 0, E = NewTemplateArgs.size(); I != E; ++I)
SpecTL.setArgLocInfo(I, NewTemplateArgs[I].getLocInfo());
}
return Result;
}
template<typename Derived>
QualType TreeTransform<Derived>::TransformPackExpansionType(TypeLocBuilder &TLB,
PackExpansionTypeLoc TL) {
QualType Pattern
= getDerived().TransformType(TLB, TL.getPatternLoc());
if (Pattern.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
Pattern != TL.getPatternLoc().getType()) {
Result = getDerived().RebuildPackExpansionType(Pattern,
TL.getPatternLoc().getSourceRange(),
TL.getEllipsisLoc(),
TL.getTypePtr()->getNumExpansions());
if (Result.isNull())
return QualType();
}
PackExpansionTypeLoc NewT = TLB.push<PackExpansionTypeLoc>(Result);
NewT.setEllipsisLoc(TL.getEllipsisLoc());
return Result;
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformObjCInterfaceType(TypeLocBuilder &TLB,
ObjCInterfaceTypeLoc TL) {
// ObjCInterfaceType is never dependent.
TLB.pushFullCopy(TL);
return TL.getType();
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformObjCTypeParamType(TypeLocBuilder &TLB,
ObjCTypeParamTypeLoc TL) {
const ObjCTypeParamType *T = TL.getTypePtr();
ObjCTypeParamDecl *OTP = cast_or_null<ObjCTypeParamDecl>(
getDerived().TransformDecl(T->getDecl()->getLocation(), T->getDecl()));
if (!OTP)
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
OTP != T->getDecl()) {
Result = getDerived().RebuildObjCTypeParamType(OTP,
TL.getProtocolLAngleLoc(),
llvm::makeArrayRef(TL.getTypePtr()->qual_begin(),
TL.getNumProtocols()),
TL.getProtocolLocs(),
TL.getProtocolRAngleLoc());
if (Result.isNull())
return QualType();
}
ObjCTypeParamTypeLoc NewTL = TLB.push<ObjCTypeParamTypeLoc>(Result);
if (TL.getNumProtocols()) {
NewTL.setProtocolLAngleLoc(TL.getProtocolLAngleLoc());
for (unsigned i = 0, n = TL.getNumProtocols(); i != n; ++i)
NewTL.setProtocolLoc(i, TL.getProtocolLoc(i));
NewTL.setProtocolRAngleLoc(TL.getProtocolRAngleLoc());
}
return Result;
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformObjCObjectType(TypeLocBuilder &TLB,
ObjCObjectTypeLoc TL) {
// Transform base type.
QualType BaseType = getDerived().TransformType(TLB, TL.getBaseLoc());
if (BaseType.isNull())
return QualType();
bool AnyChanged = BaseType != TL.getBaseLoc().getType();
// Transform type arguments.
SmallVector<TypeSourceInfo *, 4> NewTypeArgInfos;
for (unsigned i = 0, n = TL.getNumTypeArgs(); i != n; ++i) {
TypeSourceInfo *TypeArgInfo = TL.getTypeArgTInfo(i);
TypeLoc TypeArgLoc = TypeArgInfo->getTypeLoc();
QualType TypeArg = TypeArgInfo->getType();
if (auto PackExpansionLoc = TypeArgLoc.getAs<PackExpansionTypeLoc>()) {
AnyChanged = true;
// We have a pack expansion. Instantiate it.
const auto *PackExpansion = PackExpansionLoc.getType()
->castAs<PackExpansionType>();
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
SemaRef.collectUnexpandedParameterPacks(PackExpansion->getPattern(),
Unexpanded);
assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");
// Determine whether the set of unexpanded parameter packs can
// and should be expanded.
TypeLoc PatternLoc = PackExpansionLoc.getPatternLoc();
bool Expand = false;
bool RetainExpansion = false;
Optional<unsigned> NumExpansions = PackExpansion->getNumExpansions();
if (getDerived().TryExpandParameterPacks(
PackExpansionLoc.getEllipsisLoc(), PatternLoc.getSourceRange(),
Unexpanded, Expand, RetainExpansion, NumExpansions))
return QualType();
if (!Expand) {
// We can't expand this pack expansion into separate arguments yet;
// just substitute into the pattern and create a new pack expansion
// type.
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
TypeLocBuilder TypeArgBuilder;
TypeArgBuilder.reserve(PatternLoc.getFullDataSize());
QualType NewPatternType = getDerived().TransformType(TypeArgBuilder,
PatternLoc);
if (NewPatternType.isNull())
return QualType();
QualType NewExpansionType = SemaRef.Context.getPackExpansionType(
NewPatternType, NumExpansions);
auto NewExpansionLoc = TLB.push<PackExpansionTypeLoc>(NewExpansionType);
NewExpansionLoc.setEllipsisLoc(PackExpansionLoc.getEllipsisLoc());
NewTypeArgInfos.push_back(
TypeArgBuilder.getTypeSourceInfo(SemaRef.Context, NewExpansionType));
continue;
}
// Substitute into the pack expansion pattern for each slice of the
// pack.
for (unsigned ArgIdx = 0; ArgIdx != *NumExpansions; ++ArgIdx) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), ArgIdx);
TypeLocBuilder TypeArgBuilder;
TypeArgBuilder.reserve(PatternLoc.getFullDataSize());
QualType NewTypeArg = getDerived().TransformType(TypeArgBuilder,
PatternLoc);
if (NewTypeArg.isNull())
return QualType();
NewTypeArgInfos.push_back(
TypeArgBuilder.getTypeSourceInfo(SemaRef.Context, NewTypeArg));
}
continue;
}
TypeLocBuilder TypeArgBuilder;
TypeArgBuilder.reserve(TypeArgLoc.getFullDataSize());
QualType NewTypeArg = getDerived().TransformType(TypeArgBuilder, TypeArgLoc);
if (NewTypeArg.isNull())
return QualType();
// If nothing changed, just keep the old TypeSourceInfo.
if (NewTypeArg == TypeArg) {
NewTypeArgInfos.push_back(TypeArgInfo);
continue;
}
NewTypeArgInfos.push_back(
TypeArgBuilder.getTypeSourceInfo(SemaRef.Context, NewTypeArg));
AnyChanged = true;
}
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() || AnyChanged) {
// Rebuild the type.
Result = getDerived().RebuildObjCObjectType(
BaseType, TL.getBeginLoc(), TL.getTypeArgsLAngleLoc(), NewTypeArgInfos,
TL.getTypeArgsRAngleLoc(), TL.getProtocolLAngleLoc(),
llvm::makeArrayRef(TL.getTypePtr()->qual_begin(), TL.getNumProtocols()),
TL.getProtocolLocs(), TL.getProtocolRAngleLoc());
if (Result.isNull())
return QualType();
}
ObjCObjectTypeLoc NewT = TLB.push<ObjCObjectTypeLoc>(Result);
NewT.setHasBaseTypeAsWritten(true);
NewT.setTypeArgsLAngleLoc(TL.getTypeArgsLAngleLoc());
for (unsigned i = 0, n = TL.getNumTypeArgs(); i != n; ++i)
NewT.setTypeArgTInfo(i, NewTypeArgInfos[i]);
NewT.setTypeArgsRAngleLoc(TL.getTypeArgsRAngleLoc());
NewT.setProtocolLAngleLoc(TL.getProtocolLAngleLoc());
for (unsigned i = 0, n = TL.getNumProtocols(); i != n; ++i)
NewT.setProtocolLoc(i, TL.getProtocolLoc(i));
NewT.setProtocolRAngleLoc(TL.getProtocolRAngleLoc());
return Result;
}
template<typename Derived>
QualType
TreeTransform<Derived>::TransformObjCObjectPointerType(TypeLocBuilder &TLB,
ObjCObjectPointerTypeLoc TL) {
QualType PointeeType = getDerived().TransformType(TLB, TL.getPointeeLoc());
if (PointeeType.isNull())
return QualType();
QualType Result = TL.getType();
if (getDerived().AlwaysRebuild() ||
PointeeType != TL.getPointeeLoc().getType()) {
Result = getDerived().RebuildObjCObjectPointerType(PointeeType,
TL.getStarLoc());
if (Result.isNull())
return QualType();
}
ObjCObjectPointerTypeLoc NewT = TLB.push<ObjCObjectPointerTypeLoc>(Result);
NewT.setStarLoc(TL.getStarLoc());
return Result;
}
//===----------------------------------------------------------------------===//
// Statement transformation
//===----------------------------------------------------------------------===//
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformNullStmt(NullStmt *S) {
return S;
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformCompoundStmt(CompoundStmt *S) {
return getDerived().TransformCompoundStmt(S, false);
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformCompoundStmt(CompoundStmt *S,
bool IsStmtExpr) {
Sema::CompoundScopeRAII CompoundScope(getSema());
const Stmt *ExprResult = S->getStmtExprResult();
bool SubStmtInvalid = false;
bool SubStmtChanged = false;
SmallVector<Stmt*, 8> Statements;
for (auto *B : S->body()) {
StmtResult Result = getDerived().TransformStmt(
B, IsStmtExpr && B == ExprResult ? SDK_StmtExprResult : SDK_Discarded);
if (Result.isInvalid()) {
// Immediately fail if this was a DeclStmt, since it's very
// likely that this will cause problems for future statements.
if (isa<DeclStmt>(B))
return StmtError();
// Otherwise, just keep processing substatements and fail later.
SubStmtInvalid = true;
continue;
}
SubStmtChanged = SubStmtChanged || Result.get() != B;
Statements.push_back(Result.getAs<Stmt>());
}
if (SubStmtInvalid)
return StmtError();
if (!getDerived().AlwaysRebuild() &&
!SubStmtChanged)
return S;
return getDerived().RebuildCompoundStmt(S->getLBracLoc(),
Statements,
S->getRBracLoc(),
IsStmtExpr);
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformCaseStmt(CaseStmt *S) {
ExprResult LHS, RHS;
{
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
// Transform the left-hand case value.
LHS = getDerived().TransformExpr(S->getLHS());
LHS = SemaRef.ActOnCaseExpr(S->getCaseLoc(), LHS);
if (LHS.isInvalid())
return StmtError();
// Transform the right-hand case value (for the GNU case-range extension).
RHS = getDerived().TransformExpr(S->getRHS());
RHS = SemaRef.ActOnCaseExpr(S->getCaseLoc(), RHS);
if (RHS.isInvalid())
return StmtError();
}
// Build the case statement.
// Case statements are always rebuilt so that they will attached to their
// transformed switch statement.
StmtResult Case = getDerived().RebuildCaseStmt(S->getCaseLoc(),
LHS.get(),
S->getEllipsisLoc(),
RHS.get(),
S->getColonLoc());
if (Case.isInvalid())
return StmtError();
// Transform the statement following the case
StmtResult SubStmt =
getDerived().TransformStmt(S->getSubStmt());
if (SubStmt.isInvalid())
return StmtError();
// Attach the body to the case statement
return getDerived().RebuildCaseStmtBody(Case.get(), SubStmt.get());
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformDefaultStmt(DefaultStmt *S) {
// Transform the statement following the default case
StmtResult SubStmt =
getDerived().TransformStmt(S->getSubStmt());
if (SubStmt.isInvalid())
return StmtError();
// Default statements are always rebuilt
return getDerived().RebuildDefaultStmt(S->getDefaultLoc(), S->getColonLoc(),
SubStmt.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformLabelStmt(LabelStmt *S, StmtDiscardKind SDK) {
StmtResult SubStmt = getDerived().TransformStmt(S->getSubStmt(), SDK);
if (SubStmt.isInvalid())
return StmtError();
Decl *LD = getDerived().TransformDecl(S->getDecl()->getLocation(),
S->getDecl());
if (!LD)
return StmtError();
// If we're transforming "in-place" (we're not creating new local
// declarations), assume we're replacing the old label statement
// and clear out the reference to it.
if (LD == S->getDecl())
S->getDecl()->setStmt(nullptr);
// FIXME: Pass the real colon location in.
return getDerived().RebuildLabelStmt(S->getIdentLoc(),
cast<LabelDecl>(LD), SourceLocation(),
SubStmt.get());
}
template <typename Derived>
const Attr *TreeTransform<Derived>::TransformAttr(const Attr *R) {
if (!R)
return R;
switch (R->getKind()) {
// Transform attributes with a pragma spelling by calling TransformXXXAttr.
#define ATTR(X)
#define PRAGMA_SPELLING_ATTR(X) \
case attr::X: \
return getDerived().Transform##X##Attr(cast<X##Attr>(R));
#include "clang/Basic/AttrList.inc"
default:
return R;
}
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformAttributedStmt(AttributedStmt *S,
StmtDiscardKind SDK) {
bool AttrsChanged = false;
SmallVector<const Attr *, 1> Attrs;
// Visit attributes and keep track if any are transformed.
for (const auto *I : S->getAttrs()) {
const Attr *R = getDerived().TransformAttr(I);
AttrsChanged |= (I != R);
if (R)
Attrs.push_back(R);
}
StmtResult SubStmt = getDerived().TransformStmt(S->getSubStmt(), SDK);
if (SubStmt.isInvalid())
return StmtError();
if (SubStmt.get() == S->getSubStmt() && !AttrsChanged)
return S;
// If transforming the attributes failed for all of the attributes in the
// statement, don't make an AttributedStmt without attributes.
if (Attrs.empty())
return SubStmt;
return getDerived().RebuildAttributedStmt(S->getAttrLoc(), Attrs,
SubStmt.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformIfStmt(IfStmt *S) {
// Transform the initialization statement
StmtResult Init = getDerived().TransformStmt(S->getInit());
if (Init.isInvalid())
return StmtError();
// Transform the condition
Sema::ConditionResult Cond = getDerived().TransformCondition(
S->getIfLoc(), S->getConditionVariable(), S->getCond(),
S->isConstexpr() ? Sema::ConditionKind::ConstexprIf
: Sema::ConditionKind::Boolean);
if (Cond.isInvalid())
return StmtError();
// If this is a constexpr if, determine which arm we should instantiate.
llvm::Optional<bool> ConstexprConditionValue;
if (S->isConstexpr())
ConstexprConditionValue = Cond.getKnownValue();
// Transform the "then" branch.
StmtResult Then;
if (!ConstexprConditionValue || *ConstexprConditionValue) {
Then = getDerived().TransformStmt(S->getThen());
if (Then.isInvalid())
return StmtError();
} else {
Then = new (getSema().Context) NullStmt(S->getThen()->getBeginLoc());
}
// Transform the "else" branch.
StmtResult Else;
if (!ConstexprConditionValue || !*ConstexprConditionValue) {
Else = getDerived().TransformStmt(S->getElse());
if (Else.isInvalid())
return StmtError();
}
if (!getDerived().AlwaysRebuild() &&
Init.get() == S->getInit() &&
Cond.get() == std::make_pair(S->getConditionVariable(), S->getCond()) &&
Then.get() == S->getThen() &&
Else.get() == S->getElse())
return S;
return getDerived().RebuildIfStmt(
S->getIfLoc(), S->isConstexpr(), S->getLParenLoc(), Cond,
S->getRParenLoc(), Init.get(), Then.get(), S->getElseLoc(), Else.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformSwitchStmt(SwitchStmt *S) {
// Transform the initialization statement
StmtResult Init = getDerived().TransformStmt(S->getInit());
if (Init.isInvalid())
return StmtError();
// Transform the condition.
Sema::ConditionResult Cond = getDerived().TransformCondition(
S->getSwitchLoc(), S->getConditionVariable(), S->getCond(),
Sema::ConditionKind::Switch);
if (Cond.isInvalid())
return StmtError();
// Rebuild the switch statement.
StmtResult Switch =
getDerived().RebuildSwitchStmtStart(S->getSwitchLoc(), S->getLParenLoc(),
Init.get(), Cond, S->getRParenLoc());
if (Switch.isInvalid())
return StmtError();
// Transform the body of the switch statement.
StmtResult Body = getDerived().TransformStmt(S->getBody());
if (Body.isInvalid())
return StmtError();
// Complete the switch statement.
return getDerived().RebuildSwitchStmtBody(S->getSwitchLoc(), Switch.get(),
Body.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformWhileStmt(WhileStmt *S) {
// Transform the condition
Sema::ConditionResult Cond = getDerived().TransformCondition(
S->getWhileLoc(), S->getConditionVariable(), S->getCond(),
Sema::ConditionKind::Boolean);
if (Cond.isInvalid())
return StmtError();
// Transform the body
StmtResult Body = getDerived().TransformStmt(S->getBody());
if (Body.isInvalid())
return StmtError();
if (!getDerived().AlwaysRebuild() &&
Cond.get() == std::make_pair(S->getConditionVariable(), S->getCond()) &&
Body.get() == S->getBody())
return Owned(S);
return getDerived().RebuildWhileStmt(S->getWhileLoc(), S->getLParenLoc(),
Cond, S->getRParenLoc(), Body.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformDoStmt(DoStmt *S) {
// Transform the body
StmtResult Body = getDerived().TransformStmt(S->getBody());
if (Body.isInvalid())
return StmtError();
// Transform the condition
ExprResult Cond = getDerived().TransformExpr(S->getCond());
if (Cond.isInvalid())
return StmtError();
if (!getDerived().AlwaysRebuild() &&
Cond.get() == S->getCond() &&
Body.get() == S->getBody())
return S;
return getDerived().RebuildDoStmt(S->getDoLoc(), Body.get(), S->getWhileLoc(),
/*FIXME:*/S->getWhileLoc(), Cond.get(),
S->getRParenLoc());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformForStmt(ForStmt *S) {
if (getSema().getLangOpts().OpenMP)
getSema().startOpenMPLoop();
// Transform the initialization statement
StmtResult Init = getDerived().TransformStmt(S->getInit());
if (Init.isInvalid())
return StmtError();
// In OpenMP loop region loop control variable must be captured and be
// private. Perform analysis of first part (if any).
if (getSema().getLangOpts().OpenMP && Init.isUsable())
getSema().ActOnOpenMPLoopInitialization(S->getForLoc(), Init.get());
// Transform the condition
Sema::ConditionResult Cond = getDerived().TransformCondition(
S->getForLoc(), S->getConditionVariable(), S->getCond(),
Sema::ConditionKind::Boolean);
if (Cond.isInvalid())
return StmtError();
// Transform the increment
ExprResult Inc = getDerived().TransformExpr(S->getInc());
if (Inc.isInvalid())
return StmtError();
Sema::FullExprArg FullInc(getSema().MakeFullDiscardedValueExpr(Inc.get()));
if (S->getInc() && !FullInc.get())
return StmtError();
// Transform the body
StmtResult Body = getDerived().TransformStmt(S->getBody());
if (Body.isInvalid())
return StmtError();
if (!getDerived().AlwaysRebuild() &&
Init.get() == S->getInit() &&
Cond.get() == std::make_pair(S->getConditionVariable(), S->getCond()) &&
Inc.get() == S->getInc() &&
Body.get() == S->getBody())
return S;
return getDerived().RebuildForStmt(S->getForLoc(), S->getLParenLoc(),
Init.get(), Cond, FullInc,
S->getRParenLoc(), Body.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformGotoStmt(GotoStmt *S) {
Decl *LD = getDerived().TransformDecl(S->getLabel()->getLocation(),
S->getLabel());
if (!LD)
return StmtError();
// Goto statements must always be rebuilt, to resolve the label.
return getDerived().RebuildGotoStmt(S->getGotoLoc(), S->getLabelLoc(),
cast<LabelDecl>(LD));
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformIndirectGotoStmt(IndirectGotoStmt *S) {
ExprResult Target = getDerived().TransformExpr(S->getTarget());
if (Target.isInvalid())
return StmtError();
Target = SemaRef.MaybeCreateExprWithCleanups(Target.get());
if (!getDerived().AlwaysRebuild() &&
Target.get() == S->getTarget())
return S;
return getDerived().RebuildIndirectGotoStmt(S->getGotoLoc(), S->getStarLoc(),
Target.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformContinueStmt(ContinueStmt *S) {
return S;
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformBreakStmt(BreakStmt *S) {
return S;
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformReturnStmt(ReturnStmt *S) {
ExprResult Result = getDerived().TransformInitializer(S->getRetValue(),
/*NotCopyInit*/false);
if (Result.isInvalid())
return StmtError();
// FIXME: We always rebuild the return statement because there is no way
// to tell whether the return type of the function has changed.
return getDerived().RebuildReturnStmt(S->getReturnLoc(), Result.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformDeclStmt(DeclStmt *S) {
bool DeclChanged = false;
SmallVector<Decl *, 4> Decls;
for (auto *D : S->decls()) {
Decl *Transformed = getDerived().TransformDefinition(D->getLocation(), D);
if (!Transformed)
return StmtError();
if (Transformed != D)
DeclChanged = true;
Decls.push_back(Transformed);
}
if (!getDerived().AlwaysRebuild() && !DeclChanged)
return S;
return getDerived().RebuildDeclStmt(Decls, S->getBeginLoc(), S->getEndLoc());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformGCCAsmStmt(GCCAsmStmt *S) {
SmallVector<Expr*, 8> Constraints;
SmallVector<Expr*, 8> Exprs;
SmallVector<IdentifierInfo *, 4> Names;
ExprResult AsmString;
SmallVector<Expr*, 8> Clobbers;
bool ExprsChanged = false;
// Go through the outputs.
for (unsigned I = 0, E = S->getNumOutputs(); I != E; ++I) {
Names.push_back(S->getOutputIdentifier(I));
// No need to transform the constraint literal.
Constraints.push_back(S->getOutputConstraintLiteral(I));
// Transform the output expr.
Expr *OutputExpr = S->getOutputExpr(I);
ExprResult Result = getDerived().TransformExpr(OutputExpr);
if (Result.isInvalid())
return StmtError();
ExprsChanged |= Result.get() != OutputExpr;
Exprs.push_back(Result.get());
}
// Go through the inputs.
for (unsigned I = 0, E = S->getNumInputs(); I != E; ++I) {
Names.push_back(S->getInputIdentifier(I));
// No need to transform the constraint literal.
Constraints.push_back(S->getInputConstraintLiteral(I));
// Transform the input expr.
Expr *InputExpr = S->getInputExpr(I);
ExprResult Result = getDerived().TransformExpr(InputExpr);
if (Result.isInvalid())
return StmtError();
ExprsChanged |= Result.get() != InputExpr;
Exprs.push_back(Result.get());
}
// Go through the Labels.
for (unsigned I = 0, E = S->getNumLabels(); I != E; ++I) {
Names.push_back(S->getLabelIdentifier(I));
ExprResult Result = getDerived().TransformExpr(S->getLabelExpr(I));
if (Result.isInvalid())
return StmtError();
ExprsChanged |= Result.get() != S->getLabelExpr(I);
Exprs.push_back(Result.get());
}
if (!getDerived().AlwaysRebuild() && !ExprsChanged)
return S;
// Go through the clobbers.
for (unsigned I = 0, E = S->getNumClobbers(); I != E; ++I)
Clobbers.push_back(S->getClobberStringLiteral(I));
// No need to transform the asm string literal.
AsmString = S->getAsmString();
return getDerived().RebuildGCCAsmStmt(S->getAsmLoc(), S->isSimple(),
S->isVolatile(), S->getNumOutputs(),
S->getNumInputs(), Names.data(),
Constraints, Exprs, AsmString.get(),
Clobbers, S->getNumLabels(),
S->getRParenLoc());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformMSAsmStmt(MSAsmStmt *S) {
ArrayRef<Token> AsmToks =
llvm::makeArrayRef(S->getAsmToks(), S->getNumAsmToks());
bool HadError = false, HadChange = false;
ArrayRef<Expr*> SrcExprs = S->getAllExprs();
SmallVector<Expr*, 8> TransformedExprs;
TransformedExprs.reserve(SrcExprs.size());
for (unsigned i = 0, e = SrcExprs.size(); i != e; ++i) {
ExprResult Result = getDerived().TransformExpr(SrcExprs[i]);
if (!Result.isUsable()) {
HadError = true;
} else {
HadChange |= (Result.get() != SrcExprs[i]);
TransformedExprs.push_back(Result.get());
}
}
if (HadError) return StmtError();
if (!HadChange && !getDerived().AlwaysRebuild())
return Owned(S);
return getDerived().RebuildMSAsmStmt(S->getAsmLoc(), S->getLBraceLoc(),
AsmToks, S->getAsmString(),
S->getNumOutputs(), S->getNumInputs(),
S->getAllConstraints(), S->getClobbers(),
TransformedExprs, S->getEndLoc());
}
// C++ Coroutines TS
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformCoroutineBodyStmt(CoroutineBodyStmt *S) {
auto *ScopeInfo = SemaRef.getCurFunction();
auto *FD = cast<FunctionDecl>(SemaRef.CurContext);
assert(FD && ScopeInfo && !ScopeInfo->CoroutinePromise &&
ScopeInfo->NeedsCoroutineSuspends &&
ScopeInfo->CoroutineSuspends.first == nullptr &&
ScopeInfo->CoroutineSuspends.second == nullptr &&
"expected clean scope info");
// Set that we have (possibly-invalid) suspend points before we do anything
// that may fail.
ScopeInfo->setNeedsCoroutineSuspends(false);
// We re-build the coroutine promise object (and the coroutine parameters its
// type and constructor depend on) based on the types used in our current
// function. We must do so, and set it on the current FunctionScopeInfo,
// before attempting to transform the other parts of the coroutine body
// statement, such as the implicit suspend statements (because those
// statements reference the FunctionScopeInfo::CoroutinePromise).
if (!SemaRef.buildCoroutineParameterMoves(FD->getLocation()))
return StmtError();
auto *Promise = SemaRef.buildCoroutinePromise(FD->getLocation());
if (!Promise)
return StmtError();
getDerived().transformedLocalDecl(S->getPromiseDecl(), {Promise});
ScopeInfo->CoroutinePromise = Promise;
// Transform the implicit coroutine statements constructed using dependent
// types during the previous parse: initial and final suspensions, the return
// object, and others. We also transform the coroutine function's body.
StmtResult InitSuspend = getDerived().TransformStmt(S->getInitSuspendStmt());
if (InitSuspend.isInvalid())
return StmtError();
StmtResult FinalSuspend =
getDerived().TransformStmt(S->getFinalSuspendStmt());
if (FinalSuspend.isInvalid() ||
!SemaRef.checkFinalSuspendNoThrow(FinalSuspend.get()))
return StmtError();
ScopeInfo->setCoroutineSuspends(InitSuspend.get(), FinalSuspend.get());
assert(isa<Expr>(InitSuspend.get()) && isa<Expr>(FinalSuspend.get()));
StmtResult BodyRes = getDerived().TransformStmt(S->getBody());
if (BodyRes.isInvalid())
return StmtError();
CoroutineStmtBuilder Builder(SemaRef, *FD, *ScopeInfo, BodyRes.get());
if (Builder.isInvalid())
return StmtError();
Expr *ReturnObject = S->getReturnValueInit();
assert(ReturnObject && "the return object is expected to be valid");
ExprResult Res = getDerived().TransformInitializer(ReturnObject,
/*NoCopyInit*/ false);
if (Res.isInvalid())
return StmtError();
Builder.ReturnValue = Res.get();
// If during the previous parse the coroutine still had a dependent promise
// statement, we may need to build some implicit coroutine statements
// (such as exception and fallthrough handlers) for the first time.
if (S->hasDependentPromiseType()) {
// We can only build these statements, however, if the current promise type
// is not dependent.
if (!Promise->getType()->isDependentType()) {
assert(!S->getFallthroughHandler() && !S->getExceptionHandler() &&
!S->getReturnStmtOnAllocFailure() && !S->getDeallocate() &&
"these nodes should not have been built yet");
if (!Builder.buildDependentStatements())
return StmtError();
}
} else {
if (auto *OnFallthrough = S->getFallthroughHandler()) {
StmtResult Res = getDerived().TransformStmt(OnFallthrough);
if (Res.isInvalid())
return StmtError();
Builder.OnFallthrough = Res.get();
}
if (auto *OnException = S->getExceptionHandler()) {
StmtResult Res = getDerived().TransformStmt(OnException);
if (Res.isInvalid())
return StmtError();
Builder.OnException = Res.get();
}
if (auto *OnAllocFailure = S->getReturnStmtOnAllocFailure()) {
StmtResult Res = getDerived().TransformStmt(OnAllocFailure);
if (Res.isInvalid())
return StmtError();
Builder.ReturnStmtOnAllocFailure = Res.get();
}
// Transform any additional statements we may have already built
assert(S->getAllocate() && S->getDeallocate() &&
"allocation and deallocation calls must already be built");
ExprResult AllocRes = getDerived().TransformExpr(S->getAllocate());
if (AllocRes.isInvalid())
return StmtError();
Builder.Allocate = AllocRes.get();
ExprResult DeallocRes = getDerived().TransformExpr(S->getDeallocate());
if (DeallocRes.isInvalid())
return StmtError();
Builder.Deallocate = DeallocRes.get();
assert(S->getResultDecl() && "ResultDecl must already be built");
StmtResult ResultDecl = getDerived().TransformStmt(S->getResultDecl());
if (ResultDecl.isInvalid())
return StmtError();
Builder.ResultDecl = ResultDecl.get();
if (auto *ReturnStmt = S->getReturnStmt()) {
StmtResult Res = getDerived().TransformStmt(ReturnStmt);
if (Res.isInvalid())
return StmtError();
Builder.ReturnStmt = Res.get();
}
}
return getDerived().RebuildCoroutineBodyStmt(Builder);
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformCoreturnStmt(CoreturnStmt *S) {
ExprResult Result = getDerived().TransformInitializer(S->getOperand(),
/*NotCopyInit*/false);
if (Result.isInvalid())
return StmtError();
// Always rebuild; we don't know if this needs to be injected into a new
// context or if the promise type has changed.
return getDerived().RebuildCoreturnStmt(S->getKeywordLoc(), Result.get(),
S->isImplicit());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCoawaitExpr(CoawaitExpr *E) {
ExprResult Result = getDerived().TransformInitializer(E->getOperand(),
/*NotCopyInit*/false);
if (Result.isInvalid())
return ExprError();
// Always rebuild; we don't know if this needs to be injected into a new
// context or if the promise type has changed.
return getDerived().RebuildCoawaitExpr(E->getKeywordLoc(), Result.get(),
E->isImplicit());
}
template <typename Derived>
ExprResult
TreeTransform<Derived>::TransformDependentCoawaitExpr(DependentCoawaitExpr *E) {
ExprResult OperandResult = getDerived().TransformInitializer(E->getOperand(),
/*NotCopyInit*/ false);
if (OperandResult.isInvalid())
return ExprError();
ExprResult LookupResult = getDerived().TransformUnresolvedLookupExpr(
E->getOperatorCoawaitLookup());
if (LookupResult.isInvalid())
return ExprError();
// Always rebuild; we don't know if this needs to be injected into a new
// context or if the promise type has changed.
return getDerived().RebuildDependentCoawaitExpr(
E->getKeywordLoc(), OperandResult.get(),
cast<UnresolvedLookupExpr>(LookupResult.get()));
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCoyieldExpr(CoyieldExpr *E) {
ExprResult Result = getDerived().TransformInitializer(E->getOperand(),
/*NotCopyInit*/false);
if (Result.isInvalid())
return ExprError();
// Always rebuild; we don't know if this needs to be injected into a new
// context or if the promise type has changed.
return getDerived().RebuildCoyieldExpr(E->getKeywordLoc(), Result.get());
}
// Objective-C Statements.
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformObjCAtTryStmt(ObjCAtTryStmt *S) {
// Transform the body of the @try.
StmtResult TryBody = getDerived().TransformStmt(S->getTryBody());
if (TryBody.isInvalid())
return StmtError();
// Transform the @catch statements (if present).
bool AnyCatchChanged = false;
SmallVector<Stmt*, 8> CatchStmts;
for (unsigned I = 0, N = S->getNumCatchStmts(); I != N; ++I) {
StmtResult Catch = getDerived().TransformStmt(S->getCatchStmt(I));
if (Catch.isInvalid())
return StmtError();
if (Catch.get() != S->getCatchStmt(I))
AnyCatchChanged = true;
CatchStmts.push_back(Catch.get());
}
// Transform the @finally statement (if present).
StmtResult Finally;
if (S->getFinallyStmt()) {
Finally = getDerived().TransformStmt(S->getFinallyStmt());
if (Finally.isInvalid())
return StmtError();
}
// If nothing changed, just retain this statement.
if (!getDerived().AlwaysRebuild() &&
TryBody.get() == S->getTryBody() &&
!AnyCatchChanged &&
Finally.get() == S->getFinallyStmt())
return S;
// Build a new statement.
return getDerived().RebuildObjCAtTryStmt(S->getAtTryLoc(), TryBody.get(),
CatchStmts, Finally.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformObjCAtCatchStmt(ObjCAtCatchStmt *S) {
// Transform the @catch parameter, if there is one.
VarDecl *Var = nullptr;
if (VarDecl *FromVar = S->getCatchParamDecl()) {
TypeSourceInfo *TSInfo = nullptr;
if (FromVar->getTypeSourceInfo()) {
TSInfo = getDerived().TransformType(FromVar->getTypeSourceInfo());
if (!TSInfo)
return StmtError();
}
QualType T;
if (TSInfo)
T = TSInfo->getType();
else {
T = getDerived().TransformType(FromVar->getType());
if (T.isNull())
return StmtError();
}
Var = getDerived().RebuildObjCExceptionDecl(FromVar, TSInfo, T);
if (!Var)
return StmtError();
}
StmtResult Body = getDerived().TransformStmt(S->getCatchBody());
if (Body.isInvalid())
return StmtError();
return getDerived().RebuildObjCAtCatchStmt(S->getAtCatchLoc(),
S->getRParenLoc(),
Var, Body.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformObjCAtFinallyStmt(ObjCAtFinallyStmt *S) {
// Transform the body.
StmtResult Body = getDerived().TransformStmt(S->getFinallyBody());
if (Body.isInvalid())
return StmtError();
// If nothing changed, just retain this statement.
if (!getDerived().AlwaysRebuild() &&
Body.get() == S->getFinallyBody())
return S;
// Build a new statement.
return getDerived().RebuildObjCAtFinallyStmt(S->getAtFinallyLoc(),
Body.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformObjCAtThrowStmt(ObjCAtThrowStmt *S) {
ExprResult Operand;
if (S->getThrowExpr()) {
Operand = getDerived().TransformExpr(S->getThrowExpr());
if (Operand.isInvalid())
return StmtError();
}
if (!getDerived().AlwaysRebuild() &&
Operand.get() == S->getThrowExpr())
return S;
return getDerived().RebuildObjCAtThrowStmt(S->getThrowLoc(), Operand.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformObjCAtSynchronizedStmt(
ObjCAtSynchronizedStmt *S) {
// Transform the object we are locking.
ExprResult Object = getDerived().TransformExpr(S->getSynchExpr());
if (Object.isInvalid())
return StmtError();
Object =
getDerived().RebuildObjCAtSynchronizedOperand(S->getAtSynchronizedLoc(),
Object.get());
if (Object.isInvalid())
return StmtError();
// Transform the body.
StmtResult Body = getDerived().TransformStmt(S->getSynchBody());
if (Body.isInvalid())
return StmtError();
// If nothing change, just retain the current statement.
if (!getDerived().AlwaysRebuild() &&
Object.get() == S->getSynchExpr() &&
Body.get() == S->getSynchBody())
return S;
// Build a new statement.
return getDerived().RebuildObjCAtSynchronizedStmt(S->getAtSynchronizedLoc(),
Object.get(), Body.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformObjCAutoreleasePoolStmt(
ObjCAutoreleasePoolStmt *S) {
// Transform the body.
StmtResult Body = getDerived().TransformStmt(S->getSubStmt());
if (Body.isInvalid())
return StmtError();
// If nothing changed, just retain this statement.
if (!getDerived().AlwaysRebuild() &&
Body.get() == S->getSubStmt())
return S;
// Build a new statement.
return getDerived().RebuildObjCAutoreleasePoolStmt(
S->getAtLoc(), Body.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformObjCForCollectionStmt(
ObjCForCollectionStmt *S) {
// Transform the element statement.
StmtResult Element =
getDerived().TransformStmt(S->getElement(), SDK_NotDiscarded);
if (Element.isInvalid())
return StmtError();
// Transform the collection expression.
ExprResult Collection = getDerived().TransformExpr(S->getCollection());
if (Collection.isInvalid())
return StmtError();
// Transform the body.
StmtResult Body = getDerived().TransformStmt(S->getBody());
if (Body.isInvalid())
return StmtError();
// If nothing changed, just retain this statement.
if (!getDerived().AlwaysRebuild() &&
Element.get() == S->getElement() &&
Collection.get() == S->getCollection() &&
Body.get() == S->getBody())
return S;
// Build a new statement.
return getDerived().RebuildObjCForCollectionStmt(S->getForLoc(),
Element.get(),
Collection.get(),
S->getRParenLoc(),
Body.get());
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformCXXCatchStmt(CXXCatchStmt *S) {
// Transform the exception declaration, if any.
VarDecl *Var = nullptr;
if (VarDecl *ExceptionDecl = S->getExceptionDecl()) {
TypeSourceInfo *T =
getDerived().TransformType(ExceptionDecl->getTypeSourceInfo());
if (!T)
return StmtError();
Var = getDerived().RebuildExceptionDecl(
ExceptionDecl, T, ExceptionDecl->getInnerLocStart(),
ExceptionDecl->getLocation(), ExceptionDecl->getIdentifier());
if (!Var || Var->isInvalidDecl())
return StmtError();
}
// Transform the actual exception handler.
StmtResult Handler = getDerived().TransformStmt(S->getHandlerBlock());
if (Handler.isInvalid())
return StmtError();
if (!getDerived().AlwaysRebuild() && !Var &&
Handler.get() == S->getHandlerBlock())
return S;
return getDerived().RebuildCXXCatchStmt(S->getCatchLoc(), Var, Handler.get());
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformCXXTryStmt(CXXTryStmt *S) {
// Transform the try block itself.
StmtResult TryBlock = getDerived().TransformCompoundStmt(S->getTryBlock());
if (TryBlock.isInvalid())
return StmtError();
// Transform the handlers.
bool HandlerChanged = false;
SmallVector<Stmt *, 8> Handlers;
for (unsigned I = 0, N = S->getNumHandlers(); I != N; ++I) {
StmtResult Handler = getDerived().TransformCXXCatchStmt(S->getHandler(I));
if (Handler.isInvalid())
return StmtError();
HandlerChanged = HandlerChanged || Handler.get() != S->getHandler(I);
Handlers.push_back(Handler.getAs<Stmt>());
}
if (!getDerived().AlwaysRebuild() && TryBlock.get() == S->getTryBlock() &&
!HandlerChanged)
return S;
return getDerived().RebuildCXXTryStmt(S->getTryLoc(), TryBlock.get(),
Handlers);
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformCXXForRangeStmt(CXXForRangeStmt *S) {
StmtResult Init =
S->getInit() ? getDerived().TransformStmt(S->getInit()) : StmtResult();
if (Init.isInvalid())
return StmtError();
StmtResult Range = getDerived().TransformStmt(S->getRangeStmt());
if (Range.isInvalid())
return StmtError();
StmtResult Begin = getDerived().TransformStmt(S->getBeginStmt());
if (Begin.isInvalid())
return StmtError();
StmtResult End = getDerived().TransformStmt(S->getEndStmt());
if (End.isInvalid())
return StmtError();
ExprResult Cond = getDerived().TransformExpr(S->getCond());
if (Cond.isInvalid())
return StmtError();
if (Cond.get())
Cond = SemaRef.CheckBooleanCondition(S->getColonLoc(), Cond.get());
if (Cond.isInvalid())
return StmtError();
if (Cond.get())
Cond = SemaRef.MaybeCreateExprWithCleanups(Cond.get());
ExprResult Inc = getDerived().TransformExpr(S->getInc());
if (Inc.isInvalid())
return StmtError();
if (Inc.get())
Inc = SemaRef.MaybeCreateExprWithCleanups(Inc.get());
StmtResult LoopVar = getDerived().TransformStmt(S->getLoopVarStmt());
if (LoopVar.isInvalid())
return StmtError();
StmtResult NewStmt = S;
if (getDerived().AlwaysRebuild() ||
Init.get() != S->getInit() ||
Range.get() != S->getRangeStmt() ||
Begin.get() != S->getBeginStmt() ||
End.get() != S->getEndStmt() ||
Cond.get() != S->getCond() ||
Inc.get() != S->getInc() ||
LoopVar.get() != S->getLoopVarStmt()) {
NewStmt = getDerived().RebuildCXXForRangeStmt(S->getForLoc(),
S->getCoawaitLoc(), Init.get(),
S->getColonLoc(), Range.get(),
Begin.get(), End.get(),
Cond.get(),
Inc.get(), LoopVar.get(),
S->getRParenLoc());
if (NewStmt.isInvalid() && LoopVar.get() != S->getLoopVarStmt()) {
// Might not have attached any initializer to the loop variable.
getSema().ActOnInitializerError(
cast<DeclStmt>(LoopVar.get())->getSingleDecl());
return StmtError();
}
}
StmtResult Body = getDerived().TransformStmt(S->getBody());
if (Body.isInvalid())
return StmtError();
// Body has changed but we didn't rebuild the for-range statement. Rebuild
// it now so we have a new statement to attach the body to.
if (Body.get() != S->getBody() && NewStmt.get() == S) {
NewStmt = getDerived().RebuildCXXForRangeStmt(S->getForLoc(),
S->getCoawaitLoc(), Init.get(),
S->getColonLoc(), Range.get(),
Begin.get(), End.get(),
Cond.get(),
Inc.get(), LoopVar.get(),
S->getRParenLoc());
if (NewStmt.isInvalid())
return StmtError();
}
if (NewStmt.get() == S)
return S;
return FinishCXXForRangeStmt(NewStmt.get(), Body.get());
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformMSDependentExistsStmt(
MSDependentExistsStmt *S) {
// Transform the nested-name-specifier, if any.
NestedNameSpecifierLoc QualifierLoc;
if (S->getQualifierLoc()) {
QualifierLoc
= getDerived().TransformNestedNameSpecifierLoc(S->getQualifierLoc());
if (!QualifierLoc)
return StmtError();
}
// Transform the declaration name.
DeclarationNameInfo NameInfo = S->getNameInfo();
if (NameInfo.getName()) {
NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo);
if (!NameInfo.getName())
return StmtError();
}
// Check whether anything changed.
if (!getDerived().AlwaysRebuild() &&
QualifierLoc == S->getQualifierLoc() &&
NameInfo.getName() == S->getNameInfo().getName())
return S;
// Determine whether this name exists, if we can.
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
bool Dependent = false;
switch (getSema().CheckMicrosoftIfExistsSymbol(/*S=*/nullptr, SS, NameInfo)) {
case Sema::IER_Exists:
if (S->isIfExists())
break;
return new (getSema().Context) NullStmt(S->getKeywordLoc());
case Sema::IER_DoesNotExist:
if (S->isIfNotExists())
break;
return new (getSema().Context) NullStmt(S->getKeywordLoc());
case Sema::IER_Dependent:
Dependent = true;
break;
case Sema::IER_Error:
return StmtError();
}
// We need to continue with the instantiation, so do so now.
StmtResult SubStmt = getDerived().TransformCompoundStmt(S->getSubStmt());
if (SubStmt.isInvalid())
return StmtError();
// If we have resolved the name, just transform to the substatement.
if (!Dependent)
return SubStmt;
// The name is still dependent, so build a dependent expression again.
return getDerived().RebuildMSDependentExistsStmt(S->getKeywordLoc(),
S->isIfExists(),
QualifierLoc,
NameInfo,
SubStmt.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformMSPropertyRefExpr(MSPropertyRefExpr *E) {
NestedNameSpecifierLoc QualifierLoc;
if (E->getQualifierLoc()) {
QualifierLoc
= getDerived().TransformNestedNameSpecifierLoc(E->getQualifierLoc());
if (!QualifierLoc)
return ExprError();
}
MSPropertyDecl *PD = cast_or_null<MSPropertyDecl>(
getDerived().TransformDecl(E->getMemberLoc(), E->getPropertyDecl()));
if (!PD)
return ExprError();
ExprResult Base = getDerived().TransformExpr(E->getBaseExpr());
if (Base.isInvalid())
return ExprError();
return new (SemaRef.getASTContext())
MSPropertyRefExpr(Base.get(), PD, E->isArrow(),
SemaRef.getASTContext().PseudoObjectTy, VK_LValue,
QualifierLoc, E->getMemberLoc());
}
template <typename Derived>
ExprResult TreeTransform<Derived>::TransformMSPropertySubscriptExpr(
MSPropertySubscriptExpr *E) {
auto BaseRes = getDerived().TransformExpr(E->getBase());
if (BaseRes.isInvalid())
return ExprError();
auto IdxRes = getDerived().TransformExpr(E->getIdx());
if (IdxRes.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
BaseRes.get() == E->getBase() &&
IdxRes.get() == E->getIdx())
return E;
return getDerived().RebuildArraySubscriptExpr(
BaseRes.get(), SourceLocation(), IdxRes.get(), E->getRBracketLoc());
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformSEHTryStmt(SEHTryStmt *S) {
StmtResult TryBlock = getDerived().TransformCompoundStmt(S->getTryBlock());
if (TryBlock.isInvalid())
return StmtError();
StmtResult Handler = getDerived().TransformSEHHandler(S->getHandler());
if (Handler.isInvalid())
return StmtError();
if (!getDerived().AlwaysRebuild() && TryBlock.get() == S->getTryBlock() &&
Handler.get() == S->getHandler())
return S;
return getDerived().RebuildSEHTryStmt(S->getIsCXXTry(), S->getTryLoc(),
TryBlock.get(), Handler.get());
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformSEHFinallyStmt(SEHFinallyStmt *S) {
StmtResult Block = getDerived().TransformCompoundStmt(S->getBlock());
if (Block.isInvalid())
return StmtError();
return getDerived().RebuildSEHFinallyStmt(S->getFinallyLoc(), Block.get());
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformSEHExceptStmt(SEHExceptStmt *S) {
ExprResult FilterExpr = getDerived().TransformExpr(S->getFilterExpr());
if (FilterExpr.isInvalid())
return StmtError();
StmtResult Block = getDerived().TransformCompoundStmt(S->getBlock());
if (Block.isInvalid())
return StmtError();
return getDerived().RebuildSEHExceptStmt(S->getExceptLoc(), FilterExpr.get(),
Block.get());
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformSEHHandler(Stmt *Handler) {
if (isa<SEHFinallyStmt>(Handler))
return getDerived().TransformSEHFinallyStmt(cast<SEHFinallyStmt>(Handler));
else
return getDerived().TransformSEHExceptStmt(cast<SEHExceptStmt>(Handler));
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformSEHLeaveStmt(SEHLeaveStmt *S) {
return S;
}
//===----------------------------------------------------------------------===//
// OpenMP directive transformation
//===----------------------------------------------------------------------===//
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPCanonicalLoop(OMPCanonicalLoop *L) {
// OMPCanonicalLoops are eliminated during transformation, since they will be
// recomputed by semantic analysis of the associated OMPLoopBasedDirective
// after transformation.
return getDerived().TransformStmt(L->getLoopStmt());
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPExecutableDirective(
OMPExecutableDirective *D) {
// Transform the clauses
llvm::SmallVector<OMPClause *, 16> TClauses;
ArrayRef<OMPClause *> Clauses = D->clauses();
TClauses.reserve(Clauses.size());
for (ArrayRef<OMPClause *>::iterator I = Clauses.begin(), E = Clauses.end();
I != E; ++I) {
if (*I) {
getDerived().getSema().StartOpenMPClause((*I)->getClauseKind());
OMPClause *Clause = getDerived().TransformOMPClause(*I);
getDerived().getSema().EndOpenMPClause();
if (Clause)
TClauses.push_back(Clause);
} else {
TClauses.push_back(nullptr);
}
}
StmtResult AssociatedStmt;
if (D->hasAssociatedStmt() && D->getAssociatedStmt()) {
getDerived().getSema().ActOnOpenMPRegionStart(D->getDirectiveKind(),
/*CurScope=*/nullptr);
StmtResult Body;
{
Sema::CompoundScopeRAII CompoundScope(getSema());
Stmt *CS;
if (D->getDirectiveKind() == OMPD_atomic ||
D->getDirectiveKind() == OMPD_critical ||
D->getDirectiveKind() == OMPD_section ||
D->getDirectiveKind() == OMPD_master)
CS = D->getAssociatedStmt();
else
CS = D->getRawStmt();
Body = getDerived().TransformStmt(CS);
if (Body.isUsable() && isOpenMPLoopDirective(D->getDirectiveKind()) &&
getSema().getLangOpts().OpenMPIRBuilder)
Body = getDerived().RebuildOMPCanonicalLoop(Body.get());
}
AssociatedStmt =
getDerived().getSema().ActOnOpenMPRegionEnd(Body, TClauses);
if (AssociatedStmt.isInvalid()) {
return StmtError();
}
}
if (TClauses.size() != Clauses.size()) {
return StmtError();
}
// Transform directive name for 'omp critical' directive.
DeclarationNameInfo DirName;
if (D->getDirectiveKind() == OMPD_critical) {
DirName = cast<OMPCriticalDirective>(D)->getDirectiveName();
DirName = getDerived().TransformDeclarationNameInfo(DirName);
}
OpenMPDirectiveKind CancelRegion = OMPD_unknown;
if (D->getDirectiveKind() == OMPD_cancellation_point) {
CancelRegion = cast<OMPCancellationPointDirective>(D)->getCancelRegion();
} else if (D->getDirectiveKind() == OMPD_cancel) {
CancelRegion = cast<OMPCancelDirective>(D)->getCancelRegion();
}
return getDerived().RebuildOMPExecutableDirective(
D->getDirectiveKind(), DirName, CancelRegion, TClauses,
AssociatedStmt.get(), D->getBeginLoc(), D->getEndLoc());
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPParallelDirective(OMPParallelDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPSimdDirective(OMPSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_simd, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPTileDirective(OMPTileDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(D->getDirectiveKind(), DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPUnrollDirective(OMPUnrollDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(D->getDirectiveKind(), DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPForDirective(OMPForDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_for, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPForSimdDirective(OMPForSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_for_simd, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPSectionsDirective(OMPSectionsDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_sections, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPSectionDirective(OMPSectionDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_section, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPSingleDirective(OMPSingleDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_single, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPMasterDirective(OMPMasterDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_master, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPCriticalDirective(OMPCriticalDirective *D) {
getDerived().getSema().StartOpenMPDSABlock(
OMPD_critical, D->getDirectiveName(), nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPParallelForDirective(
OMPParallelForDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_for, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPParallelForSimdDirective(
OMPParallelForSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_for_simd, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPParallelMasterDirective(
OMPParallelMasterDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_master, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPParallelSectionsDirective(
OMPParallelSectionsDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_sections, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPTaskDirective(OMPTaskDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_task, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTaskyieldDirective(
OMPTaskyieldDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_taskyield, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPBarrierDirective(OMPBarrierDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_barrier, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPTaskwaitDirective(OMPTaskwaitDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_taskwait, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTaskgroupDirective(
OMPTaskgroupDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_taskgroup, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPFlushDirective(OMPFlushDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_flush, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPDepobjDirective(OMPDepobjDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_depobj, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPScanDirective(OMPScanDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_scan, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPOrderedDirective(OMPOrderedDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_ordered, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPAtomicDirective(OMPAtomicDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_atomic, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPTargetDirective(OMPTargetDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_target, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTargetDataDirective(
OMPTargetDataDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_target_data, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTargetEnterDataDirective(
OMPTargetEnterDataDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_target_enter_data, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTargetExitDataDirective(
OMPTargetExitDataDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_target_exit_data, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTargetParallelDirective(
OMPTargetParallelDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_target_parallel, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTargetParallelForDirective(
OMPTargetParallelForDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_target_parallel_for, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTargetUpdateDirective(
OMPTargetUpdateDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_target_update, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPTeamsDirective(OMPTeamsDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_teams, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPCancellationPointDirective(
OMPCancellationPointDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_cancellation_point, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPCancelDirective(OMPCancelDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_cancel, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPTaskLoopDirective(OMPTaskLoopDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_taskloop, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTaskLoopSimdDirective(
OMPTaskLoopSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_taskloop_simd, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPMasterTaskLoopDirective(
OMPMasterTaskLoopDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_master_taskloop, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPMasterTaskLoopSimdDirective(
OMPMasterTaskLoopSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_master_taskloop_simd, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPParallelMasterTaskLoopDirective(
OMPParallelMasterTaskLoopDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_parallel_master_taskloop, DirName, nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPParallelMasterTaskLoopSimdDirective(
OMPParallelMasterTaskLoopSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_parallel_master_taskloop_simd, DirName, nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPDistributeDirective(
OMPDistributeDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_distribute, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPDistributeParallelForDirective(
OMPDistributeParallelForDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_distribute_parallel_for, DirName, nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPDistributeParallelForSimdDirective(
OMPDistributeParallelForSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_distribute_parallel_for_simd, DirName, nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPDistributeSimdDirective(
OMPDistributeSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_distribute_simd, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTargetParallelForSimdDirective(
OMPTargetParallelForSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_target_parallel_for_simd, DirName, nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTargetSimdDirective(
OMPTargetSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_target_simd, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTeamsDistributeDirective(
OMPTeamsDistributeDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_teams_distribute, DirName,
nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTeamsDistributeSimdDirective(
OMPTeamsDistributeSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_teams_distribute_simd, DirName, nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTeamsDistributeParallelForSimdDirective(
OMPTeamsDistributeParallelForSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_teams_distribute_parallel_for_simd, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTeamsDistributeParallelForDirective(
OMPTeamsDistributeParallelForDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_teams_distribute_parallel_for, DirName, nullptr, D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTargetTeamsDirective(
OMPTargetTeamsDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_target_teams, DirName,
nullptr, D->getBeginLoc());
auto Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::TransformOMPTargetTeamsDistributeDirective(
OMPTargetTeamsDistributeDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_target_teams_distribute, DirName, nullptr, D->getBeginLoc());
auto Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPTargetTeamsDistributeParallelForDirective(
OMPTargetTeamsDistributeParallelForDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_target_teams_distribute_parallel_for, DirName, nullptr,
D->getBeginLoc());
auto Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult TreeTransform<Derived>::
TransformOMPTargetTeamsDistributeParallelForSimdDirective(
OMPTargetTeamsDistributeParallelForSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_target_teams_distribute_parallel_for_simd, DirName, nullptr,
D->getBeginLoc());
auto Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPTargetTeamsDistributeSimdDirective(
OMPTargetTeamsDistributeSimdDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(
OMPD_target_teams_distribute_simd, DirName, nullptr, D->getBeginLoc());
auto Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPInteropDirective(OMPInteropDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_interop, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPDispatchDirective(OMPDispatchDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_dispatch, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
template <typename Derived>
StmtResult
TreeTransform<Derived>::TransformOMPMaskedDirective(OMPMaskedDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_masked, DirName, nullptr,
D->getBeginLoc());
StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
getDerived().getSema().EndOpenMPDSABlock(Res.get());
return Res;
}
//===----------------------------------------------------------------------===//
// OpenMP clause transformation
//===----------------------------------------------------------------------===//
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPIfClause(OMPIfClause *C) {
ExprResult Cond = getDerived().TransformExpr(C->getCondition());
if (Cond.isInvalid())
return nullptr;
return getDerived().RebuildOMPIfClause(
C->getNameModifier(), Cond.get(), C->getBeginLoc(), C->getLParenLoc(),
C->getNameModifierLoc(), C->getColonLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPFinalClause(OMPFinalClause *C) {
ExprResult Cond = getDerived().TransformExpr(C->getCondition());
if (Cond.isInvalid())
return nullptr;
return getDerived().RebuildOMPFinalClause(Cond.get(), C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPNumThreadsClause(OMPNumThreadsClause *C) {
ExprResult NumThreads = getDerived().TransformExpr(C->getNumThreads());
if (NumThreads.isInvalid())
return nullptr;
return getDerived().RebuildOMPNumThreadsClause(
NumThreads.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPSafelenClause(OMPSafelenClause *C) {
ExprResult E = getDerived().TransformExpr(C->getSafelen());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPSafelenClause(
E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPAllocatorClause(OMPAllocatorClause *C) {
ExprResult E = getDerived().TransformExpr(C->getAllocator());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPAllocatorClause(
E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPSimdlenClause(OMPSimdlenClause *C) {
ExprResult E = getDerived().TransformExpr(C->getSimdlen());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPSimdlenClause(
E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPSizesClause(OMPSizesClause *C) {
SmallVector<Expr *, 4> TransformedSizes;
TransformedSizes.reserve(C->getNumSizes());
bool Changed = false;
for (Expr *E : C->getSizesRefs()) {
if (!E) {
TransformedSizes.push_back(nullptr);
continue;
}
ExprResult T = getDerived().TransformExpr(E);
if (T.isInvalid())
return nullptr;
if (E != T.get())
Changed = true;
TransformedSizes.push_back(T.get());
}
if (!Changed && !getDerived().AlwaysRebuild())
return C;
return RebuildOMPSizesClause(TransformedSizes, C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPFullClause(OMPFullClause *C) {
if (!getDerived().AlwaysRebuild())
return C;
return RebuildOMPFullClause(C->getBeginLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPPartialClause(OMPPartialClause *C) {
ExprResult T = getDerived().TransformExpr(C->getFactor());
if (T.isInvalid())
return nullptr;
Expr *Factor = T.get();
bool Changed = Factor != C->getFactor();
if (!Changed && !getDerived().AlwaysRebuild())
return C;
return RebuildOMPPartialClause(Factor, C->getBeginLoc(), C->getLParenLoc(),
C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) {
ExprResult E = getDerived().TransformExpr(C->getNumForLoops());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPCollapseClause(
E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPDefaultClause(OMPDefaultClause *C) {
return getDerived().RebuildOMPDefaultClause(
C->getDefaultKind(), C->getDefaultKindKwLoc(), C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPProcBindClause(OMPProcBindClause *C) {
return getDerived().RebuildOMPProcBindClause(
C->getProcBindKind(), C->getProcBindKindKwLoc(), C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPScheduleClause(OMPScheduleClause *C) {
ExprResult E = getDerived().TransformExpr(C->getChunkSize());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPScheduleClause(
C->getFirstScheduleModifier(), C->getSecondScheduleModifier(),
C->getScheduleKind(), E.get(), C->getBeginLoc(), C->getLParenLoc(),
C->getFirstScheduleModifierLoc(), C->getSecondScheduleModifierLoc(),
C->getScheduleKindLoc(), C->getCommaLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPOrderedClause(OMPOrderedClause *C) {
ExprResult E;
if (auto *Num = C->getNumForLoops()) {
E = getDerived().TransformExpr(Num);
if (E.isInvalid())
return nullptr;
}
return getDerived().RebuildOMPOrderedClause(C->getBeginLoc(), C->getEndLoc(),
C->getLParenLoc(), E.get());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPDetachClause(OMPDetachClause *C) {
ExprResult E;
if (Expr *Evt = C->getEventHandler()) {
E = getDerived().TransformExpr(Evt);
if (E.isInvalid())
return nullptr;
}
return getDerived().RebuildOMPDetachClause(E.get(), C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPNowaitClause(OMPNowaitClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPUntiedClause(OMPUntiedClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPMergeableClause(OMPMergeableClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPReadClause(OMPReadClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPWriteClause(OMPWriteClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPUpdateClause(OMPUpdateClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPCaptureClause(OMPCaptureClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPSeqCstClause(OMPSeqCstClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPAcqRelClause(OMPAcqRelClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPAcquireClause(OMPAcquireClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPReleaseClause(OMPReleaseClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPRelaxedClause(OMPRelaxedClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPThreadsClause(OMPThreadsClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPSIMDClause(OMPSIMDClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPNogroupClause(OMPNogroupClause *C) {
// No need to rebuild this clause, no template-dependent parameters.
return C;
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPInitClause(OMPInitClause *C) {
ExprResult IVR = getDerived().TransformExpr(C->getInteropVar());
if (IVR.isInvalid())
return nullptr;
llvm::SmallVector<Expr *, 8> PrefExprs;
PrefExprs.reserve(C->varlist_size() - 1);
for (Expr *E : llvm::drop_begin(C->varlists())) {
ExprResult ER = getDerived().TransformExpr(cast<Expr>(E));
if (ER.isInvalid())
return nullptr;
PrefExprs.push_back(ER.get());
}
return getDerived().RebuildOMPInitClause(
IVR.get(), PrefExprs, C->getIsTarget(), C->getIsTargetSync(),
C->getBeginLoc(), C->getLParenLoc(), C->getVarLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPUseClause(OMPUseClause *C) {
ExprResult ER = getDerived().TransformExpr(C->getInteropVar());
if (ER.isInvalid())
return nullptr;
return getDerived().RebuildOMPUseClause(ER.get(), C->getBeginLoc(),
C->getLParenLoc(), C->getVarLoc(),
C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPDestroyClause(OMPDestroyClause *C) {
ExprResult ER;
if (Expr *IV = C->getInteropVar()) {
ER = getDerived().TransformExpr(IV);
if (ER.isInvalid())
return nullptr;
}
return getDerived().RebuildOMPDestroyClause(ER.get(), C->getBeginLoc(),
C->getLParenLoc(), C->getVarLoc(),
C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPNovariantsClause(OMPNovariantsClause *C) {
ExprResult Cond = getDerived().TransformExpr(C->getCondition());
if (Cond.isInvalid())
return nullptr;
return getDerived().RebuildOMPNovariantsClause(
Cond.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPNocontextClause(OMPNocontextClause *C) {
ExprResult Cond = getDerived().TransformExpr(C->getCondition());
if (Cond.isInvalid())
return nullptr;
return getDerived().RebuildOMPNocontextClause(
Cond.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPFilterClause(OMPFilterClause *C) {
ExprResult ThreadID = getDerived().TransformExpr(C->getThreadID());
if (ThreadID.isInvalid())
return nullptr;
return getDerived().RebuildOMPFilterClause(ThreadID.get(), C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPUnifiedAddressClause(
OMPUnifiedAddressClause *C) {
llvm_unreachable("unified_address clause cannot appear in dependent context");
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPUnifiedSharedMemoryClause(
OMPUnifiedSharedMemoryClause *C) {
llvm_unreachable(
"unified_shared_memory clause cannot appear in dependent context");
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPReverseOffloadClause(
OMPReverseOffloadClause *C) {
llvm_unreachable("reverse_offload clause cannot appear in dependent context");
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPDynamicAllocatorsClause(
OMPDynamicAllocatorsClause *C) {
llvm_unreachable(
"dynamic_allocators clause cannot appear in dependent context");
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPAtomicDefaultMemOrderClause(
OMPAtomicDefaultMemOrderClause *C) {
llvm_unreachable(
"atomic_default_mem_order clause cannot appear in dependent context");
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPPrivateClause(OMPPrivateClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPPrivateClause(
Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPFirstprivateClause(
OMPFirstprivateClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPFirstprivateClause(
Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPLastprivateClause(OMPLastprivateClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPLastprivateClause(
Vars, C->getKind(), C->getKindLoc(), C->getColonLoc(), C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPSharedClause(OMPSharedClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPSharedClause(Vars, C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPReductionClause(OMPReductionClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
CXXScopeSpec ReductionIdScopeSpec;
ReductionIdScopeSpec.Adopt(C->getQualifierLoc());
DeclarationNameInfo NameInfo = C->getNameInfo();
if (NameInfo.getName()) {
NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo);
if (!NameInfo.getName())
return nullptr;
}
// Build a list of all UDR decls with the same names ranged by the Scopes.
// The Scope boundary is a duplication of the previous decl.
llvm::SmallVector<Expr *, 16> UnresolvedReductions;
for (auto *E : C->reduction_ops()) {
// Transform all the decls.
if (E) {
auto *ULE = cast<UnresolvedLookupExpr>(E);
UnresolvedSet<8> Decls;
for (auto *D : ULE->decls()) {
NamedDecl *InstD =
cast<NamedDecl>(getDerived().TransformDecl(E->getExprLoc(), D));
Decls.addDecl(InstD, InstD->getAccess());
}
UnresolvedReductions.push_back(
UnresolvedLookupExpr::Create(
SemaRef.Context, /*NamingClass=*/nullptr,
ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context),
NameInfo, /*ADL=*/true, ULE->isOverloaded(),
Decls.begin(), Decls.end()));
} else
UnresolvedReductions.push_back(nullptr);
}
return getDerived().RebuildOMPReductionClause(
Vars, C->getModifier(), C->getBeginLoc(), C->getLParenLoc(),
C->getModifierLoc(), C->getColonLoc(), C->getEndLoc(),
ReductionIdScopeSpec, NameInfo, UnresolvedReductions);
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPTaskReductionClause(
OMPTaskReductionClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
CXXScopeSpec ReductionIdScopeSpec;
ReductionIdScopeSpec.Adopt(C->getQualifierLoc());
DeclarationNameInfo NameInfo = C->getNameInfo();
if (NameInfo.getName()) {
NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo);
if (!NameInfo.getName())
return nullptr;
}
// Build a list of all UDR decls with the same names ranged by the Scopes.
// The Scope boundary is a duplication of the previous decl.
llvm::SmallVector<Expr *, 16> UnresolvedReductions;
for (auto *E : C->reduction_ops()) {
// Transform all the decls.
if (E) {
auto *ULE = cast<UnresolvedLookupExpr>(E);
UnresolvedSet<8> Decls;
for (auto *D : ULE->decls()) {
NamedDecl *InstD =
cast<NamedDecl>(getDerived().TransformDecl(E->getExprLoc(), D));
Decls.addDecl(InstD, InstD->getAccess());
}
UnresolvedReductions.push_back(UnresolvedLookupExpr::Create(
SemaRef.Context, /*NamingClass=*/nullptr,
ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo,
/*ADL=*/true, ULE->isOverloaded(), Decls.begin(), Decls.end()));
} else
UnresolvedReductions.push_back(nullptr);
}
return getDerived().RebuildOMPTaskReductionClause(
Vars, C->getBeginLoc(), C->getLParenLoc(), C->getColonLoc(),
C->getEndLoc(), ReductionIdScopeSpec, NameInfo, UnresolvedReductions);
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPInReductionClause(OMPInReductionClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
CXXScopeSpec ReductionIdScopeSpec;
ReductionIdScopeSpec.Adopt(C->getQualifierLoc());
DeclarationNameInfo NameInfo = C->getNameInfo();
if (NameInfo.getName()) {
NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo);
if (!NameInfo.getName())
return nullptr;
}
// Build a list of all UDR decls with the same names ranged by the Scopes.
// The Scope boundary is a duplication of the previous decl.
llvm::SmallVector<Expr *, 16> UnresolvedReductions;
for (auto *E : C->reduction_ops()) {
// Transform all the decls.
if (E) {
auto *ULE = cast<UnresolvedLookupExpr>(E);
UnresolvedSet<8> Decls;
for (auto *D : ULE->decls()) {
NamedDecl *InstD =
cast<NamedDecl>(getDerived().TransformDecl(E->getExprLoc(), D));
Decls.addDecl(InstD, InstD->getAccess());
}
UnresolvedReductions.push_back(UnresolvedLookupExpr::Create(
SemaRef.Context, /*NamingClass=*/nullptr,
ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo,
/*ADL=*/true, ULE->isOverloaded(), Decls.begin(), Decls.end()));
} else
UnresolvedReductions.push_back(nullptr);
}
return getDerived().RebuildOMPInReductionClause(
Vars, C->getBeginLoc(), C->getLParenLoc(), C->getColonLoc(),
C->getEndLoc(), ReductionIdScopeSpec, NameInfo, UnresolvedReductions);
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPLinearClause(OMPLinearClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
ExprResult Step = getDerived().TransformExpr(C->getStep());
if (Step.isInvalid())
return nullptr;
return getDerived().RebuildOMPLinearClause(
Vars, Step.get(), C->getBeginLoc(), C->getLParenLoc(), C->getModifier(),
C->getModifierLoc(), C->getColonLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPAlignedClause(OMPAlignedClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
ExprResult Alignment = getDerived().TransformExpr(C->getAlignment());
if (Alignment.isInvalid())
return nullptr;
return getDerived().RebuildOMPAlignedClause(
Vars, Alignment.get(), C->getBeginLoc(), C->getLParenLoc(),
C->getColonLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPCopyinClause(OMPCopyinClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPCopyinClause(Vars, C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPCopyprivateClause(OMPCopyprivateClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPCopyprivateClause(
Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPFlushClause(OMPFlushClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPFlushClause(Vars, C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPDepobjClause(OMPDepobjClause *C) {
ExprResult E = getDerived().TransformExpr(C->getDepobj());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPDepobjClause(E.get(), C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPDependClause(OMPDependClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Expr *DepModifier = C->getModifier();
if (DepModifier) {
ExprResult DepModRes = getDerived().TransformExpr(DepModifier);
if (DepModRes.isInvalid())
return nullptr;
DepModifier = DepModRes.get();
}
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPDependClause(
DepModifier, C->getDependencyKind(), C->getDependencyLoc(),
C->getColonLoc(), Vars, C->getBeginLoc(), C->getLParenLoc(),
C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPDeviceClause(OMPDeviceClause *C) {
ExprResult E = getDerived().TransformExpr(C->getDevice());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPDeviceClause(
C->getModifier(), E.get(), C->getBeginLoc(), C->getLParenLoc(),
C->getModifierLoc(), C->getEndLoc());
}
template <typename Derived, class T>
bool transformOMPMappableExprListClause(
TreeTransform<Derived> &TT, OMPMappableExprListClause<T> *C,
llvm::SmallVectorImpl<Expr *> &Vars, CXXScopeSpec &MapperIdScopeSpec,
DeclarationNameInfo &MapperIdInfo,
llvm::SmallVectorImpl<Expr *> &UnresolvedMappers) {
// Transform expressions in the list.
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = TT.getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return true;
Vars.push_back(EVar.get());
}
// Transform mapper scope specifier and identifier.
NestedNameSpecifierLoc QualifierLoc;
if (C->getMapperQualifierLoc()) {
QualifierLoc = TT.getDerived().TransformNestedNameSpecifierLoc(
C->getMapperQualifierLoc());
if (!QualifierLoc)
return true;
}
MapperIdScopeSpec.Adopt(QualifierLoc);
MapperIdInfo = C->getMapperIdInfo();
if (MapperIdInfo.getName()) {
MapperIdInfo = TT.getDerived().TransformDeclarationNameInfo(MapperIdInfo);
if (!MapperIdInfo.getName())
return true;
}
// Build a list of all candidate OMPDeclareMapperDecls, which is provided by
// the previous user-defined mapper lookup in dependent environment.
for (auto *E : C->mapperlists()) {
// Transform all the decls.
if (E) {
auto *ULE = cast<UnresolvedLookupExpr>(E);
UnresolvedSet<8> Decls;
for (auto *D : ULE->decls()) {
NamedDecl *InstD =
cast<NamedDecl>(TT.getDerived().TransformDecl(E->getExprLoc(), D));
Decls.addDecl(InstD, InstD->getAccess());
}
UnresolvedMappers.push_back(UnresolvedLookupExpr::Create(
TT.getSema().Context, /*NamingClass=*/nullptr,
MapperIdScopeSpec.getWithLocInContext(TT.getSema().Context),
MapperIdInfo, /*ADL=*/true, ULE->isOverloaded(), Decls.begin(),
Decls.end()));
} else {
UnresolvedMappers.push_back(nullptr);
}
}
return false;
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPMapClause(OMPMapClause *C) {
OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
llvm::SmallVector<Expr *, 16> Vars;
CXXScopeSpec MapperIdScopeSpec;
DeclarationNameInfo MapperIdInfo;
llvm::SmallVector<Expr *, 16> UnresolvedMappers;
if (transformOMPMappableExprListClause<Derived, OMPMapClause>(
*this, C, Vars, MapperIdScopeSpec, MapperIdInfo, UnresolvedMappers))
return nullptr;
return getDerived().RebuildOMPMapClause(
C->getMapTypeModifiers(), C->getMapTypeModifiersLoc(), MapperIdScopeSpec,
MapperIdInfo, C->getMapType(), C->isImplicitMapType(), C->getMapLoc(),
C->getColonLoc(), Vars, Locs, UnresolvedMappers);
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPAllocateClause(OMPAllocateClause *C) {
Expr *Allocator = C->getAllocator();
if (Allocator) {
ExprResult AllocatorRes = getDerived().TransformExpr(Allocator);
if (AllocatorRes.isInvalid())
return nullptr;
Allocator = AllocatorRes.get();
}
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPAllocateClause(
Allocator, Vars, C->getBeginLoc(), C->getLParenLoc(), C->getColonLoc(),
C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPNumTeamsClause(OMPNumTeamsClause *C) {
ExprResult E = getDerived().TransformExpr(C->getNumTeams());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPNumTeamsClause(
E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPThreadLimitClause(OMPThreadLimitClause *C) {
ExprResult E = getDerived().TransformExpr(C->getThreadLimit());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPThreadLimitClause(
E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPPriorityClause(OMPPriorityClause *C) {
ExprResult E = getDerived().TransformExpr(C->getPriority());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPPriorityClause(
E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPGrainsizeClause(OMPGrainsizeClause *C) {
ExprResult E = getDerived().TransformExpr(C->getGrainsize());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPGrainsizeClause(
E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPNumTasksClause(OMPNumTasksClause *C) {
ExprResult E = getDerived().TransformExpr(C->getNumTasks());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPNumTasksClause(
E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPHintClause(OMPHintClause *C) {
ExprResult E = getDerived().TransformExpr(C->getHint());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPHintClause(E.get(), C->getBeginLoc(),
C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPDistScheduleClause(
OMPDistScheduleClause *C) {
ExprResult E = getDerived().TransformExpr(C->getChunkSize());
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPDistScheduleClause(
C->getDistScheduleKind(), E.get(), C->getBeginLoc(), C->getLParenLoc(),
C->getDistScheduleKindLoc(), C->getCommaLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPDefaultmapClause(OMPDefaultmapClause *C) {
// Rebuild Defaultmap Clause since we need to invoke the checking of
// defaultmap(none:variable-category) after template initialization.
return getDerived().RebuildOMPDefaultmapClause(C->getDefaultmapModifier(),
C->getDefaultmapKind(),
C->getBeginLoc(),
C->getLParenLoc(),
C->getDefaultmapModifierLoc(),
C->getDefaultmapKindLoc(),
C->getEndLoc());
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPToClause(OMPToClause *C) {
OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
llvm::SmallVector<Expr *, 16> Vars;
CXXScopeSpec MapperIdScopeSpec;
DeclarationNameInfo MapperIdInfo;
llvm::SmallVector<Expr *, 16> UnresolvedMappers;
if (transformOMPMappableExprListClause<Derived, OMPToClause>(
*this, C, Vars, MapperIdScopeSpec, MapperIdInfo, UnresolvedMappers))
return nullptr;
return getDerived().RebuildOMPToClause(
C->getMotionModifiers(), C->getMotionModifiersLoc(), MapperIdScopeSpec,
MapperIdInfo, C->getColonLoc(), Vars, Locs, UnresolvedMappers);
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPFromClause(OMPFromClause *C) {
OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
llvm::SmallVector<Expr *, 16> Vars;
CXXScopeSpec MapperIdScopeSpec;
DeclarationNameInfo MapperIdInfo;
llvm::SmallVector<Expr *, 16> UnresolvedMappers;
if (transformOMPMappableExprListClause<Derived, OMPFromClause>(
*this, C, Vars, MapperIdScopeSpec, MapperIdInfo, UnresolvedMappers))
return nullptr;
return getDerived().RebuildOMPFromClause(
C->getMotionModifiers(), C->getMotionModifiersLoc(), MapperIdScopeSpec,
MapperIdInfo, C->getColonLoc(), Vars, Locs, UnresolvedMappers);
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPUseDevicePtrClause(
OMPUseDevicePtrClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
return getDerived().RebuildOMPUseDevicePtrClause(Vars, Locs);
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPUseDeviceAddrClause(
OMPUseDeviceAddrClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
return getDerived().RebuildOMPUseDeviceAddrClause(Vars, Locs);
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
return getDerived().RebuildOMPIsDevicePtrClause(Vars, Locs);
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPNontemporalClause(OMPNontemporalClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPNontemporalClause(
Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPInclusiveClause(OMPInclusiveClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPInclusiveClause(
Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPExclusiveClause(OMPExclusiveClause *C) {
llvm::SmallVector<Expr *, 16> Vars;
Vars.reserve(C->varlist_size());
for (auto *VE : C->varlists()) {
ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
if (EVar.isInvalid())
return nullptr;
Vars.push_back(EVar.get());
}
return getDerived().RebuildOMPExclusiveClause(
Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPUsesAllocatorsClause(
OMPUsesAllocatorsClause *C) {
SmallVector<Sema::UsesAllocatorsData, 16> Data;
Data.reserve(C->getNumberOfAllocators());
for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
ExprResult Allocator = getDerived().TransformExpr(D.Allocator);
if (Allocator.isInvalid())
continue;
ExprResult AllocatorTraits;
if (Expr *AT = D.AllocatorTraits) {
AllocatorTraits = getDerived().TransformExpr(AT);
if (AllocatorTraits.isInvalid())
continue;
}
Sema::UsesAllocatorsData &NewD = Data.emplace_back();
NewD.Allocator = Allocator.get();
NewD.AllocatorTraits = AllocatorTraits.get();
NewD.LParenLoc = D.LParenLoc;
NewD.RParenLoc = D.RParenLoc;
}
return getDerived().RebuildOMPUsesAllocatorsClause(
Data, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
OMPClause *
TreeTransform<Derived>::TransformOMPAffinityClause(OMPAffinityClause *C) {
SmallVector<Expr *, 4> Locators;
Locators.reserve(C->varlist_size());
ExprResult ModifierRes;
if (Expr *Modifier = C->getModifier()) {
ModifierRes = getDerived().TransformExpr(Modifier);
if (ModifierRes.isInvalid())
return nullptr;
}
for (Expr *E : C->varlists()) {
ExprResult Locator = getDerived().TransformExpr(E);
if (Locator.isInvalid())
continue;
Locators.push_back(Locator.get());
}
return getDerived().RebuildOMPAffinityClause(
C->getBeginLoc(), C->getLParenLoc(), C->getColonLoc(), C->getEndLoc(),
ModifierRes.get(), Locators);
}
template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPOrderClause(OMPOrderClause *C) {
return getDerived().RebuildOMPOrderClause(C->getKind(), C->getKindKwLoc(),
C->getBeginLoc(), C->getLParenLoc(),
C->getEndLoc());
}
//===----------------------------------------------------------------------===//
// Expression transformation
//===----------------------------------------------------------------------===//
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformConstantExpr(ConstantExpr *E) {
return TransformExpr(E->getSubExpr());
}
template <typename Derived>
ExprResult TreeTransform<Derived>::TransformSYCLUniqueStableNameExpr(
SYCLUniqueStableNameExpr *E) {
if (!E->isTypeDependent())
return E;
TypeSourceInfo *NewT = getDerived().TransformType(E->getTypeSourceInfo());
if (!NewT)
return ExprError();
if (!getDerived().AlwaysRebuild() && E->getTypeSourceInfo() == NewT)
return E;
return getDerived().RebuildSYCLUniqueStableNameExpr(
E->getLocation(), E->getLParenLocation(), E->getRParenLocation(), NewT);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformPredefinedExpr(PredefinedExpr *E) {
if (!E->isTypeDependent())
return E;
return getDerived().RebuildPredefinedExpr(E->getLocation(),
E->getIdentKind());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformDeclRefExpr(DeclRefExpr *E) {
NestedNameSpecifierLoc QualifierLoc;
if (E->getQualifierLoc()) {
QualifierLoc
= getDerived().TransformNestedNameSpecifierLoc(E->getQualifierLoc());
if (!QualifierLoc)
return ExprError();
}
ValueDecl *ND
= cast_or_null<ValueDecl>(getDerived().TransformDecl(E->getLocation(),
E->getDecl()));
if (!ND)
return ExprError();
NamedDecl *Found = ND;
if (E->getFoundDecl() != E->getDecl()) {
Found = cast_or_null<NamedDecl>(
getDerived().TransformDecl(E->getLocation(), E->getFoundDecl()));
if (!Found)
return ExprError();
}
DeclarationNameInfo NameInfo = E->getNameInfo();
if (NameInfo.getName()) {
NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo);
if (!NameInfo.getName())
return ExprError();
}
if (!getDerived().AlwaysRebuild() &&
QualifierLoc == E->getQualifierLoc() &&
ND == E->getDecl() &&
Found == E->getFoundDecl() &&
NameInfo.getName() == E->getDecl()->getDeclName() &&
!E->hasExplicitTemplateArgs()) {
// Mark it referenced in the new context regardless.
// FIXME: this is a bit instantiation-specific.
SemaRef.MarkDeclRefReferenced(E);
return E;
}
TemplateArgumentListInfo TransArgs, *TemplateArgs = nullptr;
if (E->hasExplicitTemplateArgs()) {
TemplateArgs = &TransArgs;
TransArgs.setLAngleLoc(E->getLAngleLoc());
TransArgs.setRAngleLoc(E->getRAngleLoc());
if (getDerived().TransformTemplateArguments(E->getTemplateArgs(),
E->getNumTemplateArgs(),
TransArgs))
return ExprError();
}
return getDerived().RebuildDeclRefExpr(QualifierLoc, ND, NameInfo,
Found, TemplateArgs);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformIntegerLiteral(IntegerLiteral *E) {
return E;
}
template <typename Derived>
ExprResult TreeTransform<Derived>::TransformFixedPointLiteral(
FixedPointLiteral *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformFloatingLiteral(FloatingLiteral *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformImaginaryLiteral(ImaginaryLiteral *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformStringLiteral(StringLiteral *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCharacterLiteral(CharacterLiteral *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformUserDefinedLiteral(UserDefinedLiteral *E) {
if (FunctionDecl *FD = E->getDirectCallee())
SemaRef.MarkFunctionReferenced(E->getBeginLoc(), FD);
return SemaRef.MaybeBindToTemporary(E);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformGenericSelectionExpr(GenericSelectionExpr *E) {
ExprResult ControllingExpr =
getDerived().TransformExpr(E->getControllingExpr());
if (ControllingExpr.isInvalid())
return ExprError();
SmallVector<Expr *, 4> AssocExprs;
SmallVector<TypeSourceInfo *, 4> AssocTypes;
for (const GenericSelectionExpr::Association Assoc : E->associations()) {
TypeSourceInfo *TSI = Assoc.getTypeSourceInfo();
if (TSI) {
TypeSourceInfo *AssocType = getDerived().TransformType(TSI);
if (!AssocType)
return ExprError();
AssocTypes.push_back(AssocType);
} else {
AssocTypes.push_back(nullptr);
}
ExprResult AssocExpr =
getDerived().TransformExpr(Assoc.getAssociationExpr());
if (AssocExpr.isInvalid())
return ExprError();
AssocExprs.push_back(AssocExpr.get());
}
return getDerived().RebuildGenericSelectionExpr(E->getGenericLoc(),
E->getDefaultLoc(),
E->getRParenLoc(),
ControllingExpr.get(),
AssocTypes,
AssocExprs);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformParenExpr(ParenExpr *E) {
ExprResult SubExpr = getDerived().TransformExpr(E->getSubExpr());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getSubExpr())
return E;
return getDerived().RebuildParenExpr(SubExpr.get(), E->getLParen(),
E->getRParen());
}
/// The operand of a unary address-of operator has special rules: it's
/// allowed to refer to a non-static member of a class even if there's no 'this'
/// object available.
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformAddressOfOperand(Expr *E) {
if (DependentScopeDeclRefExpr *DRE = dyn_cast<DependentScopeDeclRefExpr>(E))
return getDerived().TransformDependentScopeDeclRefExpr(DRE, true, nullptr);
else
return getDerived().TransformExpr(E);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformUnaryOperator(UnaryOperator *E) {
ExprResult SubExpr;
if (E->getOpcode() == UO_AddrOf)
SubExpr = TransformAddressOfOperand(E->getSubExpr());
else
SubExpr = TransformExpr(E->getSubExpr());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getSubExpr())
return E;
return getDerived().RebuildUnaryOperator(E->getOperatorLoc(),
E->getOpcode(),
SubExpr.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformOffsetOfExpr(OffsetOfExpr *E) {
// Transform the type.
TypeSourceInfo *Type = getDerived().TransformType(E->getTypeSourceInfo());
if (!Type)
return ExprError();
// Transform all of the components into components similar to what the
// parser uses.
// FIXME: It would be slightly more efficient in the non-dependent case to
// just map FieldDecls, rather than requiring the rebuilder to look for
// the fields again. However, __builtin_offsetof is rare enough in
// template code that we don't care.
bool ExprChanged = false;
typedef Sema::OffsetOfComponent Component;
SmallVector<Component, 4> Components;
for (unsigned I = 0, N = E->getNumComponents(); I != N; ++I) {
const OffsetOfNode &ON = E->getComponent(I);
Component Comp;
Comp.isBrackets = true;
Comp.LocStart = ON.getSourceRange().getBegin();
Comp.LocEnd = ON.getSourceRange().getEnd();
switch (ON.getKind()) {
case OffsetOfNode::Array: {
Expr *FromIndex = E->getIndexExpr(ON.getArrayExprIndex());
ExprResult Index = getDerived().TransformExpr(FromIndex);
if (Index.isInvalid())
return ExprError();
ExprChanged = ExprChanged || Index.get() != FromIndex;
Comp.isBrackets = true;
Comp.U.E = Index.get();
break;
}
case OffsetOfNode::Field:
case OffsetOfNode::Identifier:
Comp.isBrackets = false;
Comp.U.IdentInfo = ON.getFieldName();
if (!Comp.U.IdentInfo)
continue;
break;
case OffsetOfNode::Base:
// Will be recomputed during the rebuild.
continue;
}
Components.push_back(Comp);
}
// If nothing changed, retain the existing expression.
if (!getDerived().AlwaysRebuild() &&
Type == E->getTypeSourceInfo() &&
!ExprChanged)
return E;
// Build a new offsetof expression.
return getDerived().RebuildOffsetOfExpr(E->getOperatorLoc(), Type,
Components, E->getRParenLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformOpaqueValueExpr(OpaqueValueExpr *E) {
assert((!E->getSourceExpr() || getDerived().AlreadyTransformed(E->getType())) &&
"opaque value expression requires transformation");
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformTypoExpr(TypoExpr *E) {
return E;
}
template <typename Derived>
ExprResult TreeTransform<Derived>::TransformRecoveryExpr(RecoveryExpr *E) {
llvm::SmallVector<Expr *, 8> Children;
bool Changed = false;
for (Expr *C : E->subExpressions()) {
ExprResult NewC = getDerived().TransformExpr(C);
if (NewC.isInvalid())
return ExprError();
Children.push_back(NewC.get());
Changed |= NewC.get() != C;
}
if (!getDerived().AlwaysRebuild() && !Changed)
return E;
return getDerived().RebuildRecoveryExpr(E->getBeginLoc(), E->getEndLoc(),
Children, E->getType());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformPseudoObjectExpr(PseudoObjectExpr *E) {
// Rebuild the syntactic form. The original syntactic form has
// opaque-value expressions in it, so strip those away and rebuild
// the result. This is a really awful way of doing this, but the
// better solution (rebuilding the semantic expressions and
// rebinding OVEs as necessary) doesn't work; we'd need
// TreeTransform to not strip away implicit conversions.
Expr *newSyntacticForm = SemaRef.recreateSyntacticForm(E);
ExprResult result = getDerived().TransformExpr(newSyntacticForm);
if (result.isInvalid()) return ExprError();
// If that gives us a pseudo-object result back, the pseudo-object
// expression must have been an lvalue-to-rvalue conversion which we
// should reapply.
if (result.get()->hasPlaceholderType(BuiltinType::PseudoObject))
result = SemaRef.checkPseudoObjectRValue(result.get());
return result;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformUnaryExprOrTypeTraitExpr(
UnaryExprOrTypeTraitExpr *E) {
if (E->isArgumentType()) {
TypeSourceInfo *OldT = E->getArgumentTypeInfo();
TypeSourceInfo *NewT = getDerived().TransformType(OldT);
if (!NewT)
return ExprError();
if (!getDerived().AlwaysRebuild() && OldT == NewT)
return E;
return getDerived().RebuildUnaryExprOrTypeTrait(NewT, E->getOperatorLoc(),
E->getKind(),
E->getSourceRange());
}
// C++0x [expr.sizeof]p1:
// The operand is either an expression, which is an unevaluated operand
// [...]
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated,
Sema::ReuseLambdaContextDecl);
// Try to recover if we have something like sizeof(T::X) where X is a type.
// Notably, there must be *exactly* one set of parens if X is a type.
TypeSourceInfo *RecoveryTSI = nullptr;
ExprResult SubExpr;
auto *PE = dyn_cast<ParenExpr>(E->getArgumentExpr());
if (auto *DRE =
PE ? dyn_cast<DependentScopeDeclRefExpr>(PE->getSubExpr()) : nullptr)
SubExpr = getDerived().TransformParenDependentScopeDeclRefExpr(
PE, DRE, false, &RecoveryTSI);
else
SubExpr = getDerived().TransformExpr(E->getArgumentExpr());
if (RecoveryTSI) {
return getDerived().RebuildUnaryExprOrTypeTrait(
RecoveryTSI, E->getOperatorLoc(), E->getKind(), E->getSourceRange());
} else if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getArgumentExpr())
return E;
return getDerived().RebuildUnaryExprOrTypeTrait(SubExpr.get(),
E->getOperatorLoc(),
E->getKind(),
E->getSourceRange());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformArraySubscriptExpr(ArraySubscriptExpr *E) {
ExprResult LHS = getDerived().TransformExpr(E->getLHS());
if (LHS.isInvalid())
return ExprError();
ExprResult RHS = getDerived().TransformExpr(E->getRHS());
if (RHS.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
LHS.get() == E->getLHS() &&
RHS.get() == E->getRHS())
return E;
return getDerived().RebuildArraySubscriptExpr(
LHS.get(),
/*FIXME:*/ E->getLHS()->getBeginLoc(), RHS.get(), E->getRBracketLoc());
}
template <typename Derived>
ExprResult
TreeTransform<Derived>::TransformMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
ExprResult Base = getDerived().TransformExpr(E->getBase());
if (Base.isInvalid())
return ExprError();
ExprResult RowIdx = getDerived().TransformExpr(E->getRowIdx());
if (RowIdx.isInvalid())
return ExprError();
ExprResult ColumnIdx = getDerived().TransformExpr(E->getColumnIdx());
if (ColumnIdx.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() && Base.get() == E->getBase() &&
RowIdx.get() == E->getRowIdx() && ColumnIdx.get() == E->getColumnIdx())
return E;
return getDerived().RebuildMatrixSubscriptExpr(
Base.get(), RowIdx.get(), ColumnIdx.get(), E->getRBracketLoc());
}
template <typename Derived>
ExprResult
TreeTransform<Derived>::TransformOMPArraySectionExpr(OMPArraySectionExpr *E) {
ExprResult Base = getDerived().TransformExpr(E->getBase());
if (Base.isInvalid())
return ExprError();
ExprResult LowerBound;
if (E->getLowerBound()) {
LowerBound = getDerived().TransformExpr(E->getLowerBound());
if (LowerBound.isInvalid())
return ExprError();
}
ExprResult Length;
if (E->getLength()) {
Length = getDerived().TransformExpr(E->getLength());
if (Length.isInvalid())
return ExprError();
}
ExprResult Stride;
if (Expr *Str = E->getStride()) {
Stride = getDerived().TransformExpr(Str);
if (Stride.isInvalid())
return ExprError();
}
if (!getDerived().AlwaysRebuild() && Base.get() == E->getBase() &&
LowerBound.get() == E->getLowerBound() && Length.get() == E->getLength())
return E;
return getDerived().RebuildOMPArraySectionExpr(
Base.get(), E->getBase()->getEndLoc(), LowerBound.get(),
E->getColonLocFirst(), E->getColonLocSecond(), Length.get(), Stride.get(),
E->getRBracketLoc());
}
template <typename Derived>
ExprResult
TreeTransform<Derived>::TransformOMPArrayShapingExpr(OMPArrayShapingExpr *E) {
ExprResult Base = getDerived().TransformExpr(E->getBase());
if (Base.isInvalid())
return ExprError();
SmallVector<Expr *, 4> Dims;
bool ErrorFound = false;
for (Expr *Dim : E->getDimensions()) {
ExprResult DimRes = getDerived().TransformExpr(Dim);
if (DimRes.isInvalid()) {
ErrorFound = true;
continue;
}
Dims.push_back(DimRes.get());
}
if (ErrorFound)
return ExprError();
return getDerived().RebuildOMPArrayShapingExpr(Base.get(), E->getLParenLoc(),
E->getRParenLoc(), Dims,
E->getBracketsRanges());
}
template <typename Derived>
ExprResult
TreeTransform<Derived>::TransformOMPIteratorExpr(OMPIteratorExpr *E) {
unsigned NumIterators = E->numOfIterators();
SmallVector<Sema::OMPIteratorData, 4> Data(NumIterators);
bool ErrorFound = false;
bool NeedToRebuild = getDerived().AlwaysRebuild();
for (unsigned I = 0; I < NumIterators; ++I) {
auto *D = cast<VarDecl>(E->getIteratorDecl(I));
Data[I].DeclIdent = D->getIdentifier();
Data[I].DeclIdentLoc = D->getLocation();
if (D->getLocation() == D->getBeginLoc()) {
assert(SemaRef.Context.hasSameType(D->getType(), SemaRef.Context.IntTy) &&
"Implicit type must be int.");
} else {
TypeSourceInfo *TSI = getDerived().TransformType(D->getTypeSourceInfo());
QualType DeclTy = getDerived().TransformType(D->getType());
Data[I].Type = SemaRef.CreateParsedType(DeclTy, TSI);
}
OMPIteratorExpr::IteratorRange Range = E->getIteratorRange(I);
ExprResult Begin = getDerived().TransformExpr(Range.Begin);
ExprResult End = getDerived().TransformExpr(Range.End);
ExprResult Step = getDerived().TransformExpr(Range.Step);
ErrorFound = ErrorFound ||
!(!D->getTypeSourceInfo() || (Data[I].Type.getAsOpaquePtr() &&
!Data[I].Type.get().isNull())) ||
Begin.isInvalid() || End.isInvalid() || Step.isInvalid();
if (ErrorFound)
continue;
Data[I].Range.Begin = Begin.get();
Data[I].Range.End = End.get();
Data[I].Range.Step = Step.get();
Data[I].AssignLoc = E->getAssignLoc(I);
Data[I].ColonLoc = E->getColonLoc(I);
Data[I].SecColonLoc = E->getSecondColonLoc(I);
NeedToRebuild =
NeedToRebuild ||
(D->getTypeSourceInfo() && Data[I].Type.get().getTypePtrOrNull() !=
D->getType().getTypePtrOrNull()) ||
Range.Begin != Data[I].Range.Begin || Range.End != Data[I].Range.End ||
Range.Step != Data[I].Range.Step;
}
if (ErrorFound)
return ExprError();
if (!NeedToRebuild)
return E;
ExprResult Res = getDerived().RebuildOMPIteratorExpr(
E->getIteratorKwLoc(), E->getLParenLoc(), E->getRParenLoc(), Data);
if (!Res.isUsable())
return Res;
auto *IE = cast<OMPIteratorExpr>(Res.get());
for (unsigned I = 0; I < NumIterators; ++I)
getDerived().transformedLocalDecl(E->getIteratorDecl(I),
IE->getIteratorDecl(I));
return Res;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCallExpr(CallExpr *E) {
// Transform the callee.
ExprResult Callee = getDerived().TransformExpr(E->getCallee());
if (Callee.isInvalid())
return ExprError();
// Transform arguments.
bool ArgChanged = false;
SmallVector<Expr*, 8> Args;
if (getDerived().TransformExprs(E->getArgs(), E->getNumArgs(), true, Args,
&ArgChanged))
return ExprError();
if (!getDerived().AlwaysRebuild() &&
Callee.get() == E->getCallee() &&
!ArgChanged)
return SemaRef.MaybeBindToTemporary(E);
// FIXME: Wrong source location information for the '('.
SourceLocation FakeLParenLoc
= ((Expr *)Callee.get())->getSourceRange().getBegin();
Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
if (E->hasStoredFPFeatures()) {
FPOptionsOverride NewOverrides = E->getFPFeatures();
getSema().CurFPFeatures =
NewOverrides.applyOverrides(getSema().getLangOpts());
getSema().FpPragmaStack.CurrentValue = NewOverrides;
}
return getDerived().RebuildCallExpr(Callee.get(), FakeLParenLoc,
Args,
E->getRParenLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformMemberExpr(MemberExpr *E) {
ExprResult Base = getDerived().TransformExpr(E->getBase());
if (Base.isInvalid())
return ExprError();
NestedNameSpecifierLoc QualifierLoc;
if (E->hasQualifier()) {
QualifierLoc
= getDerived().TransformNestedNameSpecifierLoc(E->getQualifierLoc());
if (!QualifierLoc)
return ExprError();
}
SourceLocation TemplateKWLoc = E->getTemplateKeywordLoc();
ValueDecl *Member
= cast_or_null<ValueDecl>(getDerived().TransformDecl(E->getMemberLoc(),
E->getMemberDecl()));
if (!Member)
return ExprError();
NamedDecl *FoundDecl = E->getFoundDecl();
if (FoundDecl == E->getMemberDecl()) {
FoundDecl = Member;
} else {
FoundDecl = cast_or_null<NamedDecl>(
getDerived().TransformDecl(E->getMemberLoc(), FoundDecl));
if (!FoundDecl)
return ExprError();
}
if (!getDerived().AlwaysRebuild() &&
Base.get() == E->getBase() &&
QualifierLoc == E->getQualifierLoc() &&
Member == E->getMemberDecl() &&
FoundDecl == E->getFoundDecl() &&
!E->hasExplicitTemplateArgs()) {
// Mark it referenced in the new context regardless.
// FIXME: this is a bit instantiation-specific.
SemaRef.MarkMemberReferenced(E);
return E;
}
TemplateArgumentListInfo TransArgs;
if (E->hasExplicitTemplateArgs()) {
TransArgs.setLAngleLoc(E->getLAngleLoc());
TransArgs.setRAngleLoc(E->getRAngleLoc());
if (getDerived().TransformTemplateArguments(E->getTemplateArgs(),
E->getNumTemplateArgs(),
TransArgs))
return ExprError();
}
// FIXME: Bogus source location for the operator
SourceLocation FakeOperatorLoc =
SemaRef.getLocForEndOfToken(E->getBase()->getSourceRange().getEnd());
// FIXME: to do this check properly, we will need to preserve the
// first-qualifier-in-scope here, just in case we had a dependent
// base (and therefore couldn't do the check) and a
// nested-name-qualifier (and therefore could do the lookup).
NamedDecl *FirstQualifierInScope = nullptr;
DeclarationNameInfo MemberNameInfo = E->getMemberNameInfo();
if (MemberNameInfo.getName()) {
MemberNameInfo = getDerived().TransformDeclarationNameInfo(MemberNameInfo);
if (!MemberNameInfo.getName())
return ExprError();
}
return getDerived().RebuildMemberExpr(Base.get(), FakeOperatorLoc,
E->isArrow(),
QualifierLoc,
TemplateKWLoc,
MemberNameInfo,
Member,
FoundDecl,
(E->hasExplicitTemplateArgs()
? &TransArgs : nullptr),
FirstQualifierInScope);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformBinaryOperator(BinaryOperator *E) {
ExprResult LHS = getDerived().TransformExpr(E->getLHS());
if (LHS.isInvalid())
return ExprError();
ExprResult RHS = getDerived().TransformExpr(E->getRHS());
if (RHS.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
LHS.get() == E->getLHS() &&
RHS.get() == E->getRHS())
return E;
if (E->isCompoundAssignmentOp())
// FPFeatures has already been established from trailing storage
return getDerived().RebuildBinaryOperator(
E->getOperatorLoc(), E->getOpcode(), LHS.get(), RHS.get());
Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
FPOptionsOverride NewOverrides(E->getFPFeatures(getSema().getLangOpts()));
getSema().CurFPFeatures =
NewOverrides.applyOverrides(getSema().getLangOpts());
getSema().FpPragmaStack.CurrentValue = NewOverrides;
return getDerived().RebuildBinaryOperator(E->getOperatorLoc(), E->getOpcode(),
LHS.get(), RHS.get());
}
template <typename Derived>
ExprResult TreeTransform<Derived>::TransformCXXRewrittenBinaryOperator(
CXXRewrittenBinaryOperator *E) {
CXXRewrittenBinaryOperator::DecomposedForm Decomp = E->getDecomposedForm();
ExprResult LHS = getDerived().TransformExpr(const_cast<Expr*>(Decomp.LHS));
if (LHS.isInvalid())
return ExprError();
ExprResult RHS = getDerived().TransformExpr(const_cast<Expr*>(Decomp.RHS));
if (RHS.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
LHS.get() == Decomp.LHS &&
RHS.get() == Decomp.RHS)
return E;
// Extract the already-resolved callee declarations so that we can restrict
// ourselves to using them as the unqualified lookup results when rebuilding.
UnresolvedSet<2> UnqualLookups;
Expr *PossibleBinOps[] = {E->getSemanticForm(),
const_cast<Expr *>(Decomp.InnerBinOp)};
for (Expr *PossibleBinOp : PossibleBinOps) {
auto *Op = dyn_cast<CXXOperatorCallExpr>(PossibleBinOp->IgnoreImplicit());
if (!Op)
continue;
auto *Callee = dyn_cast<DeclRefExpr>(Op->getCallee()->IgnoreImplicit());
if (!Callee || isa<CXXMethodDecl>(Callee->getDecl()))
continue;
// Transform the callee in case we built a call to a local extern
// declaration.
NamedDecl *Found = cast_or_null<NamedDecl>(getDerived().TransformDecl(
E->getOperatorLoc(), Callee->getFoundDecl()));
if (!Found)
return ExprError();
UnqualLookups.addDecl(Found);
}
return getDerived().RebuildCXXRewrittenBinaryOperator(
E->getOperatorLoc(), Decomp.Opcode, UnqualLookups, LHS.get(), RHS.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCompoundAssignOperator(
CompoundAssignOperator *E) {
Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
FPOptionsOverride NewOverrides(E->getFPFeatures(getSema().getLangOpts()));
getSema().CurFPFeatures =
NewOverrides.applyOverrides(getSema().getLangOpts());
getSema().FpPragmaStack.CurrentValue = NewOverrides;
return getDerived().TransformBinaryOperator(E);
}
template<typename Derived>
ExprResult TreeTransform<Derived>::
TransformBinaryConditionalOperator(BinaryConditionalOperator *e) {
// Just rebuild the common and RHS expressions and see whether we
// get any changes.
ExprResult commonExpr = getDerived().TransformExpr(e->getCommon());
if (commonExpr.isInvalid())
return ExprError();
ExprResult rhs = getDerived().TransformExpr(e->getFalseExpr());
if (rhs.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
commonExpr.get() == e->getCommon() &&
rhs.get() == e->getFalseExpr())
return e;
return getDerived().RebuildConditionalOperator(commonExpr.get(),
e->getQuestionLoc(),
nullptr,
e->getColonLoc(),
rhs.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformConditionalOperator(ConditionalOperator *E) {
ExprResult Cond = getDerived().TransformExpr(E->getCond());
if (Cond.isInvalid())
return ExprError();
ExprResult LHS = getDerived().TransformExpr(E->getLHS());
if (LHS.isInvalid())
return ExprError();
ExprResult RHS = getDerived().TransformExpr(E->getRHS());
if (RHS.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
Cond.get() == E->getCond() &&
LHS.get() == E->getLHS() &&
RHS.get() == E->getRHS())
return E;
return getDerived().RebuildConditionalOperator(Cond.get(),
E->getQuestionLoc(),
LHS.get(),
E->getColonLoc(),
RHS.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformImplicitCastExpr(ImplicitCastExpr *E) {
// Implicit casts are eliminated during transformation, since they
// will be recomputed by semantic analysis after transformation.
return getDerived().TransformExpr(E->getSubExprAsWritten());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCStyleCastExpr(CStyleCastExpr *E) {
TypeSourceInfo *Type = getDerived().TransformType(E->getTypeInfoAsWritten());
if (!Type)
return ExprError();
ExprResult SubExpr
= getDerived().TransformExpr(E->getSubExprAsWritten());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
Type == E->getTypeInfoAsWritten() &&
SubExpr.get() == E->getSubExpr())
return E;
return getDerived().RebuildCStyleCastExpr(E->getLParenLoc(),
Type,
E->getRParenLoc(),
SubExpr.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCompoundLiteralExpr(CompoundLiteralExpr *E) {
TypeSourceInfo *OldT = E->getTypeSourceInfo();
TypeSourceInfo *NewT = getDerived().TransformType(OldT);
if (!NewT)
return ExprError();
ExprResult Init = getDerived().TransformExpr(E->getInitializer());
if (Init.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
OldT == NewT &&
Init.get() == E->getInitializer())
return SemaRef.MaybeBindToTemporary(E);
// Note: the expression type doesn't necessarily match the
// type-as-written, but that's okay, because it should always be
// derivable from the initializer.
return getDerived().RebuildCompoundLiteralExpr(
E->getLParenLoc(), NewT,
/*FIXME:*/ E->getInitializer()->getEndLoc(), Init.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformExtVectorElementExpr(ExtVectorElementExpr *E) {
ExprResult Base = getDerived().TransformExpr(E->getBase());
if (Base.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
Base.get() == E->getBase())
return E;
// FIXME: Bad source location
SourceLocation FakeOperatorLoc =
SemaRef.getLocForEndOfToken(E->getBase()->getEndLoc());
return getDerived().RebuildExtVectorElementExpr(Base.get(), FakeOperatorLoc,
E->getAccessorLoc(),
E->getAccessor());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformInitListExpr(InitListExpr *E) {
if (InitListExpr *Syntactic = E->getSyntacticForm())
E = Syntactic;
bool InitChanged = false;
EnterExpressionEvaluationContext Context(
getSema(), EnterExpressionEvaluationContext::InitList);
SmallVector<Expr*, 4> Inits;
if (getDerived().TransformExprs(E->getInits(), E->getNumInits(), false,
Inits, &InitChanged))
return ExprError();
if (!getDerived().AlwaysRebuild() && !InitChanged) {
// FIXME: Attempt to reuse the existing syntactic form of the InitListExpr
// in some cases. We can't reuse it in general, because the syntactic and
// semantic forms are linked, and we can't know that semantic form will
// match even if the syntactic form does.
}
return getDerived().RebuildInitList(E->getLBraceLoc(), Inits,
E->getRBraceLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformDesignatedInitExpr(DesignatedInitExpr *E) {
Designation Desig;
// transform the initializer value
ExprResult Init = getDerived().TransformExpr(E->getInit());
if (Init.isInvalid())
return ExprError();
// transform the designators.
SmallVector<Expr*, 4> ArrayExprs;
bool ExprChanged = false;
for (const DesignatedInitExpr::Designator &D : E->designators()) {
if (D.isFieldDesignator()) {
Desig.AddDesignator(Designator::getField(D.getFieldName(),
D.getDotLoc(),
D.getFieldLoc()));
if (D.getField()) {
FieldDecl *Field = cast_or_null<FieldDecl>(
getDerived().TransformDecl(D.getFieldLoc(), D.getField()));
if (Field != D.getField())
// Rebuild the expression when the transformed FieldDecl is
// different to the already assigned FieldDecl.
ExprChanged = true;
} else {
// Ensure that the designator expression is rebuilt when there isn't
// a resolved FieldDecl in the designator as we don't want to assign
// a FieldDecl to a pattern designator that will be instantiated again.
ExprChanged = true;
}
continue;
}
if (D.isArrayDesignator()) {
ExprResult Index = getDerived().TransformExpr(E->getArrayIndex(D));
if (Index.isInvalid())
return ExprError();
Desig.AddDesignator(
Designator::getArray(Index.get(), D.getLBracketLoc()));
ExprChanged = ExprChanged || Init.get() != E->getArrayIndex(D);
ArrayExprs.push_back(Index.get());
continue;
}
assert(D.isArrayRangeDesignator() && "New kind of designator?");
ExprResult Start
= getDerived().TransformExpr(E->getArrayRangeStart(D));
if (Start.isInvalid())
return ExprError();
ExprResult End = getDerived().TransformExpr(E->getArrayRangeEnd(D));
if (End.isInvalid())
return ExprError();
Desig.AddDesignator(Designator::getArrayRange(Start.get(),
End.get(),
D.getLBracketLoc(),
D.getEllipsisLoc()));
ExprChanged = ExprChanged || Start.get() != E->getArrayRangeStart(D) ||
End.get() != E->getArrayRangeEnd(D);
ArrayExprs.push_back(Start.get());
ArrayExprs.push_back(End.get());
}
if (!getDerived().AlwaysRebuild() &&
Init.get() == E->getInit() &&
!ExprChanged)
return E;
return getDerived().RebuildDesignatedInitExpr(Desig, ArrayExprs,
E->getEqualOrColonLoc(),
E->usesGNUSyntax(), Init.get());
}
// Seems that if TransformInitListExpr() only works on the syntactic form of an
// InitListExpr, then a DesignatedInitUpdateExpr is not encountered.
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformDesignatedInitUpdateExpr(
DesignatedInitUpdateExpr *E) {
llvm_unreachable("Unexpected DesignatedInitUpdateExpr in syntactic form of "
"initializer");
return ExprError();
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformNoInitExpr(
NoInitExpr *E) {
llvm_unreachable("Unexpected NoInitExpr in syntactic form of initializer");
return ExprError();
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformArrayInitLoopExpr(ArrayInitLoopExpr *E) {
llvm_unreachable("Unexpected ArrayInitLoopExpr outside of initializer");
return ExprError();
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformArrayInitIndexExpr(ArrayInitIndexExpr *E) {
llvm_unreachable("Unexpected ArrayInitIndexExpr outside of initializer");
return ExprError();
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformImplicitValueInitExpr(
ImplicitValueInitExpr *E) {
TemporaryBase Rebase(*this, E->getBeginLoc(), DeclarationName());
// FIXME: Will we ever have proper type location here? Will we actually
// need to transform the type?
QualType T = getDerived().TransformType(E->getType());
if (T.isNull())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
T == E->getType())
return E;
return getDerived().RebuildImplicitValueInitExpr(T);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformVAArgExpr(VAArgExpr *E) {
TypeSourceInfo *TInfo = getDerived().TransformType(E->getWrittenTypeInfo());
if (!TInfo)
return ExprError();
ExprResult SubExpr = getDerived().TransformExpr(E->getSubExpr());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
TInfo == E->getWrittenTypeInfo() &&
SubExpr.get() == E->getSubExpr())
return E;
return getDerived().RebuildVAArgExpr(E->getBuiltinLoc(), SubExpr.get(),
TInfo, E->getRParenLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformParenListExpr(ParenListExpr *E) {
bool ArgumentChanged = false;
SmallVector<Expr*, 4> Inits;
if (TransformExprs(E->getExprs(), E->getNumExprs(), true, Inits,
&ArgumentChanged))
return ExprError();
return getDerived().RebuildParenListExpr(E->getLParenLoc(),
Inits,
E->getRParenLoc());
}
/// Transform an address-of-label expression.
///
/// By default, the transformation of an address-of-label expression always
/// rebuilds the expression, so that the label identifier can be resolved to
/// the corresponding label statement by semantic analysis.
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformAddrLabelExpr(AddrLabelExpr *E) {
Decl *LD = getDerived().TransformDecl(E->getLabel()->getLocation(),
E->getLabel());
if (!LD)
return ExprError();
return getDerived().RebuildAddrLabelExpr(E->getAmpAmpLoc(), E->getLabelLoc(),
cast<LabelDecl>(LD));
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformStmtExpr(StmtExpr *E) {
SemaRef.ActOnStartStmtExpr();
StmtResult SubStmt
= getDerived().TransformCompoundStmt(E->getSubStmt(), true);
if (SubStmt.isInvalid()) {
SemaRef.ActOnStmtExprError();
return ExprError();
}
unsigned OldDepth = E->getTemplateDepth();
unsigned NewDepth = getDerived().TransformTemplateDepth(OldDepth);
if (!getDerived().AlwaysRebuild() && OldDepth == NewDepth &&
SubStmt.get() == E->getSubStmt()) {
// Calling this an 'error' is unintuitive, but it does the right thing.
SemaRef.ActOnStmtExprError();
return SemaRef.MaybeBindToTemporary(E);
}
return getDerived().RebuildStmtExpr(E->getLParenLoc(), SubStmt.get(),
E->getRParenLoc(), NewDepth);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformChooseExpr(ChooseExpr *E) {
ExprResult Cond = getDerived().TransformExpr(E->getCond());
if (Cond.isInvalid())
return ExprError();
ExprResult LHS = getDerived().TransformExpr(E->getLHS());
if (LHS.isInvalid())
return ExprError();
ExprResult RHS = getDerived().TransformExpr(E->getRHS());
if (RHS.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
Cond.get() == E->getCond() &&
LHS.get() == E->getLHS() &&
RHS.get() == E->getRHS())
return E;
return getDerived().RebuildChooseExpr(E->getBuiltinLoc(),
Cond.get(), LHS.get(), RHS.get(),
E->getRParenLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformGNUNullExpr(GNUNullExpr *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXOperatorCallExpr(CXXOperatorCallExpr *E) {
switch (E->getOperator()) {
case OO_New:
case OO_Delete:
case OO_Array_New:
case OO_Array_Delete:
llvm_unreachable("new and delete operators cannot use CXXOperatorCallExpr");
case OO_Call: {
// This is a call to an object's operator().
assert(E->getNumArgs() >= 1 && "Object call is missing arguments");
// Transform the object itself.
ExprResult Object = getDerived().TransformExpr(E->getArg(0));
if (Object.isInvalid())
return ExprError();
// FIXME: Poor location information
SourceLocation FakeLParenLoc = SemaRef.getLocForEndOfToken(
static_cast<Expr *>(Object.get())->getEndLoc());
// Transform the call arguments.
SmallVector<Expr*, 8> Args;
if (getDerived().TransformExprs(E->getArgs() + 1, E->getNumArgs() - 1, true,
Args))
return ExprError();
return getDerived().RebuildCallExpr(Object.get(), FakeLParenLoc, Args,
E->getEndLoc());
}
#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
case OO_##Name:
#define OVERLOADED_OPERATOR_MULTI(Name,Spelling,Unary,Binary,MemberOnly)
#include "clang/Basic/OperatorKinds.def"
case OO_Subscript:
// Handled below.
break;
case OO_Conditional:
llvm_unreachable("conditional operator is not actually overloadable");
case OO_None:
case NUM_OVERLOADED_OPERATORS:
llvm_unreachable("not an overloaded operator?");
}
ExprResult Callee = getDerived().TransformExpr(E->getCallee());
if (Callee.isInvalid())
return ExprError();
ExprResult First;
if (E->getOperator() == OO_Amp)
First = getDerived().TransformAddressOfOperand(E->getArg(0));
else
First = getDerived().TransformExpr(E->getArg(0));
if (First.isInvalid())
return ExprError();
ExprResult Second;
if (E->getNumArgs() == 2) {
Second = getDerived().TransformExpr(E->getArg(1));
if (Second.isInvalid())
return ExprError();
}
if (!getDerived().AlwaysRebuild() &&
Callee.get() == E->getCallee() &&
First.get() == E->getArg(0) &&
(E->getNumArgs() != 2 || Second.get() == E->getArg(1)))
return SemaRef.MaybeBindToTemporary(E);
Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
FPOptionsOverride NewOverrides(E->getFPFeatures());
getSema().CurFPFeatures =
NewOverrides.applyOverrides(getSema().getLangOpts());
getSema().FpPragmaStack.CurrentValue = NewOverrides;
return getDerived().RebuildCXXOperatorCallExpr(E->getOperator(),
E->getOperatorLoc(),
Callee.get(),
First.get(),
Second.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXMemberCallExpr(CXXMemberCallExpr *E) {
return getDerived().TransformCallExpr(E);
}
template <typename Derived>
ExprResult TreeTransform<Derived>::TransformSourceLocExpr(SourceLocExpr *E) {
bool NeedRebuildFunc = E->getIdentKind() == SourceLocExpr::Function &&
getSema().CurContext != E->getParentContext();
if (!getDerived().AlwaysRebuild() && !NeedRebuildFunc)
return E;
return getDerived().RebuildSourceLocExpr(E->getIdentKind(), E->getBeginLoc(),
E->getEndLoc(),
getSema().CurContext);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCUDAKernelCallExpr(CUDAKernelCallExpr *E) {
// Transform the callee.
ExprResult Callee = getDerived().TransformExpr(E->getCallee());
if (Callee.isInvalid())
return ExprError();
// Transform exec config.
ExprResult EC = getDerived().TransformCallExpr(E->getConfig());
if (EC.isInvalid())
return ExprError();
// Transform arguments.
bool ArgChanged = false;
SmallVector<Expr*, 8> Args;
if (getDerived().TransformExprs(E->getArgs(), E->getNumArgs(), true, Args,
&ArgChanged))
return ExprError();
if (!getDerived().AlwaysRebuild() &&
Callee.get() == E->getCallee() &&
!ArgChanged)
return SemaRef.MaybeBindToTemporary(E);
// FIXME: Wrong source location information for the '('.
SourceLocation FakeLParenLoc
= ((Expr *)Callee.get())->getSourceRange().getBegin();
return getDerived().RebuildCallExpr(Callee.get(), FakeLParenLoc,
Args,
E->getRParenLoc(), EC.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXNamedCastExpr(CXXNamedCastExpr *E) {
TypeSourceInfo *Type = getDerived().TransformType(E->getTypeInfoAsWritten());
if (!Type)
return ExprError();
ExprResult SubExpr
= getDerived().TransformExpr(E->getSubExprAsWritten());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
Type == E->getTypeInfoAsWritten() &&
SubExpr.get() == E->getSubExpr())
return E;
return getDerived().RebuildCXXNamedCastExpr(
E->getOperatorLoc(), E->getStmtClass(), E->getAngleBrackets().getBegin(),
Type, E->getAngleBrackets().getEnd(),
// FIXME. this should be '(' location
E->getAngleBrackets().getEnd(), SubExpr.get(), E->getRParenLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformBuiltinBitCastExpr(BuiltinBitCastExpr *BCE) {
TypeSourceInfo *TSI =
getDerived().TransformType(BCE->getTypeInfoAsWritten());
if (!TSI)
return ExprError();
ExprResult Sub = getDerived().TransformExpr(BCE->getSubExpr());
if (Sub.isInvalid())
return ExprError();
return getDerived().RebuildBuiltinBitCastExpr(BCE->getBeginLoc(), TSI,
Sub.get(), BCE->getEndLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXStaticCastExpr(CXXStaticCastExpr *E) {
return getDerived().TransformCXXNamedCastExpr(E);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXDynamicCastExpr(CXXDynamicCastExpr *E) {
return getDerived().TransformCXXNamedCastExpr(E);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXReinterpretCastExpr(
CXXReinterpretCastExpr *E) {
return getDerived().TransformCXXNamedCastExpr(E);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXConstCastExpr(CXXConstCastExpr *E) {
return getDerived().TransformCXXNamedCastExpr(E);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXAddrspaceCastExpr(CXXAddrspaceCastExpr *E) {
return getDerived().TransformCXXNamedCastExpr(E);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXFunctionalCastExpr(
CXXFunctionalCastExpr *E) {
TypeSourceInfo *Type =
getDerived().TransformTypeWithDeducedTST(E->getTypeInfoAsWritten());
if (!Type)
return ExprError();
ExprResult SubExpr
= getDerived().TransformExpr(E->getSubExprAsWritten());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
Type == E->getTypeInfoAsWritten() &&
SubExpr.get() == E->getSubExpr())
return E;
return getDerived().RebuildCXXFunctionalCastExpr(Type,
E->getLParenLoc(),
SubExpr.get(),
E->getRParenLoc(),
E->isListInitialization());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXTypeidExpr(CXXTypeidExpr *E) {
if (E->isTypeOperand()) {
TypeSourceInfo *TInfo
= getDerived().TransformType(E->getTypeOperandSourceInfo());
if (!TInfo)
return ExprError();
if (!getDerived().AlwaysRebuild() &&
TInfo == E->getTypeOperandSourceInfo())
return E;
return getDerived().RebuildCXXTypeidExpr(E->getType(), E->getBeginLoc(),
TInfo, E->getEndLoc());
}
// Typeid's operand is an unevaluated context, unless it's a polymorphic
// type. We must not unilaterally enter unevaluated context here, as then
// semantic processing can re-transform an already transformed operand.
Expr *Op = E->getExprOperand();
auto EvalCtx = Sema::ExpressionEvaluationContext::Unevaluated;
if (E->isGLValue())
if (auto *RecordT = Op->getType()->getAs<RecordType>())
if (cast<CXXRecordDecl>(RecordT->getDecl())->isPolymorphic())
EvalCtx = SemaRef.ExprEvalContexts.back().Context;
EnterExpressionEvaluationContext Unevaluated(SemaRef, EvalCtx,
Sema::ReuseLambdaContextDecl);
ExprResult SubExpr = getDerived().TransformExpr(Op);
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
SubExpr.get() == E->getExprOperand())
return E;
return getDerived().RebuildCXXTypeidExpr(E->getType(), E->getBeginLoc(),
SubExpr.get(), E->getEndLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXUuidofExpr(CXXUuidofExpr *E) {
if (E->isTypeOperand()) {
TypeSourceInfo *TInfo
= getDerived().TransformType(E->getTypeOperandSourceInfo());
if (!TInfo)
return ExprError();
if (!getDerived().AlwaysRebuild() &&
TInfo == E->getTypeOperandSourceInfo())
return E;
return getDerived().RebuildCXXUuidofExpr(E->getType(), E->getBeginLoc(),
TInfo, E->getEndLoc());
}
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
ExprResult SubExpr = getDerived().TransformExpr(E->getExprOperand());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
SubExpr.get() == E->getExprOperand())
return E;
return getDerived().RebuildCXXUuidofExpr(E->getType(), E->getBeginLoc(),
SubExpr.get(), E->getEndLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXBoolLiteralExpr(CXXBoolLiteralExpr *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXNullPtrLiteralExpr(
CXXNullPtrLiteralExpr *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXThisExpr(CXXThisExpr *E) {
QualType T = getSema().getCurrentThisType();
if (!getDerived().AlwaysRebuild() && T == E->getType()) {
// Mark it referenced in the new context regardless.
// FIXME: this is a bit instantiation-specific.
getSema().MarkThisReferenced(E);
return E;
}
return getDerived().RebuildCXXThisExpr(E->getBeginLoc(), T, E->isImplicit());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXThrowExpr(CXXThrowExpr *E) {
ExprResult SubExpr = getDerived().TransformExpr(E->getSubExpr());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
SubExpr.get() == E->getSubExpr())
return E;
return getDerived().RebuildCXXThrowExpr(E->getThrowLoc(), SubExpr.get(),
E->isThrownVariableInScope());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXDefaultArgExpr(CXXDefaultArgExpr *E) {
ParmVarDecl *Param = cast_or_null<ParmVarDecl>(
getDerived().TransformDecl(E->getBeginLoc(), E->getParam()));
if (!Param)
return ExprError();
if (!getDerived().AlwaysRebuild() && Param == E->getParam() &&
E->getUsedContext() == SemaRef.CurContext)
return E;
return getDerived().RebuildCXXDefaultArgExpr(E->getUsedLocation(), Param);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXDefaultInitExpr(CXXDefaultInitExpr *E) {
FieldDecl *Field = cast_or_null<FieldDecl>(
getDerived().TransformDecl(E->getBeginLoc(), E->getField()));
if (!Field)
return ExprError();
if (!getDerived().AlwaysRebuild() && Field == E->getField() &&
E->getUsedContext() == SemaRef.CurContext)
return E;
return getDerived().RebuildCXXDefaultInitExpr(E->getExprLoc(), Field);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXScalarValueInitExpr(
CXXScalarValueInitExpr *E) {
TypeSourceInfo *T = getDerived().TransformType(E->getTypeSourceInfo());
if (!T)
return ExprError();
if (!getDerived().AlwaysRebuild() &&
T == E->getTypeSourceInfo())
return E;
return getDerived().RebuildCXXScalarValueInitExpr(T,
/*FIXME:*/T->getTypeLoc().getEndLoc(),
E->getRParenLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXNewExpr(CXXNewExpr *E) {
// Transform the type that we're allocating
TypeSourceInfo *AllocTypeInfo =
getDerived().TransformTypeWithDeducedTST(E->getAllocatedTypeSourceInfo());
if (!AllocTypeInfo)
return ExprError();
// Transform the size of the array we're allocating (if any).
Optional<Expr *> ArraySize;
if (Optional<Expr *> OldArraySize = E->getArraySize()) {
ExprResult NewArraySize;
if (*OldArraySize) {
NewArraySize = getDerived().TransformExpr(*OldArraySize);
if (NewArraySize.isInvalid())
return ExprError();
}
ArraySize = NewArraySize.get();
}
// Transform the placement arguments (if any).
bool ArgumentChanged = false;
SmallVector<Expr*, 8> PlacementArgs;
if (getDerived().TransformExprs(E->getPlacementArgs(),
E->getNumPlacementArgs(), true,
PlacementArgs, &ArgumentChanged))
return ExprError();
// Transform the initializer (if any).
Expr *OldInit = E->getInitializer();
ExprResult NewInit;
if (OldInit)
NewInit = getDerived().TransformInitializer(OldInit, true);
if (NewInit.isInvalid())
return ExprError();
// Transform new operator and delete operator.
FunctionDecl *OperatorNew = nullptr;
if (E->getOperatorNew()) {
OperatorNew = cast_or_null<FunctionDecl>(
getDerived().TransformDecl(E->getBeginLoc(), E->getOperatorNew()));
if (!OperatorNew)
return ExprError();
}
FunctionDecl *OperatorDelete = nullptr;
if (E->getOperatorDelete()) {
OperatorDelete = cast_or_null<FunctionDecl>(
getDerived().TransformDecl(E->getBeginLoc(), E->getOperatorDelete()));
if (!OperatorDelete)
return ExprError();
}
if (!getDerived().AlwaysRebuild() &&
AllocTypeInfo == E->getAllocatedTypeSourceInfo() &&
ArraySize == E->getArraySize() &&
NewInit.get() == OldInit &&
OperatorNew == E->getOperatorNew() &&
OperatorDelete == E->getOperatorDelete() &&
!ArgumentChanged) {
// Mark any declarations we need as referenced.
// FIXME: instantiation-specific.
if (OperatorNew)
SemaRef.MarkFunctionReferenced(E->getBeginLoc(), OperatorNew);
if (OperatorDelete)
SemaRef.MarkFunctionReferenced(E->getBeginLoc(), OperatorDelete);
if (E->isArray() && !E->getAllocatedType()->isDependentType()) {
QualType ElementType
= SemaRef.Context.getBaseElementType(E->getAllocatedType());
if (const RecordType *RecordT = ElementType->getAs<RecordType>()) {
CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordT->getDecl());
if (CXXDestructorDecl *Destructor = SemaRef.LookupDestructor(Record)) {
SemaRef.MarkFunctionReferenced(E->getBeginLoc(), Destructor);
}
}
}
return E;
}
QualType AllocType = AllocTypeInfo->getType();
if (!ArraySize) {
// If no array size was specified, but the new expression was
// instantiated with an array type (e.g., "new T" where T is
// instantiated with "int[4]"), extract the outer bound from the
// array type as our array size. We do this with constant and
// dependently-sized array types.
const ArrayType *ArrayT = SemaRef.Context.getAsArrayType(AllocType);
if (!ArrayT) {
// Do nothing
} else if (const ConstantArrayType *ConsArrayT
= dyn_cast<ConstantArrayType>(ArrayT)) {
ArraySize = IntegerLiteral::Create(SemaRef.Context, ConsArrayT->getSize(),
SemaRef.Context.getSizeType(),
/*FIXME:*/ E->getBeginLoc());
AllocType = ConsArrayT->getElementType();
} else if (const DependentSizedArrayType *DepArrayT
= dyn_cast<DependentSizedArrayType>(ArrayT)) {
if (DepArrayT->getSizeExpr()) {
ArraySize = DepArrayT->getSizeExpr();
AllocType = DepArrayT->getElementType();
}
}
}
return getDerived().RebuildCXXNewExpr(
E->getBeginLoc(), E->isGlobalNew(),
/*FIXME:*/ E->getBeginLoc(), PlacementArgs,
/*FIXME:*/ E->getBeginLoc(), E->getTypeIdParens(), AllocType,
AllocTypeInfo, ArraySize, E->getDirectInitRange(), NewInit.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXDeleteExpr(CXXDeleteExpr *E) {
ExprResult Operand = getDerived().TransformExpr(E->getArgument());
if (Operand.isInvalid())
return ExprError();
// Transform the delete operator, if known.
FunctionDecl *OperatorDelete = nullptr;
if (E->getOperatorDelete()) {
OperatorDelete = cast_or_null<FunctionDecl>(
getDerived().TransformDecl(E->getBeginLoc(), E->getOperatorDelete()));
if (!OperatorDelete)
return ExprError();
}
if (!getDerived().AlwaysRebuild() &&
Operand.get() == E->getArgument() &&
OperatorDelete == E->getOperatorDelete()) {
// Mark any declarations we need as referenced.
// FIXME: instantiation-specific.
if (OperatorDelete)
SemaRef.MarkFunctionReferenced(E->getBeginLoc(), OperatorDelete);
if (!E->getArgument()->isTypeDependent()) {
QualType Destroyed = SemaRef.Context.getBaseElementType(
E->getDestroyedType());
if (const RecordType *DestroyedRec = Destroyed->getAs<RecordType>()) {
CXXRecordDecl *Record = cast<CXXRecordDecl>(DestroyedRec->getDecl());
SemaRef.MarkFunctionReferenced(E->getBeginLoc(),
SemaRef.LookupDestructor(Record));
}
}
return E;
}
return getDerived().RebuildCXXDeleteExpr(
E->getBeginLoc(), E->isGlobalDelete(), E->isArrayForm(), Operand.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXPseudoDestructorExpr(
CXXPseudoDestructorExpr *E) {
ExprResult Base = getDerived().TransformExpr(E->getBase());
if (Base.isInvalid())
return ExprError();
ParsedType ObjectTypePtr;
bool MayBePseudoDestructor = false;
Base = SemaRef.ActOnStartCXXMemberReference(nullptr, Base.get(),
E->getOperatorLoc(),
E->isArrow()? tok::arrow : tok::period,
ObjectTypePtr,
MayBePseudoDestructor);
if (Base.isInvalid())
return ExprError();
QualType ObjectType = ObjectTypePtr.get();
NestedNameSpecifierLoc QualifierLoc = E->getQualifierLoc();
if (QualifierLoc) {
QualifierLoc
= getDerived().TransformNestedNameSpecifierLoc(QualifierLoc, ObjectType);
if (!QualifierLoc)
return ExprError();
}
CXXScopeSpec SS;
SS.Adopt(QualifierLoc);
PseudoDestructorTypeStorage Destroyed;
if (E->getDestroyedTypeInfo()) {
TypeSourceInfo *DestroyedTypeInfo
= getDerived().TransformTypeInObjectScope(E->getDestroyedTypeInfo(),
ObjectType, nullptr, SS);
if (!DestroyedTypeInfo)
return ExprError();
Destroyed = DestroyedTypeInfo;
} else if (!ObjectType.isNull() && ObjectType->isDependentType()) {
// We aren't likely to be able to resolve the identifier down to a type
// now anyway, so just retain the identifier.
Destroyed = PseudoDestructorTypeStorage(E->getDestroyedTypeIdentifier(),
E->getDestroyedTypeLoc());
} else {
// Look for a destructor known with the given name.
ParsedType T = SemaRef.getDestructorName(E->getTildeLoc(),
*E->getDestroyedTypeIdentifier(),
E->getDestroyedTypeLoc(),
/*Scope=*/nullptr,
SS, ObjectTypePtr,
false);
if (!T)
return ExprError();
Destroyed
= SemaRef.Context.getTrivialTypeSourceInfo(SemaRef.GetTypeFromParser(T),
E->getDestroyedTypeLoc());
}
TypeSourceInfo *ScopeTypeInfo = nullptr;
if (E->getScopeTypeInfo()) {
CXXScopeSpec EmptySS;
ScopeTypeInfo = getDerived().TransformTypeInObjectScope(
E->getScopeTypeInfo(), ObjectType, nullptr, EmptySS);
if (!ScopeTypeInfo)
return ExprError();
}
return getDerived().RebuildCXXPseudoDestructorExpr(Base.get(),
E->getOperatorLoc(),
E->isArrow(),
SS,
ScopeTypeInfo,
E->getColonColonLoc(),
E->getTildeLoc(),
Destroyed);
}
template <typename Derived>
bool TreeTransform<Derived>::TransformOverloadExprDecls(OverloadExpr *Old,
bool RequiresADL,
LookupResult &R) {
// Transform all the decls.
bool AllEmptyPacks = true;
for (auto *OldD : Old->decls()) {
Decl *InstD = getDerived().TransformDecl(Old->getNameLoc(), OldD);
if (!InstD) {
// Silently ignore these if a UsingShadowDecl instantiated to nothing.
// This can happen because of dependent hiding.
if (isa<UsingShadowDecl>(OldD))
continue;
else {
R.clear();
return true;
}
}
// Expand using pack declarations.
NamedDecl *SingleDecl = cast<NamedDecl>(InstD);
ArrayRef<NamedDecl*> Decls = SingleDecl;
if (auto *UPD = dyn_cast<UsingPackDecl>(InstD))
Decls = UPD->expansions();
// Expand using declarations.
for (auto *D : Decls) {
if (auto *UD = dyn_cast<UsingDecl>(D)) {
for (auto *SD : UD->shadows())
R.addDecl(SD);
} else {
R.addDecl(D);
}
}
AllEmptyPacks &= Decls.empty();
};
// C++ [temp.res]/8.4.2:
// The program is ill-formed, no diagnostic required, if [...] lookup for
// a name in the template definition found a using-declaration, but the
// lookup in the corresponding scope in the instantiation odoes not find
// any declarations because the using-declaration was a pack expansion and
// the corresponding pack is empty
if (AllEmptyPacks && !RequiresADL) {
getSema().Diag(Old->getNameLoc(), diag::err_using_pack_expansion_empty)
<< isa<UnresolvedMemberExpr>(Old) << Old->getName();
return true;
}
// Resolve a kind, but don't do any further analysis. If it's
// ambiguous, the callee needs to deal with it.
R.resolveKind();
return false;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformUnresolvedLookupExpr(
UnresolvedLookupExpr *Old) {
LookupResult R(SemaRef, Old->getName(), Old->getNameLoc(),
Sema::LookupOrdinaryName);
// Transform the declaration set.
if (TransformOverloadExprDecls(Old, Old->requiresADL(), R))
return ExprError();
// Rebuild the nested-name qualifier, if present.
CXXScopeSpec SS;
if (Old->getQualifierLoc()) {
NestedNameSpecifierLoc QualifierLoc
= getDerived().TransformNestedNameSpecifierLoc(Old->getQualifierLoc());
if (!QualifierLoc)
return ExprError();
SS.Adopt(QualifierLoc);
}
if (Old->getNamingClass()) {
CXXRecordDecl *NamingClass
= cast_or_null<CXXRecordDecl>(getDerived().TransformDecl(
Old->getNameLoc(),
Old->getNamingClass()));
if (!NamingClass) {
R.clear();
return ExprError();
}
R.setNamingClass(NamingClass);
}
SourceLocation TemplateKWLoc = Old->getTemplateKeywordLoc();
// If we have neither explicit template arguments, nor the template keyword,
// it's a normal declaration name or member reference.
if (!Old->hasExplicitTemplateArgs() && !TemplateKWLoc.isValid()) {
NamedDecl *D = R.getAsSingle<NamedDecl>();
// In a C++11 unevaluated context, an UnresolvedLookupExpr might refer to an
// instance member. In other contexts, BuildPossibleImplicitMemberExpr will
// give a good diagnostic.
if (D && D->isCXXInstanceMember()) {
return SemaRef.BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc, R,
/*TemplateArgs=*/nullptr,
/*Scope=*/nullptr);
}
return getDerived().RebuildDeclarationNameExpr(SS, R, Old->requiresADL());
}
// If we have template arguments, rebuild them, then rebuild the
// templateid expression.
TemplateArgumentListInfo TransArgs(Old->getLAngleLoc(), Old->getRAngleLoc());
if (Old->hasExplicitTemplateArgs() &&
getDerived().TransformTemplateArguments(Old->getTemplateArgs(),
Old->getNumTemplateArgs(),
TransArgs)) {
R.clear();
return ExprError();
}
return getDerived().RebuildTemplateIdExpr(SS, TemplateKWLoc, R,
Old->requiresADL(), &TransArgs);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformTypeTraitExpr(TypeTraitExpr *E) {
bool ArgChanged = false;
SmallVector<TypeSourceInfo *, 4> Args;
for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I) {
TypeSourceInfo *From = E->getArg(I);
TypeLoc FromTL = From->getTypeLoc();
if (!FromTL.getAs<PackExpansionTypeLoc>()) {
TypeLocBuilder TLB;
TLB.reserve(FromTL.getFullDataSize());
QualType To = getDerived().TransformType(TLB, FromTL);
if (To.isNull())
return ExprError();
if (To == From->getType())
Args.push_back(From);
else {
Args.push_back(TLB.getTypeSourceInfo(SemaRef.Context, To));
ArgChanged = true;
}
continue;
}
ArgChanged = true;
// We have a pack expansion. Instantiate it.
PackExpansionTypeLoc ExpansionTL = FromTL.castAs<PackExpansionTypeLoc>();
TypeLoc PatternTL = ExpansionTL.getPatternLoc();
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
SemaRef.collectUnexpandedParameterPacks(PatternTL, Unexpanded);
// Determine whether the set of unexpanded parameter packs can and should
// be expanded.
bool Expand = true;
bool RetainExpansion = false;
Optional<unsigned> OrigNumExpansions =
ExpansionTL.getTypePtr()->getNumExpansions();
Optional<unsigned> NumExpansions = OrigNumExpansions;
if (getDerived().TryExpandParameterPacks(ExpansionTL.getEllipsisLoc(),
PatternTL.getSourceRange(),
Unexpanded,
Expand, RetainExpansion,
NumExpansions))
return ExprError();
if (!Expand) {
// The transform has determined that we should perform a simple
// transformation on the pack expansion, producing another pack
// expansion.
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
TypeLocBuilder TLB;
TLB.reserve(From->getTypeLoc().getFullDataSize());
QualType To = getDerived().TransformType(TLB, PatternTL);
if (To.isNull())
return ExprError();
To = getDerived().RebuildPackExpansionType(To,
PatternTL.getSourceRange(),
ExpansionTL.getEllipsisLoc(),
NumExpansions);
if (To.isNull())
return ExprError();
PackExpansionTypeLoc ToExpansionTL
= TLB.push<PackExpansionTypeLoc>(To);
ToExpansionTL.setEllipsisLoc(ExpansionTL.getEllipsisLoc());
Args.push_back(TLB.getTypeSourceInfo(SemaRef.Context, To));
continue;
}
// Expand the pack expansion by substituting for each argument in the
// pack(s).
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I);
TypeLocBuilder TLB;
TLB.reserve(PatternTL.getFullDataSize());
QualType To = getDerived().TransformType(TLB, PatternTL);
if (To.isNull())
return ExprError();
if (To->containsUnexpandedParameterPack()) {
To = getDerived().RebuildPackExpansionType(To,
PatternTL.getSourceRange(),
ExpansionTL.getEllipsisLoc(),
NumExpansions);
if (To.isNull())
return ExprError();
PackExpansionTypeLoc ToExpansionTL
= TLB.push<PackExpansionTypeLoc>(To);
ToExpansionTL.setEllipsisLoc(ExpansionTL.getEllipsisLoc());
}
Args.push_back(TLB.getTypeSourceInfo(SemaRef.Context, To));
}
if (!RetainExpansion)
continue;
// If we're supposed to retain a pack expansion, do so by temporarily
// forgetting the partially-substituted parameter pack.
ForgetPartiallySubstitutedPackRAII Forget(getDerived());
TypeLocBuilder TLB;
TLB.reserve(From->getTypeLoc().getFullDataSize());
QualType To = getDerived().TransformType(TLB, PatternTL);
if (To.isNull())
return ExprError();
To = getDerived().RebuildPackExpansionType(To,
PatternTL.getSourceRange(),
ExpansionTL.getEllipsisLoc(),
NumExpansions);
if (To.isNull())
return ExprError();
PackExpansionTypeLoc ToExpansionTL
= TLB.push<PackExpansionTypeLoc>(To);
ToExpansionTL.setEllipsisLoc(ExpansionTL.getEllipsisLoc());
Args.push_back(TLB.getTypeSourceInfo(SemaRef.Context, To));
}
if (!getDerived().AlwaysRebuild() && !ArgChanged)
return E;
return getDerived().RebuildTypeTrait(E->getTrait(), E->getBeginLoc(), Args,
E->getEndLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformConceptSpecializationExpr(
ConceptSpecializationExpr *E) {
const ASTTemplateArgumentListInfo *Old = E->getTemplateArgsAsWritten();
TemplateArgumentListInfo TransArgs(Old->LAngleLoc, Old->RAngleLoc);
if (getDerived().TransformTemplateArguments(Old->getTemplateArgs(),
Old->NumTemplateArgs, TransArgs))
return ExprError();
return getDerived().RebuildConceptSpecializationExpr(
E->getNestedNameSpecifierLoc(), E->getTemplateKWLoc(),
E->getConceptNameInfo(), E->getFoundDecl(), E->getNamedConcept(),
&TransArgs);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformRequiresExpr(RequiresExpr *E) {
SmallVector<ParmVarDecl*, 4> TransParams;
SmallVector<QualType, 4> TransParamTypes;
Sema::ExtParameterInfoBuilder ExtParamInfos;
// C++2a [expr.prim.req]p2
// Expressions appearing within a requirement-body are unevaluated operands.
EnterExpressionEvaluationContext Ctx(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
RequiresExprBodyDecl *Body = RequiresExprBodyDecl::Create(
getSema().Context, getSema().CurContext,
E->getBody()->getBeginLoc());
Sema::ContextRAII SavedContext(getSema(), Body, /*NewThisContext*/false);
if (getDerived().TransformFunctionTypeParams(E->getRequiresKWLoc(),
E->getLocalParameters(),
/*ParamTypes=*/nullptr,
/*ParamInfos=*/nullptr,
TransParamTypes, &TransParams,
ExtParamInfos))
return ExprError();
for (ParmVarDecl *Param : TransParams)
Param->setDeclContext(Body);
SmallVector<concepts::Requirement *, 4> TransReqs;
if (getDerived().TransformRequiresExprRequirements(E->getRequirements(),
TransReqs))
return ExprError();
for (concepts::Requirement *Req : TransReqs) {
if (auto *ER = dyn_cast<concepts::ExprRequirement>(Req)) {
if (ER->getReturnTypeRequirement().isTypeConstraint()) {
ER->getReturnTypeRequirement()
.getTypeConstraintTemplateParameterList()->getParam(0)
->setDeclContext(Body);
}
}
}
return getDerived().RebuildRequiresExpr(E->getRequiresKWLoc(), Body,
TransParams, TransReqs,
E->getRBraceLoc());
}
template<typename Derived>
bool TreeTransform<Derived>::TransformRequiresExprRequirements(
ArrayRef<concepts::Requirement *> Reqs,
SmallVectorImpl<concepts::Requirement *> &Transformed) {
for (concepts::Requirement *Req : Reqs) {
concepts::Requirement *TransReq = nullptr;
if (auto *TypeReq = dyn_cast<concepts::TypeRequirement>(Req))
TransReq = getDerived().TransformTypeRequirement(TypeReq);
else if (auto *ExprReq = dyn_cast<concepts::ExprRequirement>(Req))
TransReq = getDerived().TransformExprRequirement(ExprReq);
else
TransReq = getDerived().TransformNestedRequirement(
cast<concepts::NestedRequirement>(Req));
if (!TransReq)
return true;
Transformed.push_back(TransReq);
}
return false;
}
template<typename Derived>
concepts::TypeRequirement *
TreeTransform<Derived>::TransformTypeRequirement(
concepts::TypeRequirement *Req) {
if (Req->isSubstitutionFailure()) {
if (getDerived().AlwaysRebuild())
return getDerived().RebuildTypeRequirement(
Req->getSubstitutionDiagnostic());
return Req;
}
TypeSourceInfo *TransType = getDerived().TransformType(Req->getType());
if (!TransType)
return nullptr;
return getDerived().RebuildTypeRequirement(TransType);
}
template<typename Derived>
concepts::ExprRequirement *
TreeTransform<Derived>::TransformExprRequirement(concepts::ExprRequirement *Req) {
llvm::PointerUnion<Expr *, concepts::Requirement::SubstitutionDiagnostic *> TransExpr;
if (Req->isExprSubstitutionFailure())
TransExpr = Req->getExprSubstitutionDiagnostic();
else {
ExprResult TransExprRes = getDerived().TransformExpr(Req->getExpr());
if (TransExprRes.isInvalid())
return nullptr;
TransExpr = TransExprRes.get();
}
llvm::Optional<concepts::ExprRequirement::ReturnTypeRequirement> TransRetReq;
const auto &RetReq = Req->getReturnTypeRequirement();
if (RetReq.isEmpty())
TransRetReq.emplace();
else if (RetReq.isSubstitutionFailure())
TransRetReq.emplace(RetReq.getSubstitutionDiagnostic());
else if (RetReq.isTypeConstraint()) {
TemplateParameterList *OrigTPL =
RetReq.getTypeConstraintTemplateParameterList();
TemplateParameterList *TPL =
getDerived().TransformTemplateParameterList(OrigTPL);
if (!TPL)
return nullptr;
TransRetReq.emplace(TPL);
}
assert(TransRetReq.hasValue() &&
"All code paths leading here must set TransRetReq");
if (Expr *E = TransExpr.dyn_cast<Expr *>())
return getDerived().RebuildExprRequirement(E, Req->isSimple(),
Req->getNoexceptLoc(),
std::move(*TransRetReq));
return getDerived().RebuildExprRequirement(
TransExpr.get<concepts::Requirement::SubstitutionDiagnostic *>(),
Req->isSimple(), Req->getNoexceptLoc(), std::move(*TransRetReq));
}
template<typename Derived>
concepts::NestedRequirement *
TreeTransform<Derived>::TransformNestedRequirement(
concepts::NestedRequirement *Req) {
if (Req->isSubstitutionFailure()) {
if (getDerived().AlwaysRebuild())
return getDerived().RebuildNestedRequirement(
Req->getSubstitutionDiagnostic());
return Req;
}
ExprResult TransConstraint =
getDerived().TransformExpr(Req->getConstraintExpr());
if (TransConstraint.isInvalid())
return nullptr;
return getDerived().RebuildNestedRequirement(TransConstraint.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformArrayTypeTraitExpr(ArrayTypeTraitExpr *E) {
TypeSourceInfo *T = getDerived().TransformType(E->getQueriedTypeSourceInfo());
if (!T)
return ExprError();
if (!getDerived().AlwaysRebuild() &&
T == E->getQueriedTypeSourceInfo())
return E;
ExprResult SubExpr;
{
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
SubExpr = getDerived().TransformExpr(E->getDimensionExpression());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getDimensionExpression())
return E;
}
return getDerived().RebuildArrayTypeTrait(E->getTrait(), E->getBeginLoc(), T,
SubExpr.get(), E->getEndLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformExpressionTraitExpr(ExpressionTraitExpr *E) {
ExprResult SubExpr;
{
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
SubExpr = getDerived().TransformExpr(E->getQueriedExpression());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getQueriedExpression())
return E;
}
return getDerived().RebuildExpressionTrait(E->getTrait(), E->getBeginLoc(),
SubExpr.get(), E->getEndLoc());
}
template <typename Derived>
ExprResult TreeTransform<Derived>::TransformParenDependentScopeDeclRefExpr(
ParenExpr *PE, DependentScopeDeclRefExpr *DRE, bool AddrTaken,
TypeSourceInfo **RecoveryTSI) {
ExprResult NewDRE = getDerived().TransformDependentScopeDeclRefExpr(
DRE, AddrTaken, RecoveryTSI);
// Propagate both errors and recovered types, which return ExprEmpty.
if (!NewDRE.isUsable())
return NewDRE;
// We got an expr, wrap it up in parens.
if (!getDerived().AlwaysRebuild() && NewDRE.get() == DRE)
return PE;
return getDerived().RebuildParenExpr(NewDRE.get(), PE->getLParen(),
PE->getRParen());
}
template <typename Derived>
ExprResult TreeTransform<Derived>::TransformDependentScopeDeclRefExpr(
DependentScopeDeclRefExpr *E) {
return TransformDependentScopeDeclRefExpr(E, /*IsAddressOfOperand=*/false,
nullptr);
}
template <typename Derived>
ExprResult TreeTransform<Derived>::TransformDependentScopeDeclRefExpr(
DependentScopeDeclRefExpr *E, bool IsAddressOfOperand,
TypeSourceInfo **RecoveryTSI) {
assert(E->getQualifierLoc());
NestedNameSpecifierLoc QualifierLoc =
getDerived().TransformNestedNameSpecifierLoc(E->getQualifierLoc());
if (!QualifierLoc)
return ExprError();
SourceLocation TemplateKWLoc = E->getTemplateKeywordLoc();
// TODO: If this is a conversion-function-id, verify that the
// destination type name (if present) resolves the same way after
// instantiation as it did in the local scope.
DeclarationNameInfo NameInfo =
getDerived().TransformDeclarationNameInfo(E->getNameInfo());
if (!NameInfo.getName())
return ExprError();
if (!E->hasExplicitTemplateArgs()) {
if (!getDerived().AlwaysRebuild() && QualifierLoc == E->getQualifierLoc() &&
// Note: it is sufficient to compare the Name component of NameInfo:
// if name has not changed, DNLoc has not changed either.
NameInfo.getName() == E->getDeclName())
return E;
return getDerived().RebuildDependentScopeDeclRefExpr(
QualifierLoc, TemplateKWLoc, NameInfo, /*TemplateArgs=*/nullptr,
IsAddressOfOperand, RecoveryTSI);
}
TemplateArgumentListInfo TransArgs(E->getLAngleLoc(), E->getRAngleLoc());
if (getDerived().TransformTemplateArguments(
E->getTemplateArgs(), E->getNumTemplateArgs(), TransArgs))
return ExprError();
return getDerived().RebuildDependentScopeDeclRefExpr(
QualifierLoc, TemplateKWLoc, NameInfo, &TransArgs, IsAddressOfOperand,
RecoveryTSI);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXConstructExpr(CXXConstructExpr *E) {
// CXXConstructExprs other than for list-initialization and
// CXXTemporaryObjectExpr are always implicit, so when we have
// a 1-argument construction we just transform that argument.
if (getDerived().AllowSkippingCXXConstructExpr() &&
((E->getNumArgs() == 1 ||
(E->getNumArgs() > 1 && getDerived().DropCallArgument(E->getArg(1)))) &&
(!getDerived().DropCallArgument(E->getArg(0))) &&
!E->isListInitialization()))
return getDerived().TransformInitializer(E->getArg(0),
/*DirectInit*/ false);
TemporaryBase Rebase(*this, /*FIXME*/ E->getBeginLoc(), DeclarationName());
QualType T = getDerived().TransformType(E->getType());
if (T.isNull())
return ExprError();
CXXConstructorDecl *Constructor = cast_or_null<CXXConstructorDecl>(
getDerived().TransformDecl(E->getBeginLoc(), E->getConstructor()));
if (!Constructor)
return ExprError();
bool ArgumentChanged = false;
SmallVector<Expr*, 8> Args;
{
EnterExpressionEvaluationContext Context(
getSema(), EnterExpressionEvaluationContext::InitList,
E->isListInitialization());
if (getDerived().TransformExprs(E->getArgs(), E->getNumArgs(), true, Args,
&ArgumentChanged))
return ExprError();
}
if (!getDerived().AlwaysRebuild() &&
T == E->getType() &&
Constructor == E->getConstructor() &&
!ArgumentChanged) {
// Mark the constructor as referenced.
// FIXME: Instantiation-specific
SemaRef.MarkFunctionReferenced(E->getBeginLoc(), Constructor);
return E;
}
return getDerived().RebuildCXXConstructExpr(
T, /*FIXME:*/ E->getBeginLoc(), Constructor, E->isElidable(), Args,
E->hadMultipleCandidates(), E->isListInitialization(),
E->isStdInitListInitialization(), E->requiresZeroInitialization(),
E->getConstructionKind(), E->getParenOrBraceRange());
}
template<typename Derived>
ExprResult TreeTransform<Derived>::TransformCXXInheritedCtorInitExpr(
CXXInheritedCtorInitExpr *E) {
QualType T = getDerived().TransformType(E->getType());
if (T.isNull())
return ExprError();
CXXConstructorDecl *Constructor = cast_or_null<CXXConstructorDecl>(
getDerived().TransformDecl(E->getBeginLoc(), E->getConstructor()));
if (!Constructor)
return ExprError();
if (!getDerived().AlwaysRebuild() &&
T == E->getType() &&
Constructor == E->getConstructor()) {
// Mark the constructor as referenced.
// FIXME: Instantiation-specific
SemaRef.MarkFunctionReferenced(E->getBeginLoc(), Constructor);
return E;
}
return getDerived().RebuildCXXInheritedCtorInitExpr(
T, E->getLocation(), Constructor,
E->constructsVBase(), E->inheritedFromVBase());
}
/// Transform a C++ temporary-binding expression.
///
/// Since CXXBindTemporaryExpr nodes are implicitly generated, we just
/// transform the subexpression and return that.
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
return getDerived().TransformExpr(E->getSubExpr());
}
/// Transform a C++ expression that contains cleanups that should
/// be run after the expression is evaluated.
///
/// Since ExprWithCleanups nodes are implicitly generated, we
/// just transform the subexpression and return that.
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformExprWithCleanups(ExprWithCleanups *E) {
return getDerived().TransformExpr(E->getSubExpr());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXTemporaryObjectExpr(
CXXTemporaryObjectExpr *E) {
TypeSourceInfo *T =
getDerived().TransformTypeWithDeducedTST(E->getTypeSourceInfo());
if (!T)
return ExprError();
CXXConstructorDecl *Constructor = cast_or_null<CXXConstructorDecl>(
getDerived().TransformDecl(E->getBeginLoc(), E->getConstructor()));
if (!Constructor)
return ExprError();
bool ArgumentChanged = false;
SmallVector<Expr*, 8> Args;
Args.reserve(E->getNumArgs());
{
EnterExpressionEvaluationContext Context(
getSema(), EnterExpressionEvaluationContext::InitList,
E->isListInitialization());
if (TransformExprs(E->getArgs(), E->getNumArgs(), true, Args,
&ArgumentChanged))
return ExprError();
}
if (!getDerived().AlwaysRebuild() &&
T == E->getTypeSourceInfo() &&
Constructor == E->getConstructor() &&
!ArgumentChanged) {
// FIXME: Instantiation-specific
SemaRef.MarkFunctionReferenced(E->getBeginLoc(), Constructor);
return SemaRef.MaybeBindToTemporary(E);
}
// FIXME: We should just pass E->isListInitialization(), but we're not
// prepared to handle list-initialization without a child InitListExpr.
SourceLocation LParenLoc = T->getTypeLoc().getEndLoc();
return getDerived().RebuildCXXTemporaryObjectExpr(
T, LParenLoc, Args, E->getEndLoc(),
/*ListInitialization=*/LParenLoc.isInvalid());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
// Transform any init-capture expressions before entering the scope of the
// lambda body, because they are not semantically within that scope.
typedef std::pair<ExprResult, QualType> InitCaptureInfoTy;
struct TransformedInitCapture {
// The location of the ... if the result is retaining a pack expansion.
SourceLocation EllipsisLoc;
// Zero or more expansions of the init-capture.
SmallVector<InitCaptureInfoTy, 4> Expansions;
};
SmallVector<TransformedInitCapture, 4> InitCaptures;
InitCaptures.resize(E->explicit_capture_end() - E->explicit_capture_begin());
for (LambdaExpr::capture_iterator C = E->capture_begin(),
CEnd = E->capture_end();
C != CEnd; ++C) {
if (!E->isInitCapture(C))
continue;
TransformedInitCapture &Result = InitCaptures[C - E->capture_begin()];
VarDecl *OldVD = C->getCapturedVar();
auto SubstInitCapture = [&](SourceLocation EllipsisLoc,
Optional<unsigned> NumExpansions) {
ExprResult NewExprInitResult = getDerived().TransformInitializer(
OldVD->getInit(), OldVD->getInitStyle() == VarDecl::CallInit);
if (NewExprInitResult.isInvalid()) {
Result.Expansions.push_back(InitCaptureInfoTy(ExprError(), QualType()));
return;
}
Expr *NewExprInit = NewExprInitResult.get();
QualType NewInitCaptureType =
getSema().buildLambdaInitCaptureInitialization(
C->getLocation(), OldVD->getType()->isReferenceType(),
EllipsisLoc, NumExpansions, OldVD->getIdentifier(),
C->getCapturedVar()->getInitStyle() != VarDecl::CInit,
NewExprInit);
Result.Expansions.push_back(
InitCaptureInfoTy(NewExprInit, NewInitCaptureType));
};
// If this is an init-capture pack, consider expanding the pack now.
if (OldVD->isParameterPack()) {
PackExpansionTypeLoc ExpansionTL = OldVD->getTypeSourceInfo()
->getTypeLoc()
.castAs<PackExpansionTypeLoc>();
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
SemaRef.collectUnexpandedParameterPacks(OldVD->getInit(), Unexpanded);
// Determine whether the set of unexpanded parameter packs can and should
// be expanded.
bool Expand = true;
bool RetainExpansion = false;
Optional<unsigned> OrigNumExpansions =
ExpansionTL.getTypePtr()->getNumExpansions();
Optional<unsigned> NumExpansions = OrigNumExpansions;
if (getDerived().TryExpandParameterPacks(
ExpansionTL.getEllipsisLoc(),
OldVD->getInit()->getSourceRange(), Unexpanded, Expand,
RetainExpansion, NumExpansions))
return ExprError();
if (Expand) {
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
SubstInitCapture(SourceLocation(), None);
}
}
if (!Expand || RetainExpansion) {
ForgetPartiallySubstitutedPackRAII Forget(getDerived());
SubstInitCapture(ExpansionTL.getEllipsisLoc(), NumExpansions);
Result.EllipsisLoc = ExpansionTL.getEllipsisLoc();
}
} else {
SubstInitCapture(SourceLocation(), None);
}
}
LambdaScopeInfo *LSI = getSema().PushLambdaScope();
Sema::FunctionScopeRAII FuncScopeCleanup(getSema());
// Transform the template parameters, and add them to the current
// instantiation scope. The null case is handled correctly.
auto TPL = getDerived().TransformTemplateParameterList(
E->getTemplateParameterList());
LSI->GLTemplateParameterList = TPL;
// Transform the type of the original lambda's call operator.
// The transformation MUST be done in the CurrentInstantiationScope since
// it introduces a mapping of the original to the newly created
// transformed parameters.
TypeSourceInfo *NewCallOpTSI = nullptr;
{
TypeSourceInfo *OldCallOpTSI = E->getCallOperator()->getTypeSourceInfo();
FunctionProtoTypeLoc OldCallOpFPTL =
OldCallOpTSI->getTypeLoc().getAs<FunctionProtoTypeLoc>();
TypeLocBuilder NewCallOpTLBuilder;
SmallVector<QualType, 4> ExceptionStorage;
TreeTransform *This = this; // Work around gcc.gnu.org/PR56135.
QualType NewCallOpType = TransformFunctionProtoType(
NewCallOpTLBuilder, OldCallOpFPTL, nullptr, Qualifiers(),
[&](FunctionProtoType::ExceptionSpecInfo &ESI, bool &Changed) {
return This->TransformExceptionSpec(OldCallOpFPTL.getBeginLoc(), ESI,
ExceptionStorage, Changed);
});
if (NewCallOpType.isNull())
return ExprError();
NewCallOpTSI = NewCallOpTLBuilder.getTypeSourceInfo(getSema().Context,
NewCallOpType);
}
// Transform the trailing requires clause
ExprResult NewTrailingRequiresClause;
if (Expr *TRC = E->getCallOperator()->getTrailingRequiresClause())
// FIXME: Concepts: Substitution into requires clause should only happen
// when checking satisfaction.
NewTrailingRequiresClause = getDerived().TransformExpr(TRC);
// Create the local class that will describe the lambda.
// FIXME: KnownDependent below is wrong when substituting inside a templated
// context that isn't a DeclContext (such as a variable template).
CXXRecordDecl *OldClass = E->getLambdaClass();
CXXRecordDecl *Class
= getSema().createLambdaClosureType(E->getIntroducerRange(),
NewCallOpTSI,
/*KnownDependent=*/false,
E->getCaptureDefault());
getDerived().transformedLocalDecl(OldClass, {Class});
Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling;
if (getDerived().ReplacingOriginal())
Mangling = std::make_tuple(OldClass->hasKnownLambdaInternalLinkage(),
OldClass->getLambdaManglingNumber(),
OldClass->getDeviceLambdaManglingNumber(),
OldClass->getLambdaContextDecl());
// Build the call operator.
CXXMethodDecl *NewCallOperator = getSema().startLambdaDefinition(
Class, E->getIntroducerRange(), NewCallOpTSI,
E->getCallOperator()->getEndLoc(),
NewCallOpTSI->getTypeLoc().castAs<FunctionProtoTypeLoc>().getParams(),
E->getCallOperator()->getConstexprKind(),
NewTrailingRequiresClause.get());
LSI->CallOperator = NewCallOperator;
getDerived().transformAttrs(E->getCallOperator(), NewCallOperator);
getDerived().transformedLocalDecl(E->getCallOperator(), {NewCallOperator});
// Number the lambda for linkage purposes if necessary.
getSema().handleLambdaNumbering(Class, NewCallOperator, Mangling);
// Introduce the context of the call operator.
Sema::ContextRAII SavedContext(getSema(), NewCallOperator,
/*NewThisContext*/false);
// Enter the scope of the lambda.
getSema().buildLambdaScope(LSI, NewCallOperator,
E->getIntroducerRange(),
E->getCaptureDefault(),
E->getCaptureDefaultLoc(),
E->hasExplicitParameters(),
E->hasExplicitResultType(),
E->isMutable());
bool Invalid = false;
// Transform captures.
for (LambdaExpr::capture_iterator C = E->capture_begin(),
CEnd = E->capture_end();
C != CEnd; ++C) {
// When we hit the first implicit capture, tell Sema that we've finished
// the list of explicit captures.
if (C->isImplicit())
break;
// Capturing 'this' is trivial.
if (C->capturesThis()) {
getSema().CheckCXXThisCapture(C->getLocation(), C->isExplicit(),
/*BuildAndDiagnose*/ true, nullptr,
C->getCaptureKind() == LCK_StarThis);
continue;
}
// Captured expression will be recaptured during captured variables
// rebuilding.
if (C->capturesVLAType())
continue;
// Rebuild init-captures, including the implied field declaration.
if (E->isInitCapture(C)) {
TransformedInitCapture &NewC = InitCaptures[C - E->capture_begin()];
VarDecl *OldVD = C->getCapturedVar();
llvm::SmallVector<Decl*, 4> NewVDs;
for (InitCaptureInfoTy &Info : NewC.Expansions) {
ExprResult Init = Info.first;
QualType InitQualType = Info.second;
if (Init.isInvalid() || InitQualType.isNull()) {
Invalid = true;
break;
}
VarDecl *NewVD = getSema().createLambdaInitCaptureVarDecl(
OldVD->getLocation(), InitQualType, NewC.EllipsisLoc,
OldVD->getIdentifier(), OldVD->getInitStyle(), Init.get());
if (!NewVD) {
Invalid = true;
break;
}
NewVDs.push_back(NewVD);
getSema().addInitCapture(LSI, NewVD);
}
if (Invalid)
break;
getDerived().transformedLocalDecl(OldVD, NewVDs);
continue;
}
assert(C->capturesVariable() && "unexpected kind of lambda capture");
// Determine the capture kind for Sema.
Sema::TryCaptureKind Kind
= C->isImplicit()? Sema::TryCapture_Implicit
: C->getCaptureKind() == LCK_ByCopy
? Sema::TryCapture_ExplicitByVal
: Sema::TryCapture_ExplicitByRef;
SourceLocation EllipsisLoc;
if (C->isPackExpansion()) {
UnexpandedParameterPack Unexpanded(C->getCapturedVar(), C->getLocation());
bool ShouldExpand = false;
bool RetainExpansion = false;
Optional<unsigned> NumExpansions;
if (getDerived().TryExpandParameterPacks(C->getEllipsisLoc(),
C->getLocation(),
Unexpanded,
ShouldExpand, RetainExpansion,
NumExpansions)) {
Invalid = true;
continue;
}
if (ShouldExpand) {
// The transform has determined that we should perform an expansion;
// transform and capture each of the arguments.
// expansion of the pattern. Do so.
VarDecl *Pack = C->getCapturedVar();
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
VarDecl *CapturedVar
= cast_or_null<VarDecl>(getDerived().TransformDecl(C->getLocation(),
Pack));
if (!CapturedVar) {
Invalid = true;
continue;
}
// Capture the transformed variable.
getSema().tryCaptureVariable(CapturedVar, C->getLocation(), Kind);
}
// FIXME: Retain a pack expansion if RetainExpansion is true.
continue;
}
EllipsisLoc = C->getEllipsisLoc();
}
// Transform the captured variable.
VarDecl *CapturedVar
= cast_or_null<VarDecl>(getDerived().TransformDecl(C->getLocation(),
C->getCapturedVar()));
if (!CapturedVar || CapturedVar->isInvalidDecl()) {
Invalid = true;
continue;
}
// Capture the transformed variable.
getSema().tryCaptureVariable(CapturedVar, C->getLocation(), Kind,
EllipsisLoc);
}
getSema().finishLambdaExplicitCaptures(LSI);
// FIXME: Sema's lambda-building mechanism expects us to push an expression
// evaluation context even if we're not transforming the function body.
getSema().PushExpressionEvaluationContext(
Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
// Instantiate the body of the lambda expression.
StmtResult Body =
Invalid ? StmtError() : getDerived().TransformLambdaBody(E, E->getBody());
// ActOnLambda* will pop the function scope for us.
FuncScopeCleanup.disable();
if (Body.isInvalid()) {
SavedContext.pop();
getSema().ActOnLambdaError(E->getBeginLoc(), /*CurScope=*/nullptr,
/*IsInstantiation=*/true);
return ExprError();
}
// Copy the LSI before ActOnFinishFunctionBody removes it.
// FIXME: This is dumb. Store the lambda information somewhere that outlives
// the call operator.
auto LSICopy = *LSI;
getSema().ActOnFinishFunctionBody(NewCallOperator, Body.get(),
/*IsInstantiation*/ true);
SavedContext.pop();
return getSema().BuildLambdaExpr(E->getBeginLoc(), Body.get()->getEndLoc(),
&LSICopy);
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformLambdaBody(LambdaExpr *E, Stmt *S) {
return TransformStmt(S);
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::SkipLambdaBody(LambdaExpr *E, Stmt *S) {
// Transform captures.
for (LambdaExpr::capture_iterator C = E->capture_begin(),
CEnd = E->capture_end();
C != CEnd; ++C) {
// When we hit the first implicit capture, tell Sema that we've finished
// the list of explicit captures.
if (!C->isImplicit())
continue;
// Capturing 'this' is trivial.
if (C->capturesThis()) {
getSema().CheckCXXThisCapture(C->getLocation(), C->isExplicit(),
/*BuildAndDiagnose*/ true, nullptr,
C->getCaptureKind() == LCK_StarThis);
continue;
}
// Captured expression will be recaptured during captured variables
// rebuilding.
if (C->capturesVLAType())
continue;
assert(C->capturesVariable() && "unexpected kind of lambda capture");
assert(!E->isInitCapture(C) && "implicit init-capture?");
// Transform the captured variable.
VarDecl *CapturedVar = cast_or_null<VarDecl>(
getDerived().TransformDecl(C->getLocation(), C->getCapturedVar()));
if (!CapturedVar || CapturedVar->isInvalidDecl())
return StmtError();
// Capture the transformed variable.
getSema().tryCaptureVariable(CapturedVar, C->getLocation());
}
return S;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXUnresolvedConstructExpr(
CXXUnresolvedConstructExpr *E) {
TypeSourceInfo *T =
getDerived().TransformTypeWithDeducedTST(E->getTypeSourceInfo());
if (!T)
return ExprError();
bool ArgumentChanged = false;
SmallVector<Expr*, 8> Args;
Args.reserve(E->getNumArgs());
{
EnterExpressionEvaluationContext Context(
getSema(), EnterExpressionEvaluationContext::InitList,
E->isListInitialization());
if (getDerived().TransformExprs(E->arg_begin(), E->getNumArgs(), true, Args,
&ArgumentChanged))
return ExprError();
}
if (!getDerived().AlwaysRebuild() &&
T == E->getTypeSourceInfo() &&
!ArgumentChanged)
return E;
// FIXME: we're faking the locations of the commas
return getDerived().RebuildCXXUnresolvedConstructExpr(
T, E->getLParenLoc(), Args, E->getRParenLoc(), E->isListInitialization());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXDependentScopeMemberExpr(
CXXDependentScopeMemberExpr *E) {
// Transform the base of the expression.
ExprResult Base((Expr*) nullptr);
Expr *OldBase;
QualType BaseType;
QualType ObjectType;
if (!E->isImplicitAccess()) {
OldBase = E->getBase();
Base = getDerived().TransformExpr(OldBase);
if (Base.isInvalid())
return ExprError();
// Start the member reference and compute the object's type.
ParsedType ObjectTy;
bool MayBePseudoDestructor = false;
Base = SemaRef.ActOnStartCXXMemberReference(nullptr, Base.get(),
E->getOperatorLoc(),
E->isArrow()? tok::arrow : tok::period,
ObjectTy,
MayBePseudoDestructor);
if (Base.isInvalid())
return ExprError();
ObjectType = ObjectTy.get();
BaseType = ((Expr*) Base.get())->getType();
} else {
OldBase = nullptr;
BaseType = getDerived().TransformType(E->getBaseType());
ObjectType = BaseType->castAs<PointerType>()->getPointeeType();
}
// Transform the first part of the nested-name-specifier that qualifies
// the member name.
NamedDecl *FirstQualifierInScope
= getDerived().TransformFirstQualifierInScope(
E->getFirstQualifierFoundInScope(),
E->getQualifierLoc().getBeginLoc());
NestedNameSpecifierLoc QualifierLoc;
if (E->getQualifier()) {
QualifierLoc
= getDerived().TransformNestedNameSpecifierLoc(E->getQualifierLoc(),
ObjectType,
FirstQualifierInScope);
if (!QualifierLoc)
return ExprError();
}
SourceLocation TemplateKWLoc = E->getTemplateKeywordLoc();
// TODO: If this is a conversion-function-id, verify that the
// destination type name (if present) resolves the same way after
// instantiation as it did in the local scope.
DeclarationNameInfo NameInfo
= getDerived().TransformDeclarationNameInfo(E->getMemberNameInfo());
if (!NameInfo.getName())
return ExprError();
if (!E->hasExplicitTemplateArgs()) {
// This is a reference to a member without an explicitly-specified
// template argument list. Optimize for this common case.
if (!getDerived().AlwaysRebuild() &&
Base.get() == OldBase &&
BaseType == E->getBaseType() &&
QualifierLoc == E->getQualifierLoc() &&
NameInfo.getName() == E->getMember() &&
FirstQualifierInScope == E->getFirstQualifierFoundInScope())
return E;
return getDerived().RebuildCXXDependentScopeMemberExpr(Base.get(),
BaseType,
E->isArrow(),
E->getOperatorLoc(),
QualifierLoc,
TemplateKWLoc,
FirstQualifierInScope,
NameInfo,
/*TemplateArgs*/nullptr);
}
TemplateArgumentListInfo TransArgs(E->getLAngleLoc(), E->getRAngleLoc());
if (getDerived().TransformTemplateArguments(E->getTemplateArgs(),
E->getNumTemplateArgs(),
TransArgs))
return ExprError();
return getDerived().RebuildCXXDependentScopeMemberExpr(Base.get(),
BaseType,
E->isArrow(),
E->getOperatorLoc(),
QualifierLoc,
TemplateKWLoc,
FirstQualifierInScope,
NameInfo,
&TransArgs);
}
template <typename Derived>
ExprResult TreeTransform<Derived>::TransformUnresolvedMemberExpr(
UnresolvedMemberExpr *Old) {
// Transform the base of the expression.
ExprResult Base((Expr *)nullptr);
QualType BaseType;
if (!Old->isImplicitAccess()) {
Base = getDerived().TransformExpr(Old->getBase());
if (Base.isInvalid())
return ExprError();
Base =
getSema().PerformMemberExprBaseConversion(Base.get(), Old->isArrow());
if (Base.isInvalid())
return ExprError();
BaseType = Base.get()->getType();
} else {
BaseType = getDerived().TransformType(Old->getBaseType());
}
NestedNameSpecifierLoc QualifierLoc;
if (Old->getQualifierLoc()) {
QualifierLoc =
getDerived().TransformNestedNameSpecifierLoc(Old->getQualifierLoc());
if (!QualifierLoc)
return ExprError();
}
SourceLocation TemplateKWLoc = Old->getTemplateKeywordLoc();
LookupResult R(SemaRef, Old->getMemberNameInfo(), Sema::LookupOrdinaryName);
// Transform the declaration set.
if (TransformOverloadExprDecls(Old, /*RequiresADL*/ false, R))
return ExprError();
// Determine the naming class.
if (Old->getNamingClass()) {
CXXRecordDecl *NamingClass = cast_or_null<CXXRecordDecl>(
getDerived().TransformDecl(Old->getMemberLoc(), Old->getNamingClass()));
if (!NamingClass)
return ExprError();
R.setNamingClass(NamingClass);
}
TemplateArgumentListInfo TransArgs;
if (Old->hasExplicitTemplateArgs()) {
TransArgs.setLAngleLoc(Old->getLAngleLoc());
TransArgs.setRAngleLoc(Old->getRAngleLoc());
if (getDerived().TransformTemplateArguments(
Old->getTemplateArgs(), Old->getNumTemplateArgs(), TransArgs))
return ExprError();
}
// FIXME: to do this check properly, we will need to preserve the
// first-qualifier-in-scope here, just in case we had a dependent
// base (and therefore couldn't do the check) and a
// nested-name-qualifier (and therefore could do the lookup).
NamedDecl *FirstQualifierInScope = nullptr;
return getDerived().RebuildUnresolvedMemberExpr(
Base.get(), BaseType, Old->getOperatorLoc(), Old->isArrow(), QualifierLoc,
TemplateKWLoc, FirstQualifierInScope, R,
(Old->hasExplicitTemplateArgs() ? &TransArgs : nullptr));
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXNoexceptExpr(CXXNoexceptExpr *E) {
EnterExpressionEvaluationContext Unevaluated(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
ExprResult SubExpr = getDerived().TransformExpr(E->getOperand());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getOperand())
return E;
return getDerived().RebuildCXXNoexceptExpr(E->getSourceRange(),SubExpr.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformPackExpansionExpr(PackExpansionExpr *E) {
ExprResult Pattern = getDerived().TransformExpr(E->getPattern());
if (Pattern.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() && Pattern.get() == E->getPattern())
return E;
return getDerived().RebuildPackExpansion(Pattern.get(), E->getEllipsisLoc(),
E->getNumExpansions());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformSizeOfPackExpr(SizeOfPackExpr *E) {
// If E is not value-dependent, then nothing will change when we transform it.
// Note: This is an instantiation-centric view.
if (!E->isValueDependent())
return E;
EnterExpressionEvaluationContext Unevaluated(
getSema(), Sema::ExpressionEvaluationContext::Unevaluated);
ArrayRef<TemplateArgument> PackArgs;
TemplateArgument ArgStorage;
// Find the argument list to transform.
if (E->isPartiallySubstituted()) {
PackArgs = E->getPartialArguments();
} else if (E->isValueDependent()) {
UnexpandedParameterPack Unexpanded(E->getPack(), E->getPackLoc());
bool ShouldExpand = false;
bool RetainExpansion = false;
Optional<unsigned> NumExpansions;
if (getDerived().TryExpandParameterPacks(E->getOperatorLoc(), E->getPackLoc(),
Unexpanded,
ShouldExpand, RetainExpansion,
NumExpansions))
return ExprError();
// If we need to expand the pack, build a template argument from it and
// expand that.
if (ShouldExpand) {
auto *Pack = E->getPack();
if (auto *TTPD = dyn_cast<TemplateTypeParmDecl>(Pack)) {
ArgStorage = getSema().Context.getPackExpansionType(
getSema().Context.getTypeDeclType(TTPD), None);
} else if (auto *TTPD = dyn_cast<TemplateTemplateParmDecl>(Pack)) {
ArgStorage = TemplateArgument(TemplateName(TTPD), None);
} else {
auto *VD = cast<ValueDecl>(Pack);
ExprResult DRE = getSema().BuildDeclRefExpr(
VD, VD->getType().getNonLValueExprType(getSema().Context),
VD->getType()->isReferenceType() ? VK_LValue : VK_PRValue,
E->getPackLoc());
if (DRE.isInvalid())
return ExprError();
ArgStorage = new (getSema().Context) PackExpansionExpr(
getSema().Context.DependentTy, DRE.get(), E->getPackLoc(), None);
}
PackArgs = ArgStorage;
}
}
// If we're not expanding the pack, just transform the decl.
if (!PackArgs.size()) {
auto *Pack = cast_or_null<NamedDecl>(
getDerived().TransformDecl(E->getPackLoc(), E->getPack()));
if (!Pack)
return ExprError();
return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), Pack,
E->getPackLoc(),
E->getRParenLoc(), None, None);
}
// Try to compute the result without performing a partial substitution.
Optional<unsigned> Result = 0;
for (const TemplateArgument &Arg : PackArgs) {
if (!Arg.isPackExpansion()) {
Result = *Result + 1;
continue;
}
TemplateArgumentLoc ArgLoc;
InventTemplateArgumentLoc(Arg, ArgLoc);
// Find the pattern of the pack expansion.
SourceLocation Ellipsis;
Optional<unsigned> OrigNumExpansions;
TemplateArgumentLoc Pattern =
getSema().getTemplateArgumentPackExpansionPattern(ArgLoc, Ellipsis,
OrigNumExpansions);
// Substitute under the pack expansion. Do not expand the pack (yet).
TemplateArgumentLoc OutPattern;
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
if (getDerived().TransformTemplateArgument(Pattern, OutPattern,
/*Uneval*/ true))
return true;
// See if we can determine the number of arguments from the result.
Optional<unsigned> NumExpansions =
getSema().getFullyPackExpandedSize(OutPattern.getArgument());
if (!NumExpansions) {
// No: we must be in an alias template expansion, and we're going to need
// to actually expand the packs.
Result = None;
break;
}
Result = *Result + *NumExpansions;
}
// Common case: we could determine the number of expansions without
// substituting.
if (Result)
return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), E->getPack(),
E->getPackLoc(),
E->getRParenLoc(), *Result, None);
TemplateArgumentListInfo TransformedPackArgs(E->getPackLoc(),
E->getPackLoc());
{
TemporaryBase Rebase(*this, E->getPackLoc(), getBaseEntity());
typedef TemplateArgumentLocInventIterator<
Derived, const TemplateArgument*> PackLocIterator;
if (TransformTemplateArguments(PackLocIterator(*this, PackArgs.begin()),
PackLocIterator(*this, PackArgs.end()),
TransformedPackArgs, /*Uneval*/true))
return ExprError();
}
// Check whether we managed to fully-expand the pack.
// FIXME: Is it possible for us to do so and not hit the early exit path?
SmallVector<TemplateArgument, 8> Args;
bool PartialSubstitution = false;
for (auto &Loc : TransformedPackArgs.arguments()) {
Args.push_back(Loc.getArgument());
if (Loc.getArgument().isPackExpansion())
PartialSubstitution = true;
}
if (PartialSubstitution)
return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), E->getPack(),
E->getPackLoc(),
E->getRParenLoc(), None, Args);
return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), E->getPack(),
E->getPackLoc(), E->getRParenLoc(),
Args.size(), None);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformSubstNonTypeTemplateParmPackExpr(
SubstNonTypeTemplateParmPackExpr *E) {
// Default behavior is to do nothing with this transformation.
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformSubstNonTypeTemplateParmExpr(
SubstNonTypeTemplateParmExpr *E) {
// Default behavior is to do nothing with this transformation.
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformFunctionParmPackExpr(FunctionParmPackExpr *E) {
// Default behavior is to do nothing with this transformation.
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformMaterializeTemporaryExpr(
MaterializeTemporaryExpr *E) {
return getDerived().TransformExpr(E->getSubExpr());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXFoldExpr(CXXFoldExpr *E) {
UnresolvedLookupExpr *Callee = nullptr;
if (Expr *OldCallee = E->getCallee()) {
ExprResult CalleeResult = getDerived().TransformExpr(OldCallee);
if (CalleeResult.isInvalid())
return ExprError();
Callee = cast<UnresolvedLookupExpr>(CalleeResult.get());
}
Expr *Pattern = E->getPattern();
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
getSema().collectUnexpandedParameterPacks(Pattern, Unexpanded);
assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");
// Determine whether the set of unexpanded parameter packs can and should
// be expanded.
bool Expand = true;
bool RetainExpansion = false;
Optional<unsigned> OrigNumExpansions = E->getNumExpansions(),
NumExpansions = OrigNumExpansions;
if (getDerived().TryExpandParameterPacks(E->getEllipsisLoc(),
Pattern->getSourceRange(),
Unexpanded,
Expand, RetainExpansion,
NumExpansions))
return true;
if (!Expand) {
// Do not expand any packs here, just transform and rebuild a fold
// expression.
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
ExprResult LHS =
E->getLHS() ? getDerived().TransformExpr(E->getLHS()) : ExprResult();
if (LHS.isInvalid())
return true;
ExprResult RHS =
E->getRHS() ? getDerived().TransformExpr(E->getRHS()) : ExprResult();
if (RHS.isInvalid())
return true;
if (!getDerived().AlwaysRebuild() &&
LHS.get() == E->getLHS() && RHS.get() == E->getRHS())
return E;
return getDerived().RebuildCXXFoldExpr(
Callee, E->getBeginLoc(), LHS.get(), E->getOperator(),
E->getEllipsisLoc(), RHS.get(), E->getEndLoc(), NumExpansions);
}
// Formally a fold expression expands to nested parenthesized expressions.
// Enforce this limit to avoid creating trees so deep we can't safely traverse
// them.
if (NumExpansions && SemaRef.getLangOpts().BracketDepth < NumExpansions) {
SemaRef.Diag(E->getEllipsisLoc(),
clang::diag::err_fold_expression_limit_exceeded)
<< *NumExpansions << SemaRef.getLangOpts().BracketDepth
<< E->getSourceRange();
SemaRef.Diag(E->getEllipsisLoc(), diag::note_bracket_depth);
return ExprError();
}
// The transform has determined that we should perform an elementwise
// expansion of the pattern. Do so.
ExprResult Result = getDerived().TransformExpr(E->getInit());
if (Result.isInvalid())
return true;
bool LeftFold = E->isLeftFold();
// If we're retaining an expansion for a right fold, it is the innermost
// component and takes the init (if any).
if (!LeftFold && RetainExpansion) {
ForgetPartiallySubstitutedPackRAII Forget(getDerived());
ExprResult Out = getDerived().TransformExpr(Pattern);
if (Out.isInvalid())
return true;
Result = getDerived().RebuildCXXFoldExpr(
Callee, E->getBeginLoc(), Out.get(), E->getOperator(),
E->getEllipsisLoc(), Result.get(), E->getEndLoc(), OrigNumExpansions);
if (Result.isInvalid())
return true;
}
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(
getSema(), LeftFold ? I : *NumExpansions - I - 1);
ExprResult Out = getDerived().TransformExpr(Pattern);
if (Out.isInvalid())
return true;
if (Out.get()->containsUnexpandedParameterPack()) {
// We still have a pack; retain a pack expansion for this slice.
Result = getDerived().RebuildCXXFoldExpr(
Callee, E->getBeginLoc(), LeftFold ? Result.get() : Out.get(),
E->getOperator(), E->getEllipsisLoc(),
LeftFold ? Out.get() : Result.get(), E->getEndLoc(),
OrigNumExpansions);
} else if (Result.isUsable()) {
// We've got down to a single element; build a binary operator.
Expr *LHS = LeftFold ? Result.get() : Out.get();
Expr *RHS = LeftFold ? Out.get() : Result.get();
if (Callee)
Result = getDerived().RebuildCXXOperatorCallExpr(
BinaryOperator::getOverloadedOperator(E->getOperator()),
E->getEllipsisLoc(), Callee, LHS, RHS);
else
Result = getDerived().RebuildBinaryOperator(E->getEllipsisLoc(),
E->getOperator(), LHS, RHS);
} else
Result = Out;
if (Result.isInvalid())
return true;
}
// If we're retaining an expansion for a left fold, it is the outermost
// component and takes the complete expansion so far as its init (if any).
if (LeftFold && RetainExpansion) {
ForgetPartiallySubstitutedPackRAII Forget(getDerived());
ExprResult Out = getDerived().TransformExpr(Pattern);
if (Out.isInvalid())
return true;
Result = getDerived().RebuildCXXFoldExpr(
Callee, E->getBeginLoc(), Result.get(), E->getOperator(),
E->getEllipsisLoc(), Out.get(), E->getEndLoc(), OrigNumExpansions);
if (Result.isInvalid())
return true;
}
// If we had no init and an empty pack, and we're not retaining an expansion,
// then produce a fallback value or error.
if (Result.isUnset())
return getDerived().RebuildEmptyCXXFoldExpr(E->getEllipsisLoc(),
E->getOperator());
return Result;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformCXXStdInitializerListExpr(
CXXStdInitializerListExpr *E) {
return getDerived().TransformExpr(E->getSubExpr());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCStringLiteral(ObjCStringLiteral *E) {
return SemaRef.MaybeBindToTemporary(E);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCBoolLiteralExpr(ObjCBoolLiteralExpr *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCBoxedExpr(ObjCBoxedExpr *E) {
ExprResult SubExpr = getDerived().TransformExpr(E->getSubExpr());
if (SubExpr.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
SubExpr.get() == E->getSubExpr())
return E;
return getDerived().RebuildObjCBoxedExpr(E->getSourceRange(), SubExpr.get());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCArrayLiteral(ObjCArrayLiteral *E) {
// Transform each of the elements.
SmallVector<Expr *, 8> Elements;
bool ArgChanged = false;
if (getDerived().TransformExprs(E->getElements(), E->getNumElements(),
/*IsCall=*/false, Elements, &ArgChanged))
return ExprError();
if (!getDerived().AlwaysRebuild() && !ArgChanged)
return SemaRef.MaybeBindToTemporary(E);
return getDerived().RebuildObjCArrayLiteral(E->getSourceRange(),
Elements.data(),
Elements.size());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCDictionaryLiteral(
ObjCDictionaryLiteral *E) {
// Transform each of the elements.
SmallVector<ObjCDictionaryElement, 8> Elements;
bool ArgChanged = false;
for (unsigned I = 0, N = E->getNumElements(); I != N; ++I) {
ObjCDictionaryElement OrigElement = E->getKeyValueElement(I);
if (OrigElement.isPackExpansion()) {
// This key/value element is a pack expansion.
SmallVector<UnexpandedParameterPack, 2> Unexpanded;
getSema().collectUnexpandedParameterPacks(OrigElement.Key, Unexpanded);
getSema().collectUnexpandedParameterPacks(OrigElement.Value, Unexpanded);
assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");
// Determine whether the set of unexpanded parameter packs can
// and should be expanded.
bool Expand = true;
bool RetainExpansion = false;
Optional<unsigned> OrigNumExpansions = OrigElement.NumExpansions;
Optional<unsigned> NumExpansions = OrigNumExpansions;
SourceRange PatternRange(OrigElement.Key->getBeginLoc(),
OrigElement.Value->getEndLoc());
if (getDerived().TryExpandParameterPacks(OrigElement.EllipsisLoc,
PatternRange, Unexpanded, Expand,
RetainExpansion, NumExpansions))
return ExprError();
if (!Expand) {
// The transform has determined that we should perform a simple
// transformation on the pack expansion, producing another pack
// expansion.
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
ExprResult Key = getDerived().TransformExpr(OrigElement.Key);
if (Key.isInvalid())
return ExprError();
if (Key.get() != OrigElement.Key)
ArgChanged = true;
ExprResult Value = getDerived().TransformExpr(OrigElement.Value);
if (Value.isInvalid())
return ExprError();
if (Value.get() != OrigElement.Value)
ArgChanged = true;
ObjCDictionaryElement Expansion = {
Key.get(), Value.get(), OrigElement.EllipsisLoc, NumExpansions
};
Elements.push_back(Expansion);
continue;
}
// Record right away that the argument was changed. This needs
// to happen even if the array expands to nothing.
ArgChanged = true;
// The transform has determined that we should perform an elementwise
// expansion of the pattern. Do so.
for (unsigned I = 0; I != *NumExpansions; ++I) {
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
ExprResult Key = getDerived().TransformExpr(OrigElement.Key);
if (Key.isInvalid())
return ExprError();
ExprResult Value = getDerived().TransformExpr(OrigElement.Value);
if (Value.isInvalid())
return ExprError();
ObjCDictionaryElement Element = {
Key.get(), Value.get(), SourceLocation(), NumExpansions
};
// If any unexpanded parameter packs remain, we still have a
// pack expansion.
// FIXME: Can this really happen?
if (Key.get()->containsUnexpandedParameterPack() ||
Value.get()->containsUnexpandedParameterPack())
Element.EllipsisLoc = OrigElement.EllipsisLoc;
Elements.push_back(Element);
}
// FIXME: Retain a pack expansion if RetainExpansion is true.
// We've finished with this pack expansion.
continue;
}
// Transform and check key.
ExprResult Key = getDerived().TransformExpr(OrigElement.Key);
if (Key.isInvalid())
return ExprError();
if (Key.get() != OrigElement.Key)
ArgChanged = true;
// Transform and check value.
ExprResult Value
= getDerived().TransformExpr(OrigElement.Value);
if (Value.isInvalid())
return ExprError();
if (Value.get() != OrigElement.Value)
ArgChanged = true;
ObjCDictionaryElement Element = {
Key.get(), Value.get(), SourceLocation(), None
};
Elements.push_back(Element);
}
if (!getDerived().AlwaysRebuild() && !ArgChanged)
return SemaRef.MaybeBindToTemporary(E);
return getDerived().RebuildObjCDictionaryLiteral(E->getSourceRange(),
Elements);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCEncodeExpr(ObjCEncodeExpr *E) {
TypeSourceInfo *EncodedTypeInfo
= getDerived().TransformType(E->getEncodedTypeSourceInfo());
if (!EncodedTypeInfo)
return ExprError();
if (!getDerived().AlwaysRebuild() &&
EncodedTypeInfo == E->getEncodedTypeSourceInfo())
return E;
return getDerived().RebuildObjCEncodeExpr(E->getAtLoc(),
EncodedTypeInfo,
E->getRParenLoc());
}
template<typename Derived>
ExprResult TreeTransform<Derived>::
TransformObjCIndirectCopyRestoreExpr(ObjCIndirectCopyRestoreExpr *E) {
// This is a kind of implicit conversion, and it needs to get dropped
// and recomputed for the same general reasons that ImplicitCastExprs
// do, as well a more specific one: this expression is only valid when
// it appears *immediately* as an argument expression.
return getDerived().TransformExpr(E->getSubExpr());
}
template<typename Derived>
ExprResult TreeTransform<Derived>::
TransformObjCBridgedCastExpr(ObjCBridgedCastExpr *E) {
TypeSourceInfo *TSInfo
= getDerived().TransformType(E->getTypeInfoAsWritten());
if (!TSInfo)
return ExprError();
ExprResult Result = getDerived().TransformExpr(E->getSubExpr());
if (Result.isInvalid())
return ExprError();
if (!getDerived().AlwaysRebuild() &&
TSInfo == E->getTypeInfoAsWritten() &&
Result.get() == E->getSubExpr())
return E;
return SemaRef.BuildObjCBridgedCast(E->getLParenLoc(), E->getBridgeKind(),
E->getBridgeKeywordLoc(), TSInfo,
Result.get());
}
template <typename Derived>
ExprResult TreeTransform<Derived>::TransformObjCAvailabilityCheckExpr(
ObjCAvailabilityCheckExpr *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCMessageExpr(ObjCMessageExpr *E) {
// Transform arguments.
bool ArgChanged = false;
SmallVector<Expr*, 8> Args;
Args.reserve(E->getNumArgs());
if (getDerived().TransformExprs(E->getArgs(), E->getNumArgs(), false, Args,
&ArgChanged))
return ExprError();
if (E->getReceiverKind() == ObjCMessageExpr::Class) {
// Class message: transform the receiver type.
TypeSourceInfo *ReceiverTypeInfo
= getDerived().TransformType(E->getClassReceiverTypeInfo());
if (!ReceiverTypeInfo)
return ExprError();
// If nothing changed, just retain the existing message send.
if (!getDerived().AlwaysRebuild() &&
ReceiverTypeInfo == E->getClassReceiverTypeInfo() && !ArgChanged)
return SemaRef.MaybeBindToTemporary(E);
// Build a new class message send.
SmallVector<SourceLocation, 16> SelLocs;
E->getSelectorLocs(SelLocs);
return getDerived().RebuildObjCMessageExpr(ReceiverTypeInfo,
E->getSelector(),
SelLocs,
E->getMethodDecl(),
E->getLeftLoc(),
Args,
E->getRightLoc());
}
else if (E->getReceiverKind() == ObjCMessageExpr::SuperClass ||
E->getReceiverKind() == ObjCMessageExpr::SuperInstance) {
if (!E->getMethodDecl())
return ExprError();
// Build a new class message send to 'super'.
SmallVector<SourceLocation, 16> SelLocs;
E->getSelectorLocs(SelLocs);
return getDerived().RebuildObjCMessageExpr(E->getSuperLoc(),
E->getSelector(),
SelLocs,
E->getReceiverType(),
E->getMethodDecl(),
E->getLeftLoc(),
Args,
E->getRightLoc());
}
// Instance message: transform the receiver
assert(E->getReceiverKind() == ObjCMessageExpr::Instance &&
"Only class and instance messages may be instantiated");
ExprResult Receiver
= getDerived().TransformExpr(E->getInstanceReceiver());
if (Receiver.isInvalid())
return ExprError();
// If nothing changed, just retain the existing message send.
if (!getDerived().AlwaysRebuild() &&
Receiver.get() == E->getInstanceReceiver() && !ArgChanged)
return SemaRef.MaybeBindToTemporary(E);
// Build a new instance message send.
SmallVector<SourceLocation, 16> SelLocs;
E->getSelectorLocs(SelLocs);
return getDerived().RebuildObjCMessageExpr(Receiver.get(),
E->getSelector(),
SelLocs,
E->getMethodDecl(),
E->getLeftLoc(),
Args,
E->getRightLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCSelectorExpr(ObjCSelectorExpr *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCProtocolExpr(ObjCProtocolExpr *E) {
return E;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCIvarRefExpr(ObjCIvarRefExpr *E) {
// Transform the base expression.
ExprResult Base = getDerived().TransformExpr(E->getBase());
if (Base.isInvalid())
return ExprError();
// We don't need to transform the ivar; it will never change.
// If nothing changed, just retain the existing expression.
if (!getDerived().AlwaysRebuild() &&
Base.get() == E->getBase())
return E;
return getDerived().RebuildObjCIvarRefExpr(Base.get(), E->getDecl(),
E->getLocation(),
E->isArrow(), E->isFreeIvar());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCPropertyRefExpr(ObjCPropertyRefExpr *E) {
// 'super' and types never change. Property never changes. Just
// retain the existing expression.
if (!E->isObjectReceiver())
return E;
// Transform the base expression.
ExprResult Base = getDerived().TransformExpr(E->getBase());
if (Base.isInvalid())
return ExprError();
// We don't need to transform the property; it will never change.
// If nothing changed, just retain the existing expression.
if (!getDerived().AlwaysRebuild() &&
Base.get() == E->getBase())
return E;
if (E->isExplicitProperty())
return getDerived().RebuildObjCPropertyRefExpr(Base.get(),
E->getExplicitProperty(),
E->getLocation());
return getDerived().RebuildObjCPropertyRefExpr(Base.get(),
SemaRef.Context.PseudoObjectTy,
E->getImplicitPropertyGetter(),
E->getImplicitPropertySetter(),
E->getLocation());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCSubscriptRefExpr(ObjCSubscriptRefExpr *E) {
// Transform the base expression.
ExprResult Base = getDerived().TransformExpr(E->getBaseExpr());
if (Base.isInvalid())
return ExprError();
// Transform the key expression.
ExprResult Key = getDerived().TransformExpr(E->getKeyExpr());
if (Key.isInvalid())
return ExprError();
// If nothing changed, just retain the existing expression.
if (!getDerived().AlwaysRebuild() &&
Key.get() == E->getKeyExpr() && Base.get() == E->getBaseExpr())
return E;
return getDerived().RebuildObjCSubscriptRefExpr(E->getRBracket(),
Base.get(), Key.get(),
E->getAtIndexMethodDecl(),
E->setAtIndexMethodDecl());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformObjCIsaExpr(ObjCIsaExpr *E) {
// Transform the base expression.
ExprResult Base = getDerived().TransformExpr(E->getBase());
if (Base.isInvalid())
return ExprError();
// If nothing changed, just retain the existing expression.
if (!getDerived().AlwaysRebuild() &&
Base.get() == E->getBase())
return E;
return getDerived().RebuildObjCIsaExpr(Base.get(), E->getIsaMemberLoc(),
E->getOpLoc(),
E->isArrow());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformShuffleVectorExpr(ShuffleVectorExpr *E) {
bool ArgumentChanged = false;
SmallVector<Expr*, 8> SubExprs;
SubExprs.reserve(E->getNumSubExprs());
if (getDerived().TransformExprs(E->getSubExprs(), E->getNumSubExprs(), false,
SubExprs, &ArgumentChanged))
return ExprError();
if (!getDerived().AlwaysRebuild() &&
!ArgumentChanged)
return E;
return getDerived().RebuildShuffleVectorExpr(E->getBuiltinLoc(),
SubExprs,
E->getRParenLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformConvertVectorExpr(ConvertVectorExpr *E) {
ExprResult SrcExpr = getDerived().TransformExpr(E->getSrcExpr());
if (SrcExpr.isInvalid())
return ExprError();
TypeSourceInfo *Type = getDerived().TransformType(E->getTypeSourceInfo());
if (!Type)
return ExprError();
if (!getDerived().AlwaysRebuild() &&
Type == E->getTypeSourceInfo() &&
SrcExpr.get() == E->getSrcExpr())
return E;
return getDerived().RebuildConvertVectorExpr(E->getBuiltinLoc(),
SrcExpr.get(), Type,
E->getRParenLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformBlockExpr(BlockExpr *E) {
BlockDecl *oldBlock = E->getBlockDecl();
SemaRef.ActOnBlockStart(E->getCaretLocation(), /*Scope=*/nullptr);
BlockScopeInfo *blockScope = SemaRef.getCurBlock();
blockScope->TheDecl->setIsVariadic(oldBlock->isVariadic());
blockScope->TheDecl->setBlockMissingReturnType(
oldBlock->blockMissingReturnType());
SmallVector<ParmVarDecl*, 4> params;
SmallVector<QualType, 4> paramTypes;
const FunctionProtoType *exprFunctionType = E->getFunctionType();
// Parameter substitution.
Sema::ExtParameterInfoBuilder extParamInfos;
if (getDerived().TransformFunctionTypeParams(
E->getCaretLocation(), oldBlock->parameters(), nullptr,
exprFunctionType->getExtParameterInfosOrNull(), paramTypes, &params,
extParamInfos)) {
getSema().ActOnBlockError(E->getCaretLocation(), /*Scope=*/nullptr);
return ExprError();
}
QualType exprResultType =
getDerived().TransformType(exprFunctionType->getReturnType());
auto epi = exprFunctionType->getExtProtoInfo();
epi.ExtParameterInfos = extParamInfos.getPointerOrNull(paramTypes.size());
QualType functionType =
getDerived().RebuildFunctionProtoType(exprResultType, paramTypes, epi);
blockScope->FunctionType = functionType;
// Set the parameters on the block decl.
if (!params.empty())
blockScope->TheDecl->setParams(params);
if (!oldBlock->blockMissingReturnType()) {
blockScope->HasImplicitReturnType = false;
blockScope->ReturnType = exprResultType;
}
// Transform the body
StmtResult body = getDerived().TransformStmt(E->getBody());
if (body.isInvalid()) {
getSema().ActOnBlockError(E->getCaretLocation(), /*Scope=*/nullptr);
return ExprError();
}
#ifndef NDEBUG
// In builds with assertions, make sure that we captured everything we
// captured before.
if (!SemaRef.getDiagnostics().hasErrorOccurred()) {
for (const auto &I : oldBlock->captures()) {
VarDecl *oldCapture = I.getVariable();
// Ignore parameter packs.
if (oldCapture->isParameterPack())
continue;
VarDecl *newCapture =
cast<VarDecl>(getDerived().TransformDecl(E->getCaretLocation(),
oldCapture));
assert(blockScope->CaptureMap.count(newCapture));
}
assert(oldBlock->capturesCXXThis() == blockScope->isCXXThisCaptured());
}
#endif
return SemaRef.ActOnBlockStmtExpr(E->getCaretLocation(), body.get(),
/*Scope=*/nullptr);
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformAsTypeExpr(AsTypeExpr *E) {
ExprResult SrcExpr = getDerived().TransformExpr(E->getSrcExpr());
if (SrcExpr.isInvalid())
return ExprError();
QualType Type = getDerived().TransformType(E->getType());
return SemaRef.BuildAsTypeExpr(SrcExpr.get(), Type, E->getBuiltinLoc(),
E->getRParenLoc());
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::TransformAtomicExpr(AtomicExpr *E) {
bool ArgumentChanged = false;
SmallVector<Expr*, 8> SubExprs;
SubExprs.reserve(E->getNumSubExprs());
if (getDerived().TransformExprs(E->getSubExprs(), E->getNumSubExprs(), false,
SubExprs, &ArgumentChanged))
return ExprError();
if (!getDerived().AlwaysRebuild() &&
!ArgumentChanged)
return E;
return getDerived().RebuildAtomicExpr(E->getBuiltinLoc(), SubExprs,
E->getOp(), E->getRParenLoc());
}
//===----------------------------------------------------------------------===//
// Type reconstruction
//===----------------------------------------------------------------------===//
template<typename Derived>
QualType TreeTransform<Derived>::RebuildPointerType(QualType PointeeType,
SourceLocation Star) {
return SemaRef.BuildPointerType(PointeeType, Star,
getDerived().getBaseEntity());
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildBlockPointerType(QualType PointeeType,
SourceLocation Star) {
return SemaRef.BuildBlockPointerType(PointeeType, Star,
getDerived().getBaseEntity());
}
template<typename Derived>
QualType
TreeTransform<Derived>::RebuildReferenceType(QualType ReferentType,
bool WrittenAsLValue,
SourceLocation Sigil) {
return SemaRef.BuildReferenceType(ReferentType, WrittenAsLValue,
Sigil, getDerived().getBaseEntity());
}
template<typename Derived>
QualType
TreeTransform<Derived>::RebuildMemberPointerType(QualType PointeeType,
QualType ClassType,
SourceLocation Sigil) {
return SemaRef.BuildMemberPointerType(PointeeType, ClassType, Sigil,
getDerived().getBaseEntity());
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildObjCTypeParamType(
const ObjCTypeParamDecl *Decl,
SourceLocation ProtocolLAngleLoc,
ArrayRef<ObjCProtocolDecl *> Protocols,
ArrayRef<SourceLocation> ProtocolLocs,
SourceLocation ProtocolRAngleLoc) {
return SemaRef.BuildObjCTypeParamType(Decl,
ProtocolLAngleLoc, Protocols,
ProtocolLocs, ProtocolRAngleLoc,
/*FailOnError=*/true);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildObjCObjectType(
QualType BaseType,
SourceLocation Loc,
SourceLocation TypeArgsLAngleLoc,
ArrayRef<TypeSourceInfo *> TypeArgs,
SourceLocation TypeArgsRAngleLoc,
SourceLocation ProtocolLAngleLoc,
ArrayRef<ObjCProtocolDecl *> Protocols,
ArrayRef<SourceLocation> ProtocolLocs,
SourceLocation ProtocolRAngleLoc) {
return SemaRef.BuildObjCObjectType(BaseType, Loc, TypeArgsLAngleLoc,
TypeArgs, TypeArgsRAngleLoc,
ProtocolLAngleLoc, Protocols, ProtocolLocs,
ProtocolRAngleLoc,
/*FailOnError=*/true);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildObjCObjectPointerType(
QualType PointeeType,
SourceLocation Star) {
return SemaRef.Context.getObjCObjectPointerType(PointeeType);
}
template<typename Derived>
QualType
TreeTransform<Derived>::RebuildArrayType(QualType ElementType,
ArrayType::ArraySizeModifier SizeMod,
const llvm::APInt *Size,
Expr *SizeExpr,
unsigned IndexTypeQuals,
SourceRange BracketsRange) {
if (SizeExpr || !Size)
return SemaRef.BuildArrayType(ElementType, SizeMod, SizeExpr,
IndexTypeQuals, BracketsRange,
getDerived().getBaseEntity());
QualType Types[] = {
SemaRef.Context.UnsignedCharTy, SemaRef.Context.UnsignedShortTy,
SemaRef.Context.UnsignedIntTy, SemaRef.Context.UnsignedLongTy,
SemaRef.Context.UnsignedLongLongTy, SemaRef.Context.UnsignedInt128Ty
};
const unsigned NumTypes = llvm::array_lengthof(Types);
QualType SizeType;
for (unsigned I = 0; I != NumTypes; ++I)
if (Size->getBitWidth() == SemaRef.Context.getIntWidth(Types[I])) {
SizeType = Types[I];
break;
}
// Note that we can return a VariableArrayType here in the case where
// the element type was a dependent VariableArrayType.
IntegerLiteral *ArraySize
= IntegerLiteral::Create(SemaRef.Context, *Size, SizeType,
/*FIXME*/BracketsRange.getBegin());
return SemaRef.BuildArrayType(ElementType, SizeMod, ArraySize,
IndexTypeQuals, BracketsRange,
getDerived().getBaseEntity());
}
template<typename Derived>
QualType
TreeTransform<Derived>::RebuildConstantArrayType(QualType ElementType,
ArrayType::ArraySizeModifier SizeMod,
const llvm::APInt &Size,
Expr *SizeExpr,
unsigned IndexTypeQuals,
SourceRange BracketsRange) {
return getDerived().RebuildArrayType(ElementType, SizeMod, &Size, SizeExpr,
IndexTypeQuals, BracketsRange);
}
template<typename Derived>
QualType
TreeTransform<Derived>::RebuildIncompleteArrayType(QualType ElementType,
ArrayType::ArraySizeModifier SizeMod,
unsigned IndexTypeQuals,
SourceRange BracketsRange) {
return getDerived().RebuildArrayType(ElementType, SizeMod, nullptr, nullptr,
IndexTypeQuals, BracketsRange);
}
template<typename Derived>
QualType
TreeTransform<Derived>::RebuildVariableArrayType(QualType ElementType,
ArrayType::ArraySizeModifier SizeMod,
Expr *SizeExpr,
unsigned IndexTypeQuals,
SourceRange BracketsRange) {
return getDerived().RebuildArrayType(ElementType, SizeMod, nullptr,
SizeExpr,
IndexTypeQuals, BracketsRange);
}
template<typename Derived>
QualType
TreeTransform<Derived>::RebuildDependentSizedArrayType(QualType ElementType,
ArrayType::ArraySizeModifier SizeMod,
Expr *SizeExpr,
unsigned IndexTypeQuals,
SourceRange BracketsRange) {
return getDerived().RebuildArrayType(ElementType, SizeMod, nullptr,
SizeExpr,
IndexTypeQuals, BracketsRange);
}
template <typename Derived>
QualType TreeTransform<Derived>::RebuildDependentAddressSpaceType(
QualType PointeeType, Expr *AddrSpaceExpr, SourceLocation AttributeLoc) {
return SemaRef.BuildAddressSpaceAttr(PointeeType, AddrSpaceExpr,
AttributeLoc);
}
template <typename Derived>
QualType
TreeTransform<Derived>::RebuildVectorType(QualType ElementType,
unsigned NumElements,
VectorType::VectorKind VecKind) {
// FIXME: semantic checking!
return SemaRef.Context.getVectorType(ElementType, NumElements, VecKind);
}
template <typename Derived>
QualType TreeTransform<Derived>::RebuildDependentVectorType(
QualType ElementType, Expr *SizeExpr, SourceLocation AttributeLoc,
VectorType::VectorKind VecKind) {
return SemaRef.BuildVectorType(ElementType, SizeExpr, AttributeLoc);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildExtVectorType(QualType ElementType,
unsigned NumElements,
SourceLocation AttributeLoc) {
llvm::APInt numElements(SemaRef.Context.getIntWidth(SemaRef.Context.IntTy),
NumElements, true);
IntegerLiteral *VectorSize
= IntegerLiteral::Create(SemaRef.Context, numElements, SemaRef.Context.IntTy,
AttributeLoc);
return SemaRef.BuildExtVectorType(ElementType, VectorSize, AttributeLoc);
}
template<typename Derived>
QualType
TreeTransform<Derived>::RebuildDependentSizedExtVectorType(QualType ElementType,
Expr *SizeExpr,
SourceLocation AttributeLoc) {
return SemaRef.BuildExtVectorType(ElementType, SizeExpr, AttributeLoc);
}
template <typename Derived>
QualType TreeTransform<Derived>::RebuildConstantMatrixType(
QualType ElementType, unsigned NumRows, unsigned NumColumns) {
return SemaRef.Context.getConstantMatrixType(ElementType, NumRows,
NumColumns);
}
template <typename Derived>
QualType TreeTransform<Derived>::RebuildDependentSizedMatrixType(
QualType ElementType, Expr *RowExpr, Expr *ColumnExpr,
SourceLocation AttributeLoc) {
return SemaRef.BuildMatrixType(ElementType, RowExpr, ColumnExpr,
AttributeLoc);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildFunctionProtoType(
QualType T,
MutableArrayRef<QualType> ParamTypes,
const FunctionProtoType::ExtProtoInfo &EPI) {
return SemaRef.BuildFunctionType(T, ParamTypes,
getDerived().getBaseLocation(),
getDerived().getBaseEntity(),
EPI);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildFunctionNoProtoType(QualType T) {
return SemaRef.Context.getFunctionNoProtoType(T);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildUnresolvedUsingType(SourceLocation Loc,
Decl *D) {
assert(D && "no decl found");
if (D->isInvalidDecl()) return QualType();
// FIXME: Doesn't account for ObjCInterfaceDecl!
TypeDecl *Ty;
if (auto *UPD = dyn_cast<UsingPackDecl>(D)) {
// A valid resolved using typename pack expansion decl can have multiple
// UsingDecls, but they must each have exactly one type, and it must be
// the same type in every case. But we must have at least one expansion!
if (UPD->expansions().empty()) {
getSema().Diag(Loc, diag::err_using_pack_expansion_empty)
<< UPD->isCXXClassMember() << UPD;
return QualType();
}
// We might still have some unresolved types. Try to pick a resolved type
// if we can. The final instantiation will check that the remaining
// unresolved types instantiate to the type we pick.
QualType FallbackT;
QualType T;
for (auto *E : UPD->expansions()) {
QualType ThisT = RebuildUnresolvedUsingType(Loc, E);
if (ThisT.isNull())
continue;
else if (ThisT->getAs<UnresolvedUsingType>())
FallbackT = ThisT;
else if (T.isNull())
T = ThisT;
else
assert(getSema().Context.hasSameType(ThisT, T) &&
"mismatched resolved types in using pack expansion");
}
return T.isNull() ? FallbackT : T;
} else if (auto *Using = dyn_cast<UsingDecl>(D)) {
assert(Using->hasTypename() &&
"UnresolvedUsingTypenameDecl transformed to non-typename using");
// A valid resolved using typename decl points to exactly one type decl.
assert(++Using->shadow_begin() == Using->shadow_end());
NamedDecl *Target = Using->shadow_begin()->getTargetDecl();
if (SemaRef.DiagnoseUseOfDecl(Target, Loc))
return QualType();
Ty = cast<TypeDecl>(Target);
} else {
assert(isa<UnresolvedUsingTypenameDecl>(D) &&
"UnresolvedUsingTypenameDecl transformed to non-using decl");
Ty = cast<UnresolvedUsingTypenameDecl>(D);
}
return SemaRef.Context.getTypeDeclType(Ty);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildTypeOfExprType(Expr *E,
SourceLocation Loc) {
return SemaRef.BuildTypeofExprType(E, Loc);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildTypeOfType(QualType Underlying) {
return SemaRef.Context.getTypeOfType(Underlying);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildDecltypeType(Expr *E,
SourceLocation Loc) {
return SemaRef.BuildDecltypeType(E, Loc);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildUnaryTransformType(QualType BaseType,
UnaryTransformType::UTTKind UKind,
SourceLocation Loc) {
return SemaRef.BuildUnaryTransformType(BaseType, UKind, Loc);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildTemplateSpecializationType(
TemplateName Template,
SourceLocation TemplateNameLoc,
TemplateArgumentListInfo &TemplateArgs) {
return SemaRef.CheckTemplateIdType(Template, TemplateNameLoc, TemplateArgs);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildAtomicType(QualType ValueType,
SourceLocation KWLoc) {
return SemaRef.BuildAtomicType(ValueType, KWLoc);
}
template<typename Derived>
QualType TreeTransform<Derived>::RebuildPipeType(QualType ValueType,
SourceLocation KWLoc,
bool isReadPipe) {
return isReadPipe ? SemaRef.BuildReadPipeType(ValueType, KWLoc)
: SemaRef.BuildWritePipeType(ValueType, KWLoc);
}
template <typename Derived>
QualType TreeTransform<Derived>::RebuildExtIntType(bool IsUnsigned,
unsigned NumBits,
SourceLocation Loc) {
llvm::APInt NumBitsAP(SemaRef.Context.getIntWidth(SemaRef.Context.IntTy),
NumBits, true);
IntegerLiteral *Bits = IntegerLiteral::Create(SemaRef.Context, NumBitsAP,
SemaRef.Context.IntTy, Loc);
return SemaRef.BuildExtIntType(IsUnsigned, Bits, Loc);
}
template <typename Derived>
QualType TreeTransform<Derived>::RebuildDependentExtIntType(
bool IsUnsigned, Expr *NumBitsExpr, SourceLocation Loc) {
return SemaRef.BuildExtIntType(IsUnsigned, NumBitsExpr, Loc);
}
template<typename Derived>
TemplateName
TreeTransform<Derived>::RebuildTemplateName(CXXScopeSpec &SS,
bool TemplateKW,
TemplateDecl *Template) {
return SemaRef.Context.getQualifiedTemplateName(SS.getScopeRep(), TemplateKW,
Template);
}
template<typename Derived>
TemplateName
TreeTransform<Derived>::RebuildTemplateName(CXXScopeSpec &SS,
SourceLocation TemplateKWLoc,
const IdentifierInfo &Name,
SourceLocation NameLoc,
QualType ObjectType,
NamedDecl *FirstQualifierInScope,
bool AllowInjectedClassName) {
UnqualifiedId TemplateName;
TemplateName.setIdentifier(&Name, NameLoc);
Sema::TemplateTy Template;
getSema().ActOnTemplateName(/*Scope=*/nullptr, SS, TemplateKWLoc,
TemplateName, ParsedType::make(ObjectType),
/*EnteringContext=*/false, Template,
AllowInjectedClassName);
return Template.get();
}
template<typename Derived>
TemplateName
TreeTransform<Derived>::RebuildTemplateName(CXXScopeSpec &SS,
SourceLocation TemplateKWLoc,
OverloadedOperatorKind Operator,
SourceLocation NameLoc,
QualType ObjectType,
bool AllowInjectedClassName) {
UnqualifiedId Name;
// FIXME: Bogus location information.
SourceLocation SymbolLocations[3] = { NameLoc, NameLoc, NameLoc };
Name.setOperatorFunctionId(NameLoc, Operator, SymbolLocations);
Sema::TemplateTy Template;
getSema().ActOnTemplateName(
/*Scope=*/nullptr, SS, TemplateKWLoc, Name, ParsedType::make(ObjectType),
/*EnteringContext=*/false, Template, AllowInjectedClassName);
return Template.get();
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::RebuildCXXOperatorCallExpr(OverloadedOperatorKind Op,
SourceLocation OpLoc,
Expr *OrigCallee,
Expr *First,
Expr *Second) {
Expr *Callee = OrigCallee->IgnoreParenCasts();
bool isPostIncDec = Second && (Op == OO_PlusPlus || Op == OO_MinusMinus);
if (First->getObjectKind() == OK_ObjCProperty) {
BinaryOperatorKind Opc = BinaryOperator::getOverloadedOpcode(Op);
if (BinaryOperator::isAssignmentOp(Opc))
return SemaRef.checkPseudoObjectAssignment(/*Scope=*/nullptr, OpLoc, Opc,
First, Second);
ExprResult Result = SemaRef.CheckPlaceholderExpr(First);
if (Result.isInvalid())
return ExprError();
First = Result.get();
}
if (Second && Second->getObjectKind() == OK_ObjCProperty) {
ExprResult Result = SemaRef.CheckPlaceholderExpr(Second);
if (Result.isInvalid())
return ExprError();
Second = Result.get();
}
// Determine whether this should be a builtin operation.
if (Op == OO_Subscript) {
if (!First->getType()->isOverloadableType() &&
!Second->getType()->isOverloadableType())
return getSema().CreateBuiltinArraySubscriptExpr(
First, Callee->getBeginLoc(), Second, OpLoc);
} else if (Op == OO_Arrow) {
// -> is never a builtin operation.
return SemaRef.BuildOverloadedArrowExpr(nullptr, First, OpLoc);
} else if (Second == nullptr || isPostIncDec) {
if (!First->getType()->isOverloadableType() ||
(Op == OO_Amp && getSema().isQualifiedMemberAccess(First))) {
// The argument is not of overloadable type, or this is an expression
// of the form &Class::member, so try to create a built-in unary
// operation.
UnaryOperatorKind Opc
= UnaryOperator::getOverloadedOpcode(Op, isPostIncDec);
return getSema().CreateBuiltinUnaryOp(OpLoc, Opc, First);
}
} else {
if (!First->getType()->isOverloadableType() &&
!Second->getType()->isOverloadableType()) {
// Neither of the arguments is an overloadable type, so try to
// create a built-in binary operation.
BinaryOperatorKind Opc = BinaryOperator::getOverloadedOpcode(Op);
ExprResult Result
= SemaRef.CreateBuiltinBinOp(OpLoc, Opc, First, Second);
if (Result.isInvalid())
return ExprError();
return Result;
}
}
// Compute the transformed set of functions (and function templates) to be
// used during overload resolution.
UnresolvedSet<16> Functions;
bool RequiresADL;
if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(Callee)) {
Functions.append(ULE->decls_begin(), ULE->decls_end());
// If the overload could not be resolved in the template definition
// (because we had a dependent argument), ADL is performed as part of
// template instantiation.
RequiresADL = ULE->requiresADL();
} else {
// If we've resolved this to a particular non-member function, just call
// that function. If we resolved it to a member function,
// CreateOverloaded* will find that function for us.
NamedDecl *ND = cast<DeclRefExpr>(Callee)->getDecl();
if (!isa<CXXMethodDecl>(ND))
Functions.addDecl(ND);
RequiresADL = false;
}
// Add any functions found via argument-dependent lookup.
Expr *Args[2] = { First, Second };
unsigned NumArgs = 1 + (Second != nullptr);
// Create the overloaded operator invocation for unary operators.
if (NumArgs == 1 || isPostIncDec) {
UnaryOperatorKind Opc
= UnaryOperator::getOverloadedOpcode(Op, isPostIncDec);
return SemaRef.CreateOverloadedUnaryOp(OpLoc, Opc, Functions, First,
RequiresADL);
}
if (Op == OO_Subscript) {
SourceLocation LBrace;
SourceLocation RBrace;
if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Callee)) {
DeclarationNameLoc NameLoc = DRE->getNameInfo().getInfo();
LBrace = NameLoc.getCXXOperatorNameBeginLoc();
RBrace = NameLoc.getCXXOperatorNameEndLoc();
} else {
LBrace = Callee->getBeginLoc();
RBrace = OpLoc;
}
return SemaRef.CreateOverloadedArraySubscriptExpr(LBrace, RBrace,
First, Second);
}
// Create the overloaded operator invocation for binary operators.
BinaryOperatorKind Opc = BinaryOperator::getOverloadedOpcode(Op);
ExprResult Result = SemaRef.CreateOverloadedBinOp(
OpLoc, Opc, Functions, Args[0], Args[1], RequiresADL);
if (Result.isInvalid())
return ExprError();
return Result;
}
template<typename Derived>
ExprResult
TreeTransform<Derived>::RebuildCXXPseudoDestructorExpr(Expr *Base,
SourceLocation OperatorLoc,
bool isArrow,
CXXScopeSpec &SS,
TypeSourceInfo *ScopeType,
SourceLocation CCLoc,
SourceLocation TildeLoc,
PseudoDestructorTypeStorage Destroyed) {
QualType BaseType = Base->getType();
if (Base->isTypeDependent() || Destroyed.getIdentifier() ||
(!isArrow && !BaseType->getAs<RecordType>()) ||
(isArrow && BaseType->getAs<PointerType>() &&
!BaseType->castAs<PointerType>()->getPointeeType()
->template getAs<RecordType>())){
// This pseudo-destructor expression is still a pseudo-destructor.
return SemaRef.BuildPseudoDestructorExpr(
Base, OperatorLoc, isArrow ? tok::arrow : tok::period, SS, ScopeType,
CCLoc, TildeLoc, Destroyed);
}
TypeSourceInfo *DestroyedType = Destroyed.getTypeSourceInfo();
DeclarationName Name(SemaRef.Context.DeclarationNames.getCXXDestructorName(
SemaRef.Context.getCanonicalType(DestroyedType->getType())));
DeclarationNameInfo NameInfo(Name, Destroyed.getLocation());
NameInfo.setNamedTypeInfo(DestroyedType);
// The scope type is now known to be a valid nested name specifier
// component. Tack it on to the end of the nested name specifier.
if (ScopeType) {
if (!ScopeType->getType()->getAs<TagType>()) {
getSema().Diag(ScopeType->getTypeLoc().getBeginLoc(),
diag::err_expected_class_or_namespace)
<< ScopeType->getType() << getSema().getLangOpts().CPlusPlus;
return ExprError();
}
SS.Extend(SemaRef.Context, SourceLocation(), ScopeType->getTypeLoc(),
CCLoc);
}
SourceLocation TemplateKWLoc; // FIXME: retrieve it from caller.
return getSema().BuildMemberReferenceExpr(Base, BaseType,
OperatorLoc, isArrow,
SS, TemplateKWLoc,
/*FIXME: FirstQualifier*/ nullptr,
NameInfo,
/*TemplateArgs*/ nullptr,
/*S*/nullptr);
}
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformCapturedStmt(CapturedStmt *S) {
SourceLocation Loc = S->getBeginLoc();
CapturedDecl *CD = S->getCapturedDecl();
unsigned NumParams = CD->getNumParams();
unsigned ContextParamPos = CD->getContextParamPosition();
SmallVector<Sema::CapturedParamNameType, 4> Params;
for (unsigned I = 0; I < NumParams; ++I) {
if (I != ContextParamPos) {
Params.push_back(
std::make_pair(
CD->getParam(I)->getName(),
getDerived().TransformType(CD->getParam(I)->getType())));
} else {
Params.push_back(std::make_pair(StringRef(), QualType()));
}
}
getSema().ActOnCapturedRegionStart(Loc, /*CurScope*/nullptr,
S->getCapturedRegionKind(), Params);
StmtResult Body;
{
Sema::CompoundScopeRAII CompoundScope(getSema());
Body = getDerived().TransformStmt(S->getCapturedStmt());
}
if (Body.isInvalid()) {
getSema().ActOnCapturedRegionError();
return StmtError();
}
return getSema().ActOnCapturedRegionEnd(Body.get());
}
} // end namespace clang
#endif // LLVM_CLANG_LIB_SEMA_TREETRANSFORM_H
diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp
index 83bade9941b3..1722572f1a27 100644
--- a/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp
+++ b/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp
@@ -1,13023 +1,13025 @@
//===- ASTReader.cpp - AST File Reader ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the ASTReader class, which reads AST files.
//
//===----------------------------------------------------------------------===//
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Serialization/ASTRecordReader.h"
#include "ASTCommon.h"
#include "ASTReaderInternals.h"
#include "clang/AST/AbstractTypeReader.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTMutationListener.h"
#include "clang/AST/ASTUnresolvedSet.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclFriend.h"
#include "clang/AST/DeclGroup.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/DeclarationName.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExternalASTSource.h"
#include "clang/AST/NestedNameSpecifier.h"
#include "clang/AST/OpenMPClause.h"
#include "clang/AST/ODRHash.h"
#include "clang/AST/RawCommentList.h"
#include "clang/AST/TemplateBase.h"
#include "clang/AST/TemplateName.h"
#include "clang/AST/Type.h"
#include "clang/AST/TypeLoc.h"
#include "clang/AST/TypeLocVisitor.h"
#include "clang/AST/UnresolvedSet.h"
#include "clang/Basic/CommentOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/ExceptionSpecificationType.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemOptions.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/OperatorKinds.h"
#include "clang/Basic/PragmaKinds.h"
#include "clang/Basic/Sanitizers.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/SourceManagerInternals.h"
#include "clang/Basic/Specifiers.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Basic/Version.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/HeaderSearchOptions.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/ModuleMap.h"
#include "clang/Lex/PreprocessingRecord.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Lex/Token.h"
#include "clang/Sema/ObjCMethodList.h"
#include "clang/Sema/Scope.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/Weak.h"
#include "clang/Serialization/ASTBitCodes.h"
#include "clang/Serialization/ASTDeserializationListener.h"
#include "clang/Serialization/ContinuousRangeMap.h"
#include "clang/Serialization/GlobalModuleIndex.h"
#include "clang/Serialization/InMemoryModuleCache.h"
#include "clang/Serialization/ModuleFile.h"
#include "clang/Serialization/ModuleFileExtension.h"
#include "clang/Serialization/ModuleManager.h"
#include "clang/Serialization/PCHContainerOperations.h"
#include "clang/Serialization/SerializationDiagnostic.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/DJB.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/VersionTuple.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <ctime>
#include <iterator>
#include <limits>
#include <map>
#include <memory>
#include <string>
#include <system_error>
#include <tuple>
#include <utility>
#include <vector>
using namespace clang;
using namespace clang::serialization;
using namespace clang::serialization::reader;
using llvm::BitstreamCursor;
using llvm::RoundingMode;
//===----------------------------------------------------------------------===//
// ChainedASTReaderListener implementation
//===----------------------------------------------------------------------===//
bool
ChainedASTReaderListener::ReadFullVersionInformation(StringRef FullVersion) {
return First->ReadFullVersionInformation(FullVersion) ||
Second->ReadFullVersionInformation(FullVersion);
}
void ChainedASTReaderListener::ReadModuleName(StringRef ModuleName) {
First->ReadModuleName(ModuleName);
Second->ReadModuleName(ModuleName);
}
void ChainedASTReaderListener::ReadModuleMapFile(StringRef ModuleMapPath) {
First->ReadModuleMapFile(ModuleMapPath);
Second->ReadModuleMapFile(ModuleMapPath);
}
bool
ChainedASTReaderListener::ReadLanguageOptions(const LangOptions &LangOpts,
bool Complain,
bool AllowCompatibleDifferences) {
return First->ReadLanguageOptions(LangOpts, Complain,
AllowCompatibleDifferences) ||
Second->ReadLanguageOptions(LangOpts, Complain,
AllowCompatibleDifferences);
}
bool ChainedASTReaderListener::ReadTargetOptions(
const TargetOptions &TargetOpts, bool Complain,
bool AllowCompatibleDifferences) {
return First->ReadTargetOptions(TargetOpts, Complain,
AllowCompatibleDifferences) ||
Second->ReadTargetOptions(TargetOpts, Complain,
AllowCompatibleDifferences);
}
bool ChainedASTReaderListener::ReadDiagnosticOptions(
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts, bool Complain) {
return First->ReadDiagnosticOptions(DiagOpts, Complain) ||
Second->ReadDiagnosticOptions(DiagOpts, Complain);
}
bool
ChainedASTReaderListener::ReadFileSystemOptions(const FileSystemOptions &FSOpts,
bool Complain) {
return First->ReadFileSystemOptions(FSOpts, Complain) ||
Second->ReadFileSystemOptions(FSOpts, Complain);
}
bool ChainedASTReaderListener::ReadHeaderSearchOptions(
const HeaderSearchOptions &HSOpts, StringRef SpecificModuleCachePath,
bool Complain) {
return First->ReadHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
Complain) ||
Second->ReadHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
Complain);
}
bool ChainedASTReaderListener::ReadPreprocessorOptions(
const PreprocessorOptions &PPOpts, bool Complain,
std::string &SuggestedPredefines) {
return First->ReadPreprocessorOptions(PPOpts, Complain,
SuggestedPredefines) ||
Second->ReadPreprocessorOptions(PPOpts, Complain, SuggestedPredefines);
}
void ChainedASTReaderListener::ReadCounter(const serialization::ModuleFile &M,
unsigned Value) {
First->ReadCounter(M, Value);
Second->ReadCounter(M, Value);
}
bool ChainedASTReaderListener::needsInputFileVisitation() {
return First->needsInputFileVisitation() ||
Second->needsInputFileVisitation();
}
bool ChainedASTReaderListener::needsSystemInputFileVisitation() {
return First->needsSystemInputFileVisitation() ||
Second->needsSystemInputFileVisitation();
}
void ChainedASTReaderListener::visitModuleFile(StringRef Filename,
ModuleKind Kind) {
First->visitModuleFile(Filename, Kind);
Second->visitModuleFile(Filename, Kind);
}
bool ChainedASTReaderListener::visitInputFile(StringRef Filename,
bool isSystem,
bool isOverridden,
bool isExplicitModule) {
bool Continue = false;
if (First->needsInputFileVisitation() &&
(!isSystem || First->needsSystemInputFileVisitation()))
Continue |= First->visitInputFile(Filename, isSystem, isOverridden,
isExplicitModule);
if (Second->needsInputFileVisitation() &&
(!isSystem || Second->needsSystemInputFileVisitation()))
Continue |= Second->visitInputFile(Filename, isSystem, isOverridden,
isExplicitModule);
return Continue;
}
void ChainedASTReaderListener::readModuleFileExtension(
const ModuleFileExtensionMetadata &Metadata) {
First->readModuleFileExtension(Metadata);
Second->readModuleFileExtension(Metadata);
}
//===----------------------------------------------------------------------===//
// PCH validator implementation
//===----------------------------------------------------------------------===//
ASTReaderListener::~ASTReaderListener() = default;
/// Compare the given set of language options against an existing set of
/// language options.
///
/// \param Diags If non-NULL, diagnostics will be emitted via this engine.
/// \param AllowCompatibleDifferences If true, differences between compatible
/// language options will be permitted.
///
/// \returns true if the languagae options mis-match, false otherwise.
static bool checkLanguageOptions(const LangOptions &LangOpts,
const LangOptions &ExistingLangOpts,
DiagnosticsEngine *Diags,
bool AllowCompatibleDifferences = true) {
#define LANGOPT(Name, Bits, Default, Description) \
if (ExistingLangOpts.Name != LangOpts.Name) { \
if (Diags) \
Diags->Report(diag::err_pch_langopt_mismatch) \
<< Description << LangOpts.Name << ExistingLangOpts.Name; \
return true; \
}
#define VALUE_LANGOPT(Name, Bits, Default, Description) \
if (ExistingLangOpts.Name != LangOpts.Name) { \
if (Diags) \
Diags->Report(diag::err_pch_langopt_value_mismatch) \
<< Description; \
return true; \
}
#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \
if (ExistingLangOpts.get##Name() != LangOpts.get##Name()) { \
if (Diags) \
Diags->Report(diag::err_pch_langopt_value_mismatch) \
<< Description; \
return true; \
}
#define COMPATIBLE_LANGOPT(Name, Bits, Default, Description) \
if (!AllowCompatibleDifferences) \
LANGOPT(Name, Bits, Default, Description)
#define COMPATIBLE_ENUM_LANGOPT(Name, Bits, Default, Description) \
if (!AllowCompatibleDifferences) \
ENUM_LANGOPT(Name, Bits, Default, Description)
#define COMPATIBLE_VALUE_LANGOPT(Name, Bits, Default, Description) \
if (!AllowCompatibleDifferences) \
VALUE_LANGOPT(Name, Bits, Default, Description)
#define BENIGN_LANGOPT(Name, Bits, Default, Description)
#define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description)
#define BENIGN_VALUE_LANGOPT(Name, Type, Bits, Default, Description)
#include "clang/Basic/LangOptions.def"
if (ExistingLangOpts.ModuleFeatures != LangOpts.ModuleFeatures) {
if (Diags)
Diags->Report(diag::err_pch_langopt_value_mismatch) << "module features";
return true;
}
if (ExistingLangOpts.ObjCRuntime != LangOpts.ObjCRuntime) {
if (Diags)
Diags->Report(diag::err_pch_langopt_value_mismatch)
<< "target Objective-C runtime";
return true;
}
if (ExistingLangOpts.CommentOpts.BlockCommandNames !=
LangOpts.CommentOpts.BlockCommandNames) {
if (Diags)
Diags->Report(diag::err_pch_langopt_value_mismatch)
<< "block command names";
return true;
}
// Sanitizer feature mismatches are treated as compatible differences. If
// compatible differences aren't allowed, we still only want to check for
// mismatches of non-modular sanitizers (the only ones which can affect AST
// generation).
if (!AllowCompatibleDifferences) {
SanitizerMask ModularSanitizers = getPPTransparentSanitizers();
SanitizerSet ExistingSanitizers = ExistingLangOpts.Sanitize;
SanitizerSet ImportedSanitizers = LangOpts.Sanitize;
ExistingSanitizers.clear(ModularSanitizers);
ImportedSanitizers.clear(ModularSanitizers);
if (ExistingSanitizers.Mask != ImportedSanitizers.Mask) {
const std::string Flag = "-fsanitize=";
if (Diags) {
#define SANITIZER(NAME, ID) \
{ \
bool InExistingModule = ExistingSanitizers.has(SanitizerKind::ID); \
bool InImportedModule = ImportedSanitizers.has(SanitizerKind::ID); \
if (InExistingModule != InImportedModule) \
Diags->Report(diag::err_pch_targetopt_feature_mismatch) \
<< InExistingModule << (Flag + NAME); \
}
#include "clang/Basic/Sanitizers.def"
}
return true;
}
}
return false;
}
/// Compare the given set of target options against an existing set of
/// target options.
///
/// \param Diags If non-NULL, diagnostics will be emitted via this engine.
///
/// \returns true if the target options mis-match, false otherwise.
static bool checkTargetOptions(const TargetOptions &TargetOpts,
const TargetOptions &ExistingTargetOpts,
DiagnosticsEngine *Diags,
bool AllowCompatibleDifferences = true) {
#define CHECK_TARGET_OPT(Field, Name) \
if (TargetOpts.Field != ExistingTargetOpts.Field) { \
if (Diags) \
Diags->Report(diag::err_pch_targetopt_mismatch) \
<< Name << TargetOpts.Field << ExistingTargetOpts.Field; \
return true; \
}
// The triple and ABI must match exactly.
CHECK_TARGET_OPT(Triple, "target");
CHECK_TARGET_OPT(ABI, "target ABI");
// We can tolerate different CPUs in many cases, notably when one CPU
// supports a strict superset of another. When allowing compatible
// differences skip this check.
if (!AllowCompatibleDifferences) {
CHECK_TARGET_OPT(CPU, "target CPU");
CHECK_TARGET_OPT(TuneCPU, "tune CPU");
}
#undef CHECK_TARGET_OPT
// Compare feature sets.
SmallVector<StringRef, 4> ExistingFeatures(
ExistingTargetOpts.FeaturesAsWritten.begin(),
ExistingTargetOpts.FeaturesAsWritten.end());
SmallVector<StringRef, 4> ReadFeatures(TargetOpts.FeaturesAsWritten.begin(),
TargetOpts.FeaturesAsWritten.end());
llvm::sort(ExistingFeatures);
llvm::sort(ReadFeatures);
// We compute the set difference in both directions explicitly so that we can
// diagnose the differences differently.
SmallVector<StringRef, 4> UnmatchedExistingFeatures, UnmatchedReadFeatures;
std::set_difference(
ExistingFeatures.begin(), ExistingFeatures.end(), ReadFeatures.begin(),
ReadFeatures.end(), std::back_inserter(UnmatchedExistingFeatures));
std::set_difference(ReadFeatures.begin(), ReadFeatures.end(),
ExistingFeatures.begin(), ExistingFeatures.end(),
std::back_inserter(UnmatchedReadFeatures));
// If we are allowing compatible differences and the read feature set is
// a strict subset of the existing feature set, there is nothing to diagnose.
if (AllowCompatibleDifferences && UnmatchedReadFeatures.empty())
return false;
if (Diags) {
for (StringRef Feature : UnmatchedReadFeatures)
Diags->Report(diag::err_pch_targetopt_feature_mismatch)
<< /* is-existing-feature */ false << Feature;
for (StringRef Feature : UnmatchedExistingFeatures)
Diags->Report(diag::err_pch_targetopt_feature_mismatch)
<< /* is-existing-feature */ true << Feature;
}
return !UnmatchedReadFeatures.empty() || !UnmatchedExistingFeatures.empty();
}
bool
PCHValidator::ReadLanguageOptions(const LangOptions &LangOpts,
bool Complain,
bool AllowCompatibleDifferences) {
const LangOptions &ExistingLangOpts = PP.getLangOpts();
return checkLanguageOptions(LangOpts, ExistingLangOpts,
Complain ? &Reader.Diags : nullptr,
AllowCompatibleDifferences);
}
bool PCHValidator::ReadTargetOptions(const TargetOptions &TargetOpts,
bool Complain,
bool AllowCompatibleDifferences) {
const TargetOptions &ExistingTargetOpts = PP.getTargetInfo().getTargetOpts();
return checkTargetOptions(TargetOpts, ExistingTargetOpts,
Complain ? &Reader.Diags : nullptr,
AllowCompatibleDifferences);
}
namespace {
using MacroDefinitionsMap =
llvm::StringMap<std::pair<StringRef, bool /*IsUndef*/>>;
using DeclsMap = llvm::DenseMap<DeclarationName, SmallVector<NamedDecl *, 8>>;
} // namespace
static bool checkDiagnosticGroupMappings(DiagnosticsEngine &StoredDiags,
DiagnosticsEngine &Diags,
bool Complain) {
using Level = DiagnosticsEngine::Level;
// Check current mappings for new -Werror mappings, and the stored mappings
// for cases that were explicitly mapped to *not* be errors that are now
// errors because of options like -Werror.
DiagnosticsEngine *MappingSources[] = { &Diags, &StoredDiags };
for (DiagnosticsEngine *MappingSource : MappingSources) {
for (auto DiagIDMappingPair : MappingSource->getDiagnosticMappings()) {
diag::kind DiagID = DiagIDMappingPair.first;
Level CurLevel = Diags.getDiagnosticLevel(DiagID, SourceLocation());
if (CurLevel < DiagnosticsEngine::Error)
continue; // not significant
Level StoredLevel =
StoredDiags.getDiagnosticLevel(DiagID, SourceLocation());
if (StoredLevel < DiagnosticsEngine::Error) {
if (Complain)
Diags.Report(diag::err_pch_diagopt_mismatch) << "-Werror=" +
Diags.getDiagnosticIDs()->getWarningOptionForDiag(DiagID).str();
return true;
}
}
}
return false;
}
static bool isExtHandlingFromDiagsError(DiagnosticsEngine &Diags) {
diag::Severity Ext = Diags.getExtensionHandlingBehavior();
if (Ext == diag::Severity::Warning && Diags.getWarningsAsErrors())
return true;
return Ext >= diag::Severity::Error;
}
static bool checkDiagnosticMappings(DiagnosticsEngine &StoredDiags,
DiagnosticsEngine &Diags,
bool IsSystem, bool Complain) {
// Top-level options
if (IsSystem) {
if (Diags.getSuppressSystemWarnings())
return false;
// If -Wsystem-headers was not enabled before, be conservative
if (StoredDiags.getSuppressSystemWarnings()) {
if (Complain)
Diags.Report(diag::err_pch_diagopt_mismatch) << "-Wsystem-headers";
return true;
}
}
if (Diags.getWarningsAsErrors() && !StoredDiags.getWarningsAsErrors()) {
if (Complain)
Diags.Report(diag::err_pch_diagopt_mismatch) << "-Werror";
return true;
}
if (Diags.getWarningsAsErrors() && Diags.getEnableAllWarnings() &&
!StoredDiags.getEnableAllWarnings()) {
if (Complain)
Diags.Report(diag::err_pch_diagopt_mismatch) << "-Weverything -Werror";
return true;
}
if (isExtHandlingFromDiagsError(Diags) &&
!isExtHandlingFromDiagsError(StoredDiags)) {
if (Complain)
Diags.Report(diag::err_pch_diagopt_mismatch) << "-pedantic-errors";
return true;
}
return checkDiagnosticGroupMappings(StoredDiags, Diags, Complain);
}
/// Return the top import module if it is implicit, nullptr otherwise.
static Module *getTopImportImplicitModule(ModuleManager &ModuleMgr,
Preprocessor &PP) {
// If the original import came from a file explicitly generated by the user,
// don't check the diagnostic mappings.
// FIXME: currently this is approximated by checking whether this is not a
// module import of an implicitly-loaded module file.
// Note: ModuleMgr.rbegin() may not be the current module, but it must be in
// the transitive closure of its imports, since unrelated modules cannot be
// imported until after this module finishes validation.
ModuleFile *TopImport = &*ModuleMgr.rbegin();
while (!TopImport->ImportedBy.empty())
TopImport = TopImport->ImportedBy[0];
if (TopImport->Kind != MK_ImplicitModule)
return nullptr;
StringRef ModuleName = TopImport->ModuleName;
assert(!ModuleName.empty() && "diagnostic options read before module name");
Module *M = PP.getHeaderSearchInfo().lookupModule(ModuleName);
assert(M && "missing module");
return M;
}
bool PCHValidator::ReadDiagnosticOptions(
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts, bool Complain) {
DiagnosticsEngine &ExistingDiags = PP.getDiagnostics();
IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(ExistingDiags.getDiagnosticIDs());
IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
new DiagnosticsEngine(DiagIDs, DiagOpts.get()));
// This should never fail, because we would have processed these options
// before writing them to an ASTFile.
ProcessWarningOptions(*Diags, *DiagOpts, /*Report*/false);
ModuleManager &ModuleMgr = Reader.getModuleManager();
assert(ModuleMgr.size() >= 1 && "what ASTFile is this then");
Module *TopM = getTopImportImplicitModule(ModuleMgr, PP);
if (!TopM)
return false;
// FIXME: if the diagnostics are incompatible, save a DiagnosticOptions that
// contains the union of their flags.
return checkDiagnosticMappings(*Diags, ExistingDiags, TopM->IsSystem,
Complain);
}
/// Collect the macro definitions provided by the given preprocessor
/// options.
static void
collectMacroDefinitions(const PreprocessorOptions &PPOpts,
MacroDefinitionsMap &Macros,
SmallVectorImpl<StringRef> *MacroNames = nullptr) {
for (unsigned I = 0, N = PPOpts.Macros.size(); I != N; ++I) {
StringRef Macro = PPOpts.Macros[I].first;
bool IsUndef = PPOpts.Macros[I].second;
std::pair<StringRef, StringRef> MacroPair = Macro.split('=');
StringRef MacroName = MacroPair.first;
StringRef MacroBody = MacroPair.second;
// For an #undef'd macro, we only care about the name.
if (IsUndef) {
if (MacroNames && !Macros.count(MacroName))
MacroNames->push_back(MacroName);
Macros[MacroName] = std::make_pair("", true);
continue;
}
// For a #define'd macro, figure out the actual definition.
if (MacroName.size() == Macro.size())
MacroBody = "1";
else {
// Note: GCC drops anything following an end-of-line character.
StringRef::size_type End = MacroBody.find_first_of("\n\r");
MacroBody = MacroBody.substr(0, End);
}
if (MacroNames && !Macros.count(MacroName))
MacroNames->push_back(MacroName);
Macros[MacroName] = std::make_pair(MacroBody, false);
}
}
/// Check the preprocessor options deserialized from the control block
/// against the preprocessor options in an existing preprocessor.
///
/// \param Diags If non-null, produce diagnostics for any mismatches incurred.
/// \param Validate If true, validate preprocessor options. If false, allow
/// macros defined by \p ExistingPPOpts to override those defined by
/// \p PPOpts in SuggestedPredefines.
static bool checkPreprocessorOptions(const PreprocessorOptions &PPOpts,
const PreprocessorOptions &ExistingPPOpts,
DiagnosticsEngine *Diags,
FileManager &FileMgr,
std::string &SuggestedPredefines,
const LangOptions &LangOpts,
bool Validate = true) {
// Check macro definitions.
MacroDefinitionsMap ASTFileMacros;
collectMacroDefinitions(PPOpts, ASTFileMacros);
MacroDefinitionsMap ExistingMacros;
SmallVector<StringRef, 4> ExistingMacroNames;
collectMacroDefinitions(ExistingPPOpts, ExistingMacros, &ExistingMacroNames);
for (unsigned I = 0, N = ExistingMacroNames.size(); I != N; ++I) {
// Dig out the macro definition in the existing preprocessor options.
StringRef MacroName = ExistingMacroNames[I];
std::pair<StringRef, bool> Existing = ExistingMacros[MacroName];
// Check whether we know anything about this macro name or not.
llvm::StringMap<std::pair<StringRef, bool /*IsUndef*/>>::iterator Known =
ASTFileMacros.find(MacroName);
if (!Validate || Known == ASTFileMacros.end()) {
// FIXME: Check whether this identifier was referenced anywhere in the
// AST file. If so, we should reject the AST file. Unfortunately, this
// information isn't in the control block. What shall we do about it?
if (Existing.second) {
SuggestedPredefines += "#undef ";
SuggestedPredefines += MacroName.str();
SuggestedPredefines += '\n';
} else {
SuggestedPredefines += "#define ";
SuggestedPredefines += MacroName.str();
SuggestedPredefines += ' ';
SuggestedPredefines += Existing.first.str();
SuggestedPredefines += '\n';
}
continue;
}
// If the macro was defined in one but undef'd in the other, we have a
// conflict.
if (Existing.second != Known->second.second) {
if (Diags) {
Diags->Report(diag::err_pch_macro_def_undef)
<< MacroName << Known->second.second;
}
return true;
}
// If the macro was #undef'd in both, or if the macro bodies are identical,
// it's fine.
if (Existing.second || Existing.first == Known->second.first)
continue;
// The macro bodies differ; complain.
if (Diags) {
Diags->Report(diag::err_pch_macro_def_conflict)
<< MacroName << Known->second.first << Existing.first;
}
return true;
}
// Check whether we're using predefines.
if (PPOpts.UsePredefines != ExistingPPOpts.UsePredefines && Validate) {
if (Diags) {
Diags->Report(diag::err_pch_undef) << ExistingPPOpts.UsePredefines;
}
return true;
}
// Detailed record is important since it is used for the module cache hash.
if (LangOpts.Modules &&
PPOpts.DetailedRecord != ExistingPPOpts.DetailedRecord && Validate) {
if (Diags) {
Diags->Report(diag::err_pch_pp_detailed_record) << PPOpts.DetailedRecord;
}
return true;
}
// Compute the #include and #include_macros lines we need.
for (unsigned I = 0, N = ExistingPPOpts.Includes.size(); I != N; ++I) {
StringRef File = ExistingPPOpts.Includes[I];
if (!ExistingPPOpts.ImplicitPCHInclude.empty() &&
!ExistingPPOpts.PCHThroughHeader.empty()) {
// In case the through header is an include, we must add all the includes
// to the predefines so the start point can be determined.
SuggestedPredefines += "#include \"";
SuggestedPredefines += File;
SuggestedPredefines += "\"\n";
continue;
}
if (File == ExistingPPOpts.ImplicitPCHInclude)
continue;
if (std::find(PPOpts.Includes.begin(), PPOpts.Includes.end(), File)
!= PPOpts.Includes.end())
continue;
SuggestedPredefines += "#include \"";
SuggestedPredefines += File;
SuggestedPredefines += "\"\n";
}
for (unsigned I = 0, N = ExistingPPOpts.MacroIncludes.size(); I != N; ++I) {
StringRef File = ExistingPPOpts.MacroIncludes[I];
if (std::find(PPOpts.MacroIncludes.begin(), PPOpts.MacroIncludes.end(),
File)
!= PPOpts.MacroIncludes.end())
continue;
SuggestedPredefines += "#__include_macros \"";
SuggestedPredefines += File;
SuggestedPredefines += "\"\n##\n";
}
return false;
}
bool PCHValidator::ReadPreprocessorOptions(const PreprocessorOptions &PPOpts,
bool Complain,
std::string &SuggestedPredefines) {
const PreprocessorOptions &ExistingPPOpts = PP.getPreprocessorOpts();
return checkPreprocessorOptions(PPOpts, ExistingPPOpts,
Complain? &Reader.Diags : nullptr,
PP.getFileManager(),
SuggestedPredefines,
PP.getLangOpts());
}
bool SimpleASTReaderListener::ReadPreprocessorOptions(
const PreprocessorOptions &PPOpts,
bool Complain,
std::string &SuggestedPredefines) {
return checkPreprocessorOptions(PPOpts,
PP.getPreprocessorOpts(),
nullptr,
PP.getFileManager(),
SuggestedPredefines,
PP.getLangOpts(),
false);
}
/// Check the header search options deserialized from the control block
/// against the header search options in an existing preprocessor.
///
/// \param Diags If non-null, produce diagnostics for any mismatches incurred.
static bool checkHeaderSearchOptions(const HeaderSearchOptions &HSOpts,
StringRef SpecificModuleCachePath,
StringRef ExistingModuleCachePath,
DiagnosticsEngine *Diags,
const LangOptions &LangOpts,
const PreprocessorOptions &PPOpts) {
if (LangOpts.Modules) {
if (SpecificModuleCachePath != ExistingModuleCachePath &&
!PPOpts.AllowPCHWithDifferentModulesCachePath) {
if (Diags)
Diags->Report(diag::err_pch_modulecache_mismatch)
<< SpecificModuleCachePath << ExistingModuleCachePath;
return true;
}
}
return false;
}
bool PCHValidator::ReadHeaderSearchOptions(const HeaderSearchOptions &HSOpts,
StringRef SpecificModuleCachePath,
bool Complain) {
return checkHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
PP.getHeaderSearchInfo().getModuleCachePath(),
Complain ? &Reader.Diags : nullptr,
PP.getLangOpts(), PP.getPreprocessorOpts());
}
void PCHValidator::ReadCounter(const ModuleFile &M, unsigned Value) {
PP.setCounterValue(Value);
}
//===----------------------------------------------------------------------===//
// AST reader implementation
//===----------------------------------------------------------------------===//
static uint64_t readULEB(const unsigned char *&P) {
unsigned Length = 0;
const char *Error = nullptr;
uint64_t Val = llvm::decodeULEB128(P, &Length, nullptr, &Error);
if (Error)
llvm::report_fatal_error(Error);
P += Length;
return Val;
}
/// Read ULEB-encoded key length and data length.
static std::pair<unsigned, unsigned>
readULEBKeyDataLength(const unsigned char *&P) {
unsigned KeyLen = readULEB(P);
if ((unsigned)KeyLen != KeyLen)
llvm::report_fatal_error("key too large");
unsigned DataLen = readULEB(P);
if ((unsigned)DataLen != DataLen)
llvm::report_fatal_error("data too large");
return std::make_pair(KeyLen, DataLen);
}
void ASTReader::setDeserializationListener(ASTDeserializationListener *Listener,
bool TakeOwnership) {
DeserializationListener = Listener;
OwnsDeserializationListener = TakeOwnership;
}
unsigned ASTSelectorLookupTrait::ComputeHash(Selector Sel) {
return serialization::ComputeHash(Sel);
}
std::pair<unsigned, unsigned>
ASTSelectorLookupTrait::ReadKeyDataLength(const unsigned char*& d) {
return readULEBKeyDataLength(d);
}
ASTSelectorLookupTrait::internal_key_type
ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) {
using namespace llvm::support;
SelectorTable &SelTable = Reader.getContext().Selectors;
unsigned N = endian::readNext<uint16_t, little, unaligned>(d);
IdentifierInfo *FirstII = Reader.getLocalIdentifier(
F, endian::readNext<uint32_t, little, unaligned>(d));
if (N == 0)
return SelTable.getNullarySelector(FirstII);
else if (N == 1)
return SelTable.getUnarySelector(FirstII);
SmallVector<IdentifierInfo *, 16> Args;
Args.push_back(FirstII);
for (unsigned I = 1; I != N; ++I)
Args.push_back(Reader.getLocalIdentifier(
F, endian::readNext<uint32_t, little, unaligned>(d)));
return SelTable.getSelector(N, Args.data());
}
ASTSelectorLookupTrait::data_type
ASTSelectorLookupTrait::ReadData(Selector, const unsigned char* d,
unsigned DataLen) {
using namespace llvm::support;
data_type Result;
Result.ID = Reader.getGlobalSelectorID(
F, endian::readNext<uint32_t, little, unaligned>(d));
unsigned FullInstanceBits = endian::readNext<uint16_t, little, unaligned>(d);
unsigned FullFactoryBits = endian::readNext<uint16_t, little, unaligned>(d);
Result.InstanceBits = FullInstanceBits & 0x3;
Result.InstanceHasMoreThanOneDecl = (FullInstanceBits >> 2) & 0x1;
Result.FactoryBits = FullFactoryBits & 0x3;
Result.FactoryHasMoreThanOneDecl = (FullFactoryBits >> 2) & 0x1;
unsigned NumInstanceMethods = FullInstanceBits >> 3;
unsigned NumFactoryMethods = FullFactoryBits >> 3;
// Load instance methods
for (unsigned I = 0; I != NumInstanceMethods; ++I) {
if (ObjCMethodDecl *Method = Reader.GetLocalDeclAs<ObjCMethodDecl>(
F, endian::readNext<uint32_t, little, unaligned>(d)))
Result.Instance.push_back(Method);
}
// Load factory methods
for (unsigned I = 0; I != NumFactoryMethods; ++I) {
if (ObjCMethodDecl *Method = Reader.GetLocalDeclAs<ObjCMethodDecl>(
F, endian::readNext<uint32_t, little, unaligned>(d)))
Result.Factory.push_back(Method);
}
return Result;
}
unsigned ASTIdentifierLookupTraitBase::ComputeHash(const internal_key_type& a) {
return llvm::djbHash(a);
}
std::pair<unsigned, unsigned>
ASTIdentifierLookupTraitBase::ReadKeyDataLength(const unsigned char*& d) {
return readULEBKeyDataLength(d);
}
ASTIdentifierLookupTraitBase::internal_key_type
ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) {
assert(n >= 2 && d[n-1] == '\0');
return StringRef((const char*) d, n-1);
}
/// Whether the given identifier is "interesting".
static bool isInterestingIdentifier(ASTReader &Reader, IdentifierInfo &II,
bool IsModule) {
return II.hadMacroDefinition() || II.isPoisoned() ||
(!IsModule && II.getObjCOrBuiltinID()) ||
II.hasRevertedTokenIDToIdentifier() ||
(!(IsModule && Reader.getPreprocessor().getLangOpts().CPlusPlus) &&
II.getFETokenInfo());
}
static bool readBit(unsigned &Bits) {
bool Value = Bits & 0x1;
Bits >>= 1;
return Value;
}
IdentID ASTIdentifierLookupTrait::ReadIdentifierID(const unsigned char *d) {
using namespace llvm::support;
unsigned RawID = endian::readNext<uint32_t, little, unaligned>(d);
return Reader.getGlobalIdentifierID(F, RawID >> 1);
}
static void markIdentifierFromAST(ASTReader &Reader, IdentifierInfo &II) {
if (!II.isFromAST()) {
II.setIsFromAST();
bool IsModule = Reader.getPreprocessor().getCurrentModule() != nullptr;
if (isInterestingIdentifier(Reader, II, IsModule))
II.setChangedSinceDeserialization();
}
}
IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
const unsigned char* d,
unsigned DataLen) {
using namespace llvm::support;
unsigned RawID = endian::readNext<uint32_t, little, unaligned>(d);
bool IsInteresting = RawID & 0x01;
// Wipe out the "is interesting" bit.
RawID = RawID >> 1;
// Build the IdentifierInfo and link the identifier ID with it.
IdentifierInfo *II = KnownII;
if (!II) {
II = &Reader.getIdentifierTable().getOwn(k);
KnownII = II;
}
markIdentifierFromAST(Reader, *II);
Reader.markIdentifierUpToDate(II);
IdentID ID = Reader.getGlobalIdentifierID(F, RawID);
if (!IsInteresting) {
// For uninteresting identifiers, there's nothing else to do. Just notify
// the reader that we've finished loading this identifier.
Reader.SetIdentifierInfo(ID, II);
return II;
}
unsigned ObjCOrBuiltinID = endian::readNext<uint16_t, little, unaligned>(d);
unsigned Bits = endian::readNext<uint16_t, little, unaligned>(d);
bool CPlusPlusOperatorKeyword = readBit(Bits);
bool HasRevertedTokenIDToIdentifier = readBit(Bits);
bool Poisoned = readBit(Bits);
bool ExtensionToken = readBit(Bits);
bool HadMacroDefinition = readBit(Bits);
assert(Bits == 0 && "Extra bits in the identifier?");
DataLen -= 8;
// Set or check the various bits in the IdentifierInfo structure.
// Token IDs are read-only.
if (HasRevertedTokenIDToIdentifier && II->getTokenID() != tok::identifier)
II->revertTokenIDToIdentifier();
if (!F.isModule())
II->setObjCOrBuiltinID(ObjCOrBuiltinID);
assert(II->isExtensionToken() == ExtensionToken &&
"Incorrect extension token flag");
(void)ExtensionToken;
if (Poisoned)
II->setIsPoisoned(true);
assert(II->isCPlusPlusOperatorKeyword() == CPlusPlusOperatorKeyword &&
"Incorrect C++ operator keyword flag");
(void)CPlusPlusOperatorKeyword;
// If this identifier is a macro, deserialize the macro
// definition.
if (HadMacroDefinition) {
uint32_t MacroDirectivesOffset =
endian::readNext<uint32_t, little, unaligned>(d);
DataLen -= 4;
Reader.addPendingMacro(II, &F, MacroDirectivesOffset);
}
Reader.SetIdentifierInfo(ID, II);
// Read all of the declarations visible at global scope with this
// name.
if (DataLen > 0) {
SmallVector<uint32_t, 4> DeclIDs;
for (; DataLen > 0; DataLen -= 4)
DeclIDs.push_back(Reader.getGlobalDeclID(
F, endian::readNext<uint32_t, little, unaligned>(d)));
Reader.SetGloballyVisibleDecls(II, DeclIDs);
}
return II;
}
DeclarationNameKey::DeclarationNameKey(DeclarationName Name)
: Kind(Name.getNameKind()) {
switch (Kind) {
case DeclarationName::Identifier:
Data = (uint64_t)Name.getAsIdentifierInfo();
break;
case DeclarationName::ObjCZeroArgSelector:
case DeclarationName::ObjCOneArgSelector:
case DeclarationName::ObjCMultiArgSelector:
Data = (uint64_t)Name.getObjCSelector().getAsOpaquePtr();
break;
case DeclarationName::CXXOperatorName:
Data = Name.getCXXOverloadedOperator();
break;
case DeclarationName::CXXLiteralOperatorName:
Data = (uint64_t)Name.getCXXLiteralIdentifier();
break;
case DeclarationName::CXXDeductionGuideName:
Data = (uint64_t)Name.getCXXDeductionGuideTemplate()
->getDeclName().getAsIdentifierInfo();
break;
case DeclarationName::CXXConstructorName:
case DeclarationName::CXXDestructorName:
case DeclarationName::CXXConversionFunctionName:
case DeclarationName::CXXUsingDirective:
Data = 0;
break;
}
}
unsigned DeclarationNameKey::getHash() const {
llvm::FoldingSetNodeID ID;
ID.AddInteger(Kind);
switch (Kind) {
case DeclarationName::Identifier:
case DeclarationName::CXXLiteralOperatorName:
case DeclarationName::CXXDeductionGuideName:
ID.AddString(((IdentifierInfo*)Data)->getName());
break;
case DeclarationName::ObjCZeroArgSelector:
case DeclarationName::ObjCOneArgSelector:
case DeclarationName::ObjCMultiArgSelector:
ID.AddInteger(serialization::ComputeHash(Selector(Data)));
break;
case DeclarationName::CXXOperatorName:
ID.AddInteger((OverloadedOperatorKind)Data);
break;
case DeclarationName::CXXConstructorName:
case DeclarationName::CXXDestructorName:
case DeclarationName::CXXConversionFunctionName:
case DeclarationName::CXXUsingDirective:
break;
}
return ID.ComputeHash();
}
ModuleFile *
ASTDeclContextNameLookupTrait::ReadFileRef(const unsigned char *&d) {
using namespace llvm::support;
uint32_t ModuleFileID = endian::readNext<uint32_t, little, unaligned>(d);
return Reader.getLocalModuleFile(F, ModuleFileID);
}
std::pair<unsigned, unsigned>
ASTDeclContextNameLookupTrait::ReadKeyDataLength(const unsigned char *&d) {
return readULEBKeyDataLength(d);
}
ASTDeclContextNameLookupTrait::internal_key_type
ASTDeclContextNameLookupTrait::ReadKey(const unsigned char *d, unsigned) {
using namespace llvm::support;
auto Kind = (DeclarationName::NameKind)*d++;
uint64_t Data;
switch (Kind) {
case DeclarationName::Identifier:
case DeclarationName::CXXLiteralOperatorName:
case DeclarationName::CXXDeductionGuideName:
Data = (uint64_t)Reader.getLocalIdentifier(
F, endian::readNext<uint32_t, little, unaligned>(d));
break;
case DeclarationName::ObjCZeroArgSelector:
case DeclarationName::ObjCOneArgSelector:
case DeclarationName::ObjCMultiArgSelector:
Data =
(uint64_t)Reader.getLocalSelector(
F, endian::readNext<uint32_t, little, unaligned>(
d)).getAsOpaquePtr();
break;
case DeclarationName::CXXOperatorName:
Data = *d++; // OverloadedOperatorKind
break;
case DeclarationName::CXXConstructorName:
case DeclarationName::CXXDestructorName:
case DeclarationName::CXXConversionFunctionName:
case DeclarationName::CXXUsingDirective:
Data = 0;
break;
}
return DeclarationNameKey(Kind, Data);
}
void ASTDeclContextNameLookupTrait::ReadDataInto(internal_key_type,
const unsigned char *d,
unsigned DataLen,
data_type_builder &Val) {
using namespace llvm::support;
for (unsigned NumDecls = DataLen / 4; NumDecls; --NumDecls) {
uint32_t LocalID = endian::readNext<uint32_t, little, unaligned>(d);
Val.insert(Reader.getGlobalDeclID(F, LocalID));
}
}
bool ASTReader::ReadLexicalDeclContextStorage(ModuleFile &M,
BitstreamCursor &Cursor,
uint64_t Offset,
DeclContext *DC) {
assert(Offset != 0);
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(Offset)) {
Error(std::move(Err));
return true;
}
RecordData Record;
StringRef Blob;
Expected<unsigned> MaybeCode = Cursor.ReadCode();
if (!MaybeCode) {
Error(MaybeCode.takeError());
return true;
}
unsigned Code = MaybeCode.get();
Expected<unsigned> MaybeRecCode = Cursor.readRecord(Code, Record, &Blob);
if (!MaybeRecCode) {
Error(MaybeRecCode.takeError());
return true;
}
unsigned RecCode = MaybeRecCode.get();
if (RecCode != DECL_CONTEXT_LEXICAL) {
Error("Expected lexical block");
return true;
}
assert(!isa<TranslationUnitDecl>(DC) &&
"expected a TU_UPDATE_LEXICAL record for TU");
// If we are handling a C++ class template instantiation, we can see multiple
// lexical updates for the same record. It's important that we select only one
// of them, so that field numbering works properly. Just pick the first one we
// see.
auto &Lex = LexicalDecls[DC];
if (!Lex.first) {
Lex = std::make_pair(
&M, llvm::makeArrayRef(
reinterpret_cast<const llvm::support::unaligned_uint32_t *>(
Blob.data()),
Blob.size() / 4));
}
DC->setHasExternalLexicalStorage(true);
return false;
}
bool ASTReader::ReadVisibleDeclContextStorage(ModuleFile &M,
BitstreamCursor &Cursor,
uint64_t Offset,
DeclID ID) {
assert(Offset != 0);
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(Offset)) {
Error(std::move(Err));
return true;
}
RecordData Record;
StringRef Blob;
Expected<unsigned> MaybeCode = Cursor.ReadCode();
if (!MaybeCode) {
Error(MaybeCode.takeError());
return true;
}
unsigned Code = MaybeCode.get();
Expected<unsigned> MaybeRecCode = Cursor.readRecord(Code, Record, &Blob);
if (!MaybeRecCode) {
Error(MaybeRecCode.takeError());
return true;
}
unsigned RecCode = MaybeRecCode.get();
if (RecCode != DECL_CONTEXT_VISIBLE) {
Error("Expected visible lookup table block");
return true;
}
// We can't safely determine the primary context yet, so delay attaching the
// lookup table until we're done with recursive deserialization.
auto *Data = (const unsigned char*)Blob.data();
PendingVisibleUpdates[ID].push_back(PendingVisibleUpdate{&M, Data});
return false;
}
void ASTReader::Error(StringRef Msg) const {
Error(diag::err_fe_pch_malformed, Msg);
if (PP.getLangOpts().Modules && !Diags.isDiagnosticInFlight() &&
!PP.getHeaderSearchInfo().getModuleCachePath().empty()) {
Diag(diag::note_module_cache_path)
<< PP.getHeaderSearchInfo().getModuleCachePath();
}
}
void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2,
StringRef Arg3) const {
if (Diags.isDiagnosticInFlight())
Diags.SetDelayedDiagnostic(DiagID, Arg1, Arg2, Arg3);
else
Diag(DiagID) << Arg1 << Arg2 << Arg3;
}
void ASTReader::Error(llvm::Error &&Err) const {
Error(toString(std::move(Err)));
}
//===----------------------------------------------------------------------===//
// Source Manager Deserialization
//===----------------------------------------------------------------------===//
/// Read the line table in the source manager block.
/// \returns true if there was an error.
bool ASTReader::ParseLineTable(ModuleFile &F,
const RecordData &Record) {
unsigned Idx = 0;
LineTableInfo &LineTable = SourceMgr.getLineTable();
// Parse the file names
std::map<int, int> FileIDs;
FileIDs[-1] = -1; // For unspecified filenames.
for (unsigned I = 0; Record[Idx]; ++I) {
// Extract the file name
auto Filename = ReadPath(F, Record, Idx);
FileIDs[I] = LineTable.getLineTableFilenameID(Filename);
}
++Idx;
// Parse the line entries
std::vector<LineEntry> Entries;
while (Idx < Record.size()) {
int FID = Record[Idx++];
assert(FID >= 0 && "Serialized line entries for non-local file.");
// Remap FileID from 1-based old view.
FID += F.SLocEntryBaseID - 1;
// Extract the line entries
unsigned NumEntries = Record[Idx++];
assert(NumEntries && "no line entries for file ID");
Entries.clear();
Entries.reserve(NumEntries);
for (unsigned I = 0; I != NumEntries; ++I) {
unsigned FileOffset = Record[Idx++];
unsigned LineNo = Record[Idx++];
int FilenameID = FileIDs[Record[Idx++]];
SrcMgr::CharacteristicKind FileKind
= (SrcMgr::CharacteristicKind)Record[Idx++];
unsigned IncludeOffset = Record[Idx++];
Entries.push_back(LineEntry::get(FileOffset, LineNo, FilenameID,
FileKind, IncludeOffset));
}
LineTable.AddEntry(FileID::get(FID), Entries);
}
return false;
}
/// Read a source manager block
bool ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
using namespace SrcMgr;
BitstreamCursor &SLocEntryCursor = F.SLocEntryCursor;
// Set the source-location entry cursor to the current position in
// the stream. This cursor will be used to read the contents of the
// source manager block initially, and then lazily read
// source-location entries as needed.
SLocEntryCursor = F.Stream;
// The stream itself is going to skip over the source manager block.
if (llvm::Error Err = F.Stream.SkipBlock()) {
Error(std::move(Err));
return true;
}
// Enter the source manager block.
if (llvm::Error Err =
SLocEntryCursor.EnterSubBlock(SOURCE_MANAGER_BLOCK_ID)) {
Error(std::move(Err));
return true;
}
F.SourceManagerBlockStartOffset = SLocEntryCursor.GetCurrentBitNo();
RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeE =
SLocEntryCursor.advanceSkippingSubblocks();
if (!MaybeE) {
Error(MaybeE.takeError());
return true;
}
llvm::BitstreamEntry E = MaybeE.get();
switch (E.Kind) {
case llvm::BitstreamEntry::SubBlock: // Handled for us already.
case llvm::BitstreamEntry::Error:
Error("malformed block record in AST file");
return true;
case llvm::BitstreamEntry::EndBlock:
return false;
case llvm::BitstreamEntry::Record:
// The interesting case.
break;
}
// Read a record.
Record.clear();
StringRef Blob;
Expected<unsigned> MaybeRecord =
SLocEntryCursor.readRecord(E.ID, Record, &Blob);
if (!MaybeRecord) {
Error(MaybeRecord.takeError());
return true;
}
switch (MaybeRecord.get()) {
default: // Default behavior: ignore.
break;
case SM_SLOC_FILE_ENTRY:
case SM_SLOC_BUFFER_ENTRY:
case SM_SLOC_EXPANSION_ENTRY:
// Once we hit one of the source location entries, we're done.
return false;
}
}
}
/// If a header file is not found at the path that we expect it to be
/// and the PCH file was moved from its original location, try to resolve the
/// file by assuming that header+PCH were moved together and the header is in
/// the same place relative to the PCH.
static std::string
resolveFileRelativeToOriginalDir(const std::string &Filename,
const std::string &OriginalDir,
const std::string &CurrDir) {
assert(OriginalDir != CurrDir &&
"No point trying to resolve the file if the PCH dir didn't change");
using namespace llvm::sys;
SmallString<128> filePath(Filename);
fs::make_absolute(filePath);
assert(path::is_absolute(OriginalDir));
SmallString<128> currPCHPath(CurrDir);
path::const_iterator fileDirI = path::begin(path::parent_path(filePath)),
fileDirE = path::end(path::parent_path(filePath));
path::const_iterator origDirI = path::begin(OriginalDir),
origDirE = path::end(OriginalDir);
// Skip the common path components from filePath and OriginalDir.
while (fileDirI != fileDirE && origDirI != origDirE &&
*fileDirI == *origDirI) {
++fileDirI;
++origDirI;
}
for (; origDirI != origDirE; ++origDirI)
path::append(currPCHPath, "..");
path::append(currPCHPath, fileDirI, fileDirE);
path::append(currPCHPath, path::filename(Filename));
return std::string(currPCHPath.str());
}
bool ASTReader::ReadSLocEntry(int ID) {
if (ID == 0)
return false;
if (unsigned(-ID) - 2 >= getTotalNumSLocs() || ID > 0) {
Error("source location entry ID out-of-range for AST file");
return true;
}
// Local helper to read the (possibly-compressed) buffer data following the
// entry record.
auto ReadBuffer = [this](
BitstreamCursor &SLocEntryCursor,
StringRef Name) -> std::unique_ptr<llvm::MemoryBuffer> {
RecordData Record;
StringRef Blob;
Expected<unsigned> MaybeCode = SLocEntryCursor.ReadCode();
if (!MaybeCode) {
Error(MaybeCode.takeError());
return nullptr;
}
unsigned Code = MaybeCode.get();
Expected<unsigned> MaybeRecCode =
SLocEntryCursor.readRecord(Code, Record, &Blob);
if (!MaybeRecCode) {
Error(MaybeRecCode.takeError());
return nullptr;
}
unsigned RecCode = MaybeRecCode.get();
if (RecCode == SM_SLOC_BUFFER_BLOB_COMPRESSED) {
if (!llvm::zlib::isAvailable()) {
Error("zlib is not available");
return nullptr;
}
SmallString<0> Uncompressed;
if (llvm::Error E =
llvm::zlib::uncompress(Blob, Uncompressed, Record[0])) {
Error("could not decompress embedded file contents: " +
llvm::toString(std::move(E)));
return nullptr;
}
return llvm::MemoryBuffer::getMemBufferCopy(Uncompressed, Name);
} else if (RecCode == SM_SLOC_BUFFER_BLOB) {
return llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name, true);
} else {
Error("AST record has invalid code");
return nullptr;
}
};
ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second;
if (llvm::Error Err = F->SLocEntryCursor.JumpToBit(
F->SLocEntryOffsetsBase +
F->SLocEntryOffsets[ID - F->SLocEntryBaseID])) {
Error(std::move(Err));
return true;
}
BitstreamCursor &SLocEntryCursor = F->SLocEntryCursor;
SourceLocation::UIntTy BaseOffset = F->SLocEntryBaseOffset;
++NumSLocEntriesRead;
Expected<llvm::BitstreamEntry> MaybeEntry = SLocEntryCursor.advance();
if (!MaybeEntry) {
Error(MaybeEntry.takeError());
return true;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
if (Entry.Kind != llvm::BitstreamEntry::Record) {
Error("incorrectly-formatted source location entry in AST file");
return true;
}
RecordData Record;
StringRef Blob;
Expected<unsigned> MaybeSLOC =
SLocEntryCursor.readRecord(Entry.ID, Record, &Blob);
if (!MaybeSLOC) {
Error(MaybeSLOC.takeError());
return true;
}
switch (MaybeSLOC.get()) {
default:
Error("incorrectly-formatted source location entry in AST file");
return true;
case SM_SLOC_FILE_ENTRY: {
// We will detect whether a file changed and return 'Failure' for it, but
// we will also try to fail gracefully by setting up the SLocEntry.
unsigned InputID = Record[4];
InputFile IF = getInputFile(*F, InputID);
Optional<FileEntryRef> File = IF.getFile();
bool OverriddenBuffer = IF.isOverridden();
// Note that we only check if a File was returned. If it was out-of-date
// we have complained but we will continue creating a FileID to recover
// gracefully.
if (!File)
return true;
SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]);
if (IncludeLoc.isInvalid() && F->Kind != MK_MainFile) {
// This is the module's main file.
IncludeLoc = getImportLocation(F);
}
SrcMgr::CharacteristicKind
FileCharacter = (SrcMgr::CharacteristicKind)Record[2];
FileID FID = SourceMgr.createFileID(*File, IncludeLoc, FileCharacter, ID,
BaseOffset + Record[0]);
SrcMgr::FileInfo &FileInfo =
const_cast<SrcMgr::FileInfo&>(SourceMgr.getSLocEntry(FID).getFile());
FileInfo.NumCreatedFIDs = Record[5];
if (Record[3])
FileInfo.setHasLineDirectives();
unsigned NumFileDecls = Record[7];
if (NumFileDecls && ContextObj) {
const DeclID *FirstDecl = F->FileSortedDecls + Record[6];
assert(F->FileSortedDecls && "FILE_SORTED_DECLS not encountered yet ?");
FileDeclIDs[FID] = FileDeclsInfo(F, llvm::makeArrayRef(FirstDecl,
NumFileDecls));
}
const SrcMgr::ContentCache &ContentCache =
SourceMgr.getOrCreateContentCache(*File, isSystem(FileCharacter));
if (OverriddenBuffer && !ContentCache.BufferOverridden &&
ContentCache.ContentsEntry == ContentCache.OrigEntry &&
!ContentCache.getBufferIfLoaded()) {
auto Buffer = ReadBuffer(SLocEntryCursor, File->getName());
if (!Buffer)
return true;
SourceMgr.overrideFileContents(*File, std::move(Buffer));
}
break;
}
case SM_SLOC_BUFFER_ENTRY: {
const char *Name = Blob.data();
unsigned Offset = Record[0];
SrcMgr::CharacteristicKind
FileCharacter = (SrcMgr::CharacteristicKind)Record[2];
SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]);
if (IncludeLoc.isInvalid() && F->isModule()) {
IncludeLoc = getImportLocation(F);
}
auto Buffer = ReadBuffer(SLocEntryCursor, Name);
if (!Buffer)
return true;
SourceMgr.createFileID(std::move(Buffer), FileCharacter, ID,
BaseOffset + Offset, IncludeLoc);
break;
}
case SM_SLOC_EXPANSION_ENTRY: {
SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1]);
SourceMgr.createExpansionLoc(SpellingLoc,
ReadSourceLocation(*F, Record[2]),
ReadSourceLocation(*F, Record[3]),
Record[5],
Record[4],
ID,
BaseOffset + Record[0]);
break;
}
}
return false;
}
std::pair<SourceLocation, StringRef> ASTReader::getModuleImportLoc(int ID) {
if (ID == 0)
return std::make_pair(SourceLocation(), "");
if (unsigned(-ID) - 2 >= getTotalNumSLocs() || ID > 0) {
Error("source location entry ID out-of-range for AST file");
return std::make_pair(SourceLocation(), "");
}
// Find which module file this entry lands in.
ModuleFile *M = GlobalSLocEntryMap.find(-ID)->second;
if (!M->isModule())
return std::make_pair(SourceLocation(), "");
// FIXME: Can we map this down to a particular submodule? That would be
// ideal.
return std::make_pair(M->ImportLoc, StringRef(M->ModuleName));
}
/// Find the location where the module F is imported.
SourceLocation ASTReader::getImportLocation(ModuleFile *F) {
if (F->ImportLoc.isValid())
return F->ImportLoc;
// Otherwise we have a PCH. It's considered to be "imported" at the first
// location of its includer.
if (F->ImportedBy.empty() || !F->ImportedBy[0]) {
// Main file is the importer.
assert(SourceMgr.getMainFileID().isValid() && "missing main file");
return SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
}
return F->ImportedBy[0]->FirstLoc;
}
/// Enter a subblock of the specified BlockID with the specified cursor. Read
/// the abbreviations that are at the top of the block and then leave the cursor
/// pointing into the block.
bool ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor, unsigned BlockID,
uint64_t *StartOfBlockOffset) {
if (llvm::Error Err = Cursor.EnterSubBlock(BlockID)) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
return true;
}
if (StartOfBlockOffset)
*StartOfBlockOffset = Cursor.GetCurrentBitNo();
while (true) {
uint64_t Offset = Cursor.GetCurrentBitNo();
Expected<unsigned> MaybeCode = Cursor.ReadCode();
if (!MaybeCode) {
// FIXME this drops errors on the floor.
consumeError(MaybeCode.takeError());
return true;
}
unsigned Code = MaybeCode.get();
// We expect all abbrevs to be at the start of the block.
if (Code != llvm::bitc::DEFINE_ABBREV) {
if (llvm::Error Err = Cursor.JumpToBit(Offset)) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
return true;
}
return false;
}
if (llvm::Error Err = Cursor.ReadAbbrevRecord()) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
return true;
}
}
}
Token ASTReader::ReadToken(ModuleFile &F, const RecordDataImpl &Record,
unsigned &Idx) {
Token Tok;
Tok.startToken();
Tok.setLocation(ReadSourceLocation(F, Record, Idx));
Tok.setLength(Record[Idx++]);
if (IdentifierInfo *II = getLocalIdentifier(F, Record[Idx++]))
Tok.setIdentifierInfo(II);
Tok.setKind((tok::TokenKind)Record[Idx++]);
Tok.setFlag((Token::TokenFlags)Record[Idx++]);
return Tok;
}
MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) {
BitstreamCursor &Stream = F.MacroCursor;
// Keep track of where we are in the stream, then jump back there
// after reading this macro.
SavedStreamPosition SavedPosition(Stream);
if (llvm::Error Err = Stream.JumpToBit(Offset)) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
return nullptr;
}
RecordData Record;
SmallVector<IdentifierInfo*, 16> MacroParams;
MacroInfo *Macro = nullptr;
while (true) {
// Advance to the next record, but if we get to the end of the block, don't
// pop it (removing all the abbreviations from the cursor) since we want to
// be able to reseek within the block and read entries.
unsigned Flags = BitstreamCursor::AF_DontPopBlockAtEnd;
Expected<llvm::BitstreamEntry> MaybeEntry =
Stream.advanceSkippingSubblocks(Flags);
if (!MaybeEntry) {
Error(MaybeEntry.takeError());
return Macro;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::SubBlock: // Handled for us already.
case llvm::BitstreamEntry::Error:
Error("malformed block record in AST file");
return Macro;
case llvm::BitstreamEntry::EndBlock:
return Macro;
case llvm::BitstreamEntry::Record:
// The interesting case.
break;
}
// Read a record.
Record.clear();
PreprocessorRecordTypes RecType;
if (Expected<unsigned> MaybeRecType = Stream.readRecord(Entry.ID, Record))
RecType = (PreprocessorRecordTypes)MaybeRecType.get();
else {
Error(MaybeRecType.takeError());
return Macro;
}
switch (RecType) {
case PP_MODULE_MACRO:
case PP_MACRO_DIRECTIVE_HISTORY:
return Macro;
case PP_MACRO_OBJECT_LIKE:
case PP_MACRO_FUNCTION_LIKE: {
// If we already have a macro, that means that we've hit the end
// of the definition of the macro we were looking for. We're
// done.
if (Macro)
return Macro;
unsigned NextIndex = 1; // Skip identifier ID.
SourceLocation Loc = ReadSourceLocation(F, Record, NextIndex);
MacroInfo *MI = PP.AllocateMacroInfo(Loc);
MI->setDefinitionEndLoc(ReadSourceLocation(F, Record, NextIndex));
MI->setIsUsed(Record[NextIndex++]);
MI->setUsedForHeaderGuard(Record[NextIndex++]);
if (RecType == PP_MACRO_FUNCTION_LIKE) {
// Decode function-like macro info.
bool isC99VarArgs = Record[NextIndex++];
bool isGNUVarArgs = Record[NextIndex++];
bool hasCommaPasting = Record[NextIndex++];
MacroParams.clear();
unsigned NumArgs = Record[NextIndex++];
for (unsigned i = 0; i != NumArgs; ++i)
MacroParams.push_back(getLocalIdentifier(F, Record[NextIndex++]));
// Install function-like macro info.
MI->setIsFunctionLike();
if (isC99VarArgs) MI->setIsC99Varargs();
if (isGNUVarArgs) MI->setIsGNUVarargs();
if (hasCommaPasting) MI->setHasCommaPasting();
MI->setParameterList(MacroParams, PP.getPreprocessorAllocator());
}
// Remember that we saw this macro last so that we add the tokens that
// form its body to it.
Macro = MI;
if (NextIndex + 1 == Record.size() && PP.getPreprocessingRecord() &&
Record[NextIndex]) {
// We have a macro definition. Register the association
PreprocessedEntityID
GlobalID = getGlobalPreprocessedEntityID(F, Record[NextIndex]);
PreprocessingRecord &PPRec = *PP.getPreprocessingRecord();
PreprocessingRecord::PPEntityID PPID =
PPRec.getPPEntityID(GlobalID - 1, /*isLoaded=*/true);
MacroDefinitionRecord *PPDef = cast_or_null<MacroDefinitionRecord>(
PPRec.getPreprocessedEntity(PPID));
if (PPDef)
PPRec.RegisterMacroDefinition(Macro, PPDef);
}
++NumMacrosRead;
break;
}
case PP_TOKEN: {
// If we see a TOKEN before a PP_MACRO_*, then the file is
// erroneous, just pretend we didn't see this.
if (!Macro) break;
unsigned Idx = 0;
Token Tok = ReadToken(F, Record, Idx);
Macro->AddTokenToBody(Tok);
break;
}
}
}
}
PreprocessedEntityID
ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M,
unsigned LocalID) const {
if (!M.ModuleOffsetMap.empty())
ReadModuleOffsetMap(M);
ContinuousRangeMap<uint32_t, int, 2>::const_iterator
I = M.PreprocessedEntityRemap.find(LocalID - NUM_PREDEF_PP_ENTITY_IDS);
assert(I != M.PreprocessedEntityRemap.end()
&& "Invalid index into preprocessed entity index remap");
return LocalID + I->second;
}
unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
return llvm::hash_combine(ikey.Size, ikey.ModTime);
}
HeaderFileInfoTrait::internal_key_type
HeaderFileInfoTrait::GetInternalKey(const FileEntry *FE) {
internal_key_type ikey = {FE->getSize(),
M.HasTimestamps ? FE->getModificationTime() : 0,
FE->getName(), /*Imported*/ false};
return ikey;
}
bool HeaderFileInfoTrait::EqualKey(internal_key_ref a, internal_key_ref b) {
if (a.Size != b.Size || (a.ModTime && b.ModTime && a.ModTime != b.ModTime))
return false;
if (llvm::sys::path::is_absolute(a.Filename) && a.Filename == b.Filename)
return true;
// Determine whether the actual files are equivalent.
FileManager &FileMgr = Reader.getFileManager();
auto GetFile = [&](const internal_key_type &Key) -> const FileEntry* {
if (!Key.Imported) {
if (auto File = FileMgr.getFile(Key.Filename))
return *File;
return nullptr;
}
std::string Resolved = std::string(Key.Filename);
Reader.ResolveImportedPath(M, Resolved);
if (auto File = FileMgr.getFile(Resolved))
return *File;
return nullptr;
};
const FileEntry *FEA = GetFile(a);
const FileEntry *FEB = GetFile(b);
return FEA && FEA == FEB;
}
std::pair<unsigned, unsigned>
HeaderFileInfoTrait::ReadKeyDataLength(const unsigned char*& d) {
return readULEBKeyDataLength(d);
}
HeaderFileInfoTrait::internal_key_type
HeaderFileInfoTrait::ReadKey(const unsigned char *d, unsigned) {
using namespace llvm::support;
internal_key_type ikey;
ikey.Size = off_t(endian::readNext<uint64_t, little, unaligned>(d));
ikey.ModTime = time_t(endian::readNext<uint64_t, little, unaligned>(d));
ikey.Filename = (const char *)d;
ikey.Imported = true;
return ikey;
}
HeaderFileInfoTrait::data_type
HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
unsigned DataLen) {
using namespace llvm::support;
const unsigned char *End = d + DataLen;
HeaderFileInfo HFI;
unsigned Flags = *d++;
// FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
HFI.isImport |= (Flags >> 5) & 0x01;
HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
HFI.DirInfo = (Flags >> 1) & 0x07;
HFI.IndexHeaderMapHeader = Flags & 0x01;
// FIXME: Find a better way to handle this. Maybe just store a
// "has been included" flag?
HFI.NumIncludes = std::max(endian::readNext<uint16_t, little, unaligned>(d),
HFI.NumIncludes);
HFI.ControllingMacroID = Reader.getGlobalIdentifierID(
M, endian::readNext<uint32_t, little, unaligned>(d));
if (unsigned FrameworkOffset =
endian::readNext<uint32_t, little, unaligned>(d)) {
// The framework offset is 1 greater than the actual offset,
// since 0 is used as an indicator for "no framework name".
StringRef FrameworkName(FrameworkStrings + FrameworkOffset - 1);
HFI.Framework = HS->getUniqueFrameworkName(FrameworkName);
}
assert((End - d) % 4 == 0 &&
"Wrong data length in HeaderFileInfo deserialization");
while (d != End) {
uint32_t LocalSMID = endian::readNext<uint32_t, little, unaligned>(d);
auto HeaderRole = static_cast<ModuleMap::ModuleHeaderRole>(LocalSMID & 3);
LocalSMID >>= 2;
// This header is part of a module. Associate it with the module to enable
// implicit module import.
SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
Module *Mod = Reader.getSubmodule(GlobalSMID);
FileManager &FileMgr = Reader.getFileManager();
ModuleMap &ModMap =
Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap();
std::string Filename = std::string(key.Filename);
if (key.Imported)
Reader.ResolveImportedPath(M, Filename);
// FIXME: NameAsWritten
Module::Header H = {std::string(key.Filename), "",
*FileMgr.getFile(Filename)};
ModMap.addHeader(Mod, H, HeaderRole, /*Imported*/true);
HFI.isModuleHeader |= !(HeaderRole & ModuleMap::TextualHeader);
}
// This HeaderFileInfo was externally loaded.
HFI.External = true;
HFI.IsValid = true;
return HFI;
}
void ASTReader::addPendingMacro(IdentifierInfo *II, ModuleFile *M,
uint32_t MacroDirectivesOffset) {
assert(NumCurrentElementsDeserializing > 0 &&"Missing deserialization guard");
PendingMacroIDs[II].push_back(PendingMacroInfo(M, MacroDirectivesOffset));
}
void ASTReader::ReadDefinedMacros() {
// Note that we are loading defined macros.
Deserializing Macros(this);
for (ModuleFile &I : llvm::reverse(ModuleMgr)) {
BitstreamCursor &MacroCursor = I.MacroCursor;
// If there was no preprocessor block, skip this file.
if (MacroCursor.getBitcodeBytes().empty())
continue;
BitstreamCursor Cursor = MacroCursor;
if (llvm::Error Err = Cursor.JumpToBit(I.MacroStartOffset)) {
Error(std::move(Err));
return;
}
RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeE = Cursor.advanceSkippingSubblocks();
if (!MaybeE) {
Error(MaybeE.takeError());
return;
}
llvm::BitstreamEntry E = MaybeE.get();
switch (E.Kind) {
case llvm::BitstreamEntry::SubBlock: // Handled for us already.
case llvm::BitstreamEntry::Error:
Error("malformed block record in AST file");
return;
case llvm::BitstreamEntry::EndBlock:
goto NextCursor;
case llvm::BitstreamEntry::Record: {
Record.clear();
Expected<unsigned> MaybeRecord = Cursor.readRecord(E.ID, Record);
if (!MaybeRecord) {
Error(MaybeRecord.takeError());
return;
}
switch (MaybeRecord.get()) {
default: // Default behavior: ignore.
break;
case PP_MACRO_OBJECT_LIKE:
case PP_MACRO_FUNCTION_LIKE: {
IdentifierInfo *II = getLocalIdentifier(I, Record[0]);
if (II->isOutOfDate())
updateOutOfDateIdentifier(*II);
break;
}
case PP_TOKEN:
// Ignore tokens.
break;
}
break;
}
}
}
NextCursor: ;
}
}
namespace {
/// Visitor class used to look up identifirs in an AST file.
class IdentifierLookupVisitor {
StringRef Name;
unsigned NameHash;
unsigned PriorGeneration;
unsigned &NumIdentifierLookups;
unsigned &NumIdentifierLookupHits;
IdentifierInfo *Found = nullptr;
public:
IdentifierLookupVisitor(StringRef Name, unsigned PriorGeneration,
unsigned &NumIdentifierLookups,
unsigned &NumIdentifierLookupHits)
: Name(Name), NameHash(ASTIdentifierLookupTrait::ComputeHash(Name)),
PriorGeneration(PriorGeneration),
NumIdentifierLookups(NumIdentifierLookups),
NumIdentifierLookupHits(NumIdentifierLookupHits) {}
bool operator()(ModuleFile &M) {
// If we've already searched this module file, skip it now.
if (M.Generation <= PriorGeneration)
return true;
ASTIdentifierLookupTable *IdTable
= (ASTIdentifierLookupTable *)M.IdentifierLookupTable;
if (!IdTable)
return false;
ASTIdentifierLookupTrait Trait(IdTable->getInfoObj().getReader(), M,
Found);
++NumIdentifierLookups;
ASTIdentifierLookupTable::iterator Pos =
IdTable->find_hashed(Name, NameHash, &Trait);
if (Pos == IdTable->end())
return false;
// Dereferencing the iterator has the effect of building the
// IdentifierInfo node and populating it with the various
// declarations it needs.
++NumIdentifierLookupHits;
Found = *Pos;
return true;
}
// Retrieve the identifier info found within the module
// files.
IdentifierInfo *getIdentifierInfo() const { return Found; }
};
} // namespace
void ASTReader::updateOutOfDateIdentifier(IdentifierInfo &II) {
// Note that we are loading an identifier.
Deserializing AnIdentifier(this);
unsigned PriorGeneration = 0;
if (getContext().getLangOpts().Modules)
PriorGeneration = IdentifierGeneration[&II];
// If there is a global index, look there first to determine which modules
// provably do not have any results for this identifier.
GlobalModuleIndex::HitSet Hits;
GlobalModuleIndex::HitSet *HitsPtr = nullptr;
if (!loadGlobalIndex()) {
if (GlobalIndex->lookupIdentifier(II.getName(), Hits)) {
HitsPtr = &Hits;
}
}
IdentifierLookupVisitor Visitor(II.getName(), PriorGeneration,
NumIdentifierLookups,
NumIdentifierLookupHits);
ModuleMgr.visit(Visitor, HitsPtr);
markIdentifierUpToDate(&II);
}
void ASTReader::markIdentifierUpToDate(IdentifierInfo *II) {
if (!II)
return;
II->setOutOfDate(false);
// Update the generation for this identifier.
if (getContext().getLangOpts().Modules)
IdentifierGeneration[II] = getGeneration();
}
void ASTReader::resolvePendingMacro(IdentifierInfo *II,
const PendingMacroInfo &PMInfo) {
ModuleFile &M = *PMInfo.M;
BitstreamCursor &Cursor = M.MacroCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err =
Cursor.JumpToBit(M.MacroOffsetsBase + PMInfo.MacroDirectivesOffset)) {
Error(std::move(Err));
return;
}
struct ModuleMacroRecord {
SubmoduleID SubModID;
MacroInfo *MI;
SmallVector<SubmoduleID, 8> Overrides;
};
llvm::SmallVector<ModuleMacroRecord, 8> ModuleMacros;
// We expect to see a sequence of PP_MODULE_MACRO records listing exported
// macros, followed by a PP_MACRO_DIRECTIVE_HISTORY record with the complete
// macro histroy.
RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry =
Cursor.advance(BitstreamCursor::AF_DontPopBlockAtEnd);
if (!MaybeEntry) {
Error(MaybeEntry.takeError());
return;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
if (Entry.Kind != llvm::BitstreamEntry::Record) {
Error("malformed block record in AST file");
return;
}
Record.clear();
Expected<unsigned> MaybePP = Cursor.readRecord(Entry.ID, Record);
if (!MaybePP) {
Error(MaybePP.takeError());
return;
}
switch ((PreprocessorRecordTypes)MaybePP.get()) {
case PP_MACRO_DIRECTIVE_HISTORY:
break;
case PP_MODULE_MACRO: {
ModuleMacros.push_back(ModuleMacroRecord());
auto &Info = ModuleMacros.back();
Info.SubModID = getGlobalSubmoduleID(M, Record[0]);
Info.MI = getMacro(getGlobalMacroID(M, Record[1]));
for (int I = 2, N = Record.size(); I != N; ++I)
Info.Overrides.push_back(getGlobalSubmoduleID(M, Record[I]));
continue;
}
default:
Error("malformed block record in AST file");
return;
}
// We found the macro directive history; that's the last record
// for this macro.
break;
}
// Module macros are listed in reverse dependency order.
{
std::reverse(ModuleMacros.begin(), ModuleMacros.end());
llvm::SmallVector<ModuleMacro*, 8> Overrides;
for (auto &MMR : ModuleMacros) {
Overrides.clear();
for (unsigned ModID : MMR.Overrides) {
Module *Mod = getSubmodule(ModID);
auto *Macro = PP.getModuleMacro(Mod, II);
assert(Macro && "missing definition for overridden macro");
Overrides.push_back(Macro);
}
bool Inserted = false;
Module *Owner = getSubmodule(MMR.SubModID);
PP.addModuleMacro(Owner, II, MMR.MI, Overrides, Inserted);
}
}
// Don't read the directive history for a module; we don't have anywhere
// to put it.
if (M.isModule())
return;
// Deserialize the macro directives history in reverse source-order.
MacroDirective *Latest = nullptr, *Earliest = nullptr;
unsigned Idx = 0, N = Record.size();
while (Idx < N) {
MacroDirective *MD = nullptr;
SourceLocation Loc = ReadSourceLocation(M, Record, Idx);
MacroDirective::Kind K = (MacroDirective::Kind)Record[Idx++];
switch (K) {
case MacroDirective::MD_Define: {
MacroInfo *MI = getMacro(getGlobalMacroID(M, Record[Idx++]));
MD = PP.AllocateDefMacroDirective(MI, Loc);
break;
}
case MacroDirective::MD_Undefine:
MD = PP.AllocateUndefMacroDirective(Loc);
break;
case MacroDirective::MD_Visibility:
bool isPublic = Record[Idx++];
MD = PP.AllocateVisibilityMacroDirective(Loc, isPublic);
break;
}
if (!Latest)
Latest = MD;
if (Earliest)
Earliest->setPrevious(MD);
Earliest = MD;
}
if (Latest)
PP.setLoadedMacroDirective(II, Earliest, Latest);
}
bool ASTReader::shouldDisableValidationForFile(
const serialization::ModuleFile &M) const {
if (DisableValidationKind == DisableValidationForModuleKind::None)
return false;
// If a PCH is loaded and validation is disabled for PCH then disable
// validation for the PCH and the modules it loads.
ModuleKind K = CurrentDeserializingModuleKind.getValueOr(M.Kind);
switch (K) {
case MK_MainFile:
case MK_Preamble:
case MK_PCH:
return bool(DisableValidationKind & DisableValidationForModuleKind::PCH);
case MK_ImplicitModule:
case MK_ExplicitModule:
case MK_PrebuiltModule:
return bool(DisableValidationKind & DisableValidationForModuleKind::Module);
}
return false;
}
ASTReader::InputFileInfo
ASTReader::readInputFileInfo(ModuleFile &F, unsigned ID) {
// Go find this input file.
BitstreamCursor &Cursor = F.InputFilesCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(F.InputFileOffsets[ID - 1])) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
}
Expected<unsigned> MaybeCode = Cursor.ReadCode();
if (!MaybeCode) {
// FIXME this drops errors on the floor.
consumeError(MaybeCode.takeError());
}
unsigned Code = MaybeCode.get();
RecordData Record;
StringRef Blob;
if (Expected<unsigned> Maybe = Cursor.readRecord(Code, Record, &Blob))
assert(static_cast<InputFileRecordTypes>(Maybe.get()) == INPUT_FILE &&
"invalid record type for input file");
else {
// FIXME this drops errors on the floor.
consumeError(Maybe.takeError());
}
assert(Record[0] == ID && "Bogus stored ID or offset");
InputFileInfo R;
R.StoredSize = static_cast<off_t>(Record[1]);
R.StoredTime = static_cast<time_t>(Record[2]);
R.Overridden = static_cast<bool>(Record[3]);
R.Transient = static_cast<bool>(Record[4]);
R.TopLevelModuleMap = static_cast<bool>(Record[5]);
R.Filename = std::string(Blob);
ResolveImportedPath(F, R.Filename);
Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
if (!MaybeEntry) // FIXME this drops errors on the floor.
consumeError(MaybeEntry.takeError());
llvm::BitstreamEntry Entry = MaybeEntry.get();
assert(Entry.Kind == llvm::BitstreamEntry::Record &&
"expected record type for input file hash");
Record.clear();
if (Expected<unsigned> Maybe = Cursor.readRecord(Entry.ID, Record))
assert(static_cast<InputFileRecordTypes>(Maybe.get()) == INPUT_FILE_HASH &&
"invalid record type for input file hash");
else {
// FIXME this drops errors on the floor.
consumeError(Maybe.takeError());
}
R.ContentHash = (static_cast<uint64_t>(Record[1]) << 32) |
static_cast<uint64_t>(Record[0]);
return R;
}
static unsigned moduleKindForDiagnostic(ModuleKind Kind);
InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
// If this ID is bogus, just return an empty input file.
if (ID == 0 || ID > F.InputFilesLoaded.size())
return InputFile();
// If we've already loaded this input file, return it.
if (F.InputFilesLoaded[ID-1].getFile())
return F.InputFilesLoaded[ID-1];
if (F.InputFilesLoaded[ID-1].isNotFound())
return InputFile();
// Go find this input file.
BitstreamCursor &Cursor = F.InputFilesCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(F.InputFileOffsets[ID - 1])) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
}
InputFileInfo FI = readInputFileInfo(F, ID);
off_t StoredSize = FI.StoredSize;
time_t StoredTime = FI.StoredTime;
bool Overridden = FI.Overridden;
bool Transient = FI.Transient;
StringRef Filename = FI.Filename;
uint64_t StoredContentHash = FI.ContentHash;
OptionalFileEntryRefDegradesToFileEntryPtr File =
expectedToOptional(FileMgr.getFileRef(Filename, /*OpenFile=*/false));
// If we didn't find the file, resolve it relative to the
// original directory from which this AST file was created.
if (!File && !F.OriginalDir.empty() && !F.BaseDirectory.empty() &&
F.OriginalDir != F.BaseDirectory) {
std::string Resolved = resolveFileRelativeToOriginalDir(
std::string(Filename), F.OriginalDir, F.BaseDirectory);
if (!Resolved.empty())
File = expectedToOptional(FileMgr.getFileRef(Resolved));
}
// For an overridden file, create a virtual file with the stored
// size/timestamp.
if ((Overridden || Transient) && !File)
File = FileMgr.getVirtualFileRef(Filename, StoredSize, StoredTime);
if (!File) {
if (Complain) {
std::string ErrorStr = "could not find file '";
ErrorStr += Filename;
ErrorStr += "' referenced by AST file '";
ErrorStr += F.FileName;
ErrorStr += "'";
Error(ErrorStr);
}
// Record that we didn't find the file.
F.InputFilesLoaded[ID-1] = InputFile::getNotFound();
return InputFile();
}
// Check if there was a request to override the contents of the file
// that was part of the precompiled header. Overriding such a file
// can lead to problems when lexing using the source locations from the
// PCH.
SourceManager &SM = getSourceManager();
// FIXME: Reject if the overrides are different.
if ((!Overridden && !Transient) && SM.isFileOverridden(File)) {
if (Complain)
Error(diag::err_fe_pch_file_overridden, Filename);
// After emitting the diagnostic, bypass the overriding file to recover
// (this creates a separate FileEntry).
File = SM.bypassFileContentsOverride(*File);
if (!File) {
F.InputFilesLoaded[ID - 1] = InputFile::getNotFound();
return InputFile();
}
}
enum ModificationType {
Size,
ModTime,
Content,
None,
};
auto HasInputFileChanged = [&]() {
if (StoredSize != File->getSize())
return ModificationType::Size;
if (!shouldDisableValidationForFile(F) && StoredTime &&
StoredTime != File->getModificationTime()) {
// In case the modification time changes but not the content,
// accept the cached file as legit.
if (ValidateASTInputFilesContent &&
StoredContentHash != static_cast<uint64_t>(llvm::hash_code(-1))) {
auto MemBuffOrError = FileMgr.getBufferForFile(File);
if (!MemBuffOrError) {
if (!Complain)
return ModificationType::ModTime;
std::string ErrorStr = "could not get buffer for file '";
ErrorStr += File->getName();
ErrorStr += "'";
Error(ErrorStr);
return ModificationType::ModTime;
}
auto ContentHash = hash_value(MemBuffOrError.get()->getBuffer());
if (StoredContentHash == static_cast<uint64_t>(ContentHash))
return ModificationType::None;
return ModificationType::Content;
}
return ModificationType::ModTime;
}
return ModificationType::None;
};
bool IsOutOfDate = false;
auto FileChange = HasInputFileChanged();
// For an overridden file, there is nothing to validate.
if (!Overridden && FileChange != ModificationType::None) {
if (Complain && !Diags.isDiagnosticInFlight()) {
// Build a list of the PCH imports that got us here (in reverse).
SmallVector<ModuleFile *, 4> ImportStack(1, &F);
while (!ImportStack.back()->ImportedBy.empty())
ImportStack.push_back(ImportStack.back()->ImportedBy[0]);
// The top-level PCH is stale.
StringRef TopLevelPCHName(ImportStack.back()->FileName);
Diag(diag::err_fe_ast_file_modified)
<< Filename << moduleKindForDiagnostic(ImportStack.back()->Kind)
<< TopLevelPCHName << FileChange;
// Print the import stack.
if (ImportStack.size() > 1) {
Diag(diag::note_pch_required_by)
<< Filename << ImportStack[0]->FileName;
for (unsigned I = 1; I < ImportStack.size(); ++I)
Diag(diag::note_pch_required_by)
<< ImportStack[I-1]->FileName << ImportStack[I]->FileName;
}
Diag(diag::note_pch_rebuild_required) << TopLevelPCHName;
}
IsOutOfDate = true;
}
// FIXME: If the file is overridden and we've already opened it,
// issue an error (or split it into a separate FileEntry).
InputFile IF = InputFile(*File, Overridden || Transient, IsOutOfDate);
// Note that we've loaded this input file.
F.InputFilesLoaded[ID-1] = IF;
return IF;
}
/// If we are loading a relocatable PCH or module file, and the filename
/// is not an absolute path, add the system or module root to the beginning of
/// the file name.
void ASTReader::ResolveImportedPath(ModuleFile &M, std::string &Filename) {
// Resolve relative to the base directory, if we have one.
if (!M.BaseDirectory.empty())
return ResolveImportedPath(Filename, M.BaseDirectory);
}
void ASTReader::ResolveImportedPath(std::string &Filename, StringRef Prefix) {
if (Filename.empty() || llvm::sys::path::is_absolute(Filename))
return;
SmallString<128> Buffer;
llvm::sys::path::append(Buffer, Prefix, Filename);
Filename.assign(Buffer.begin(), Buffer.end());
}
static bool isDiagnosedResult(ASTReader::ASTReadResult ARR, unsigned Caps) {
switch (ARR) {
case ASTReader::Failure: return true;
case ASTReader::Missing: return !(Caps & ASTReader::ARR_Missing);
case ASTReader::OutOfDate: return !(Caps & ASTReader::ARR_OutOfDate);
case ASTReader::VersionMismatch: return !(Caps & ASTReader::ARR_VersionMismatch);
case ASTReader::ConfigurationMismatch:
return !(Caps & ASTReader::ARR_ConfigurationMismatch);
case ASTReader::HadErrors: return true;
case ASTReader::Success: return false;
}
llvm_unreachable("unknown ASTReadResult");
}
ASTReader::ASTReadResult ASTReader::ReadOptionsBlock(
BitstreamCursor &Stream, unsigned ClientLoadCapabilities,
bool AllowCompatibleConfigurationMismatch, ASTReaderListener &Listener,
std::string &SuggestedPredefines) {
if (llvm::Error Err = Stream.EnterSubBlock(OPTIONS_BLOCK_ID)) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
return Failure;
}
// Read all of the records in the options block.
RecordData Record;
ASTReadResult Result = Success;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry) {
// FIXME this drops errors on the floor.
consumeError(MaybeEntry.takeError());
return Failure;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::Error:
case llvm::BitstreamEntry::SubBlock:
return Failure;
case llvm::BitstreamEntry::EndBlock:
return Result;
case llvm::BitstreamEntry::Record:
// The interesting case.
break;
}
// Read and process a record.
Record.clear();
Expected<unsigned> MaybeRecordType = Stream.readRecord(Entry.ID, Record);
if (!MaybeRecordType) {
// FIXME this drops errors on the floor.
consumeError(MaybeRecordType.takeError());
return Failure;
}
switch ((OptionsRecordTypes)MaybeRecordType.get()) {
case LANGUAGE_OPTIONS: {
bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
if (ParseLanguageOptions(Record, Complain, Listener,
AllowCompatibleConfigurationMismatch))
Result = ConfigurationMismatch;
break;
}
case TARGET_OPTIONS: {
bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
if (ParseTargetOptions(Record, Complain, Listener,
AllowCompatibleConfigurationMismatch))
Result = ConfigurationMismatch;
break;
}
case FILE_SYSTEM_OPTIONS: {
bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
if (!AllowCompatibleConfigurationMismatch &&
ParseFileSystemOptions(Record, Complain, Listener))
Result = ConfigurationMismatch;
break;
}
case HEADER_SEARCH_OPTIONS: {
bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
if (!AllowCompatibleConfigurationMismatch &&
ParseHeaderSearchOptions(Record, Complain, Listener))
Result = ConfigurationMismatch;
break;
}
case PREPROCESSOR_OPTIONS:
bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
if (!AllowCompatibleConfigurationMismatch &&
ParsePreprocessorOptions(Record, Complain, Listener,
SuggestedPredefines))
Result = ConfigurationMismatch;
break;
}
}
}
ASTReader::ASTReadResult
ASTReader::ReadControlBlock(ModuleFile &F,
SmallVectorImpl<ImportedModule> &Loaded,
const ModuleFile *ImportedBy,
unsigned ClientLoadCapabilities) {
BitstreamCursor &Stream = F.Stream;
if (llvm::Error Err = Stream.EnterSubBlock(CONTROL_BLOCK_ID)) {
Error(std::move(Err));
return Failure;
}
// Lambda to read the unhashed control block the first time it's called.
//
// For PCM files, the unhashed control block cannot be read until after the
// MODULE_NAME record. However, PCH files have no MODULE_NAME, and yet still
// need to look ahead before reading the IMPORTS record. For consistency,
// this block is always read somehow (see BitstreamEntry::EndBlock).
bool HasReadUnhashedControlBlock = false;
auto readUnhashedControlBlockOnce = [&]() {
if (!HasReadUnhashedControlBlock) {
HasReadUnhashedControlBlock = true;
if (ASTReadResult Result =
readUnhashedControlBlock(F, ImportedBy, ClientLoadCapabilities))
return Result;
}
return Success;
};
bool DisableValidation = shouldDisableValidationForFile(F);
// Read all of the records and blocks in the control block.
RecordData Record;
unsigned NumInputs = 0;
unsigned NumUserInputs = 0;
StringRef BaseDirectoryAsWritten;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry) {
Error(MaybeEntry.takeError());
return Failure;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::Error:
Error("malformed block record in AST file");
return Failure;
case llvm::BitstreamEntry::EndBlock: {
// Validate the module before returning. This call catches an AST with
// no module name and no imports.
if (ASTReadResult Result = readUnhashedControlBlockOnce())
return Result;
// Validate input files.
const HeaderSearchOptions &HSOpts =
PP.getHeaderSearchInfo().getHeaderSearchOpts();
// All user input files reside at the index range [0, NumUserInputs), and
// system input files reside at [NumUserInputs, NumInputs). For explicitly
// loaded module files, ignore missing inputs.
if (!DisableValidation && F.Kind != MK_ExplicitModule &&
F.Kind != MK_PrebuiltModule) {
bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
// If we are reading a module, we will create a verification timestamp,
// so we verify all input files. Otherwise, verify only user input
// files.
unsigned N = NumUserInputs;
if (ValidateSystemInputs ||
(HSOpts.ModulesValidateOncePerBuildSession &&
F.InputFilesValidationTimestamp <= HSOpts.BuildSessionTimestamp &&
F.Kind == MK_ImplicitModule))
N = NumInputs;
for (unsigned I = 0; I < N; ++I) {
InputFile IF = getInputFile(F, I+1, Complain);
if (!IF.getFile() || IF.isOutOfDate())
return OutOfDate;
}
}
if (Listener)
Listener->visitModuleFile(F.FileName, F.Kind);
if (Listener && Listener->needsInputFileVisitation()) {
unsigned N = Listener->needsSystemInputFileVisitation() ? NumInputs
: NumUserInputs;
for (unsigned I = 0; I < N; ++I) {
bool IsSystem = I >= NumUserInputs;
InputFileInfo FI = readInputFileInfo(F, I+1);
Listener->visitInputFile(FI.Filename, IsSystem, FI.Overridden,
F.Kind == MK_ExplicitModule ||
F.Kind == MK_PrebuiltModule);
}
}
return Success;
}
case llvm::BitstreamEntry::SubBlock:
switch (Entry.ID) {
case INPUT_FILES_BLOCK_ID:
F.InputFilesCursor = Stream;
if (llvm::Error Err = Stream.SkipBlock()) {
Error(std::move(Err));
return Failure;
}
if (ReadBlockAbbrevs(F.InputFilesCursor, INPUT_FILES_BLOCK_ID)) {
Error("malformed block record in AST file");
return Failure;
}
continue;
case OPTIONS_BLOCK_ID:
// If we're reading the first module for this group, check its options
// are compatible with ours. For modules it imports, no further checking
// is required, because we checked them when we built it.
if (Listener && !ImportedBy) {
// Should we allow the configuration of the module file to differ from
// the configuration of the current translation unit in a compatible
// way?
//
// FIXME: Allow this for files explicitly specified with -include-pch.
bool AllowCompatibleConfigurationMismatch =
F.Kind == MK_ExplicitModule || F.Kind == MK_PrebuiltModule;
ASTReadResult Result =
ReadOptionsBlock(Stream, ClientLoadCapabilities,
AllowCompatibleConfigurationMismatch, *Listener,
SuggestedPredefines);
if (Result == Failure) {
Error("malformed block record in AST file");
return Result;
}
if (DisableValidation ||
(AllowConfigurationMismatch && Result == ConfigurationMismatch))
Result = Success;
// If we can't load the module, exit early since we likely
// will rebuild the module anyway. The stream may be in the
// middle of a block.
if (Result != Success)
return Result;
} else if (llvm::Error Err = Stream.SkipBlock()) {
Error(std::move(Err));
return Failure;
}
continue;
default:
if (llvm::Error Err = Stream.SkipBlock()) {
Error(std::move(Err));
return Failure;
}
continue;
}
case llvm::BitstreamEntry::Record:
// The interesting case.
break;
}
// Read and process a record.
Record.clear();
StringRef Blob;
Expected<unsigned> MaybeRecordType =
Stream.readRecord(Entry.ID, Record, &Blob);
if (!MaybeRecordType) {
Error(MaybeRecordType.takeError());
return Failure;
}
switch ((ControlRecordTypes)MaybeRecordType.get()) {
case METADATA: {
if (Record[0] != VERSION_MAJOR && !DisableValidation) {
if ((ClientLoadCapabilities & ARR_VersionMismatch) == 0)
Diag(Record[0] < VERSION_MAJOR? diag::err_pch_version_too_old
: diag::err_pch_version_too_new);
return VersionMismatch;
}
bool hasErrors = Record[6];
if (hasErrors && !DisableValidation) {
// If requested by the caller and the module hasn't already been read
// or compiled, mark modules on error as out-of-date.
if ((ClientLoadCapabilities & ARR_TreatModuleWithErrorsAsOutOfDate) &&
canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
return OutOfDate;
if (!AllowASTWithCompilerErrors) {
Diag(diag::err_pch_with_compiler_errors);
return HadErrors;
}
}
if (hasErrors) {
Diags.ErrorOccurred = true;
Diags.UncompilableErrorOccurred = true;
Diags.UnrecoverableErrorOccurred = true;
}
F.RelocatablePCH = Record[4];
// Relative paths in a relocatable PCH are relative to our sysroot.
if (F.RelocatablePCH)
F.BaseDirectory = isysroot.empty() ? "/" : isysroot;
F.HasTimestamps = Record[5];
const std::string &CurBranch = getClangFullRepositoryVersion();
StringRef ASTBranch = Blob;
if (StringRef(CurBranch) != ASTBranch && !DisableValidation) {
if ((ClientLoadCapabilities & ARR_VersionMismatch) == 0)
Diag(diag::err_pch_different_branch) << ASTBranch << CurBranch;
return VersionMismatch;
}
break;
}
case IMPORTS: {
// Validate the AST before processing any imports (otherwise, untangling
// them can be error-prone and expensive). A module will have a name and
// will already have been validated, but this catches the PCH case.
if (ASTReadResult Result = readUnhashedControlBlockOnce())
return Result;
// Load each of the imported PCH files.
unsigned Idx = 0, N = Record.size();
while (Idx < N) {
// Read information about the AST file.
ModuleKind ImportedKind = (ModuleKind)Record[Idx++];
// The import location will be the local one for now; we will adjust
// all import locations of module imports after the global source
// location info are setup, in ReadAST.
SourceLocation ImportLoc =
ReadUntranslatedSourceLocation(Record[Idx++]);
off_t StoredSize = (off_t)Record[Idx++];
time_t StoredModTime = (time_t)Record[Idx++];
auto FirstSignatureByte = Record.begin() + Idx;
ASTFileSignature StoredSignature = ASTFileSignature::create(
FirstSignatureByte, FirstSignatureByte + ASTFileSignature::size);
Idx += ASTFileSignature::size;
std::string ImportedName = ReadString(Record, Idx);
std::string ImportedFile;
// For prebuilt and explicit modules first consult the file map for
// an override. Note that here we don't search prebuilt module
// directories, only the explicit name to file mappings. Also, we will
// still verify the size/signature making sure it is essentially the
// same file but perhaps in a different location.
if (ImportedKind == MK_PrebuiltModule || ImportedKind == MK_ExplicitModule)
ImportedFile = PP.getHeaderSearchInfo().getPrebuiltModuleFileName(
ImportedName, /*FileMapOnly*/ true);
if (ImportedFile.empty())
// Use BaseDirectoryAsWritten to ensure we use the same path in the
// ModuleCache as when writing.
ImportedFile = ReadPath(BaseDirectoryAsWritten, Record, Idx);
else
SkipPath(Record, Idx);
// If our client can't cope with us being out of date, we can't cope with
// our dependency being missing.
unsigned Capabilities = ClientLoadCapabilities;
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
Capabilities &= ~ARR_Missing;
// Load the AST file.
auto Result = ReadASTCore(ImportedFile, ImportedKind, ImportLoc, &F,
Loaded, StoredSize, StoredModTime,
StoredSignature, Capabilities);
// If we diagnosed a problem, produce a backtrace.
bool recompilingFinalized =
Result == OutOfDate && (Capabilities & ARR_OutOfDate) &&
getModuleManager().getModuleCache().isPCMFinal(F.FileName);
if (isDiagnosedResult(Result, Capabilities) || recompilingFinalized)
Diag(diag::note_module_file_imported_by)
<< F.FileName << !F.ModuleName.empty() << F.ModuleName;
if (recompilingFinalized)
Diag(diag::note_module_file_conflict);
switch (Result) {
case Failure: return Failure;
// If we have to ignore the dependency, we'll have to ignore this too.
case Missing:
case OutOfDate: return OutOfDate;
case VersionMismatch: return VersionMismatch;
case ConfigurationMismatch: return ConfigurationMismatch;
case HadErrors: return HadErrors;
case Success: break;
}
}
break;
}
case ORIGINAL_FILE:
F.OriginalSourceFileID = FileID::get(Record[0]);
F.ActualOriginalSourceFileName = std::string(Blob);
F.OriginalSourceFileName = F.ActualOriginalSourceFileName;
ResolveImportedPath(F, F.OriginalSourceFileName);
break;
case ORIGINAL_FILE_ID:
F.OriginalSourceFileID = FileID::get(Record[0]);
break;
case ORIGINAL_PCH_DIR:
F.OriginalDir = std::string(Blob);
break;
case MODULE_NAME:
F.ModuleName = std::string(Blob);
Diag(diag::remark_module_import)
<< F.ModuleName << F.FileName << (ImportedBy ? true : false)
<< (ImportedBy ? StringRef(ImportedBy->ModuleName) : StringRef());
if (Listener)
Listener->ReadModuleName(F.ModuleName);
// Validate the AST as soon as we have a name so we can exit early on
// failure.
if (ASTReadResult Result = readUnhashedControlBlockOnce())
return Result;
break;
case MODULE_DIRECTORY: {
// Save the BaseDirectory as written in the PCM for computing the module
// filename for the ModuleCache.
BaseDirectoryAsWritten = Blob;
assert(!F.ModuleName.empty() &&
"MODULE_DIRECTORY found before MODULE_NAME");
// If we've already loaded a module map file covering this module, we may
// have a better path for it (relative to the current build).
Module *M = PP.getHeaderSearchInfo().lookupModule(
F.ModuleName, /*AllowSearch*/ true,
/*AllowExtraModuleMapSearch*/ true);
if (M && M->Directory) {
// If we're implicitly loading a module, the base directory can't
// change between the build and use.
// Don't emit module relocation error if we have -fno-validate-pch
if (!bool(PP.getPreprocessorOpts().DisablePCHOrModuleValidation &
DisableValidationForModuleKind::Module) &&
F.Kind != MK_ExplicitModule && F.Kind != MK_PrebuiltModule) {
auto BuildDir = PP.getFileManager().getDirectory(Blob);
if (!BuildDir || *BuildDir != M->Directory) {
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
Diag(diag::err_imported_module_relocated)
<< F.ModuleName << Blob << M->Directory->getName();
return OutOfDate;
}
}
F.BaseDirectory = std::string(M->Directory->getName());
} else {
F.BaseDirectory = std::string(Blob);
}
break;
}
case MODULE_MAP_FILE:
if (ASTReadResult Result =
ReadModuleMapFileBlock(Record, F, ImportedBy, ClientLoadCapabilities))
return Result;
break;
case INPUT_FILE_OFFSETS:
NumInputs = Record[0];
NumUserInputs = Record[1];
F.InputFileOffsets =
(const llvm::support::unaligned_uint64_t *)Blob.data();
F.InputFilesLoaded.resize(NumInputs);
F.NumUserInputFiles = NumUserInputs;
break;
}
}
}
ASTReader::ASTReadResult
ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
BitstreamCursor &Stream = F.Stream;
if (llvm::Error Err = Stream.EnterSubBlock(AST_BLOCK_ID)) {
Error(std::move(Err));
return Failure;
}
F.ASTBlockStartOffset = Stream.GetCurrentBitNo();
// Read all of the records and blocks for the AST file.
RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry) {
Error(MaybeEntry.takeError());
return Failure;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::Error:
Error("error at end of module block in AST file");
return Failure;
case llvm::BitstreamEntry::EndBlock:
// Outside of C++, we do not store a lookup map for the translation unit.
// Instead, mark it as needing a lookup map to be built if this module
// contains any declarations lexically within it (which it always does!).
// This usually has no cost, since we very rarely need the lookup map for
// the translation unit outside C++.
if (ASTContext *Ctx = ContextObj) {
DeclContext *DC = Ctx->getTranslationUnitDecl();
if (DC->hasExternalLexicalStorage() && !Ctx->getLangOpts().CPlusPlus)
DC->setMustBuildLookupTable();
}
return Success;
case llvm::BitstreamEntry::SubBlock:
switch (Entry.ID) {
case DECLTYPES_BLOCK_ID:
// We lazily load the decls block, but we want to set up the
// DeclsCursor cursor to point into it. Clone our current bitcode
// cursor to it, enter the block and read the abbrevs in that block.
// With the main cursor, we just skip over it.
F.DeclsCursor = Stream;
if (llvm::Error Err = Stream.SkipBlock()) {
Error(std::move(Err));
return Failure;
}
if (ReadBlockAbbrevs(F.DeclsCursor, DECLTYPES_BLOCK_ID,
&F.DeclsBlockStartOffset)) {
Error("malformed block record in AST file");
return Failure;
}
break;
case PREPROCESSOR_BLOCK_ID:
F.MacroCursor = Stream;
if (!PP.getExternalSource())
PP.setExternalSource(this);
if (llvm::Error Err = Stream.SkipBlock()) {
Error(std::move(Err));
return Failure;
}
if (ReadBlockAbbrevs(F.MacroCursor, PREPROCESSOR_BLOCK_ID)) {
Error("malformed block record in AST file");
return Failure;
}
F.MacroStartOffset = F.MacroCursor.GetCurrentBitNo();
break;
case PREPROCESSOR_DETAIL_BLOCK_ID:
F.PreprocessorDetailCursor = Stream;
if (llvm::Error Err = Stream.SkipBlock()) {
Error(std::move(Err));
return Failure;
}
if (ReadBlockAbbrevs(F.PreprocessorDetailCursor,
PREPROCESSOR_DETAIL_BLOCK_ID)) {
Error("malformed preprocessor detail record in AST file");
return Failure;
}
F.PreprocessorDetailStartOffset
= F.PreprocessorDetailCursor.GetCurrentBitNo();
if (!PP.getPreprocessingRecord())
PP.createPreprocessingRecord();
if (!PP.getPreprocessingRecord()->getExternalSource())
PP.getPreprocessingRecord()->SetExternalSource(*this);
break;
case SOURCE_MANAGER_BLOCK_ID:
if (ReadSourceManagerBlock(F))
return Failure;
break;
case SUBMODULE_BLOCK_ID:
if (ASTReadResult Result =
ReadSubmoduleBlock(F, ClientLoadCapabilities))
return Result;
break;
case COMMENTS_BLOCK_ID: {
BitstreamCursor C = Stream;
if (llvm::Error Err = Stream.SkipBlock()) {
Error(std::move(Err));
return Failure;
}
if (ReadBlockAbbrevs(C, COMMENTS_BLOCK_ID)) {
Error("malformed comments block in AST file");
return Failure;
}
CommentsCursors.push_back(std::make_pair(C, &F));
break;
}
default:
if (llvm::Error Err = Stream.SkipBlock()) {
Error(std::move(Err));
return Failure;
}
break;
}
continue;
case llvm::BitstreamEntry::Record:
// The interesting case.
break;
}
// Read and process a record.
Record.clear();
StringRef Blob;
Expected<unsigned> MaybeRecordType =
Stream.readRecord(Entry.ID, Record, &Blob);
if (!MaybeRecordType) {
Error(MaybeRecordType.takeError());
return Failure;
}
ASTRecordTypes RecordType = (ASTRecordTypes)MaybeRecordType.get();
// If we're not loading an AST context, we don't care about most records.
if (!ContextObj) {
switch (RecordType) {
case IDENTIFIER_TABLE:
case IDENTIFIER_OFFSET:
case INTERESTING_IDENTIFIERS:
case STATISTICS:
case PP_CONDITIONAL_STACK:
case PP_COUNTER_VALUE:
case SOURCE_LOCATION_OFFSETS:
case MODULE_OFFSET_MAP:
case SOURCE_MANAGER_LINE_TABLE:
case SOURCE_LOCATION_PRELOADS:
case PPD_ENTITIES_OFFSETS:
case HEADER_SEARCH_TABLE:
case IMPORTED_MODULES:
case MACRO_OFFSET:
break;
default:
continue;
}
}
switch (RecordType) {
default: // Default behavior: ignore.
break;
case TYPE_OFFSET: {
if (F.LocalNumTypes != 0) {
Error("duplicate TYPE_OFFSET record in AST file");
return Failure;
}
F.TypeOffsets = reinterpret_cast<const UnderalignedInt64 *>(Blob.data());
F.LocalNumTypes = Record[0];
unsigned LocalBaseTypeIndex = Record[1];
F.BaseTypeIndex = getTotalNumTypes();
if (F.LocalNumTypes > 0) {
// Introduce the global -> local mapping for types within this module.
GlobalTypeMap.insert(std::make_pair(getTotalNumTypes(), &F));
// Introduce the local -> global mapping for types within this module.
F.TypeRemap.insertOrReplace(
std::make_pair(LocalBaseTypeIndex,
F.BaseTypeIndex - LocalBaseTypeIndex));
TypesLoaded.resize(TypesLoaded.size() + F.LocalNumTypes);
}
break;
}
case DECL_OFFSET: {
if (F.LocalNumDecls != 0) {
Error("duplicate DECL_OFFSET record in AST file");
return Failure;
}
F.DeclOffsets = (const DeclOffset *)Blob.data();
F.LocalNumDecls = Record[0];
unsigned LocalBaseDeclID = Record[1];
F.BaseDeclID = getTotalNumDecls();
if (F.LocalNumDecls > 0) {
// Introduce the global -> local mapping for declarations within this
// module.
GlobalDeclMap.insert(
std::make_pair(getTotalNumDecls() + NUM_PREDEF_DECL_IDS, &F));
// Introduce the local -> global mapping for declarations within this
// module.
F.DeclRemap.insertOrReplace(
std::make_pair(LocalBaseDeclID, F.BaseDeclID - LocalBaseDeclID));
// Introduce the global -> local mapping for declarations within this
// module.
F.GlobalToLocalDeclIDs[&F] = LocalBaseDeclID;
DeclsLoaded.resize(DeclsLoaded.size() + F.LocalNumDecls);
}
break;
}
case TU_UPDATE_LEXICAL: {
DeclContext *TU = ContextObj->getTranslationUnitDecl();
LexicalContents Contents(
reinterpret_cast<const llvm::support::unaligned_uint32_t *>(
Blob.data()),
static_cast<unsigned int>(Blob.size() / 4));
TULexicalDecls.push_back(std::make_pair(&F, Contents));
TU->setHasExternalLexicalStorage(true);
break;
}
case UPDATE_VISIBLE: {
unsigned Idx = 0;
serialization::DeclID ID = ReadDeclID(F, Record, Idx);
auto *Data = (const unsigned char*)Blob.data();
PendingVisibleUpdates[ID].push_back(PendingVisibleUpdate{&F, Data});
// If we've already loaded the decl, perform the updates when we finish
// loading this block.
if (Decl *D = GetExistingDecl(ID))
PendingUpdateRecords.push_back(
PendingUpdateRecord(ID, D, /*JustLoaded=*/false));
break;
}
case IDENTIFIER_TABLE:
F.IdentifierTableData =
reinterpret_cast<const unsigned char *>(Blob.data());
if (Record[0]) {
F.IdentifierLookupTable = ASTIdentifierLookupTable::Create(
F.IdentifierTableData + Record[0],
F.IdentifierTableData + sizeof(uint32_t),
F.IdentifierTableData,
ASTIdentifierLookupTrait(*this, F));
PP.getIdentifierTable().setExternalIdentifierLookup(this);
}
break;
case IDENTIFIER_OFFSET: {
if (F.LocalNumIdentifiers != 0) {
Error("duplicate IDENTIFIER_OFFSET record in AST file");
return Failure;
}
F.IdentifierOffsets = (const uint32_t *)Blob.data();
F.LocalNumIdentifiers = Record[0];
unsigned LocalBaseIdentifierID = Record[1];
F.BaseIdentifierID = getTotalNumIdentifiers();
if (F.LocalNumIdentifiers > 0) {
// Introduce the global -> local mapping for identifiers within this
// module.
GlobalIdentifierMap.insert(std::make_pair(getTotalNumIdentifiers() + 1,
&F));
// Introduce the local -> global mapping for identifiers within this
// module.
F.IdentifierRemap.insertOrReplace(
std::make_pair(LocalBaseIdentifierID,
F.BaseIdentifierID - LocalBaseIdentifierID));
IdentifiersLoaded.resize(IdentifiersLoaded.size()
+ F.LocalNumIdentifiers);
}
break;
}
case INTERESTING_IDENTIFIERS:
F.PreloadIdentifierOffsets.assign(Record.begin(), Record.end());
break;
case EAGERLY_DESERIALIZED_DECLS:
// FIXME: Skip reading this record if our ASTConsumer doesn't care
// about "interesting" decls (for instance, if we're building a module).
for (unsigned I = 0, N = Record.size(); I != N; ++I)
EagerlyDeserializedDecls.push_back(getGlobalDeclID(F, Record[I]));
break;
case MODULAR_CODEGEN_DECLS:
// FIXME: Skip reading this record if our ASTConsumer doesn't care about
// them (ie: if we're not codegenerating this module).
if (F.Kind == MK_MainFile ||
getContext().getLangOpts().BuildingPCHWithObjectFile)
for (unsigned I = 0, N = Record.size(); I != N; ++I)
EagerlyDeserializedDecls.push_back(getGlobalDeclID(F, Record[I]));
break;
case SPECIAL_TYPES:
if (SpecialTypes.empty()) {
for (unsigned I = 0, N = Record.size(); I != N; ++I)
SpecialTypes.push_back(getGlobalTypeID(F, Record[I]));
break;
}
if (SpecialTypes.size() != Record.size()) {
Error("invalid special-types record");
return Failure;
}
for (unsigned I = 0, N = Record.size(); I != N; ++I) {
serialization::TypeID ID = getGlobalTypeID(F, Record[I]);
if (!SpecialTypes[I])
SpecialTypes[I] = ID;
// FIXME: If ID && SpecialTypes[I] != ID, do we need a separate
// merge step?
}
break;
case STATISTICS:
TotalNumStatements += Record[0];
TotalNumMacros += Record[1];
TotalLexicalDeclContexts += Record[2];
TotalVisibleDeclContexts += Record[3];
break;
case UNUSED_FILESCOPED_DECLS:
for (unsigned I = 0, N = Record.size(); I != N; ++I)
UnusedFileScopedDecls.push_back(getGlobalDeclID(F, Record[I]));
break;
case DELEGATING_CTORS:
for (unsigned I = 0, N = Record.size(); I != N; ++I)
DelegatingCtorDecls.push_back(getGlobalDeclID(F, Record[I]));
break;
case WEAK_UNDECLARED_IDENTIFIERS:
if (Record.size() % 4 != 0) {
Error("invalid weak identifiers record");
return Failure;
}
// FIXME: Ignore weak undeclared identifiers from non-original PCH
// files. This isn't the way to do it :)
WeakUndeclaredIdentifiers.clear();
// Translate the weak, undeclared identifiers into global IDs.
for (unsigned I = 0, N = Record.size(); I < N; /* in loop */) {
WeakUndeclaredIdentifiers.push_back(
getGlobalIdentifierID(F, Record[I++]));
WeakUndeclaredIdentifiers.push_back(
getGlobalIdentifierID(F, Record[I++]));
WeakUndeclaredIdentifiers.push_back(
ReadSourceLocation(F, Record, I).getRawEncoding());
WeakUndeclaredIdentifiers.push_back(Record[I++]);
}
break;
case SELECTOR_OFFSETS: {
F.SelectorOffsets = (const uint32_t *)Blob.data();
F.LocalNumSelectors = Record[0];
unsigned LocalBaseSelectorID = Record[1];
F.BaseSelectorID = getTotalNumSelectors();
if (F.LocalNumSelectors > 0) {
// Introduce the global -> local mapping for selectors within this
// module.
GlobalSelectorMap.insert(std::make_pair(getTotalNumSelectors()+1, &F));
// Introduce the local -> global mapping for selectors within this
// module.
F.SelectorRemap.insertOrReplace(
std::make_pair(LocalBaseSelectorID,
F.BaseSelectorID - LocalBaseSelectorID));
SelectorsLoaded.resize(SelectorsLoaded.size() + F.LocalNumSelectors);
}
break;
}
case METHOD_POOL:
F.SelectorLookupTableData = (const unsigned char *)Blob.data();
if (Record[0])
F.SelectorLookupTable
= ASTSelectorLookupTable::Create(
F.SelectorLookupTableData + Record[0],
F.SelectorLookupTableData,
ASTSelectorLookupTrait(*this, F));
TotalNumMethodPoolEntries += Record[1];
break;
case REFERENCED_SELECTOR_POOL:
if (!Record.empty()) {
for (unsigned Idx = 0, N = Record.size() - 1; Idx < N; /* in loop */) {
ReferencedSelectorsData.push_back(getGlobalSelectorID(F,
Record[Idx++]));
ReferencedSelectorsData.push_back(ReadSourceLocation(F, Record, Idx).
getRawEncoding());
}
}
break;
case PP_CONDITIONAL_STACK:
if (!Record.empty()) {
unsigned Idx = 0, End = Record.size() - 1;
bool ReachedEOFWhileSkipping = Record[Idx++];
llvm::Optional<Preprocessor::PreambleSkipInfo> SkipInfo;
if (ReachedEOFWhileSkipping) {
SourceLocation HashToken = ReadSourceLocation(F, Record, Idx);
SourceLocation IfTokenLoc = ReadSourceLocation(F, Record, Idx);
bool FoundNonSkipPortion = Record[Idx++];
bool FoundElse = Record[Idx++];
SourceLocation ElseLoc = ReadSourceLocation(F, Record, Idx);
SkipInfo.emplace(HashToken, IfTokenLoc, FoundNonSkipPortion,
FoundElse, ElseLoc);
}
SmallVector<PPConditionalInfo, 4> ConditionalStack;
while (Idx < End) {
auto Loc = ReadSourceLocation(F, Record, Idx);
bool WasSkipping = Record[Idx++];
bool FoundNonSkip = Record[Idx++];
bool FoundElse = Record[Idx++];
ConditionalStack.push_back(
{Loc, WasSkipping, FoundNonSkip, FoundElse});
}
PP.setReplayablePreambleConditionalStack(ConditionalStack, SkipInfo);
}
break;
case PP_COUNTER_VALUE:
if (!Record.empty() && Listener)
Listener->ReadCounter(F, Record[0]);
break;
case FILE_SORTED_DECLS:
F.FileSortedDecls = (const DeclID *)Blob.data();
F.NumFileSortedDecls = Record[0];
break;
case SOURCE_LOCATION_OFFSETS: {
F.SLocEntryOffsets = (const uint32_t *)Blob.data();
F.LocalNumSLocEntries = Record[0];
SourceLocation::UIntTy SLocSpaceSize = Record[1];
F.SLocEntryOffsetsBase = Record[2] + F.SourceManagerBlockStartOffset;
std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
SLocSpaceSize);
if (!F.SLocEntryBaseID) {
Error("ran out of source locations");
break;
}
// Make our entry in the range map. BaseID is negative and growing, so
// we invert it. Because we invert it, though, we need the other end of
// the range.
unsigned RangeStart =
unsigned(-F.SLocEntryBaseID) - F.LocalNumSLocEntries + 1;
GlobalSLocEntryMap.insert(std::make_pair(RangeStart, &F));
F.FirstLoc = SourceLocation::getFromRawEncoding(F.SLocEntryBaseOffset);
// SLocEntryBaseOffset is lower than MaxLoadedOffset and decreasing.
assert((F.SLocEntryBaseOffset & SourceLocation::MacroIDBit) == 0);
GlobalSLocOffsetMap.insert(
std::make_pair(SourceManager::MaxLoadedOffset - F.SLocEntryBaseOffset
- SLocSpaceSize,&F));
// Initialize the remapping table.
// Invalid stays invalid.
F.SLocRemap.insertOrReplace(std::make_pair(0U, 0));
// This module. Base was 2 when being compiled.
F.SLocRemap.insertOrReplace(std::make_pair(
2U, static_cast<SourceLocation::IntTy>(F.SLocEntryBaseOffset - 2)));
TotalNumSLocEntries += F.LocalNumSLocEntries;
break;
}
case MODULE_OFFSET_MAP:
F.ModuleOffsetMap = Blob;
break;
case SOURCE_MANAGER_LINE_TABLE:
if (ParseLineTable(F, Record)) {
Error("malformed SOURCE_MANAGER_LINE_TABLE in AST file");
return Failure;
}
break;
case SOURCE_LOCATION_PRELOADS: {
// Need to transform from the local view (1-based IDs) to the global view,
// which is based off F.SLocEntryBaseID.
if (!F.PreloadSLocEntries.empty()) {
Error("Multiple SOURCE_LOCATION_PRELOADS records in AST file");
return Failure;
}
F.PreloadSLocEntries.swap(Record);
break;
}
case EXT_VECTOR_DECLS:
for (unsigned I = 0, N = Record.size(); I != N; ++I)
ExtVectorDecls.push_back(getGlobalDeclID(F, Record[I]));
break;
case VTABLE_USES:
if (Record.size() % 3 != 0) {
Error("Invalid VTABLE_USES record");
return Failure;
}
// Later tables overwrite earlier ones.
// FIXME: Modules will have some trouble with this. This is clearly not
// the right way to do this.
VTableUses.clear();
for (unsigned Idx = 0, N = Record.size(); Idx != N; /* In loop */) {
VTableUses.push_back(getGlobalDeclID(F, Record[Idx++]));
VTableUses.push_back(
ReadSourceLocation(F, Record, Idx).getRawEncoding());
VTableUses.push_back(Record[Idx++]);
}
break;
case PENDING_IMPLICIT_INSTANTIATIONS:
if (PendingInstantiations.size() % 2 != 0) {
Error("Invalid existing PendingInstantiations");
return Failure;
}
if (Record.size() % 2 != 0) {
Error("Invalid PENDING_IMPLICIT_INSTANTIATIONS block");
return Failure;
}
for (unsigned I = 0, N = Record.size(); I != N; /* in loop */) {
PendingInstantiations.push_back(getGlobalDeclID(F, Record[I++]));
PendingInstantiations.push_back(
ReadSourceLocation(F, Record, I).getRawEncoding());
}
break;
case SEMA_DECL_REFS:
if (Record.size() != 3) {
Error("Invalid SEMA_DECL_REFS block");
return Failure;
}
for (unsigned I = 0, N = Record.size(); I != N; ++I)
SemaDeclRefs.push_back(getGlobalDeclID(F, Record[I]));
break;
case PPD_ENTITIES_OFFSETS: {
F.PreprocessedEntityOffsets = (const PPEntityOffset *)Blob.data();
assert(Blob.size() % sizeof(PPEntityOffset) == 0);
F.NumPreprocessedEntities = Blob.size() / sizeof(PPEntityOffset);
unsigned LocalBasePreprocessedEntityID = Record[0];
unsigned StartingID;
if (!PP.getPreprocessingRecord())
PP.createPreprocessingRecord();
if (!PP.getPreprocessingRecord()->getExternalSource())
PP.getPreprocessingRecord()->SetExternalSource(*this);
StartingID
= PP.getPreprocessingRecord()
->allocateLoadedEntities(F.NumPreprocessedEntities);
F.BasePreprocessedEntityID = StartingID;
if (F.NumPreprocessedEntities > 0) {
// Introduce the global -> local mapping for preprocessed entities in
// this module.
GlobalPreprocessedEntityMap.insert(std::make_pair(StartingID, &F));
// Introduce the local -> global mapping for preprocessed entities in
// this module.
F.PreprocessedEntityRemap.insertOrReplace(
std::make_pair(LocalBasePreprocessedEntityID,
F.BasePreprocessedEntityID - LocalBasePreprocessedEntityID));
}
break;
}
case PPD_SKIPPED_RANGES: {
F.PreprocessedSkippedRangeOffsets = (const PPSkippedRange*)Blob.data();
assert(Blob.size() % sizeof(PPSkippedRange) == 0);
F.NumPreprocessedSkippedRanges = Blob.size() / sizeof(PPSkippedRange);
if (!PP.getPreprocessingRecord())
PP.createPreprocessingRecord();
if (!PP.getPreprocessingRecord()->getExternalSource())
PP.getPreprocessingRecord()->SetExternalSource(*this);
F.BasePreprocessedSkippedRangeID = PP.getPreprocessingRecord()
->allocateSkippedRanges(F.NumPreprocessedSkippedRanges);
if (F.NumPreprocessedSkippedRanges > 0)
GlobalSkippedRangeMap.insert(
std::make_pair(F.BasePreprocessedSkippedRangeID, &F));
break;
}
case DECL_UPDATE_OFFSETS:
if (Record.size() % 2 != 0) {
Error("invalid DECL_UPDATE_OFFSETS block in AST file");
return Failure;
}
for (unsigned I = 0, N = Record.size(); I != N; I += 2) {
GlobalDeclID ID = getGlobalDeclID(F, Record[I]);
DeclUpdateOffsets[ID].push_back(std::make_pair(&F, Record[I + 1]));
// If we've already loaded the decl, perform the updates when we finish
// loading this block.
if (Decl *D = GetExistingDecl(ID))
PendingUpdateRecords.push_back(
PendingUpdateRecord(ID, D, /*JustLoaded=*/false));
}
break;
case OBJC_CATEGORIES_MAP:
if (F.LocalNumObjCCategoriesInMap != 0) {
Error("duplicate OBJC_CATEGORIES_MAP record in AST file");
return Failure;
}
F.LocalNumObjCCategoriesInMap = Record[0];
F.ObjCCategoriesMap = (const ObjCCategoriesInfo *)Blob.data();
break;
case OBJC_CATEGORIES:
F.ObjCCategories.swap(Record);
break;
case CUDA_SPECIAL_DECL_REFS:
// Later tables overwrite earlier ones.
// FIXME: Modules will have trouble with this.
CUDASpecialDeclRefs.clear();
for (unsigned I = 0, N = Record.size(); I != N; ++I)
CUDASpecialDeclRefs.push_back(getGlobalDeclID(F, Record[I]));
break;
case HEADER_SEARCH_TABLE:
F.HeaderFileInfoTableData = Blob.data();
F.LocalNumHeaderFileInfos = Record[1];
if (Record[0]) {
F.HeaderFileInfoTable
= HeaderFileInfoLookupTable::Create(
(const unsigned char *)F.HeaderFileInfoTableData + Record[0],
(const unsigned char *)F.HeaderFileInfoTableData,
HeaderFileInfoTrait(*this, F,
&PP.getHeaderSearchInfo(),
Blob.data() + Record[2]));
PP.getHeaderSearchInfo().SetExternalSource(this);
if (!PP.getHeaderSearchInfo().getExternalLookup())
PP.getHeaderSearchInfo().SetExternalLookup(this);
}
break;
case FP_PRAGMA_OPTIONS:
// Later tables overwrite earlier ones.
FPPragmaOptions.swap(Record);
break;
case OPENCL_EXTENSIONS:
for (unsigned I = 0, E = Record.size(); I != E; ) {
auto Name = ReadString(Record, I);
auto &OptInfo = OpenCLExtensions.OptMap[Name];
OptInfo.Supported = Record[I++] != 0;
OptInfo.Enabled = Record[I++] != 0;
OptInfo.WithPragma = Record[I++] != 0;
OptInfo.Avail = Record[I++];
OptInfo.Core = Record[I++];
OptInfo.Opt = Record[I++];
}
break;
case TENTATIVE_DEFINITIONS:
for (unsigned I = 0, N = Record.size(); I != N; ++I)
TentativeDefinitions.push_back(getGlobalDeclID(F, Record[I]));
break;
case KNOWN_NAMESPACES:
for (unsigned I = 0, N = Record.size(); I != N; ++I)
KnownNamespaces.push_back(getGlobalDeclID(F, Record[I]));
break;
case UNDEFINED_BUT_USED:
if (UndefinedButUsed.size() % 2 != 0) {
Error("Invalid existing UndefinedButUsed");
return Failure;
}
if (Record.size() % 2 != 0) {
Error("invalid undefined-but-used record");
return Failure;
}
for (unsigned I = 0, N = Record.size(); I != N; /* in loop */) {
UndefinedButUsed.push_back(getGlobalDeclID(F, Record[I++]));
UndefinedButUsed.push_back(
ReadSourceLocation(F, Record, I).getRawEncoding());
}
break;
case DELETE_EXPRS_TO_ANALYZE:
for (unsigned I = 0, N = Record.size(); I != N;) {
DelayedDeleteExprs.push_back(getGlobalDeclID(F, Record[I++]));
const uint64_t Count = Record[I++];
DelayedDeleteExprs.push_back(Count);
for (uint64_t C = 0; C < Count; ++C) {
DelayedDeleteExprs.push_back(ReadSourceLocation(F, Record, I).getRawEncoding());
bool IsArrayForm = Record[I++] == 1;
DelayedDeleteExprs.push_back(IsArrayForm);
}
}
break;
case IMPORTED_MODULES:
if (!F.isModule()) {
// If we aren't loading a module (which has its own exports), make
// all of the imported modules visible.
// FIXME: Deal with macros-only imports.
for (unsigned I = 0, N = Record.size(); I != N; /**/) {
unsigned GlobalID = getGlobalSubmoduleID(F, Record[I++]);
SourceLocation Loc = ReadSourceLocation(F, Record, I);
if (GlobalID) {
ImportedModules.push_back(ImportedSubmodule(GlobalID, Loc));
if (DeserializationListener)
DeserializationListener->ModuleImportRead(GlobalID, Loc);
}
}
}
break;
case MACRO_OFFSET: {
if (F.LocalNumMacros != 0) {
Error("duplicate MACRO_OFFSET record in AST file");
return Failure;
}
F.MacroOffsets = (const uint32_t *)Blob.data();
F.LocalNumMacros = Record[0];
unsigned LocalBaseMacroID = Record[1];
F.MacroOffsetsBase = Record[2] + F.ASTBlockStartOffset;
F.BaseMacroID = getTotalNumMacros();
if (F.LocalNumMacros > 0) {
// Introduce the global -> local mapping for macros within this module.
GlobalMacroMap.insert(std::make_pair(getTotalNumMacros() + 1, &F));
// Introduce the local -> global mapping for macros within this module.
F.MacroRemap.insertOrReplace(
std::make_pair(LocalBaseMacroID,
F.BaseMacroID - LocalBaseMacroID));
MacrosLoaded.resize(MacrosLoaded.size() + F.LocalNumMacros);
}
break;
}
case LATE_PARSED_TEMPLATE:
LateParsedTemplates.emplace_back(
std::piecewise_construct, std::forward_as_tuple(&F),
std::forward_as_tuple(Record.begin(), Record.end()));
break;
case OPTIMIZE_PRAGMA_OPTIONS:
if (Record.size() != 1) {
Error("invalid pragma optimize record");
return Failure;
}
OptimizeOffPragmaLocation = ReadSourceLocation(F, Record[0]);
break;
case MSSTRUCT_PRAGMA_OPTIONS:
if (Record.size() != 1) {
Error("invalid pragma ms_struct record");
return Failure;
}
PragmaMSStructState = Record[0];
break;
case POINTERS_TO_MEMBERS_PRAGMA_OPTIONS:
if (Record.size() != 2) {
Error("invalid pragma ms_struct record");
return Failure;
}
PragmaMSPointersToMembersState = Record[0];
PointersToMembersPragmaLocation = ReadSourceLocation(F, Record[1]);
break;
case UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES:
for (unsigned I = 0, N = Record.size(); I != N; ++I)
UnusedLocalTypedefNameCandidates.push_back(
getGlobalDeclID(F, Record[I]));
break;
case CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH:
if (Record.size() != 1) {
Error("invalid cuda pragma options record");
return Failure;
}
ForceCUDAHostDeviceDepth = Record[0];
break;
case ALIGN_PACK_PRAGMA_OPTIONS: {
if (Record.size() < 3) {
Error("invalid pragma pack record");
return Failure;
}
PragmaAlignPackCurrentValue = ReadAlignPackInfo(Record[0]);
PragmaAlignPackCurrentLocation = ReadSourceLocation(F, Record[1]);
unsigned NumStackEntries = Record[2];
unsigned Idx = 3;
// Reset the stack when importing a new module.
PragmaAlignPackStack.clear();
for (unsigned I = 0; I < NumStackEntries; ++I) {
PragmaAlignPackStackEntry Entry;
Entry.Value = ReadAlignPackInfo(Record[Idx++]);
Entry.Location = ReadSourceLocation(F, Record[Idx++]);
Entry.PushLocation = ReadSourceLocation(F, Record[Idx++]);
PragmaAlignPackStrings.push_back(ReadString(Record, Idx));
Entry.SlotLabel = PragmaAlignPackStrings.back();
PragmaAlignPackStack.push_back(Entry);
}
break;
}
case FLOAT_CONTROL_PRAGMA_OPTIONS: {
if (Record.size() < 3) {
Error("invalid pragma pack record");
return Failure;
}
FpPragmaCurrentValue = FPOptionsOverride::getFromOpaqueInt(Record[0]);
FpPragmaCurrentLocation = ReadSourceLocation(F, Record[1]);
unsigned NumStackEntries = Record[2];
unsigned Idx = 3;
// Reset the stack when importing a new module.
FpPragmaStack.clear();
for (unsigned I = 0; I < NumStackEntries; ++I) {
FpPragmaStackEntry Entry;
Entry.Value = FPOptionsOverride::getFromOpaqueInt(Record[Idx++]);
Entry.Location = ReadSourceLocation(F, Record[Idx++]);
Entry.PushLocation = ReadSourceLocation(F, Record[Idx++]);
FpPragmaStrings.push_back(ReadString(Record, Idx));
Entry.SlotLabel = FpPragmaStrings.back();
FpPragmaStack.push_back(Entry);
}
break;
}
case DECLS_TO_CHECK_FOR_DEFERRED_DIAGS:
for (unsigned I = 0, N = Record.size(); I != N; ++I)
DeclsToCheckForDeferredDiags.insert(getGlobalDeclID(F, Record[I]));
break;
}
}
}
void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
assert(!F.ModuleOffsetMap.empty() && "no module offset map to read");
// Additional remapping information.
const unsigned char *Data = (const unsigned char*)F.ModuleOffsetMap.data();
const unsigned char *DataEnd = Data + F.ModuleOffsetMap.size();
F.ModuleOffsetMap = StringRef();
// If we see this entry before SOURCE_LOCATION_OFFSETS, add placeholders.
if (F.SLocRemap.find(0) == F.SLocRemap.end()) {
F.SLocRemap.insert(std::make_pair(0U, 0));
F.SLocRemap.insert(std::make_pair(2U, 1));
}
// Continuous range maps we may be updating in our module.
using SLocRemapBuilder =
ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy,
2>::Builder;
using RemapBuilder = ContinuousRangeMap<uint32_t, int, 2>::Builder;
SLocRemapBuilder SLocRemap(F.SLocRemap);
RemapBuilder IdentifierRemap(F.IdentifierRemap);
RemapBuilder MacroRemap(F.MacroRemap);
RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap);
RemapBuilder SubmoduleRemap(F.SubmoduleRemap);
RemapBuilder SelectorRemap(F.SelectorRemap);
RemapBuilder DeclRemap(F.DeclRemap);
RemapBuilder TypeRemap(F.TypeRemap);
while (Data < DataEnd) {
// FIXME: Looking up dependency modules by filename is horrible. Let's
// start fixing this with prebuilt, explicit and implicit modules and see
// how it goes...
using namespace llvm::support;
ModuleKind Kind = static_cast<ModuleKind>(
endian::readNext<uint8_t, little, unaligned>(Data));
uint16_t Len = endian::readNext<uint16_t, little, unaligned>(Data);
StringRef Name = StringRef((const char*)Data, Len);
Data += Len;
ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule ||
Kind == MK_ImplicitModule
? ModuleMgr.lookupByModuleName(Name)
: ModuleMgr.lookupByFileName(Name));
if (!OM) {
std::string Msg =
"SourceLocation remap refers to unknown module, cannot find ";
Msg.append(std::string(Name));
Error(Msg);
return;
}
SourceLocation::UIntTy SLocOffset =
endian::readNext<uint32_t, little, unaligned>(Data);
uint32_t IdentifierIDOffset =
endian::readNext<uint32_t, little, unaligned>(Data);
uint32_t MacroIDOffset =
endian::readNext<uint32_t, little, unaligned>(Data);
uint32_t PreprocessedEntityIDOffset =
endian::readNext<uint32_t, little, unaligned>(Data);
uint32_t SubmoduleIDOffset =
endian::readNext<uint32_t, little, unaligned>(Data);
uint32_t SelectorIDOffset =
endian::readNext<uint32_t, little, unaligned>(Data);
uint32_t DeclIDOffset =
endian::readNext<uint32_t, little, unaligned>(Data);
uint32_t TypeIndexOffset =
endian::readNext<uint32_t, little, unaligned>(Data);
auto mapOffset = [&](uint32_t Offset, uint32_t BaseOffset,
RemapBuilder &Remap) {
constexpr uint32_t None = std::numeric_limits<uint32_t>::max();
if (Offset != None)
Remap.insert(std::make_pair(Offset,
static_cast<int>(BaseOffset - Offset)));
};
constexpr SourceLocation::UIntTy SLocNone =
std::numeric_limits<SourceLocation::UIntTy>::max();
if (SLocOffset != SLocNone)
SLocRemap.insert(std::make_pair(
SLocOffset, static_cast<SourceLocation::IntTy>(
OM->SLocEntryBaseOffset - SLocOffset)));
mapOffset(IdentifierIDOffset, OM->BaseIdentifierID, IdentifierRemap);
mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap);
mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID,
PreprocessedEntityRemap);
mapOffset(SubmoduleIDOffset, OM->BaseSubmoduleID, SubmoduleRemap);
mapOffset(SelectorIDOffset, OM->BaseSelectorID, SelectorRemap);
mapOffset(DeclIDOffset, OM->BaseDeclID, DeclRemap);
mapOffset(TypeIndexOffset, OM->BaseTypeIndex, TypeRemap);
// Global -> local mappings.
F.GlobalToLocalDeclIDs[OM] = DeclIDOffset;
}
}
ASTReader::ASTReadResult
ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
const ModuleFile *ImportedBy,
unsigned ClientLoadCapabilities) {
unsigned Idx = 0;
F.ModuleMapPath = ReadPath(F, Record, Idx);
// Try to resolve ModuleName in the current header search context and
// verify that it is found in the same module map file as we saved. If the
// top-level AST file is a main file, skip this check because there is no
// usable header search context.
assert(!F.ModuleName.empty() &&
"MODULE_NAME should come before MODULE_MAP_FILE");
if (F.Kind == MK_ImplicitModule && ModuleMgr.begin()->Kind != MK_MainFile) {
// An implicitly-loaded module file should have its module listed in some
// module map file that we've already loaded.
Module *M = PP.getHeaderSearchInfo().lookupModule(F.ModuleName);
auto &Map = PP.getHeaderSearchInfo().getModuleMap();
const FileEntry *ModMap = M ? Map.getModuleMapFileForUniquing(M) : nullptr;
// Don't emit module relocation error if we have -fno-validate-pch
if (!bool(PP.getPreprocessorOpts().DisablePCHOrModuleValidation &
DisableValidationForModuleKind::Module) &&
!ModMap) {
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) {
if (auto ASTFE = M ? M->getASTFile() : None) {
// This module was defined by an imported (explicit) module.
Diag(diag::err_module_file_conflict) << F.ModuleName << F.FileName
<< ASTFE->getName();
} else {
// This module was built with a different module map.
Diag(diag::err_imported_module_not_found)
<< F.ModuleName << F.FileName
<< (ImportedBy ? ImportedBy->FileName : "") << F.ModuleMapPath
<< !ImportedBy;
// In case it was imported by a PCH, there's a chance the user is
// just missing to include the search path to the directory containing
// the modulemap.
if (ImportedBy && ImportedBy->Kind == MK_PCH)
Diag(diag::note_imported_by_pch_module_not_found)
<< llvm::sys::path::parent_path(F.ModuleMapPath);
}
}
return OutOfDate;
}
assert(M && M->Name == F.ModuleName && "found module with different name");
// Check the primary module map file.
auto StoredModMap = FileMgr.getFile(F.ModuleMapPath);
if (!StoredModMap || *StoredModMap != ModMap) {
assert(ModMap && "found module is missing module map file");
assert((ImportedBy || F.Kind == MK_ImplicitModule) &&
"top-level import should be verified");
bool NotImported = F.Kind == MK_ImplicitModule && !ImportedBy;
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
Diag(diag::err_imported_module_modmap_changed)
<< F.ModuleName << (NotImported ? F.FileName : ImportedBy->FileName)
<< ModMap->getName() << F.ModuleMapPath << NotImported;
return OutOfDate;
}
llvm::SmallPtrSet<const FileEntry *, 1> AdditionalStoredMaps;
for (unsigned I = 0, N = Record[Idx++]; I < N; ++I) {
// FIXME: we should use input files rather than storing names.
std::string Filename = ReadPath(F, Record, Idx);
auto SF = FileMgr.getFile(Filename, false, false);
if (!SF) {
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
Error("could not find file '" + Filename +"' referenced by AST file");
return OutOfDate;
}
AdditionalStoredMaps.insert(*SF);
}
// Check any additional module map files (e.g. module.private.modulemap)
// that are not in the pcm.
if (auto *AdditionalModuleMaps = Map.getAdditionalModuleMapFiles(M)) {
for (const FileEntry *ModMap : *AdditionalModuleMaps) {
// Remove files that match
// Note: SmallPtrSet::erase is really remove
if (!AdditionalStoredMaps.erase(ModMap)) {
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
Diag(diag::err_module_different_modmap)
<< F.ModuleName << /*new*/0 << ModMap->getName();
return OutOfDate;
}
}
}
// Check any additional module map files that are in the pcm, but not
// found in header search. Cases that match are already removed.
for (const FileEntry *ModMap : AdditionalStoredMaps) {
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
Diag(diag::err_module_different_modmap)
<< F.ModuleName << /*not new*/1 << ModMap->getName();
return OutOfDate;
}
}
if (Listener)
Listener->ReadModuleMapFile(F.ModuleMapPath);
return Success;
}
/// Move the given method to the back of the global list of methods.
static void moveMethodToBackOfGlobalList(Sema &S, ObjCMethodDecl *Method) {
// Find the entry for this selector in the method pool.
Sema::GlobalMethodPool::iterator Known
= S.MethodPool.find(Method->getSelector());
if (Known == S.MethodPool.end())
return;
// Retrieve the appropriate method list.
ObjCMethodList &Start = Method->isInstanceMethod()? Known->second.first
: Known->second.second;
bool Found = false;
for (ObjCMethodList *List = &Start; List; List = List->getNext()) {
if (!Found) {
if (List->getMethod() == Method) {
Found = true;
} else {
// Keep searching.
continue;
}
}
if (List->getNext())
List->setMethod(List->getNext()->getMethod());
else
List->setMethod(Method);
}
}
void ASTReader::makeNamesVisible(const HiddenNames &Names, Module *Owner) {
assert(Owner->NameVisibility != Module::Hidden && "nothing to make visible?");
for (Decl *D : Names) {
bool wasHidden = !D->isUnconditionallyVisible();
D->setVisibleDespiteOwningModule();
if (wasHidden && SemaObj) {
if (ObjCMethodDecl *Method = dyn_cast<ObjCMethodDecl>(D)) {
moveMethodToBackOfGlobalList(*SemaObj, Method);
}
}
}
}
void ASTReader::makeModuleVisible(Module *Mod,
Module::NameVisibilityKind NameVisibility,
SourceLocation ImportLoc) {
llvm::SmallPtrSet<Module *, 4> Visited;
SmallVector<Module *, 4> Stack;
Stack.push_back(Mod);
while (!Stack.empty()) {
Mod = Stack.pop_back_val();
if (NameVisibility <= Mod->NameVisibility) {
// This module already has this level of visibility (or greater), so
// there is nothing more to do.
continue;
}
if (Mod->isUnimportable()) {
// Modules that aren't importable cannot be made visible.
continue;
}
// Update the module's name visibility.
Mod->NameVisibility = NameVisibility;
// If we've already deserialized any names from this module,
// mark them as visible.
HiddenNamesMapType::iterator Hidden = HiddenNamesMap.find(Mod);
if (Hidden != HiddenNamesMap.end()) {
auto HiddenNames = std::move(*Hidden);
HiddenNamesMap.erase(Hidden);
makeNamesVisible(HiddenNames.second, HiddenNames.first);
assert(HiddenNamesMap.find(Mod) == HiddenNamesMap.end() &&
"making names visible added hidden names");
}
// Push any exported modules onto the stack to be marked as visible.
SmallVector<Module *, 16> Exports;
Mod->getExportedModules(Exports);
for (SmallVectorImpl<Module *>::iterator
I = Exports.begin(), E = Exports.end(); I != E; ++I) {
Module *Exported = *I;
if (Visited.insert(Exported).second)
Stack.push_back(Exported);
}
}
}
/// We've merged the definition \p MergedDef into the existing definition
/// \p Def. Ensure that \p Def is made visible whenever \p MergedDef is made
/// visible.
void ASTReader::mergeDefinitionVisibility(NamedDecl *Def,
NamedDecl *MergedDef) {
if (!Def->isUnconditionallyVisible()) {
// If MergedDef is visible or becomes visible, make the definition visible.
if (MergedDef->isUnconditionallyVisible())
Def->setVisibleDespiteOwningModule();
else {
getContext().mergeDefinitionIntoModule(
Def, MergedDef->getImportedOwningModule(),
/*NotifyListeners*/ false);
PendingMergedDefinitionsToDeduplicate.insert(Def);
}
}
}
bool ASTReader::loadGlobalIndex() {
if (GlobalIndex)
return false;
if (TriedLoadingGlobalIndex || !UseGlobalIndex ||
!PP.getLangOpts().Modules)
return true;
// Try to load the global index.
TriedLoadingGlobalIndex = true;
StringRef ModuleCachePath
= getPreprocessor().getHeaderSearchInfo().getModuleCachePath();
std::pair<GlobalModuleIndex *, llvm::Error> Result =
GlobalModuleIndex::readIndex(ModuleCachePath);
if (llvm::Error Err = std::move(Result.second)) {
assert(!Result.first);
consumeError(std::move(Err)); // FIXME this drops errors on the floor.
return true;
}
GlobalIndex.reset(Result.first);
ModuleMgr.setGlobalIndex(GlobalIndex.get());
return false;
}
bool ASTReader::isGlobalIndexUnavailable() const {
return PP.getLangOpts().Modules && UseGlobalIndex &&
!hasGlobalIndex() && TriedLoadingGlobalIndex;
}
static void updateModuleTimestamp(ModuleFile &MF) {
// Overwrite the timestamp file contents so that file's mtime changes.
std::string TimestampFilename = MF.getTimestampFilename();
std::error_code EC;
llvm::raw_fd_ostream OS(TimestampFilename, EC,
llvm::sys::fs::OF_TextWithCRLF);
if (EC)
return;
OS << "Timestamp file\n";
OS.close();
OS.clear_error(); // Avoid triggering a fatal error.
}
/// Given a cursor at the start of an AST file, scan ahead and drop the
/// cursor into the start of the given block ID, returning false on success and
/// true on failure.
static bool SkipCursorToBlock(BitstreamCursor &Cursor, unsigned BlockID) {
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
if (!MaybeEntry) {
// FIXME this drops errors on the floor.
consumeError(MaybeEntry.takeError());
return true;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::Error:
case llvm::BitstreamEntry::EndBlock:
return true;
case llvm::BitstreamEntry::Record:
// Ignore top-level records.
if (Expected<unsigned> Skipped = Cursor.skipRecord(Entry.ID))
break;
else {
// FIXME this drops errors on the floor.
consumeError(Skipped.takeError());
return true;
}
case llvm::BitstreamEntry::SubBlock:
if (Entry.ID == BlockID) {
if (llvm::Error Err = Cursor.EnterSubBlock(BlockID)) {
// FIXME this drops the error on the floor.
consumeError(std::move(Err));
return true;
}
// Found it!
return false;
}
if (llvm::Error Err = Cursor.SkipBlock()) {
// FIXME this drops the error on the floor.
consumeError(std::move(Err));
return true;
}
}
}
}
ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
ModuleKind Type,
SourceLocation ImportLoc,
unsigned ClientLoadCapabilities,
SmallVectorImpl<ImportedSubmodule> *Imported) {
llvm::SaveAndRestore<SourceLocation>
SetCurImportLocRAII(CurrentImportLoc, ImportLoc);
llvm::SaveAndRestore<Optional<ModuleKind>> SetCurModuleKindRAII(
CurrentDeserializingModuleKind, Type);
// Defer any pending actions until we get to the end of reading the AST file.
Deserializing AnASTFile(this);
// Bump the generation number.
unsigned PreviousGeneration = 0;
if (ContextObj)
PreviousGeneration = incrementGeneration(*ContextObj);
unsigned NumModules = ModuleMgr.size();
auto removeModulesAndReturn = [&](ASTReadResult ReadResult) {
assert(ReadResult && "expected to return error");
ModuleMgr.removeModules(ModuleMgr.begin() + NumModules,
PP.getLangOpts().Modules
? &PP.getHeaderSearchInfo().getModuleMap()
: nullptr);
// If we find that any modules are unusable, the global index is going
// to be out-of-date. Just remove it.
GlobalIndex.reset();
ModuleMgr.setGlobalIndex(nullptr);
return ReadResult;
};
SmallVector<ImportedModule, 4> Loaded;
switch (ASTReadResult ReadResult =
ReadASTCore(FileName, Type, ImportLoc,
/*ImportedBy=*/nullptr, Loaded, 0, 0,
ASTFileSignature(), ClientLoadCapabilities)) {
case Failure:
case Missing:
case OutOfDate:
case VersionMismatch:
case ConfigurationMismatch:
case HadErrors:
return removeModulesAndReturn(ReadResult);
case Success:
break;
}
// Here comes stuff that we only do once the entire chain is loaded.
// Load the AST blocks of all of the modules that we loaded. We can still
// hit errors parsing the ASTs at this point.
for (ImportedModule &M : Loaded) {
ModuleFile &F = *M.Mod;
// Read the AST block.
if (ASTReadResult Result = ReadASTBlock(F, ClientLoadCapabilities))
return removeModulesAndReturn(Result);
// The AST block should always have a definition for the main module.
if (F.isModule() && !F.DidReadTopLevelSubmodule) {
Error(diag::err_module_file_missing_top_level_submodule, F.FileName);
return removeModulesAndReturn(Failure);
}
// Read the extension blocks.
while (!SkipCursorToBlock(F.Stream, EXTENSION_BLOCK_ID)) {
if (ASTReadResult Result = ReadExtensionBlock(F))
return removeModulesAndReturn(Result);
}
// Once read, set the ModuleFile bit base offset and update the size in
// bits of all files we've seen.
F.GlobalBitOffset = TotalModulesSizeInBits;
TotalModulesSizeInBits += F.SizeInBits;
GlobalBitOffsetsMap.insert(std::make_pair(F.GlobalBitOffset, &F));
}
// Preload source locations and interesting indentifiers.
for (ImportedModule &M : Loaded) {
ModuleFile &F = *M.Mod;
// Preload SLocEntries.
for (unsigned I = 0, N = F.PreloadSLocEntries.size(); I != N; ++I) {
int Index = int(F.PreloadSLocEntries[I] - 1) + F.SLocEntryBaseID;
// Load it through the SourceManager and don't call ReadSLocEntry()
// directly because the entry may have already been loaded in which case
// calling ReadSLocEntry() directly would trigger an assertion in
// SourceManager.
SourceMgr.getLoadedSLocEntryByID(Index);
}
// Map the original source file ID into the ID space of the current
// compilation.
if (F.OriginalSourceFileID.isValid()) {
F.OriginalSourceFileID = FileID::get(
F.SLocEntryBaseID + F.OriginalSourceFileID.getOpaqueValue() - 1);
}
// Preload all the pending interesting identifiers by marking them out of
// date.
for (auto Offset : F.PreloadIdentifierOffsets) {
const unsigned char *Data = F.IdentifierTableData + Offset;
ASTIdentifierLookupTrait Trait(*this, F);
auto KeyDataLen = Trait.ReadKeyDataLength(Data);
auto Key = Trait.ReadKey(Data, KeyDataLen.first);
auto &II = PP.getIdentifierTable().getOwn(Key);
II.setOutOfDate(true);
// Mark this identifier as being from an AST file so that we can track
// whether we need to serialize it.
markIdentifierFromAST(*this, II);
// Associate the ID with the identifier so that the writer can reuse it.
auto ID = Trait.ReadIdentifierID(Data + KeyDataLen.first);
SetIdentifierInfo(ID, &II);
}
}
// Setup the import locations and notify the module manager that we've
// committed to these module files.
for (ImportedModule &M : Loaded) {
ModuleFile &F = *M.Mod;
ModuleMgr.moduleFileAccepted(&F);
// Set the import location.
F.DirectImportLoc = ImportLoc;
// FIXME: We assume that locations from PCH / preamble do not need
// any translation.
if (!M.ImportedBy)
F.ImportLoc = M.ImportLoc;
else
F.ImportLoc = TranslateSourceLocation(*M.ImportedBy, M.ImportLoc);
}
if (!PP.getLangOpts().CPlusPlus ||
(Type != MK_ImplicitModule && Type != MK_ExplicitModule &&
Type != MK_PrebuiltModule)) {
// Mark all of the identifiers in the identifier table as being out of date,
// so that various accessors know to check the loaded modules when the
// identifier is used.
//
// For C++ modules, we don't need information on many identifiers (just
// those that provide macros or are poisoned), so we mark all of
// the interesting ones via PreloadIdentifierOffsets.
for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
IdEnd = PP.getIdentifierTable().end();
Id != IdEnd; ++Id)
Id->second->setOutOfDate(true);
}
// Mark selectors as out of date.
for (auto Sel : SelectorGeneration)
SelectorOutOfDate[Sel.first] = true;
// Resolve any unresolved module exports.
for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I];
SubmoduleID GlobalID = getGlobalSubmoduleID(*Unresolved.File,Unresolved.ID);
Module *ResolvedMod = getSubmodule(GlobalID);
switch (Unresolved.Kind) {
case UnresolvedModuleRef::Conflict:
if (ResolvedMod) {
Module::Conflict Conflict;
Conflict.Other = ResolvedMod;
Conflict.Message = Unresolved.String.str();
Unresolved.Mod->Conflicts.push_back(Conflict);
}
continue;
case UnresolvedModuleRef::Import:
if (ResolvedMod)
Unresolved.Mod->Imports.insert(ResolvedMod);
continue;
case UnresolvedModuleRef::Export:
if (ResolvedMod || Unresolved.IsWildcard)
Unresolved.Mod->Exports.push_back(
Module::ExportDecl(ResolvedMod, Unresolved.IsWildcard));
continue;
}
}
UnresolvedModuleRefs.clear();
if (Imported)
Imported->append(ImportedModules.begin(),
ImportedModules.end());
// FIXME: How do we load the 'use'd modules? They may not be submodules.
// Might be unnecessary as use declarations are only used to build the
// module itself.
if (ContextObj)
InitializeContext();
if (SemaObj)
UpdateSema();
if (DeserializationListener)
DeserializationListener->ReaderInitialized(this);
ModuleFile &PrimaryModule = ModuleMgr.getPrimaryModule();
if (PrimaryModule.OriginalSourceFileID.isValid()) {
// If this AST file is a precompiled preamble, then set the
// preamble file ID of the source manager to the file source file
// from which the preamble was built.
if (Type == MK_Preamble) {
SourceMgr.setPreambleFileID(PrimaryModule.OriginalSourceFileID);
} else if (Type == MK_MainFile) {
SourceMgr.setMainFileID(PrimaryModule.OriginalSourceFileID);
}
}
// For any Objective-C class definitions we have already loaded, make sure
// that we load any additional categories.
if (ContextObj) {
for (unsigned I = 0, N = ObjCClassesLoaded.size(); I != N; ++I) {
loadObjCCategories(ObjCClassesLoaded[I]->getGlobalID(),
ObjCClassesLoaded[I],
PreviousGeneration);
}
}
if (PP.getHeaderSearchInfo()
.getHeaderSearchOpts()
.ModulesValidateOncePerBuildSession) {
// Now we are certain that the module and all modules it depends on are
// up to date. Create or update timestamp files for modules that are
// located in the module cache (not for PCH files that could be anywhere
// in the filesystem).
for (unsigned I = 0, N = Loaded.size(); I != N; ++I) {
ImportedModule &M = Loaded[I];
if (M.Mod->Kind == MK_ImplicitModule) {
updateModuleTimestamp(*M.Mod);
}
}
}
return Success;
}
static ASTFileSignature readASTFileSignature(StringRef PCH);
/// Whether \p Stream doesn't start with the AST/PCH file magic number 'CPCH'.
static llvm::Error doesntStartWithASTFileMagic(BitstreamCursor &Stream) {
// FIXME checking magic headers is done in other places such as
// SerializedDiagnosticReader and GlobalModuleIndex, but error handling isn't
// always done the same. Unify it all with a helper.
if (!Stream.canSkipToPos(4))
return llvm::createStringError(std::errc::illegal_byte_sequence,
"file too small to contain AST file magic");
for (unsigned C : {'C', 'P', 'C', 'H'})
if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = Stream.Read(8)) {
if (Res.get() != C)
return llvm::createStringError(
std::errc::illegal_byte_sequence,
"file doesn't start with AST file magic");
} else
return Res.takeError();
return llvm::Error::success();
}
static unsigned moduleKindForDiagnostic(ModuleKind Kind) {
switch (Kind) {
case MK_PCH:
return 0; // PCH
case MK_ImplicitModule:
case MK_ExplicitModule:
case MK_PrebuiltModule:
return 1; // module
case MK_MainFile:
case MK_Preamble:
return 2; // main source file
}
llvm_unreachable("unknown module kind");
}
ASTReader::ASTReadResult
ASTReader::ReadASTCore(StringRef FileName,
ModuleKind Type,
SourceLocation ImportLoc,
ModuleFile *ImportedBy,
SmallVectorImpl<ImportedModule> &Loaded,
off_t ExpectedSize, time_t ExpectedModTime,
ASTFileSignature ExpectedSignature,
unsigned ClientLoadCapabilities) {
ModuleFile *M;
std::string ErrorStr;
ModuleManager::AddModuleResult AddResult
= ModuleMgr.addModule(FileName, Type, ImportLoc, ImportedBy,
getGeneration(), ExpectedSize, ExpectedModTime,
ExpectedSignature, readASTFileSignature,
M, ErrorStr);
switch (AddResult) {
case ModuleManager::AlreadyLoaded:
Diag(diag::remark_module_import)
<< M->ModuleName << M->FileName << (ImportedBy ? true : false)
<< (ImportedBy ? StringRef(ImportedBy->ModuleName) : StringRef());
return Success;
case ModuleManager::NewlyLoaded:
// Load module file below.
break;
case ModuleManager::Missing:
// The module file was missing; if the client can handle that, return
// it.
if (ClientLoadCapabilities & ARR_Missing)
return Missing;
// Otherwise, return an error.
Diag(diag::err_ast_file_not_found)
<< moduleKindForDiagnostic(Type) << FileName << !ErrorStr.empty()
<< ErrorStr;
return Failure;
case ModuleManager::OutOfDate:
// We couldn't load the module file because it is out-of-date. If the
// client can handle out-of-date, return it.
if (ClientLoadCapabilities & ARR_OutOfDate)
return OutOfDate;
// Otherwise, return an error.
Diag(diag::err_ast_file_out_of_date)
<< moduleKindForDiagnostic(Type) << FileName << !ErrorStr.empty()
<< ErrorStr;
return Failure;
}
assert(M && "Missing module file");
bool ShouldFinalizePCM = false;
auto FinalizeOrDropPCM = llvm::make_scope_exit([&]() {
auto &MC = getModuleManager().getModuleCache();
if (ShouldFinalizePCM)
MC.finalizePCM(FileName);
else
MC.tryToDropPCM(FileName);
});
ModuleFile &F = *M;
BitstreamCursor &Stream = F.Stream;
Stream = BitstreamCursor(PCHContainerRdr.ExtractPCH(*F.Buffer));
F.SizeInBits = F.Buffer->getBufferSize() * 8;
// Sniff for the signature.
if (llvm::Error Err = doesntStartWithASTFileMagic(Stream)) {
Diag(diag::err_ast_file_invalid)
<< moduleKindForDiagnostic(Type) << FileName << std::move(Err);
return Failure;
}
// This is used for compatibility with older PCH formats.
bool HaveReadControlBlock = false;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry) {
Error(MaybeEntry.takeError());
return Failure;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::Error:
case llvm::BitstreamEntry::Record:
case llvm::BitstreamEntry::EndBlock:
Error("invalid record at top-level of AST file");
return Failure;
case llvm::BitstreamEntry::SubBlock:
break;
}
switch (Entry.ID) {
case CONTROL_BLOCK_ID:
HaveReadControlBlock = true;
switch (ReadControlBlock(F, Loaded, ImportedBy, ClientLoadCapabilities)) {
case Success:
// Check that we didn't try to load a non-module AST file as a module.
//
// FIXME: Should we also perform the converse check? Loading a module as
// a PCH file sort of works, but it's a bit wonky.
if ((Type == MK_ImplicitModule || Type == MK_ExplicitModule ||
Type == MK_PrebuiltModule) &&
F.ModuleName.empty()) {
auto Result = (Type == MK_ImplicitModule) ? OutOfDate : Failure;
if (Result != OutOfDate ||
(ClientLoadCapabilities & ARR_OutOfDate) == 0)
Diag(diag::err_module_file_not_module) << FileName;
return Result;
}
break;
case Failure: return Failure;
case Missing: return Missing;
case OutOfDate: return OutOfDate;
case VersionMismatch: return VersionMismatch;
case ConfigurationMismatch: return ConfigurationMismatch;
case HadErrors: return HadErrors;
}
break;
case AST_BLOCK_ID:
if (!HaveReadControlBlock) {
if ((ClientLoadCapabilities & ARR_VersionMismatch) == 0)
Diag(diag::err_pch_version_too_old);
return VersionMismatch;
}
// Record that we've loaded this module.
Loaded.push_back(ImportedModule(M, ImportedBy, ImportLoc));
ShouldFinalizePCM = true;
return Success;
case UNHASHED_CONTROL_BLOCK_ID:
// This block is handled using look-ahead during ReadControlBlock. We
// shouldn't get here!
Error("malformed block record in AST file");
return Failure;
default:
if (llvm::Error Err = Stream.SkipBlock()) {
Error(std::move(Err));
return Failure;
}
break;
}
}
llvm_unreachable("unexpected break; expected return");
}
ASTReader::ASTReadResult
ASTReader::readUnhashedControlBlock(ModuleFile &F, bool WasImportedBy,
unsigned ClientLoadCapabilities) {
const HeaderSearchOptions &HSOpts =
PP.getHeaderSearchInfo().getHeaderSearchOpts();
bool AllowCompatibleConfigurationMismatch =
F.Kind == MK_ExplicitModule || F.Kind == MK_PrebuiltModule;
bool DisableValidation = shouldDisableValidationForFile(F);
ASTReadResult Result = readUnhashedControlBlockImpl(
&F, F.Data, ClientLoadCapabilities, AllowCompatibleConfigurationMismatch,
Listener.get(),
WasImportedBy ? false : HSOpts.ModulesValidateDiagnosticOptions);
// If F was directly imported by another module, it's implicitly validated by
// the importing module.
if (DisableValidation || WasImportedBy ||
(AllowConfigurationMismatch && Result == ConfigurationMismatch))
return Success;
if (Result == Failure) {
Error("malformed block record in AST file");
return Failure;
}
if (Result == OutOfDate && F.Kind == MK_ImplicitModule) {
// If this module has already been finalized in the ModuleCache, we're stuck
// with it; we can only load a single version of each module.
//
// This can happen when a module is imported in two contexts: in one, as a
// user module; in another, as a system module (due to an import from
// another module marked with the [system] flag). It usually indicates a
// bug in the module map: this module should also be marked with [system].
//
// If -Wno-system-headers (the default), and the first import is as a
// system module, then validation will fail during the as-user import,
// since -Werror flags won't have been validated. However, it's reasonable
// to treat this consistently as a system module.
//
// If -Wsystem-headers, the PCM on disk was built with
// -Wno-system-headers, and the first import is as a user module, then
// validation will fail during the as-system import since the PCM on disk
// doesn't guarantee that -Werror was respected. However, the -Werror
// flags were checked during the initial as-user import.
if (getModuleManager().getModuleCache().isPCMFinal(F.FileName)) {
Diag(diag::warn_module_system_bit_conflict) << F.FileName;
return Success;
}
}
return Result;
}
ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
ModuleFile *F, llvm::StringRef StreamData, unsigned ClientLoadCapabilities,
bool AllowCompatibleConfigurationMismatch, ASTReaderListener *Listener,
bool ValidateDiagnosticOptions) {
// Initialize a stream.
BitstreamCursor Stream(StreamData);
// Sniff for the signature.
if (llvm::Error Err = doesntStartWithASTFileMagic(Stream)) {
// FIXME this drops the error on the floor.
consumeError(std::move(Err));
return Failure;
}
// Scan for the UNHASHED_CONTROL_BLOCK_ID block.
if (SkipCursorToBlock(Stream, UNHASHED_CONTROL_BLOCK_ID))
return Failure;
// Read all of the records in the options block.
RecordData Record;
ASTReadResult Result = Success;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry) {
// FIXME this drops the error on the floor.
consumeError(MaybeEntry.takeError());
return Failure;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::Error:
case llvm::BitstreamEntry::SubBlock:
return Failure;
case llvm::BitstreamEntry::EndBlock:
return Result;
case llvm::BitstreamEntry::Record:
// The interesting case.
break;
}
// Read and process a record.
Record.clear();
Expected<unsigned> MaybeRecordType = Stream.readRecord(Entry.ID, Record);
if (!MaybeRecordType) {
// FIXME this drops the error.
return Failure;
}
switch ((UnhashedControlBlockRecordTypes)MaybeRecordType.get()) {
case SIGNATURE:
if (F)
F->Signature = ASTFileSignature::create(Record.begin(), Record.end());
break;
case AST_BLOCK_HASH:
if (F)
F->ASTBlockHash =
ASTFileSignature::create(Record.begin(), Record.end());
break;
case DIAGNOSTIC_OPTIONS: {
bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
if (Listener && ValidateDiagnosticOptions &&
!AllowCompatibleConfigurationMismatch &&
ParseDiagnosticOptions(Record, Complain, *Listener))
Result = OutOfDate; // Don't return early. Read the signature.
break;
}
case DIAG_PRAGMA_MAPPINGS:
if (!F)
break;
if (F->PragmaDiagMappings.empty())
F->PragmaDiagMappings.swap(Record);
else
F->PragmaDiagMappings.insert(F->PragmaDiagMappings.end(),
Record.begin(), Record.end());
break;
}
}
}
/// Parse a record and blob containing module file extension metadata.
static bool parseModuleFileExtensionMetadata(
const SmallVectorImpl<uint64_t> &Record,
StringRef Blob,
ModuleFileExtensionMetadata &Metadata) {
if (Record.size() < 4) return true;
Metadata.MajorVersion = Record[0];
Metadata.MinorVersion = Record[1];
unsigned BlockNameLen = Record[2];
unsigned UserInfoLen = Record[3];
if (BlockNameLen + UserInfoLen > Blob.size()) return true;
Metadata.BlockName = std::string(Blob.data(), Blob.data() + BlockNameLen);
Metadata.UserInfo = std::string(Blob.data() + BlockNameLen,
Blob.data() + BlockNameLen + UserInfoLen);
return false;
}
ASTReader::ASTReadResult ASTReader::ReadExtensionBlock(ModuleFile &F) {
BitstreamCursor &Stream = F.Stream;
RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry) {
Error(MaybeEntry.takeError());
return Failure;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::SubBlock:
if (llvm::Error Err = Stream.SkipBlock()) {
Error(std::move(Err));
return Failure;
}
continue;
case llvm::BitstreamEntry::EndBlock:
return Success;
case llvm::BitstreamEntry::Error:
return HadErrors;
case llvm::BitstreamEntry::Record:
break;
}
Record.clear();
StringRef Blob;
Expected<unsigned> MaybeRecCode =
Stream.readRecord(Entry.ID, Record, &Blob);
if (!MaybeRecCode) {
Error(MaybeRecCode.takeError());
return Failure;
}
switch (MaybeRecCode.get()) {
case EXTENSION_METADATA: {
ModuleFileExtensionMetadata Metadata;
if (parseModuleFileExtensionMetadata(Record, Blob, Metadata)) {
Error("malformed EXTENSION_METADATA in AST file");
return Failure;
}
// Find a module file extension with this block name.
auto Known = ModuleFileExtensions.find(Metadata.BlockName);
if (Known == ModuleFileExtensions.end()) break;
// Form a reader.
if (auto Reader = Known->second->createExtensionReader(Metadata, *this,
F, Stream)) {
F.ExtensionReaders.push_back(std::move(Reader));
}
break;
}
}
}
return Success;
}
void ASTReader::InitializeContext() {
assert(ContextObj && "no context to initialize");
ASTContext &Context = *ContextObj;
// If there's a listener, notify them that we "read" the translation unit.
if (DeserializationListener)
DeserializationListener->DeclRead(PREDEF_DECL_TRANSLATION_UNIT_ID,
Context.getTranslationUnitDecl());
// FIXME: Find a better way to deal with collisions between these
// built-in types. Right now, we just ignore the problem.
// Load the special types.
if (SpecialTypes.size() >= NumSpecialTypeIDs) {
if (unsigned String = SpecialTypes[SPECIAL_TYPE_CF_CONSTANT_STRING]) {
if (!Context.CFConstantStringTypeDecl)
Context.setCFConstantStringType(GetType(String));
}
if (unsigned File = SpecialTypes[SPECIAL_TYPE_FILE]) {
QualType FileType = GetType(File);
if (FileType.isNull()) {
Error("FILE type is NULL");
return;
}
if (!Context.FILEDecl) {
if (const TypedefType *Typedef = FileType->getAs<TypedefType>())
Context.setFILEDecl(Typedef->getDecl());
else {
const TagType *Tag = FileType->getAs<TagType>();
if (!Tag) {
Error("Invalid FILE type in AST file");
return;
}
Context.setFILEDecl(Tag->getDecl());
}
}
}
if (unsigned Jmp_buf = SpecialTypes[SPECIAL_TYPE_JMP_BUF]) {
QualType Jmp_bufType = GetType(Jmp_buf);
if (Jmp_bufType.isNull()) {
Error("jmp_buf type is NULL");
return;
}
if (!Context.jmp_bufDecl) {
if (const TypedefType *Typedef = Jmp_bufType->getAs<TypedefType>())
Context.setjmp_bufDecl(Typedef->getDecl());
else {
const TagType *Tag = Jmp_bufType->getAs<TagType>();
if (!Tag) {
Error("Invalid jmp_buf type in AST file");
return;
}
Context.setjmp_bufDecl(Tag->getDecl());
}
}
}
if (unsigned Sigjmp_buf = SpecialTypes[SPECIAL_TYPE_SIGJMP_BUF]) {
QualType Sigjmp_bufType = GetType(Sigjmp_buf);
if (Sigjmp_bufType.isNull()) {
Error("sigjmp_buf type is NULL");
return;
}
if (!Context.sigjmp_bufDecl) {
if (const TypedefType *Typedef = Sigjmp_bufType->getAs<TypedefType>())
Context.setsigjmp_bufDecl(Typedef->getDecl());
else {
const TagType *Tag = Sigjmp_bufType->getAs<TagType>();
assert(Tag && "Invalid sigjmp_buf type in AST file");
Context.setsigjmp_bufDecl(Tag->getDecl());
}
}
}
if (unsigned ObjCIdRedef
= SpecialTypes[SPECIAL_TYPE_OBJC_ID_REDEFINITION]) {
if (Context.ObjCIdRedefinitionType.isNull())
Context.ObjCIdRedefinitionType = GetType(ObjCIdRedef);
}
if (unsigned ObjCClassRedef
= SpecialTypes[SPECIAL_TYPE_OBJC_CLASS_REDEFINITION]) {
if (Context.ObjCClassRedefinitionType.isNull())
Context.ObjCClassRedefinitionType = GetType(ObjCClassRedef);
}
if (unsigned ObjCSelRedef
= SpecialTypes[SPECIAL_TYPE_OBJC_SEL_REDEFINITION]) {
if (Context.ObjCSelRedefinitionType.isNull())
Context.ObjCSelRedefinitionType = GetType(ObjCSelRedef);
}
if (unsigned Ucontext_t = SpecialTypes[SPECIAL_TYPE_UCONTEXT_T]) {
QualType Ucontext_tType = GetType(Ucontext_t);
if (Ucontext_tType.isNull()) {
Error("ucontext_t type is NULL");
return;
}
if (!Context.ucontext_tDecl) {
if (const TypedefType *Typedef = Ucontext_tType->getAs<TypedefType>())
Context.setucontext_tDecl(Typedef->getDecl());
else {
const TagType *Tag = Ucontext_tType->getAs<TagType>();
assert(Tag && "Invalid ucontext_t type in AST file");
Context.setucontext_tDecl(Tag->getDecl());
}
}
}
}
ReadPragmaDiagnosticMappings(Context.getDiagnostics());
// If there were any CUDA special declarations, deserialize them.
if (!CUDASpecialDeclRefs.empty()) {
assert(CUDASpecialDeclRefs.size() == 1 && "More decl refs than expected!");
Context.setcudaConfigureCallDecl(
cast<FunctionDecl>(GetDecl(CUDASpecialDeclRefs[0])));
}
// Re-export any modules that were imported by a non-module AST file.
// FIXME: This does not make macro-only imports visible again.
for (auto &Import : ImportedModules) {
if (Module *Imported = getSubmodule(Import.ID)) {
makeModuleVisible(Imported, Module::AllVisible,
/*ImportLoc=*/Import.ImportLoc);
if (Import.ImportLoc.isValid())
PP.makeModuleVisible(Imported, Import.ImportLoc);
// This updates visibility for Preprocessor only. For Sema, which can be
// nullptr here, we do the same later, in UpdateSema().
}
}
}
void ASTReader::finalizeForWriting() {
// Nothing to do for now.
}
/// Reads and return the signature record from \p PCH's control block, or
/// else returns 0.
static ASTFileSignature readASTFileSignature(StringRef PCH) {
BitstreamCursor Stream(PCH);
if (llvm::Error Err = doesntStartWithASTFileMagic(Stream)) {
// FIXME this drops the error on the floor.
consumeError(std::move(Err));
return ASTFileSignature();
}
// Scan for the UNHASHED_CONTROL_BLOCK_ID block.
if (SkipCursorToBlock(Stream, UNHASHED_CONTROL_BLOCK_ID))
return ASTFileSignature();
// Scan for SIGNATURE inside the diagnostic options block.
ASTReader::RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry =
Stream.advanceSkippingSubblocks();
if (!MaybeEntry) {
// FIXME this drops the error on the floor.
consumeError(MaybeEntry.takeError());
return ASTFileSignature();
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
if (Entry.Kind != llvm::BitstreamEntry::Record)
return ASTFileSignature();
Record.clear();
StringRef Blob;
Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record, &Blob);
if (!MaybeRecord) {
// FIXME this drops the error on the floor.
consumeError(MaybeRecord.takeError());
return ASTFileSignature();
}
if (SIGNATURE == MaybeRecord.get())
return ASTFileSignature::create(Record.begin(),
Record.begin() + ASTFileSignature::size);
}
}
/// Retrieve the name of the original source file name
/// directly from the AST file, without actually loading the AST
/// file.
std::string ASTReader::getOriginalSourceFile(
const std::string &ASTFileName, FileManager &FileMgr,
const PCHContainerReader &PCHContainerRdr, DiagnosticsEngine &Diags) {
// Open the AST file.
auto Buffer = FileMgr.getBufferForFile(ASTFileName);
if (!Buffer) {
Diags.Report(diag::err_fe_unable_to_read_pch_file)
<< ASTFileName << Buffer.getError().message();
return std::string();
}
// Initialize the stream
BitstreamCursor Stream(PCHContainerRdr.ExtractPCH(**Buffer));
// Sniff for the signature.
if (llvm::Error Err = doesntStartWithASTFileMagic(Stream)) {
Diags.Report(diag::err_fe_not_a_pch_file) << ASTFileName << std::move(Err);
return std::string();
}
// Scan for the CONTROL_BLOCK_ID block.
if (SkipCursorToBlock(Stream, CONTROL_BLOCK_ID)) {
Diags.Report(diag::err_fe_pch_malformed_block) << ASTFileName;
return std::string();
}
// Scan for ORIGINAL_FILE inside the control block.
RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry =
Stream.advanceSkippingSubblocks();
if (!MaybeEntry) {
// FIXME this drops errors on the floor.
consumeError(MaybeEntry.takeError());
return std::string();
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
if (Entry.Kind == llvm::BitstreamEntry::EndBlock)
return std::string();
if (Entry.Kind != llvm::BitstreamEntry::Record) {
Diags.Report(diag::err_fe_pch_malformed_block) << ASTFileName;
return std::string();
}
Record.clear();
StringRef Blob;
Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record, &Blob);
if (!MaybeRecord) {
// FIXME this drops the errors on the floor.
consumeError(MaybeRecord.takeError());
return std::string();
}
if (ORIGINAL_FILE == MaybeRecord.get())
return Blob.str();
}
}
namespace {
class SimplePCHValidator : public ASTReaderListener {
const LangOptions &ExistingLangOpts;
const TargetOptions &ExistingTargetOpts;
const PreprocessorOptions &ExistingPPOpts;
std::string ExistingModuleCachePath;
FileManager &FileMgr;
public:
SimplePCHValidator(const LangOptions &ExistingLangOpts,
const TargetOptions &ExistingTargetOpts,
const PreprocessorOptions &ExistingPPOpts,
StringRef ExistingModuleCachePath, FileManager &FileMgr)
: ExistingLangOpts(ExistingLangOpts),
ExistingTargetOpts(ExistingTargetOpts),
ExistingPPOpts(ExistingPPOpts),
ExistingModuleCachePath(ExistingModuleCachePath), FileMgr(FileMgr) {}
bool ReadLanguageOptions(const LangOptions &LangOpts, bool Complain,
bool AllowCompatibleDifferences) override {
return checkLanguageOptions(ExistingLangOpts, LangOpts, nullptr,
AllowCompatibleDifferences);
}
bool ReadTargetOptions(const TargetOptions &TargetOpts, bool Complain,
bool AllowCompatibleDifferences) override {
return checkTargetOptions(ExistingTargetOpts, TargetOpts, nullptr,
AllowCompatibleDifferences);
}
bool ReadHeaderSearchOptions(const HeaderSearchOptions &HSOpts,
StringRef SpecificModuleCachePath,
bool Complain) override {
return checkHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
ExistingModuleCachePath, nullptr,
ExistingLangOpts, ExistingPPOpts);
}
bool ReadPreprocessorOptions(const PreprocessorOptions &PPOpts,
bool Complain,
std::string &SuggestedPredefines) override {
return checkPreprocessorOptions(ExistingPPOpts, PPOpts, nullptr, FileMgr,
SuggestedPredefines, ExistingLangOpts);
}
};
} // namespace
bool ASTReader::readASTFileControlBlock(
StringRef Filename, FileManager &FileMgr,
const PCHContainerReader &PCHContainerRdr,
bool FindModuleFileExtensions,
ASTReaderListener &Listener, bool ValidateDiagnosticOptions) {
// Open the AST file.
// FIXME: This allows use of the VFS; we do not allow use of the
// VFS when actually loading a module.
auto Buffer = FileMgr.getBufferForFile(Filename);
if (!Buffer) {
return true;
}
// Initialize the stream
StringRef Bytes = PCHContainerRdr.ExtractPCH(**Buffer);
BitstreamCursor Stream(Bytes);
// Sniff for the signature.
if (llvm::Error Err = doesntStartWithASTFileMagic(Stream)) {
consumeError(std::move(Err)); // FIXME this drops errors on the floor.
return true;
}
// Scan for the CONTROL_BLOCK_ID block.
if (SkipCursorToBlock(Stream, CONTROL_BLOCK_ID))
return true;
bool NeedsInputFiles = Listener.needsInputFileVisitation();
bool NeedsSystemInputFiles = Listener.needsSystemInputFileVisitation();
bool NeedsImports = Listener.needsImportVisitation();
BitstreamCursor InputFilesCursor;
RecordData Record;
std::string ModuleDir;
bool DoneWithControlBlock = false;
while (!DoneWithControlBlock) {
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry) {
// FIXME this drops the error on the floor.
consumeError(MaybeEntry.takeError());
return true;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::SubBlock: {
switch (Entry.ID) {
case OPTIONS_BLOCK_ID: {
std::string IgnoredSuggestedPredefines;
if (ReadOptionsBlock(Stream, ARR_ConfigurationMismatch | ARR_OutOfDate,
/*AllowCompatibleConfigurationMismatch*/ false,
Listener, IgnoredSuggestedPredefines) != Success)
return true;
break;
}
case INPUT_FILES_BLOCK_ID:
InputFilesCursor = Stream;
if (llvm::Error Err = Stream.SkipBlock()) {
// FIXME this drops the error on the floor.
consumeError(std::move(Err));
return true;
}
if (NeedsInputFiles &&
ReadBlockAbbrevs(InputFilesCursor, INPUT_FILES_BLOCK_ID))
return true;
break;
default:
if (llvm::Error Err = Stream.SkipBlock()) {
// FIXME this drops the error on the floor.
consumeError(std::move(Err));
return true;
}
break;
}
continue;
}
case llvm::BitstreamEntry::EndBlock:
DoneWithControlBlock = true;
break;
case llvm::BitstreamEntry::Error:
return true;
case llvm::BitstreamEntry::Record:
break;
}
if (DoneWithControlBlock) break;
Record.clear();
StringRef Blob;
Expected<unsigned> MaybeRecCode =
Stream.readRecord(Entry.ID, Record, &Blob);
if (!MaybeRecCode) {
// FIXME this drops the error.
return Failure;
}
switch ((ControlRecordTypes)MaybeRecCode.get()) {
case METADATA:
if (Record[0] != VERSION_MAJOR)
return true;
if (Listener.ReadFullVersionInformation(Blob))
return true;
break;
case MODULE_NAME:
Listener.ReadModuleName(Blob);
break;
case MODULE_DIRECTORY:
ModuleDir = std::string(Blob);
break;
case MODULE_MAP_FILE: {
unsigned Idx = 0;
auto Path = ReadString(Record, Idx);
ResolveImportedPath(Path, ModuleDir);
Listener.ReadModuleMapFile(Path);
break;
}
case INPUT_FILE_OFFSETS: {
if (!NeedsInputFiles)
break;
unsigned NumInputFiles = Record[0];
unsigned NumUserFiles = Record[1];
const llvm::support::unaligned_uint64_t *InputFileOffs =
(const llvm::support::unaligned_uint64_t *)Blob.data();
for (unsigned I = 0; I != NumInputFiles; ++I) {
// Go find this input file.
bool isSystemFile = I >= NumUserFiles;
if (isSystemFile && !NeedsSystemInputFiles)
break; // the rest are system input files
BitstreamCursor &Cursor = InputFilesCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(InputFileOffs[I])) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
}
Expected<unsigned> MaybeCode = Cursor.ReadCode();
if (!MaybeCode) {
// FIXME this drops errors on the floor.
consumeError(MaybeCode.takeError());
}
unsigned Code = MaybeCode.get();
RecordData Record;
StringRef Blob;
bool shouldContinue = false;
Expected<unsigned> MaybeRecordType =
Cursor.readRecord(Code, Record, &Blob);
if (!MaybeRecordType) {
// FIXME this drops errors on the floor.
consumeError(MaybeRecordType.takeError());
}
switch ((InputFileRecordTypes)MaybeRecordType.get()) {
case INPUT_FILE_HASH:
break;
case INPUT_FILE:
bool Overridden = static_cast<bool>(Record[3]);
std::string Filename = std::string(Blob);
ResolveImportedPath(Filename, ModuleDir);
shouldContinue = Listener.visitInputFile(
Filename, isSystemFile, Overridden, /*IsExplicitModule*/false);
break;
}
if (!shouldContinue)
break;
}
break;
}
case IMPORTS: {
if (!NeedsImports)
break;
unsigned Idx = 0, N = Record.size();
while (Idx < N) {
// Read information about the AST file.
Idx +=
1 + 1 + 1 + 1 +
ASTFileSignature::size; // Kind, ImportLoc, Size, ModTime, Signature
std::string ModuleName = ReadString(Record, Idx);
std::string Filename = ReadString(Record, Idx);
ResolveImportedPath(Filename, ModuleDir);
Listener.visitImport(ModuleName, Filename);
}
break;
}
default:
// No other validation to perform.
break;
}
}
// Look for module file extension blocks, if requested.
if (FindModuleFileExtensions) {
BitstreamCursor SavedStream = Stream;
while (!SkipCursorToBlock(Stream, EXTENSION_BLOCK_ID)) {
bool DoneWithExtensionBlock = false;
while (!DoneWithExtensionBlock) {
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry) {
// FIXME this drops the error.
return true;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::SubBlock:
if (llvm::Error Err = Stream.SkipBlock()) {
// FIXME this drops the error on the floor.
consumeError(std::move(Err));
return true;
}
continue;
case llvm::BitstreamEntry::EndBlock:
DoneWithExtensionBlock = true;
continue;
case llvm::BitstreamEntry::Error:
return true;
case llvm::BitstreamEntry::Record:
break;
}
Record.clear();
StringRef Blob;
Expected<unsigned> MaybeRecCode =
Stream.readRecord(Entry.ID, Record, &Blob);
if (!MaybeRecCode) {
// FIXME this drops the error.
return true;
}
switch (MaybeRecCode.get()) {
case EXTENSION_METADATA: {
ModuleFileExtensionMetadata Metadata;
if (parseModuleFileExtensionMetadata(Record, Blob, Metadata))
return true;
Listener.readModuleFileExtension(Metadata);
break;
}
}
}
}
Stream = SavedStream;
}
// Scan for the UNHASHED_CONTROL_BLOCK_ID block.
if (readUnhashedControlBlockImpl(
nullptr, Bytes, ARR_ConfigurationMismatch | ARR_OutOfDate,
/*AllowCompatibleConfigurationMismatch*/ false, &Listener,
ValidateDiagnosticOptions) != Success)
return true;
return false;
}
bool ASTReader::isAcceptableASTFile(StringRef Filename, FileManager &FileMgr,
const PCHContainerReader &PCHContainerRdr,
const LangOptions &LangOpts,
const TargetOptions &TargetOpts,
const PreprocessorOptions &PPOpts,
StringRef ExistingModuleCachePath) {
SimplePCHValidator validator(LangOpts, TargetOpts, PPOpts,
ExistingModuleCachePath, FileMgr);
return !readASTFileControlBlock(Filename, FileMgr, PCHContainerRdr,
/*FindModuleFileExtensions=*/false,
validator,
/*ValidateDiagnosticOptions=*/true);
}
ASTReader::ASTReadResult
ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
// Enter the submodule block.
if (llvm::Error Err = F.Stream.EnterSubBlock(SUBMODULE_BLOCK_ID)) {
Error(std::move(Err));
return Failure;
}
ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
bool First = true;
Module *CurrentModule = nullptr;
RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry =
F.Stream.advanceSkippingSubblocks();
if (!MaybeEntry) {
Error(MaybeEntry.takeError());
return Failure;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::SubBlock: // Handled for us already.
case llvm::BitstreamEntry::Error:
Error("malformed block record in AST file");
return Failure;
case llvm::BitstreamEntry::EndBlock:
return Success;
case llvm::BitstreamEntry::Record:
// The interesting case.
break;
}
// Read a record.
StringRef Blob;
Record.clear();
Expected<unsigned> MaybeKind = F.Stream.readRecord(Entry.ID, Record, &Blob);
if (!MaybeKind) {
Error(MaybeKind.takeError());
return Failure;
}
unsigned Kind = MaybeKind.get();
if ((Kind == SUBMODULE_METADATA) != First) {
Error("submodule metadata record should be at beginning of block");
return Failure;
}
First = false;
// Submodule information is only valid if we have a current module.
// FIXME: Should we error on these cases?
if (!CurrentModule && Kind != SUBMODULE_METADATA &&
Kind != SUBMODULE_DEFINITION)
continue;
switch (Kind) {
default: // Default behavior: ignore.
break;
case SUBMODULE_DEFINITION: {
if (Record.size() < 12) {
Error("malformed module definition");
return Failure;
}
StringRef Name = Blob;
unsigned Idx = 0;
SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx++]);
SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]);
Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++];
bool IsFramework = Record[Idx++];
bool IsExplicit = Record[Idx++];
bool IsSystem = Record[Idx++];
bool IsExternC = Record[Idx++];
bool InferSubmodules = Record[Idx++];
bool InferExplicitSubmodules = Record[Idx++];
bool InferExportWildcard = Record[Idx++];
bool ConfigMacrosExhaustive = Record[Idx++];
bool ModuleMapIsPrivate = Record[Idx++];
Module *ParentModule = nullptr;
if (Parent)
ParentModule = getSubmodule(Parent);
// Retrieve this (sub)module from the module map, creating it if
// necessary.
CurrentModule =
ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit)
.first;
// FIXME: set the definition loc for CurrentModule, or call
// ModMap.setInferredModuleAllowedBy()
SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
if (GlobalIndex >= SubmodulesLoaded.size() ||
SubmodulesLoaded[GlobalIndex]) {
Error("too many submodules");
return Failure;
}
if (!ParentModule) {
if (const FileEntry *CurFile = CurrentModule->getASTFile()) {
// Don't emit module relocation error if we have -fno-validate-pch
if (!bool(PP.getPreprocessorOpts().DisablePCHOrModuleValidation &
DisableValidationForModuleKind::Module) &&
CurFile != F.File) {
Error(diag::err_module_file_conflict,
CurrentModule->getTopLevelModuleName(), CurFile->getName(),
F.File->getName());
return Failure;
}
}
F.DidReadTopLevelSubmodule = true;
CurrentModule->setASTFile(F.File);
CurrentModule->PresumedModuleMapFile = F.ModuleMapPath;
}
CurrentModule->Kind = Kind;
CurrentModule->Signature = F.Signature;
CurrentModule->IsFromModuleFile = true;
CurrentModule->IsSystem = IsSystem || CurrentModule->IsSystem;
CurrentModule->IsExternC = IsExternC;
CurrentModule->InferSubmodules = InferSubmodules;
CurrentModule->InferExplicitSubmodules = InferExplicitSubmodules;
CurrentModule->InferExportWildcard = InferExportWildcard;
CurrentModule->ConfigMacrosExhaustive = ConfigMacrosExhaustive;
CurrentModule->ModuleMapIsPrivate = ModuleMapIsPrivate;
if (DeserializationListener)
DeserializationListener->ModuleRead(GlobalID, CurrentModule);
SubmodulesLoaded[GlobalIndex] = CurrentModule;
// Clear out data that will be replaced by what is in the module file.
CurrentModule->LinkLibraries.clear();
CurrentModule->ConfigMacros.clear();
CurrentModule->UnresolvedConflicts.clear();
CurrentModule->Conflicts.clear();
// The module is available unless it's missing a requirement; relevant
// requirements will be (re-)added by SUBMODULE_REQUIRES records.
// Missing headers that were present when the module was built do not
// make it unavailable -- if we got this far, this must be an explicitly
// imported module file.
CurrentModule->Requirements.clear();
CurrentModule->MissingHeaders.clear();
CurrentModule->IsUnimportable =
ParentModule && ParentModule->IsUnimportable;
CurrentModule->IsAvailable = !CurrentModule->IsUnimportable;
break;
}
case SUBMODULE_UMBRELLA_HEADER: {
std::string Filename = std::string(Blob);
ResolveImportedPath(F, Filename);
if (auto Umbrella = PP.getFileManager().getFile(Filename)) {
if (!CurrentModule->getUmbrellaHeader())
// FIXME: NameAsWritten
ModMap.setUmbrellaHeader(CurrentModule, *Umbrella, Blob, "");
else if (CurrentModule->getUmbrellaHeader().Entry != *Umbrella) {
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
Error("mismatched umbrella headers in submodule");
return OutOfDate;
}
}
break;
}
case SUBMODULE_HEADER:
case SUBMODULE_EXCLUDED_HEADER:
case SUBMODULE_PRIVATE_HEADER:
// We lazily associate headers with their modules via the HeaderInfo table.
// FIXME: Re-evaluate this section; maybe only store InputFile IDs instead
// of complete filenames or remove it entirely.
break;
case SUBMODULE_TEXTUAL_HEADER:
case SUBMODULE_PRIVATE_TEXTUAL_HEADER:
// FIXME: Textual headers are not marked in the HeaderInfo table. Load
// them here.
break;
case SUBMODULE_TOPHEADER:
CurrentModule->addTopHeaderFilename(Blob);
break;
case SUBMODULE_UMBRELLA_DIR: {
std::string Dirname = std::string(Blob);
ResolveImportedPath(F, Dirname);
if (auto Umbrella = PP.getFileManager().getDirectory(Dirname)) {
if (!CurrentModule->getUmbrellaDir())
// FIXME: NameAsWritten
ModMap.setUmbrellaDir(CurrentModule, *Umbrella, Blob, "");
else if (CurrentModule->getUmbrellaDir().Entry != *Umbrella) {
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
Error("mismatched umbrella directories in submodule");
return OutOfDate;
}
}
break;
}
case SUBMODULE_METADATA: {
F.BaseSubmoduleID = getTotalNumSubmodules();
F.LocalNumSubmodules = Record[0];
unsigned LocalBaseSubmoduleID = Record[1];
if (F.LocalNumSubmodules > 0) {
// Introduce the global -> local mapping for submodules within this
// module.
GlobalSubmoduleMap.insert(std::make_pair(getTotalNumSubmodules()+1,&F));
// Introduce the local -> global mapping for submodules within this
// module.
F.SubmoduleRemap.insertOrReplace(
std::make_pair(LocalBaseSubmoduleID,
F.BaseSubmoduleID - LocalBaseSubmoduleID));
SubmodulesLoaded.resize(SubmodulesLoaded.size() + F.LocalNumSubmodules);
}
break;
}
case SUBMODULE_IMPORTS:
for (unsigned Idx = 0; Idx != Record.size(); ++Idx) {
UnresolvedModuleRef Unresolved;
Unresolved.File = &F;
Unresolved.Mod = CurrentModule;
Unresolved.ID = Record[Idx];
Unresolved.Kind = UnresolvedModuleRef::Import;
Unresolved.IsWildcard = false;
UnresolvedModuleRefs.push_back(Unresolved);
}
break;
case SUBMODULE_EXPORTS:
for (unsigned Idx = 0; Idx + 1 < Record.size(); Idx += 2) {
UnresolvedModuleRef Unresolved;
Unresolved.File = &F;
Unresolved.Mod = CurrentModule;
Unresolved.ID = Record[Idx];
Unresolved.Kind = UnresolvedModuleRef::Export;
Unresolved.IsWildcard = Record[Idx + 1];
UnresolvedModuleRefs.push_back(Unresolved);
}
// Once we've loaded the set of exports, there's no reason to keep
// the parsed, unresolved exports around.
CurrentModule->UnresolvedExports.clear();
break;
case SUBMODULE_REQUIRES:
CurrentModule->addRequirement(Blob, Record[0], PP.getLangOpts(),
PP.getTargetInfo());
break;
case SUBMODULE_LINK_LIBRARY:
ModMap.resolveLinkAsDependencies(CurrentModule);
CurrentModule->LinkLibraries.push_back(
Module::LinkLibrary(std::string(Blob), Record[0]));
break;
case SUBMODULE_CONFIG_MACRO:
CurrentModule->ConfigMacros.push_back(Blob.str());
break;
case SUBMODULE_CONFLICT: {
UnresolvedModuleRef Unresolved;
Unresolved.File = &F;
Unresolved.Mod = CurrentModule;
Unresolved.ID = Record[0];
Unresolved.Kind = UnresolvedModuleRef::Conflict;
Unresolved.IsWildcard = false;
Unresolved.String = Blob;
UnresolvedModuleRefs.push_back(Unresolved);
break;
}
case SUBMODULE_INITIALIZERS: {
if (!ContextObj)
break;
SmallVector<uint32_t, 16> Inits;
for (auto &ID : Record)
Inits.push_back(getGlobalDeclID(F, ID));
ContextObj->addLazyModuleInitializers(CurrentModule, Inits);
break;
}
case SUBMODULE_EXPORT_AS:
CurrentModule->ExportAsModule = Blob.str();
ModMap.addLinkAsDependency(CurrentModule);
break;
}
}
}
/// Parse the record that corresponds to a LangOptions data
/// structure.
///
/// This routine parses the language options from the AST file and then gives
/// them to the AST listener if one is set.
///
/// \returns true if the listener deems the file unacceptable, false otherwise.
bool ASTReader::ParseLanguageOptions(const RecordData &Record,
bool Complain,
ASTReaderListener &Listener,
bool AllowCompatibleDifferences) {
LangOptions LangOpts;
unsigned Idx = 0;
#define LANGOPT(Name, Bits, Default, Description) \
LangOpts.Name = Record[Idx++];
#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \
LangOpts.set##Name(static_cast<LangOptions::Type>(Record[Idx++]));
#include "clang/Basic/LangOptions.def"
#define SANITIZER(NAME, ID) \
LangOpts.Sanitize.set(SanitizerKind::ID, Record[Idx++]);
#include "clang/Basic/Sanitizers.def"
for (unsigned N = Record[Idx++]; N; --N)
LangOpts.ModuleFeatures.push_back(ReadString(Record, Idx));
ObjCRuntime::Kind runtimeKind = (ObjCRuntime::Kind) Record[Idx++];
VersionTuple runtimeVersion = ReadVersionTuple(Record, Idx);
LangOpts.ObjCRuntime = ObjCRuntime(runtimeKind, runtimeVersion);
LangOpts.CurrentModule = ReadString(Record, Idx);
// Comment options.
for (unsigned N = Record[Idx++]; N; --N) {
LangOpts.CommentOpts.BlockCommandNames.push_back(
ReadString(Record, Idx));
}
LangOpts.CommentOpts.ParseAllComments = Record[Idx++];
// OpenMP offloading options.
for (unsigned N = Record[Idx++]; N; --N) {
LangOpts.OMPTargetTriples.push_back(llvm::Triple(ReadString(Record, Idx)));
}
LangOpts.OMPHostIRFile = ReadString(Record, Idx);
return Listener.ReadLanguageOptions(LangOpts, Complain,
AllowCompatibleDifferences);
}
bool ASTReader::ParseTargetOptions(const RecordData &Record, bool Complain,
ASTReaderListener &Listener,
bool AllowCompatibleDifferences) {
unsigned Idx = 0;
TargetOptions TargetOpts;
TargetOpts.Triple = ReadString(Record, Idx);
TargetOpts.CPU = ReadString(Record, Idx);
TargetOpts.TuneCPU = ReadString(Record, Idx);
TargetOpts.ABI = ReadString(Record, Idx);
for (unsigned N = Record[Idx++]; N; --N) {
TargetOpts.FeaturesAsWritten.push_back(ReadString(Record, Idx));
}
for (unsigned N = Record[Idx++]; N; --N) {
TargetOpts.Features.push_back(ReadString(Record, Idx));
}
return Listener.ReadTargetOptions(TargetOpts, Complain,
AllowCompatibleDifferences);
}
bool ASTReader::ParseDiagnosticOptions(const RecordData &Record, bool Complain,
ASTReaderListener &Listener) {
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions);
unsigned Idx = 0;
#define DIAGOPT(Name, Bits, Default) DiagOpts->Name = Record[Idx++];
#define ENUM_DIAGOPT(Name, Type, Bits, Default) \
DiagOpts->set##Name(static_cast<Type>(Record[Idx++]));
#include "clang/Basic/DiagnosticOptions.def"
for (unsigned N = Record[Idx++]; N; --N)
DiagOpts->Warnings.push_back(ReadString(Record, Idx));
for (unsigned N = Record[Idx++]; N; --N)
DiagOpts->Remarks.push_back(ReadString(Record, Idx));
return Listener.ReadDiagnosticOptions(DiagOpts, Complain);
}
bool ASTReader::ParseFileSystemOptions(const RecordData &Record, bool Complain,
ASTReaderListener &Listener) {
FileSystemOptions FSOpts;
unsigned Idx = 0;
FSOpts.WorkingDir = ReadString(Record, Idx);
return Listener.ReadFileSystemOptions(FSOpts, Complain);
}
bool ASTReader::ParseHeaderSearchOptions(const RecordData &Record,
bool Complain,
ASTReaderListener &Listener) {
HeaderSearchOptions HSOpts;
unsigned Idx = 0;
HSOpts.Sysroot = ReadString(Record, Idx);
// Include entries.
for (unsigned N = Record[Idx++]; N; --N) {
std::string Path = ReadString(Record, Idx);
frontend::IncludeDirGroup Group
= static_cast<frontend::IncludeDirGroup>(Record[Idx++]);
bool IsFramework = Record[Idx++];
bool IgnoreSysRoot = Record[Idx++];
HSOpts.UserEntries.emplace_back(std::move(Path), Group, IsFramework,
IgnoreSysRoot);
}
// System header prefixes.
for (unsigned N = Record[Idx++]; N; --N) {
std::string Prefix = ReadString(Record, Idx);
bool IsSystemHeader = Record[Idx++];
HSOpts.SystemHeaderPrefixes.emplace_back(std::move(Prefix), IsSystemHeader);
}
HSOpts.ResourceDir = ReadString(Record, Idx);
HSOpts.ModuleCachePath = ReadString(Record, Idx);
HSOpts.ModuleUserBuildPath = ReadString(Record, Idx);
HSOpts.DisableModuleHash = Record[Idx++];
HSOpts.ImplicitModuleMaps = Record[Idx++];
HSOpts.ModuleMapFileHomeIsCwd = Record[Idx++];
HSOpts.EnablePrebuiltImplicitModules = Record[Idx++];
HSOpts.UseBuiltinIncludes = Record[Idx++];
HSOpts.UseStandardSystemIncludes = Record[Idx++];
HSOpts.UseStandardCXXIncludes = Record[Idx++];
HSOpts.UseLibcxx = Record[Idx++];
std::string SpecificModuleCachePath = ReadString(Record, Idx);
return Listener.ReadHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
Complain);
}
bool ASTReader::ParsePreprocessorOptions(const RecordData &Record,
bool Complain,
ASTReaderListener &Listener,
std::string &SuggestedPredefines) {
PreprocessorOptions PPOpts;
unsigned Idx = 0;
// Macro definitions/undefs
for (unsigned N = Record[Idx++]; N; --N) {
std::string Macro = ReadString(Record, Idx);
bool IsUndef = Record[Idx++];
PPOpts.Macros.push_back(std::make_pair(Macro, IsUndef));
}
// Includes
for (unsigned N = Record[Idx++]; N; --N) {
PPOpts.Includes.push_back(ReadString(Record, Idx));
}
// Macro Includes
for (unsigned N = Record[Idx++]; N; --N) {
PPOpts.MacroIncludes.push_back(ReadString(Record, Idx));
}
PPOpts.UsePredefines = Record[Idx++];
PPOpts.DetailedRecord = Record[Idx++];
PPOpts.ImplicitPCHInclude = ReadString(Record, Idx);
PPOpts.ObjCXXARCStandardLibrary =
static_cast<ObjCXXARCStandardLibraryKind>(Record[Idx++]);
SuggestedPredefines.clear();
return Listener.ReadPreprocessorOptions(PPOpts, Complain,
SuggestedPredefines);
}
std::pair<ModuleFile *, unsigned>
ASTReader::getModulePreprocessedEntity(unsigned GlobalIndex) {
GlobalPreprocessedEntityMapType::iterator
I = GlobalPreprocessedEntityMap.find(GlobalIndex);
assert(I != GlobalPreprocessedEntityMap.end() &&
"Corrupted global preprocessed entity map");
ModuleFile *M = I->second;
unsigned LocalIndex = GlobalIndex - M->BasePreprocessedEntityID;
return std::make_pair(M, LocalIndex);
}
llvm::iterator_range<PreprocessingRecord::iterator>
ASTReader::getModulePreprocessedEntities(ModuleFile &Mod) const {
if (PreprocessingRecord *PPRec = PP.getPreprocessingRecord())
return PPRec->getIteratorsForLoadedRange(Mod.BasePreprocessedEntityID,
Mod.NumPreprocessedEntities);
return llvm::make_range(PreprocessingRecord::iterator(),
PreprocessingRecord::iterator());
}
bool ASTReader::canRecoverFromOutOfDate(StringRef ModuleFileName,
unsigned int ClientLoadCapabilities) {
return ClientLoadCapabilities & ARR_OutOfDate &&
!getModuleManager().getModuleCache().isPCMFinal(ModuleFileName);
}
llvm::iterator_range<ASTReader::ModuleDeclIterator>
ASTReader::getModuleFileLevelDecls(ModuleFile &Mod) {
return llvm::make_range(
ModuleDeclIterator(this, &Mod, Mod.FileSortedDecls),
ModuleDeclIterator(this, &Mod,
Mod.FileSortedDecls + Mod.NumFileSortedDecls));
}
SourceRange ASTReader::ReadSkippedRange(unsigned GlobalIndex) {
auto I = GlobalSkippedRangeMap.find(GlobalIndex);
assert(I != GlobalSkippedRangeMap.end() &&
"Corrupted global skipped range map");
ModuleFile *M = I->second;
unsigned LocalIndex = GlobalIndex - M->BasePreprocessedSkippedRangeID;
assert(LocalIndex < M->NumPreprocessedSkippedRanges);
PPSkippedRange RawRange = M->PreprocessedSkippedRangeOffsets[LocalIndex];
SourceRange Range(TranslateSourceLocation(*M, RawRange.getBegin()),
TranslateSourceLocation(*M, RawRange.getEnd()));
assert(Range.isValid());
return Range;
}
PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) {
PreprocessedEntityID PPID = Index+1;
std::pair<ModuleFile *, unsigned> PPInfo = getModulePreprocessedEntity(Index);
ModuleFile &M = *PPInfo.first;
unsigned LocalIndex = PPInfo.second;
const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex];
if (!PP.getPreprocessingRecord()) {
Error("no preprocessing record");
return nullptr;
}
SavedStreamPosition SavedPosition(M.PreprocessorDetailCursor);
if (llvm::Error Err = M.PreprocessorDetailCursor.JumpToBit(
M.MacroOffsetsBase + PPOffs.BitOffset)) {
Error(std::move(Err));
return nullptr;
}
Expected<llvm::BitstreamEntry> MaybeEntry =
M.PreprocessorDetailCursor.advance(BitstreamCursor::AF_DontPopBlockAtEnd);
if (!MaybeEntry) {
Error(MaybeEntry.takeError());
return nullptr;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
if (Entry.Kind != llvm::BitstreamEntry::Record)
return nullptr;
// Read the record.
SourceRange Range(TranslateSourceLocation(M, PPOffs.getBegin()),
TranslateSourceLocation(M, PPOffs.getEnd()));
PreprocessingRecord &PPRec = *PP.getPreprocessingRecord();
StringRef Blob;
RecordData Record;
Expected<unsigned> MaybeRecType =
M.PreprocessorDetailCursor.readRecord(Entry.ID, Record, &Blob);
if (!MaybeRecType) {
Error(MaybeRecType.takeError());
return nullptr;
}
switch ((PreprocessorDetailRecordTypes)MaybeRecType.get()) {
case PPD_MACRO_EXPANSION: {
bool isBuiltin = Record[0];
IdentifierInfo *Name = nullptr;
MacroDefinitionRecord *Def = nullptr;
if (isBuiltin)
Name = getLocalIdentifier(M, Record[1]);
else {
PreprocessedEntityID GlobalID =
getGlobalPreprocessedEntityID(M, Record[1]);
Def = cast<MacroDefinitionRecord>(
PPRec.getLoadedPreprocessedEntity(GlobalID - 1));
}
MacroExpansion *ME;
if (isBuiltin)
ME = new (PPRec) MacroExpansion(Name, Range);
else
ME = new (PPRec) MacroExpansion(Def, Range);
return ME;
}
case PPD_MACRO_DEFINITION: {
// Decode the identifier info and then check again; if the macro is
// still defined and associated with the identifier,
IdentifierInfo *II = getLocalIdentifier(M, Record[0]);
MacroDefinitionRecord *MD = new (PPRec) MacroDefinitionRecord(II, Range);
if (DeserializationListener)
DeserializationListener->MacroDefinitionRead(PPID, MD);
return MD;
}
case PPD_INCLUSION_DIRECTIVE: {
const char *FullFileNameStart = Blob.data() + Record[0];
StringRef FullFileName(FullFileNameStart, Blob.size() - Record[0]);
const FileEntry *File = nullptr;
if (!FullFileName.empty())
if (auto FE = PP.getFileManager().getFile(FullFileName))
File = *FE;
// FIXME: Stable encoding
InclusionDirective::InclusionKind Kind
= static_cast<InclusionDirective::InclusionKind>(Record[2]);
InclusionDirective *ID
= new (PPRec) InclusionDirective(PPRec, Kind,
StringRef(Blob.data(), Record[0]),
Record[1], Record[3],
File,
Range);
return ID;
}
}
llvm_unreachable("Invalid PreprocessorDetailRecordTypes");
}
/// Find the next module that contains entities and return the ID
/// of the first entry.
///
/// \param SLocMapI points at a chunk of a module that contains no
/// preprocessed entities or the entities it contains are not the ones we are
/// looking for.
PreprocessedEntityID ASTReader::findNextPreprocessedEntity(
GlobalSLocOffsetMapType::const_iterator SLocMapI) const {
++SLocMapI;
for (GlobalSLocOffsetMapType::const_iterator
EndI = GlobalSLocOffsetMap.end(); SLocMapI != EndI; ++SLocMapI) {
ModuleFile &M = *SLocMapI->second;
if (M.NumPreprocessedEntities)
return M.BasePreprocessedEntityID;
}
return getTotalNumPreprocessedEntities();
}
namespace {
struct PPEntityComp {
const ASTReader &Reader;
ModuleFile &M;
PPEntityComp(const ASTReader &Reader, ModuleFile &M) : Reader(Reader), M(M) {}
bool operator()(const PPEntityOffset &L, const PPEntityOffset &R) const {
SourceLocation LHS = getLoc(L);
SourceLocation RHS = getLoc(R);
return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
}
bool operator()(const PPEntityOffset &L, SourceLocation RHS) const {
SourceLocation LHS = getLoc(L);
return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
}
bool operator()(SourceLocation LHS, const PPEntityOffset &R) const {
SourceLocation RHS = getLoc(R);
return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
}
SourceLocation getLoc(const PPEntityOffset &PPE) const {
return Reader.TranslateSourceLocation(M, PPE.getBegin());
}
};
} // namespace
PreprocessedEntityID ASTReader::findPreprocessedEntity(SourceLocation Loc,
bool EndsAfter) const {
if (SourceMgr.isLocalSourceLocation(Loc))
return getTotalNumPreprocessedEntities();
GlobalSLocOffsetMapType::const_iterator SLocMapI = GlobalSLocOffsetMap.find(
SourceManager::MaxLoadedOffset - Loc.getOffset() - 1);
assert(SLocMapI != GlobalSLocOffsetMap.end() &&
"Corrupted global sloc offset map");
if (SLocMapI->second->NumPreprocessedEntities == 0)
return findNextPreprocessedEntity(SLocMapI);
ModuleFile &M = *SLocMapI->second;
using pp_iterator = const PPEntityOffset *;
pp_iterator pp_begin = M.PreprocessedEntityOffsets;
pp_iterator pp_end = pp_begin + M.NumPreprocessedEntities;
size_t Count = M.NumPreprocessedEntities;
size_t Half;
pp_iterator First = pp_begin;
pp_iterator PPI;
if (EndsAfter) {
PPI = std::upper_bound(pp_begin, pp_end, Loc,
PPEntityComp(*this, M));
} else {
// Do a binary search manually instead of using std::lower_bound because
// The end locations of entities may be unordered (when a macro expansion
// is inside another macro argument), but for this case it is not important
// whether we get the first macro expansion or its containing macro.
while (Count > 0) {
Half = Count / 2;
PPI = First;
std::advance(PPI, Half);
if (SourceMgr.isBeforeInTranslationUnit(
TranslateSourceLocation(M, PPI->getEnd()), Loc)) {
First = PPI;
++First;
Count = Count - Half - 1;
} else
Count = Half;
}
}
if (PPI == pp_end)
return findNextPreprocessedEntity(SLocMapI);
return M.BasePreprocessedEntityID + (PPI - pp_begin);
}
/// Returns a pair of [Begin, End) indices of preallocated
/// preprocessed entities that \arg Range encompasses.
std::pair<unsigned, unsigned>
ASTReader::findPreprocessedEntitiesInRange(SourceRange Range) {
if (Range.isInvalid())
return std::make_pair(0,0);
assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin()));
PreprocessedEntityID BeginID =
findPreprocessedEntity(Range.getBegin(), false);
PreprocessedEntityID EndID = findPreprocessedEntity(Range.getEnd(), true);
return std::make_pair(BeginID, EndID);
}
/// Optionally returns true or false if the preallocated preprocessed
/// entity with index \arg Index came from file \arg FID.
Optional<bool> ASTReader::isPreprocessedEntityInFileID(unsigned Index,
FileID FID) {
if (FID.isInvalid())
return false;
std::pair<ModuleFile *, unsigned> PPInfo = getModulePreprocessedEntity(Index);
ModuleFile &M = *PPInfo.first;
unsigned LocalIndex = PPInfo.second;
const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex];
SourceLocation Loc = TranslateSourceLocation(M, PPOffs.getBegin());
if (Loc.isInvalid())
return false;
if (SourceMgr.isInFileID(SourceMgr.getFileLoc(Loc), FID))
return true;
else
return false;
}
namespace {
/// Visitor used to search for information about a header file.
class HeaderFileInfoVisitor {
const FileEntry *FE;
Optional<HeaderFileInfo> HFI;
public:
explicit HeaderFileInfoVisitor(const FileEntry *FE) : FE(FE) {}
bool operator()(ModuleFile &M) {
HeaderFileInfoLookupTable *Table
= static_cast<HeaderFileInfoLookupTable *>(M.HeaderFileInfoTable);
if (!Table)
return false;
// Look in the on-disk hash table for an entry for this file name.
HeaderFileInfoLookupTable::iterator Pos = Table->find(FE);
if (Pos == Table->end())
return false;
HFI = *Pos;
return true;
}
Optional<HeaderFileInfo> getHeaderFileInfo() const { return HFI; }
};
} // namespace
HeaderFileInfo ASTReader::GetHeaderFileInfo(const FileEntry *FE) {
HeaderFileInfoVisitor Visitor(FE);
ModuleMgr.visit(Visitor);
if (Optional<HeaderFileInfo> HFI = Visitor.getHeaderFileInfo())
return *HFI;
return HeaderFileInfo();
}
void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) {
using DiagState = DiagnosticsEngine::DiagState;
SmallVector<DiagState *, 32> DiagStates;
for (ModuleFile &F : ModuleMgr) {
unsigned Idx = 0;
auto &Record = F.PragmaDiagMappings;
if (Record.empty())
continue;
DiagStates.clear();
auto ReadDiagState =
[&](const DiagState &BasedOn, SourceLocation Loc,
bool IncludeNonPragmaStates) -> DiagnosticsEngine::DiagState * {
unsigned BackrefID = Record[Idx++];
if (BackrefID != 0)
return DiagStates[BackrefID - 1];
// A new DiagState was created here.
Diag.DiagStates.push_back(BasedOn);
DiagState *NewState = &Diag.DiagStates.back();
DiagStates.push_back(NewState);
unsigned Size = Record[Idx++];
assert(Idx + Size * 2 <= Record.size() &&
"Invalid data, not enough diag/map pairs");
while (Size--) {
unsigned DiagID = Record[Idx++];
DiagnosticMapping NewMapping =
DiagnosticMapping::deserialize(Record[Idx++]);
if (!NewMapping.isPragma() && !IncludeNonPragmaStates)
continue;
DiagnosticMapping &Mapping = NewState->getOrAddMapping(DiagID);
// If this mapping was specified as a warning but the severity was
// upgraded due to diagnostic settings, simulate the current diagnostic
// settings (and use a warning).
if (NewMapping.wasUpgradedFromWarning() && !Mapping.isErrorOrFatal()) {
NewMapping.setSeverity(diag::Severity::Warning);
NewMapping.setUpgradedFromWarning(false);
}
Mapping = NewMapping;
}
return NewState;
};
// Read the first state.
DiagState *FirstState;
if (F.Kind == MK_ImplicitModule) {
// Implicitly-built modules are reused with different diagnostic
// settings. Use the initial diagnostic state from Diag to simulate this
// compilation's diagnostic settings.
FirstState = Diag.DiagStatesByLoc.FirstDiagState;
DiagStates.push_back(FirstState);
// Skip the initial diagnostic state from the serialized module.
assert(Record[1] == 0 &&
"Invalid data, unexpected backref in initial state");
Idx = 3 + Record[2] * 2;
assert(Idx < Record.size() &&
"Invalid data, not enough state change pairs in initial state");
} else if (F.isModule()) {
// For an explicit module, preserve the flags from the module build
// command line (-w, -Weverything, -Werror, ...) along with any explicit
// -Wblah flags.
unsigned Flags = Record[Idx++];
DiagState Initial;
Initial.SuppressSystemWarnings = Flags & 1; Flags >>= 1;
Initial.ErrorsAsFatal = Flags & 1; Flags >>= 1;
Initial.WarningsAsErrors = Flags & 1; Flags >>= 1;
Initial.EnableAllWarnings = Flags & 1; Flags >>= 1;
Initial.IgnoreAllWarnings = Flags & 1; Flags >>= 1;
Initial.ExtBehavior = (diag::Severity)Flags;
FirstState = ReadDiagState(Initial, SourceLocation(), true);
assert(F.OriginalSourceFileID.isValid());
// Set up the root buffer of the module to start with the initial
// diagnostic state of the module itself, to cover files that contain no
// explicit transitions (for which we did not serialize anything).
Diag.DiagStatesByLoc.Files[F.OriginalSourceFileID]
.StateTransitions.push_back({FirstState, 0});
} else {
// For prefix ASTs, start with whatever the user configured on the
// command line.
Idx++; // Skip flags.
FirstState = ReadDiagState(*Diag.DiagStatesByLoc.CurDiagState,
SourceLocation(), false);
}
// Read the state transitions.
unsigned NumLocations = Record[Idx++];
while (NumLocations--) {
assert(Idx < Record.size() &&
"Invalid data, missing pragma diagnostic states");
SourceLocation Loc = ReadSourceLocation(F, Record[Idx++]);
auto IDAndOffset = SourceMgr.getDecomposedLoc(Loc);
assert(IDAndOffset.first.isValid() && "invalid FileID for transition");
assert(IDAndOffset.second == 0 && "not a start location for a FileID");
unsigned Transitions = Record[Idx++];
// Note that we don't need to set up Parent/ParentOffset here, because
// we won't be changing the diagnostic state within imported FileIDs
// (other than perhaps appending to the main source file, which has no
// parent).
auto &F = Diag.DiagStatesByLoc.Files[IDAndOffset.first];
F.StateTransitions.reserve(F.StateTransitions.size() + Transitions);
for (unsigned I = 0; I != Transitions; ++I) {
unsigned Offset = Record[Idx++];
auto *State =
ReadDiagState(*FirstState, Loc.getLocWithOffset(Offset), false);
F.StateTransitions.push_back({State, Offset});
}
}
// Read the final state.
assert(Idx < Record.size() &&
"Invalid data, missing final pragma diagnostic state");
SourceLocation CurStateLoc =
ReadSourceLocation(F, F.PragmaDiagMappings[Idx++]);
auto *CurState = ReadDiagState(*FirstState, CurStateLoc, false);
if (!F.isModule()) {
Diag.DiagStatesByLoc.CurDiagState = CurState;
Diag.DiagStatesByLoc.CurDiagStateLoc = CurStateLoc;
// Preserve the property that the imaginary root file describes the
// current state.
FileID NullFile;
auto &T = Diag.DiagStatesByLoc.Files[NullFile].StateTransitions;
if (T.empty())
T.push_back({CurState, 0});
else
T[0].State = CurState;
}
// Don't try to read these mappings again.
Record.clear();
}
}
/// Get the correct cursor and offset for loading a type.
ASTReader::RecordLocation ASTReader::TypeCursorForIndex(unsigned Index) {
GlobalTypeMapType::iterator I = GlobalTypeMap.find(Index);
assert(I != GlobalTypeMap.end() && "Corrupted global type map");
ModuleFile *M = I->second;
return RecordLocation(
M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset() +
M->DeclsBlockStartOffset);
}
static llvm::Optional<Type::TypeClass> getTypeClassForCode(TypeCode code) {
switch (code) {
#define TYPE_BIT_CODE(CLASS_ID, CODE_ID, CODE_VALUE) \
case TYPE_##CODE_ID: return Type::CLASS_ID;
#include "clang/Serialization/TypeBitCodes.def"
default: return llvm::None;
}
}
/// Read and return the type with the given index..
///
/// The index is the type ID, shifted and minus the number of predefs. This
/// routine actually reads the record corresponding to the type at the given
/// location. It is a helper routine for GetType, which deals with reading type
/// IDs.
QualType ASTReader::readTypeRecord(unsigned Index) {
assert(ContextObj && "reading type with no AST context");
ASTContext &Context = *ContextObj;
RecordLocation Loc = TypeCursorForIndex(Index);
BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor;
// Keep track of where we are in the stream, then jump back there
// after reading this type.
SavedStreamPosition SavedPosition(DeclsCursor);
ReadingKindTracker ReadingKind(Read_Type, *this);
// Note that we are loading a type record.
Deserializing AType(this);
if (llvm::Error Err = DeclsCursor.JumpToBit(Loc.Offset)) {
Error(std::move(Err));
return QualType();
}
Expected<unsigned> RawCode = DeclsCursor.ReadCode();
if (!RawCode) {
Error(RawCode.takeError());
return QualType();
}
ASTRecordReader Record(*this, *Loc.F);
Expected<unsigned> Code = Record.readRecord(DeclsCursor, RawCode.get());
if (!Code) {
Error(Code.takeError());
return QualType();
}
if (Code.get() == TYPE_EXT_QUAL) {
QualType baseType = Record.readQualType();
Qualifiers quals = Record.readQualifiers();
return Context.getQualifiedType(baseType, quals);
}
auto maybeClass = getTypeClassForCode((TypeCode) Code.get());
if (!maybeClass) {
Error("Unexpected code for type");
return QualType();
}
serialization::AbstractTypeReader<ASTRecordReader> TypeReader(Record);
return TypeReader.read(*maybeClass);
}
namespace clang {
class TypeLocReader : public TypeLocVisitor<TypeLocReader> {
ASTRecordReader &Reader;
SourceLocation readSourceLocation() {
return Reader.readSourceLocation();
}
TypeSourceInfo *GetTypeSourceInfo() {
return Reader.readTypeSourceInfo();
}
NestedNameSpecifierLoc ReadNestedNameSpecifierLoc() {
return Reader.readNestedNameSpecifierLoc();
}
Attr *ReadAttr() {
return Reader.readAttr();
}
public:
TypeLocReader(ASTRecordReader &Reader) : Reader(Reader) {}
// We want compile-time assurance that we've enumerated all of
// these, so unfortunately we have to declare them first, then
// define them out-of-line.
#define ABSTRACT_TYPELOC(CLASS, PARENT)
#define TYPELOC(CLASS, PARENT) \
void Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc);
#include "clang/AST/TypeLocNodes.def"
void VisitFunctionTypeLoc(FunctionTypeLoc);
void VisitArrayTypeLoc(ArrayTypeLoc);
};
} // namespace clang
void TypeLocReader::VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
// nothing to do
}
void TypeLocReader::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) {
TL.setBuiltinLoc(readSourceLocation());
if (TL.needsExtraLocalData()) {
TL.setWrittenTypeSpec(static_cast<DeclSpec::TST>(Reader.readInt()));
TL.setWrittenSignSpec(static_cast<TypeSpecifierSign>(Reader.readInt()));
TL.setWrittenWidthSpec(static_cast<TypeSpecifierWidth>(Reader.readInt()));
TL.setModeAttr(Reader.readInt());
}
}
void TypeLocReader::VisitComplexTypeLoc(ComplexTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitPointerTypeLoc(PointerTypeLoc TL) {
TL.setStarLoc(readSourceLocation());
}
void TypeLocReader::VisitDecayedTypeLoc(DecayedTypeLoc TL) {
// nothing to do
}
void TypeLocReader::VisitAdjustedTypeLoc(AdjustedTypeLoc TL) {
// nothing to do
}
void TypeLocReader::VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) {
TL.setExpansionLoc(readSourceLocation());
}
void TypeLocReader::VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) {
TL.setCaretLoc(readSourceLocation());
}
void TypeLocReader::VisitLValueReferenceTypeLoc(LValueReferenceTypeLoc TL) {
TL.setAmpLoc(readSourceLocation());
}
void TypeLocReader::VisitRValueReferenceTypeLoc(RValueReferenceTypeLoc TL) {
TL.setAmpAmpLoc(readSourceLocation());
}
void TypeLocReader::VisitMemberPointerTypeLoc(MemberPointerTypeLoc TL) {
TL.setStarLoc(readSourceLocation());
TL.setClassTInfo(GetTypeSourceInfo());
}
void TypeLocReader::VisitArrayTypeLoc(ArrayTypeLoc TL) {
TL.setLBracketLoc(readSourceLocation());
TL.setRBracketLoc(readSourceLocation());
if (Reader.readBool())
TL.setSizeExpr(Reader.readExpr());
else
TL.setSizeExpr(nullptr);
}
void TypeLocReader::VisitConstantArrayTypeLoc(ConstantArrayTypeLoc TL) {
VisitArrayTypeLoc(TL);
}
void TypeLocReader::VisitIncompleteArrayTypeLoc(IncompleteArrayTypeLoc TL) {
VisitArrayTypeLoc(TL);
}
void TypeLocReader::VisitVariableArrayTypeLoc(VariableArrayTypeLoc TL) {
VisitArrayTypeLoc(TL);
}
void TypeLocReader::VisitDependentSizedArrayTypeLoc(
DependentSizedArrayTypeLoc TL) {
VisitArrayTypeLoc(TL);
}
void TypeLocReader::VisitDependentAddressSpaceTypeLoc(
DependentAddressSpaceTypeLoc TL) {
TL.setAttrNameLoc(readSourceLocation());
TL.setAttrOperandParensRange(Reader.readSourceRange());
TL.setAttrExprOperand(Reader.readExpr());
}
void TypeLocReader::VisitDependentSizedExtVectorTypeLoc(
DependentSizedExtVectorTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitVectorTypeLoc(VectorTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitDependentVectorTypeLoc(
DependentVectorTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitConstantMatrixTypeLoc(ConstantMatrixTypeLoc TL) {
TL.setAttrNameLoc(readSourceLocation());
TL.setAttrOperandParensRange(Reader.readSourceRange());
TL.setAttrRowOperand(Reader.readExpr());
TL.setAttrColumnOperand(Reader.readExpr());
}
void TypeLocReader::VisitDependentSizedMatrixTypeLoc(
DependentSizedMatrixTypeLoc TL) {
TL.setAttrNameLoc(readSourceLocation());
TL.setAttrOperandParensRange(Reader.readSourceRange());
TL.setAttrRowOperand(Reader.readExpr());
TL.setAttrColumnOperand(Reader.readExpr());
}
void TypeLocReader::VisitFunctionTypeLoc(FunctionTypeLoc TL) {
TL.setLocalRangeBegin(readSourceLocation());
TL.setLParenLoc(readSourceLocation());
TL.setRParenLoc(readSourceLocation());
TL.setExceptionSpecRange(Reader.readSourceRange());
TL.setLocalRangeEnd(readSourceLocation());
for (unsigned i = 0, e = TL.getNumParams(); i != e; ++i) {
TL.setParam(i, Reader.readDeclAs<ParmVarDecl>());
}
}
void TypeLocReader::VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc TL) {
VisitFunctionTypeLoc(TL);
}
void TypeLocReader::VisitFunctionNoProtoTypeLoc(FunctionNoProtoTypeLoc TL) {
VisitFunctionTypeLoc(TL);
}
void TypeLocReader::VisitUnresolvedUsingTypeLoc(UnresolvedUsingTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitTypedefTypeLoc(TypedefTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) {
TL.setTypeofLoc(readSourceLocation());
TL.setLParenLoc(readSourceLocation());
TL.setRParenLoc(readSourceLocation());
}
void TypeLocReader::VisitTypeOfTypeLoc(TypeOfTypeLoc TL) {
TL.setTypeofLoc(readSourceLocation());
TL.setLParenLoc(readSourceLocation());
TL.setRParenLoc(readSourceLocation());
TL.setUnderlyingTInfo(GetTypeSourceInfo());
}
void TypeLocReader::VisitDecltypeTypeLoc(DecltypeTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) {
TL.setKWLoc(readSourceLocation());
TL.setLParenLoc(readSourceLocation());
TL.setRParenLoc(readSourceLocation());
TL.setUnderlyingTInfo(GetTypeSourceInfo());
}
void TypeLocReader::VisitAutoTypeLoc(AutoTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
if (Reader.readBool()) {
TL.setNestedNameSpecifierLoc(ReadNestedNameSpecifierLoc());
TL.setTemplateKWLoc(readSourceLocation());
TL.setConceptNameLoc(readSourceLocation());
TL.setFoundDecl(Reader.readDeclAs<NamedDecl>());
TL.setLAngleLoc(readSourceLocation());
TL.setRAngleLoc(readSourceLocation());
for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i)
TL.setArgLocInfo(i, Reader.readTemplateArgumentLocInfo(
TL.getTypePtr()->getArg(i).getKind()));
}
}
void TypeLocReader::VisitDeducedTemplateSpecializationTypeLoc(
DeducedTemplateSpecializationTypeLoc TL) {
TL.setTemplateNameLoc(readSourceLocation());
}
void TypeLocReader::VisitRecordTypeLoc(RecordTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitEnumTypeLoc(EnumTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitAttributedTypeLoc(AttributedTypeLoc TL) {
TL.setAttr(ReadAttr());
}
void TypeLocReader::VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitSubstTemplateTypeParmTypeLoc(
SubstTemplateTypeParmTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitSubstTemplateTypeParmPackTypeLoc(
SubstTemplateTypeParmPackTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitTemplateSpecializationTypeLoc(
TemplateSpecializationTypeLoc TL) {
TL.setTemplateKeywordLoc(readSourceLocation());
TL.setTemplateNameLoc(readSourceLocation());
TL.setLAngleLoc(readSourceLocation());
TL.setRAngleLoc(readSourceLocation());
for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i)
TL.setArgLocInfo(
i,
Reader.readTemplateArgumentLocInfo(
TL.getTypePtr()->getArg(i).getKind()));
}
void TypeLocReader::VisitParenTypeLoc(ParenTypeLoc TL) {
TL.setLParenLoc(readSourceLocation());
TL.setRParenLoc(readSourceLocation());
}
void TypeLocReader::VisitElaboratedTypeLoc(ElaboratedTypeLoc TL) {
TL.setElaboratedKeywordLoc(readSourceLocation());
TL.setQualifierLoc(ReadNestedNameSpecifierLoc());
}
void TypeLocReader::VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
TL.setElaboratedKeywordLoc(readSourceLocation());
TL.setQualifierLoc(ReadNestedNameSpecifierLoc());
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitDependentTemplateSpecializationTypeLoc(
DependentTemplateSpecializationTypeLoc TL) {
TL.setElaboratedKeywordLoc(readSourceLocation());
TL.setQualifierLoc(ReadNestedNameSpecifierLoc());
TL.setTemplateKeywordLoc(readSourceLocation());
TL.setTemplateNameLoc(readSourceLocation());
TL.setLAngleLoc(readSourceLocation());
TL.setRAngleLoc(readSourceLocation());
for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I)
TL.setArgLocInfo(
I,
Reader.readTemplateArgumentLocInfo(
TL.getTypePtr()->getArg(I).getKind()));
}
void TypeLocReader::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) {
TL.setEllipsisLoc(readSourceLocation());
}
void TypeLocReader::VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitObjCTypeParamTypeLoc(ObjCTypeParamTypeLoc TL) {
if (TL.getNumProtocols()) {
TL.setProtocolLAngleLoc(readSourceLocation());
TL.setProtocolRAngleLoc(readSourceLocation());
}
for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i)
TL.setProtocolLoc(i, readSourceLocation());
}
void TypeLocReader::VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) {
TL.setHasBaseTypeAsWritten(Reader.readBool());
TL.setTypeArgsLAngleLoc(readSourceLocation());
TL.setTypeArgsRAngleLoc(readSourceLocation());
for (unsigned i = 0, e = TL.getNumTypeArgs(); i != e; ++i)
TL.setTypeArgTInfo(i, GetTypeSourceInfo());
TL.setProtocolLAngleLoc(readSourceLocation());
TL.setProtocolRAngleLoc(readSourceLocation());
for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i)
TL.setProtocolLoc(i, readSourceLocation());
}
void TypeLocReader::VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) {
TL.setStarLoc(readSourceLocation());
}
void TypeLocReader::VisitAtomicTypeLoc(AtomicTypeLoc TL) {
TL.setKWLoc(readSourceLocation());
TL.setLParenLoc(readSourceLocation());
TL.setRParenLoc(readSourceLocation());
}
void TypeLocReader::VisitPipeTypeLoc(PipeTypeLoc TL) {
TL.setKWLoc(readSourceLocation());
}
void TypeLocReader::VisitExtIntTypeLoc(clang::ExtIntTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void TypeLocReader::VisitDependentExtIntTypeLoc(
clang::DependentExtIntTypeLoc TL) {
TL.setNameLoc(readSourceLocation());
}
void ASTRecordReader::readTypeLoc(TypeLoc TL) {
TypeLocReader TLR(*this);
for (; !TL.isNull(); TL = TL.getNextTypeLoc())
TLR.Visit(TL);
}
TypeSourceInfo *ASTRecordReader::readTypeSourceInfo() {
QualType InfoTy = readType();
if (InfoTy.isNull())
return nullptr;
TypeSourceInfo *TInfo = getContext().CreateTypeSourceInfo(InfoTy);
readTypeLoc(TInfo->getTypeLoc());
return TInfo;
}
QualType ASTReader::GetType(TypeID ID) {
assert(ContextObj && "reading type with no AST context");
ASTContext &Context = *ContextObj;
unsigned FastQuals = ID & Qualifiers::FastMask;
unsigned Index = ID >> Qualifiers::FastWidth;
if (Index < NUM_PREDEF_TYPE_IDS) {
QualType T;
switch ((PredefinedTypeIDs)Index) {
case PREDEF_TYPE_NULL_ID:
return QualType();
case PREDEF_TYPE_VOID_ID:
T = Context.VoidTy;
break;
case PREDEF_TYPE_BOOL_ID:
T = Context.BoolTy;
break;
case PREDEF_TYPE_CHAR_U_ID:
case PREDEF_TYPE_CHAR_S_ID:
// FIXME: Check that the signedness of CharTy is correct!
T = Context.CharTy;
break;
case PREDEF_TYPE_UCHAR_ID:
T = Context.UnsignedCharTy;
break;
case PREDEF_TYPE_USHORT_ID:
T = Context.UnsignedShortTy;
break;
case PREDEF_TYPE_UINT_ID:
T = Context.UnsignedIntTy;
break;
case PREDEF_TYPE_ULONG_ID:
T = Context.UnsignedLongTy;
break;
case PREDEF_TYPE_ULONGLONG_ID:
T = Context.UnsignedLongLongTy;
break;
case PREDEF_TYPE_UINT128_ID:
T = Context.UnsignedInt128Ty;
break;
case PREDEF_TYPE_SCHAR_ID:
T = Context.SignedCharTy;
break;
case PREDEF_TYPE_WCHAR_ID:
T = Context.WCharTy;
break;
case PREDEF_TYPE_SHORT_ID:
T = Context.ShortTy;
break;
case PREDEF_TYPE_INT_ID:
T = Context.IntTy;
break;
case PREDEF_TYPE_LONG_ID:
T = Context.LongTy;
break;
case PREDEF_TYPE_LONGLONG_ID:
T = Context.LongLongTy;
break;
case PREDEF_TYPE_INT128_ID:
T = Context.Int128Ty;
break;
case PREDEF_TYPE_BFLOAT16_ID:
T = Context.BFloat16Ty;
break;
case PREDEF_TYPE_HALF_ID:
T = Context.HalfTy;
break;
case PREDEF_TYPE_FLOAT_ID:
T = Context.FloatTy;
break;
case PREDEF_TYPE_DOUBLE_ID:
T = Context.DoubleTy;
break;
case PREDEF_TYPE_LONGDOUBLE_ID:
T = Context.LongDoubleTy;
break;
case PREDEF_TYPE_SHORT_ACCUM_ID:
T = Context.ShortAccumTy;
break;
case PREDEF_TYPE_ACCUM_ID:
T = Context.AccumTy;
break;
case PREDEF_TYPE_LONG_ACCUM_ID:
T = Context.LongAccumTy;
break;
case PREDEF_TYPE_USHORT_ACCUM_ID:
T = Context.UnsignedShortAccumTy;
break;
case PREDEF_TYPE_UACCUM_ID:
T = Context.UnsignedAccumTy;
break;
case PREDEF_TYPE_ULONG_ACCUM_ID:
T = Context.UnsignedLongAccumTy;
break;
case PREDEF_TYPE_SHORT_FRACT_ID:
T = Context.ShortFractTy;
break;
case PREDEF_TYPE_FRACT_ID:
T = Context.FractTy;
break;
case PREDEF_TYPE_LONG_FRACT_ID:
T = Context.LongFractTy;
break;
case PREDEF_TYPE_USHORT_FRACT_ID:
T = Context.UnsignedShortFractTy;
break;
case PREDEF_TYPE_UFRACT_ID:
T = Context.UnsignedFractTy;
break;
case PREDEF_TYPE_ULONG_FRACT_ID:
T = Context.UnsignedLongFractTy;
break;
case PREDEF_TYPE_SAT_SHORT_ACCUM_ID:
T = Context.SatShortAccumTy;
break;
case PREDEF_TYPE_SAT_ACCUM_ID:
T = Context.SatAccumTy;
break;
case PREDEF_TYPE_SAT_LONG_ACCUM_ID:
T = Context.SatLongAccumTy;
break;
case PREDEF_TYPE_SAT_USHORT_ACCUM_ID:
T = Context.SatUnsignedShortAccumTy;
break;
case PREDEF_TYPE_SAT_UACCUM_ID:
T = Context.SatUnsignedAccumTy;
break;
case PREDEF_TYPE_SAT_ULONG_ACCUM_ID:
T = Context.SatUnsignedLongAccumTy;
break;
case PREDEF_TYPE_SAT_SHORT_FRACT_ID:
T = Context.SatShortFractTy;
break;
case PREDEF_TYPE_SAT_FRACT_ID:
T = Context.SatFractTy;
break;
case PREDEF_TYPE_SAT_LONG_FRACT_ID:
T = Context.SatLongFractTy;
break;
case PREDEF_TYPE_SAT_USHORT_FRACT_ID:
T = Context.SatUnsignedShortFractTy;
break;
case PREDEF_TYPE_SAT_UFRACT_ID:
T = Context.SatUnsignedFractTy;
break;
case PREDEF_TYPE_SAT_ULONG_FRACT_ID:
T = Context.SatUnsignedLongFractTy;
break;
case PREDEF_TYPE_FLOAT16_ID:
T = Context.Float16Ty;
break;
case PREDEF_TYPE_FLOAT128_ID:
T = Context.Float128Ty;
break;
case PREDEF_TYPE_OVERLOAD_ID:
T = Context.OverloadTy;
break;
case PREDEF_TYPE_BOUND_MEMBER:
T = Context.BoundMemberTy;
break;
case PREDEF_TYPE_PSEUDO_OBJECT:
T = Context.PseudoObjectTy;
break;
case PREDEF_TYPE_DEPENDENT_ID:
T = Context.DependentTy;
break;
case PREDEF_TYPE_UNKNOWN_ANY:
T = Context.UnknownAnyTy;
break;
case PREDEF_TYPE_NULLPTR_ID:
T = Context.NullPtrTy;
break;
case PREDEF_TYPE_CHAR8_ID:
T = Context.Char8Ty;
break;
case PREDEF_TYPE_CHAR16_ID:
T = Context.Char16Ty;
break;
case PREDEF_TYPE_CHAR32_ID:
T = Context.Char32Ty;
break;
case PREDEF_TYPE_OBJC_ID:
T = Context.ObjCBuiltinIdTy;
break;
case PREDEF_TYPE_OBJC_CLASS:
T = Context.ObjCBuiltinClassTy;
break;
case PREDEF_TYPE_OBJC_SEL:
T = Context.ObjCBuiltinSelTy;
break;
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
case PREDEF_TYPE_##Id##_ID: \
T = Context.SingletonId; \
break;
#include "clang/Basic/OpenCLImageTypes.def"
#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
case PREDEF_TYPE_##Id##_ID: \
T = Context.Id##Ty; \
break;
#include "clang/Basic/OpenCLExtensionTypes.def"
case PREDEF_TYPE_SAMPLER_ID:
T = Context.OCLSamplerTy;
break;
case PREDEF_TYPE_EVENT_ID:
T = Context.OCLEventTy;
break;
case PREDEF_TYPE_CLK_EVENT_ID:
T = Context.OCLClkEventTy;
break;
case PREDEF_TYPE_QUEUE_ID:
T = Context.OCLQueueTy;
break;
case PREDEF_TYPE_RESERVE_ID_ID:
T = Context.OCLReserveIDTy;
break;
case PREDEF_TYPE_AUTO_DEDUCT:
T = Context.getAutoDeductType();
break;
case PREDEF_TYPE_AUTO_RREF_DEDUCT:
T = Context.getAutoRRefDeductType();
break;
case PREDEF_TYPE_ARC_UNBRIDGED_CAST:
T = Context.ARCUnbridgedCastTy;
break;
case PREDEF_TYPE_BUILTIN_FN:
T = Context.BuiltinFnTy;
break;
case PREDEF_TYPE_INCOMPLETE_MATRIX_IDX:
T = Context.IncompleteMatrixIdxTy;
break;
case PREDEF_TYPE_OMP_ARRAY_SECTION:
T = Context.OMPArraySectionTy;
break;
case PREDEF_TYPE_OMP_ARRAY_SHAPING:
T = Context.OMPArraySectionTy;
break;
case PREDEF_TYPE_OMP_ITERATOR:
T = Context.OMPIteratorTy;
break;
#define SVE_TYPE(Name, Id, SingletonId) \
case PREDEF_TYPE_##Id##_ID: \
T = Context.SingletonId; \
break;
#include "clang/Basic/AArch64SVEACLETypes.def"
#define PPC_VECTOR_TYPE(Name, Id, Size) \
case PREDEF_TYPE_##Id##_ID: \
T = Context.Id##Ty; \
break;
#include "clang/Basic/PPCTypes.def"
#define RVV_TYPE(Name, Id, SingletonId) \
case PREDEF_TYPE_##Id##_ID: \
T = Context.SingletonId; \
break;
#include "clang/Basic/RISCVVTypes.def"
}
assert(!T.isNull() && "Unknown predefined type");
return T.withFastQualifiers(FastQuals);
}
Index -= NUM_PREDEF_TYPE_IDS;
assert(Index < TypesLoaded.size() && "Type index out-of-range");
if (TypesLoaded[Index].isNull()) {
TypesLoaded[Index] = readTypeRecord(Index);
if (TypesLoaded[Index].isNull())
return QualType();
TypesLoaded[Index]->setFromAST();
if (DeserializationListener)
DeserializationListener->TypeRead(TypeIdx::fromTypeID(ID),
TypesLoaded[Index]);
}
return TypesLoaded[Index].withFastQualifiers(FastQuals);
}
QualType ASTReader::getLocalType(ModuleFile &F, unsigned LocalID) {
return GetType(getGlobalTypeID(F, LocalID));
}
serialization::TypeID
ASTReader::getGlobalTypeID(ModuleFile &F, unsigned LocalID) const {
unsigned FastQuals = LocalID & Qualifiers::FastMask;
unsigned LocalIndex = LocalID >> Qualifiers::FastWidth;
if (LocalIndex < NUM_PREDEF_TYPE_IDS)
return LocalID;
if (!F.ModuleOffsetMap.empty())
ReadModuleOffsetMap(F);
ContinuousRangeMap<uint32_t, int, 2>::iterator I
= F.TypeRemap.find(LocalIndex - NUM_PREDEF_TYPE_IDS);
assert(I != F.TypeRemap.end() && "Invalid index into type index remap");
unsigned GlobalIndex = LocalIndex + I->second;
return (GlobalIndex << Qualifiers::FastWidth) | FastQuals;
}
TemplateArgumentLocInfo
ASTRecordReader::readTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind) {
switch (Kind) {
case TemplateArgument::Expression:
return readExpr();
case TemplateArgument::Type:
return readTypeSourceInfo();
case TemplateArgument::Template: {
NestedNameSpecifierLoc QualifierLoc =
readNestedNameSpecifierLoc();
SourceLocation TemplateNameLoc = readSourceLocation();
return TemplateArgumentLocInfo(getASTContext(), QualifierLoc,
TemplateNameLoc, SourceLocation());
}
case TemplateArgument::TemplateExpansion: {
NestedNameSpecifierLoc QualifierLoc = readNestedNameSpecifierLoc();
SourceLocation TemplateNameLoc = readSourceLocation();
SourceLocation EllipsisLoc = readSourceLocation();
return TemplateArgumentLocInfo(getASTContext(), QualifierLoc,
TemplateNameLoc, EllipsisLoc);
}
case TemplateArgument::Null:
case TemplateArgument::Integral:
case TemplateArgument::Declaration:
case TemplateArgument::NullPtr:
case TemplateArgument::Pack:
// FIXME: Is this right?
return TemplateArgumentLocInfo();
}
llvm_unreachable("unexpected template argument loc");
}
TemplateArgumentLoc ASTRecordReader::readTemplateArgumentLoc() {
TemplateArgument Arg = readTemplateArgument();
if (Arg.getKind() == TemplateArgument::Expression) {
if (readBool()) // bool InfoHasSameExpr.
return TemplateArgumentLoc(Arg, TemplateArgumentLocInfo(Arg.getAsExpr()));
}
return TemplateArgumentLoc(Arg, readTemplateArgumentLocInfo(Arg.getKind()));
}
const ASTTemplateArgumentListInfo *
ASTRecordReader::readASTTemplateArgumentListInfo() {
SourceLocation LAngleLoc = readSourceLocation();
SourceLocation RAngleLoc = readSourceLocation();
unsigned NumArgsAsWritten = readInt();
TemplateArgumentListInfo TemplArgsInfo(LAngleLoc, RAngleLoc);
for (unsigned i = 0; i != NumArgsAsWritten; ++i)
TemplArgsInfo.addArgument(readTemplateArgumentLoc());
return ASTTemplateArgumentListInfo::Create(getContext(), TemplArgsInfo);
}
Decl *ASTReader::GetExternalDecl(uint32_t ID) {
return GetDecl(ID);
}
void ASTReader::CompleteRedeclChain(const Decl *D) {
if (NumCurrentElementsDeserializing) {
// We arrange to not care about the complete redeclaration chain while we're
// deserializing. Just remember that the AST has marked this one as complete
// but that it's not actually complete yet, so we know we still need to
// complete it later.
PendingIncompleteDeclChains.push_back(const_cast<Decl*>(D));
return;
}
if (!D->getDeclContext()) {
assert(isa<TranslationUnitDecl>(D) && "Not a TU?");
return;
}
const DeclContext *DC = D->getDeclContext()->getRedeclContext();
// If this is a named declaration, complete it by looking it up
// within its context.
//
// FIXME: Merging a function definition should merge
// all mergeable entities within it.
if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) ||
isa<CXXRecordDecl>(DC) || isa<EnumDecl>(DC)) {
if (DeclarationName Name = cast<NamedDecl>(D)->getDeclName()) {
if (!getContext().getLangOpts().CPlusPlus &&
isa<TranslationUnitDecl>(DC)) {
// Outside of C++, we don't have a lookup table for the TU, so update
// the identifier instead. (For C++ modules, we don't store decls
// in the serialized identifier table, so we do the lookup in the TU.)
auto *II = Name.getAsIdentifierInfo();
assert(II && "non-identifier name in C?");
if (II->isOutOfDate())
updateOutOfDateIdentifier(*II);
} else
DC->lookup(Name);
} else if (needsAnonymousDeclarationNumber(cast<NamedDecl>(D))) {
// Find all declarations of this kind from the relevant context.
for (auto *DCDecl : cast<Decl>(D->getLexicalDeclContext())->redecls()) {
auto *DC = cast<DeclContext>(DCDecl);
SmallVector<Decl*, 8> Decls;
FindExternalLexicalDecls(
DC, [&](Decl::Kind K) { return K == D->getKind(); }, Decls);
}
}
}
if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(D))
CTSD->getSpecializedTemplate()->LoadLazySpecializations();
if (auto *VTSD = dyn_cast<VarTemplateSpecializationDecl>(D))
VTSD->getSpecializedTemplate()->LoadLazySpecializations();
if (auto *FD = dyn_cast<FunctionDecl>(D)) {
if (auto *Template = FD->getPrimaryTemplate())
Template->LoadLazySpecializations();
}
}
CXXCtorInitializer **
ASTReader::GetExternalCXXCtorInitializers(uint64_t Offset) {
RecordLocation Loc = getLocalBitOffset(Offset);
BitstreamCursor &Cursor = Loc.F->DeclsCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(Loc.Offset)) {
Error(std::move(Err));
return nullptr;
}
ReadingKindTracker ReadingKind(Read_Decl, *this);
Expected<unsigned> MaybeCode = Cursor.ReadCode();
if (!MaybeCode) {
Error(MaybeCode.takeError());
return nullptr;
}
unsigned Code = MaybeCode.get();
ASTRecordReader Record(*this, *Loc.F);
Expected<unsigned> MaybeRecCode = Record.readRecord(Cursor, Code);
if (!MaybeRecCode) {
Error(MaybeRecCode.takeError());
return nullptr;
}
if (MaybeRecCode.get() != DECL_CXX_CTOR_INITIALIZERS) {
Error("malformed AST file: missing C++ ctor initializers");
return nullptr;
}
return Record.readCXXCtorInitializers();
}
CXXBaseSpecifier *ASTReader::GetExternalCXXBaseSpecifiers(uint64_t Offset) {
assert(ContextObj && "reading base specifiers with no AST context");
ASTContext &Context = *ContextObj;
RecordLocation Loc = getLocalBitOffset(Offset);
BitstreamCursor &Cursor = Loc.F->DeclsCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error Err = Cursor.JumpToBit(Loc.Offset)) {
Error(std::move(Err));
return nullptr;
}
ReadingKindTracker ReadingKind(Read_Decl, *this);
Expected<unsigned> MaybeCode = Cursor.ReadCode();
if (!MaybeCode) {
Error(MaybeCode.takeError());
return nullptr;
}
unsigned Code = MaybeCode.get();
ASTRecordReader Record(*this, *Loc.F);
Expected<unsigned> MaybeRecCode = Record.readRecord(Cursor, Code);
if (!MaybeRecCode) {
Error(MaybeCode.takeError());
return nullptr;
}
unsigned RecCode = MaybeRecCode.get();
if (RecCode != DECL_CXX_BASE_SPECIFIERS) {
Error("malformed AST file: missing C++ base specifiers");
return nullptr;
}
unsigned NumBases = Record.readInt();
void *Mem = Context.Allocate(sizeof(CXXBaseSpecifier) * NumBases);
CXXBaseSpecifier *Bases = new (Mem) CXXBaseSpecifier [NumBases];
for (unsigned I = 0; I != NumBases; ++I)
Bases[I] = Record.readCXXBaseSpecifier();
return Bases;
}
serialization::DeclID
ASTReader::getGlobalDeclID(ModuleFile &F, LocalDeclID LocalID) const {
if (LocalID < NUM_PREDEF_DECL_IDS)
return LocalID;
if (!F.ModuleOffsetMap.empty())
ReadModuleOffsetMap(F);
ContinuousRangeMap<uint32_t, int, 2>::iterator I
= F.DeclRemap.find(LocalID - NUM_PREDEF_DECL_IDS);
assert(I != F.DeclRemap.end() && "Invalid index into decl index remap");
return LocalID + I->second;
}
bool ASTReader::isDeclIDFromModule(serialization::GlobalDeclID ID,
ModuleFile &M) const {
// Predefined decls aren't from any module.
if (ID < NUM_PREDEF_DECL_IDS)
return false;
return ID - NUM_PREDEF_DECL_IDS >= M.BaseDeclID &&
ID - NUM_PREDEF_DECL_IDS < M.BaseDeclID + M.LocalNumDecls;
}
ModuleFile *ASTReader::getOwningModuleFile(const Decl *D) {
if (!D->isFromASTFile())
return nullptr;
GlobalDeclMapType::const_iterator I = GlobalDeclMap.find(D->getGlobalID());
assert(I != GlobalDeclMap.end() && "Corrupted global declaration map");
return I->second;
}
SourceLocation ASTReader::getSourceLocationForDeclID(GlobalDeclID ID) {
if (ID < NUM_PREDEF_DECL_IDS)
return SourceLocation();
unsigned Index = ID - NUM_PREDEF_DECL_IDS;
if (Index > DeclsLoaded.size()) {
Error("declaration ID out-of-range for AST file");
return SourceLocation();
}
if (Decl *D = DeclsLoaded[Index])
return D->getLocation();
SourceLocation Loc;
DeclCursorForID(ID, Loc);
return Loc;
}
static Decl *getPredefinedDecl(ASTContext &Context, PredefinedDeclIDs ID) {
switch (ID) {
case PREDEF_DECL_NULL_ID:
return nullptr;
case PREDEF_DECL_TRANSLATION_UNIT_ID:
return Context.getTranslationUnitDecl();
case PREDEF_DECL_OBJC_ID_ID:
return Context.getObjCIdDecl();
case PREDEF_DECL_OBJC_SEL_ID:
return Context.getObjCSelDecl();
case PREDEF_DECL_OBJC_CLASS_ID:
return Context.getObjCClassDecl();
case PREDEF_DECL_OBJC_PROTOCOL_ID:
return Context.getObjCProtocolDecl();
case PREDEF_DECL_INT_128_ID:
return Context.getInt128Decl();
case PREDEF_DECL_UNSIGNED_INT_128_ID:
return Context.getUInt128Decl();
case PREDEF_DECL_OBJC_INSTANCETYPE_ID:
return Context.getObjCInstanceTypeDecl();
case PREDEF_DECL_BUILTIN_VA_LIST_ID:
return Context.getBuiltinVaListDecl();
case PREDEF_DECL_VA_LIST_TAG:
return Context.getVaListTagDecl();
case PREDEF_DECL_BUILTIN_MS_VA_LIST_ID:
return Context.getBuiltinMSVaListDecl();
case PREDEF_DECL_BUILTIN_MS_GUID_ID:
return Context.getMSGuidTagDecl();
case PREDEF_DECL_EXTERN_C_CONTEXT_ID:
return Context.getExternCContextDecl();
case PREDEF_DECL_MAKE_INTEGER_SEQ_ID:
return Context.getMakeIntegerSeqDecl();
case PREDEF_DECL_CF_CONSTANT_STRING_ID:
return Context.getCFConstantStringDecl();
case PREDEF_DECL_CF_CONSTANT_STRING_TAG_ID:
return Context.getCFConstantStringTagDecl();
case PREDEF_DECL_TYPE_PACK_ELEMENT_ID:
return Context.getTypePackElementDecl();
}
llvm_unreachable("PredefinedDeclIDs unknown enum value");
}
Decl *ASTReader::GetExistingDecl(DeclID ID) {
assert(ContextObj && "reading decl with no AST context");
if (ID < NUM_PREDEF_DECL_IDS) {
Decl *D = getPredefinedDecl(*ContextObj, (PredefinedDeclIDs)ID);
if (D) {
// Track that we have merged the declaration with ID \p ID into the
// pre-existing predefined declaration \p D.
auto &Merged = KeyDecls[D->getCanonicalDecl()];
if (Merged.empty())
Merged.push_back(ID);
}
return D;
}
unsigned Index = ID - NUM_PREDEF_DECL_IDS;
if (Index >= DeclsLoaded.size()) {
assert(0 && "declaration ID out-of-range for AST file");
Error("declaration ID out-of-range for AST file");
return nullptr;
}
return DeclsLoaded[Index];
}
Decl *ASTReader::GetDecl(DeclID ID) {
if (ID < NUM_PREDEF_DECL_IDS)
return GetExistingDecl(ID);
unsigned Index = ID - NUM_PREDEF_DECL_IDS;
if (Index >= DeclsLoaded.size()) {
assert(0 && "declaration ID out-of-range for AST file");
Error("declaration ID out-of-range for AST file");
return nullptr;
}
if (!DeclsLoaded[Index]) {
ReadDeclRecord(ID);
if (DeserializationListener)
DeserializationListener->DeclRead(ID, DeclsLoaded[Index]);
}
return DeclsLoaded[Index];
}
DeclID ASTReader::mapGlobalIDToModuleFileGlobalID(ModuleFile &M,
DeclID GlobalID) {
if (GlobalID < NUM_PREDEF_DECL_IDS)
return GlobalID;
GlobalDeclMapType::const_iterator I = GlobalDeclMap.find(GlobalID);
assert(I != GlobalDeclMap.end() && "Corrupted global declaration map");
ModuleFile *Owner = I->second;
llvm::DenseMap<ModuleFile *, serialization::DeclID>::iterator Pos
= M.GlobalToLocalDeclIDs.find(Owner);
if (Pos == M.GlobalToLocalDeclIDs.end())
return 0;
return GlobalID - Owner->BaseDeclID + Pos->second;
}
serialization::DeclID ASTReader::ReadDeclID(ModuleFile &F,
const RecordData &Record,
unsigned &Idx) {
if (Idx >= Record.size()) {
Error("Corrupted AST file");
return 0;
}
return getGlobalDeclID(F, Record[Idx++]);
}
/// Resolve the offset of a statement into a statement.
///
/// This operation will read a new statement from the external
/// source each time it is called, and is meant to be used via a
/// LazyOffsetPtr (which is used by Decls for the body of functions, etc).
Stmt *ASTReader::GetExternalDeclStmt(uint64_t Offset) {
// Switch case IDs are per Decl.
ClearSwitchCaseIDs();
// Offset here is a global offset across the entire chain.
RecordLocation Loc = getLocalBitOffset(Offset);
if (llvm::Error Err = Loc.F->DeclsCursor.JumpToBit(Loc.Offset)) {
Error(std::move(Err));
return nullptr;
}
assert(NumCurrentElementsDeserializing == 0 &&
"should not be called while already deserializing");
Deserializing D(this);
return ReadStmtFromStream(*Loc.F);
}
void ASTReader::FindExternalLexicalDecls(
const DeclContext *DC, llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
SmallVectorImpl<Decl *> &Decls) {
bool PredefsVisited[NUM_PREDEF_DECL_IDS] = {};
auto Visit = [&] (ModuleFile *M, LexicalContents LexicalDecls) {
assert(LexicalDecls.size() % 2 == 0 && "expected an even number of entries");
for (int I = 0, N = LexicalDecls.size(); I != N; I += 2) {
auto K = (Decl::Kind)+LexicalDecls[I];
if (!IsKindWeWant(K))
continue;
auto ID = (serialization::DeclID)+LexicalDecls[I + 1];
// Don't add predefined declarations to the lexical context more
// than once.
if (ID < NUM_PREDEF_DECL_IDS) {
if (PredefsVisited[ID])
continue;
PredefsVisited[ID] = true;
}
if (Decl *D = GetLocalDecl(*M, ID)) {
assert(D->getKind() == K && "wrong kind for lexical decl");
if (!DC->isDeclInLexicalTraversal(D))
Decls.push_back(D);
}
}
};
if (isa<TranslationUnitDecl>(DC)) {
for (auto Lexical : TULexicalDecls)
Visit(Lexical.first, Lexical.second);
} else {
auto I = LexicalDecls.find(DC);
if (I != LexicalDecls.end())
Visit(I->second.first, I->second.second);
}
++NumLexicalDeclContextsRead;
}
namespace {
class DeclIDComp {
ASTReader &Reader;
ModuleFile &Mod;
public:
DeclIDComp(ASTReader &Reader, ModuleFile &M) : Reader(Reader), Mod(M) {}
bool operator()(LocalDeclID L, LocalDeclID R) const {
SourceLocation LHS = getLocation(L);
SourceLocation RHS = getLocation(R);
return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
}
bool operator()(SourceLocation LHS, LocalDeclID R) const {
SourceLocation RHS = getLocation(R);
return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
}
bool operator()(LocalDeclID L, SourceLocation RHS) const {
SourceLocation LHS = getLocation(L);
return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
}
SourceLocation getLocation(LocalDeclID ID) const {
return Reader.getSourceManager().getFileLoc(
Reader.getSourceLocationForDeclID(Reader.getGlobalDeclID(Mod, ID)));
}
};
} // namespace
void ASTReader::FindFileRegionDecls(FileID File,
unsigned Offset, unsigned Length,
SmallVectorImpl<Decl *> &Decls) {
SourceManager &SM = getSourceManager();
llvm::DenseMap<FileID, FileDeclsInfo>::iterator I = FileDeclIDs.find(File);
if (I == FileDeclIDs.end())
return;
FileDeclsInfo &DInfo = I->second;
if (DInfo.Decls.empty())
return;
SourceLocation
BeginLoc = SM.getLocForStartOfFile(File).getLocWithOffset(Offset);
SourceLocation EndLoc = BeginLoc.getLocWithOffset(Length);
DeclIDComp DIDComp(*this, *DInfo.Mod);
ArrayRef<serialization::LocalDeclID>::iterator BeginIt =
llvm::lower_bound(DInfo.Decls, BeginLoc, DIDComp);
if (BeginIt != DInfo.Decls.begin())
--BeginIt;
// If we are pointing at a top-level decl inside an objc container, we need
// to backtrack until we find it otherwise we will fail to report that the
// region overlaps with an objc container.
while (BeginIt != DInfo.Decls.begin() &&
GetDecl(getGlobalDeclID(*DInfo.Mod, *BeginIt))
->isTopLevelDeclInObjCContainer())
--BeginIt;
ArrayRef<serialization::LocalDeclID>::iterator EndIt =
llvm::upper_bound(DInfo.Decls, EndLoc, DIDComp);
if (EndIt != DInfo.Decls.end())
++EndIt;
for (ArrayRef<serialization::LocalDeclID>::iterator
DIt = BeginIt; DIt != EndIt; ++DIt)
Decls.push_back(GetDecl(getGlobalDeclID(*DInfo.Mod, *DIt)));
}
bool
ASTReader::FindExternalVisibleDeclsByName(const DeclContext *DC,
DeclarationName Name) {
assert(DC->hasExternalVisibleStorage() && DC == DC->getPrimaryContext() &&
"DeclContext has no visible decls in storage");
if (!Name)
return false;
auto It = Lookups.find(DC);
if (It == Lookups.end())
return false;
Deserializing LookupResults(this);
// Load the list of declarations.
SmallVector<NamedDecl *, 64> Decls;
llvm::SmallPtrSet<NamedDecl *, 8> Found;
for (DeclID ID : It->second.Table.find(Name)) {
NamedDecl *ND = cast<NamedDecl>(GetDecl(ID));
if (ND->getDeclName() == Name && Found.insert(ND).second)
Decls.push_back(ND);
}
++NumVisibleDeclContextsRead;
SetExternalVisibleDeclsForName(DC, Name, Decls);
return !Decls.empty();
}
void ASTReader::completeVisibleDeclsMap(const DeclContext *DC) {
if (!DC->hasExternalVisibleStorage())
return;
auto It = Lookups.find(DC);
assert(It != Lookups.end() &&
"have external visible storage but no lookup tables");
DeclsMap Decls;
for (DeclID ID : It->second.Table.findAll()) {
NamedDecl *ND = cast<NamedDecl>(GetDecl(ID));
Decls[ND->getDeclName()].push_back(ND);
}
++NumVisibleDeclContextsRead;
for (DeclsMap::iterator I = Decls.begin(), E = Decls.end(); I != E; ++I) {
SetExternalVisibleDeclsForName(DC, I->first, I->second);
}
const_cast<DeclContext *>(DC)->setHasExternalVisibleStorage(false);
}
const serialization::reader::DeclContextLookupTable *
ASTReader::getLoadedLookupTables(DeclContext *Primary) const {
auto I = Lookups.find(Primary);
return I == Lookups.end() ? nullptr : &I->second;
}
/// Under non-PCH compilation the consumer receives the objc methods
/// before receiving the implementation, and codegen depends on this.
/// We simulate this by deserializing and passing to consumer the methods of the
/// implementation before passing the deserialized implementation decl.
static void PassObjCImplDeclToConsumer(ObjCImplDecl *ImplD,
ASTConsumer *Consumer) {
assert(ImplD && Consumer);
for (auto *I : ImplD->methods())
Consumer->HandleInterestingDecl(DeclGroupRef(I));
Consumer->HandleInterestingDecl(DeclGroupRef(ImplD));
}
void ASTReader::PassInterestingDeclToConsumer(Decl *D) {
if (ObjCImplDecl *ImplD = dyn_cast<ObjCImplDecl>(D))
PassObjCImplDeclToConsumer(ImplD, Consumer);
else
Consumer->HandleInterestingDecl(DeclGroupRef(D));
}
void ASTReader::StartTranslationUnit(ASTConsumer *Consumer) {
this->Consumer = Consumer;
if (Consumer)
PassInterestingDeclsToConsumer();
if (DeserializationListener)
DeserializationListener->ReaderInitialized(this);
}
void ASTReader::PrintStats() {
std::fprintf(stderr, "*** AST File Statistics:\n");
unsigned NumTypesLoaded
= TypesLoaded.size() - std::count(TypesLoaded.begin(), TypesLoaded.end(),
QualType());
unsigned NumDeclsLoaded
= DeclsLoaded.size() - std::count(DeclsLoaded.begin(), DeclsLoaded.end(),
(Decl *)nullptr);
unsigned NumIdentifiersLoaded
= IdentifiersLoaded.size() - std::count(IdentifiersLoaded.begin(),
IdentifiersLoaded.end(),
(IdentifierInfo *)nullptr);
unsigned NumMacrosLoaded
= MacrosLoaded.size() - std::count(MacrosLoaded.begin(),
MacrosLoaded.end(),
(MacroInfo *)nullptr);
unsigned NumSelectorsLoaded
= SelectorsLoaded.size() - std::count(SelectorsLoaded.begin(),
SelectorsLoaded.end(),
Selector());
if (unsigned TotalNumSLocEntries = getTotalNumSLocs())
std::fprintf(stderr, " %u/%u source location entries read (%f%%)\n",
NumSLocEntriesRead, TotalNumSLocEntries,
((float)NumSLocEntriesRead/TotalNumSLocEntries * 100));
if (!TypesLoaded.empty())
std::fprintf(stderr, " %u/%u types read (%f%%)\n",
NumTypesLoaded, (unsigned)TypesLoaded.size(),
((float)NumTypesLoaded/TypesLoaded.size() * 100));
if (!DeclsLoaded.empty())
std::fprintf(stderr, " %u/%u declarations read (%f%%)\n",
NumDeclsLoaded, (unsigned)DeclsLoaded.size(),
((float)NumDeclsLoaded/DeclsLoaded.size() * 100));
if (!IdentifiersLoaded.empty())
std::fprintf(stderr, " %u/%u identifiers read (%f%%)\n",
NumIdentifiersLoaded, (unsigned)IdentifiersLoaded.size(),
((float)NumIdentifiersLoaded/IdentifiersLoaded.size() * 100));
if (!MacrosLoaded.empty())
std::fprintf(stderr, " %u/%u macros read (%f%%)\n",
NumMacrosLoaded, (unsigned)MacrosLoaded.size(),
((float)NumMacrosLoaded/MacrosLoaded.size() * 100));
if (!SelectorsLoaded.empty())
std::fprintf(stderr, " %u/%u selectors read (%f%%)\n",
NumSelectorsLoaded, (unsigned)SelectorsLoaded.size(),
((float)NumSelectorsLoaded/SelectorsLoaded.size() * 100));
if (TotalNumStatements)
std::fprintf(stderr, " %u/%u statements read (%f%%)\n",
NumStatementsRead, TotalNumStatements,
((float)NumStatementsRead/TotalNumStatements * 100));
if (TotalNumMacros)
std::fprintf(stderr, " %u/%u macros read (%f%%)\n",
NumMacrosRead, TotalNumMacros,
((float)NumMacrosRead/TotalNumMacros * 100));
if (TotalLexicalDeclContexts)
std::fprintf(stderr, " %u/%u lexical declcontexts read (%f%%)\n",
NumLexicalDeclContextsRead, TotalLexicalDeclContexts,
((float)NumLexicalDeclContextsRead/TotalLexicalDeclContexts
* 100));
if (TotalVisibleDeclContexts)
std::fprintf(stderr, " %u/%u visible declcontexts read (%f%%)\n",
NumVisibleDeclContextsRead, TotalVisibleDeclContexts,
((float)NumVisibleDeclContextsRead/TotalVisibleDeclContexts
* 100));
if (TotalNumMethodPoolEntries)
std::fprintf(stderr, " %u/%u method pool entries read (%f%%)\n",
NumMethodPoolEntriesRead, TotalNumMethodPoolEntries,
((float)NumMethodPoolEntriesRead/TotalNumMethodPoolEntries
* 100));
if (NumMethodPoolLookups)
std::fprintf(stderr, " %u/%u method pool lookups succeeded (%f%%)\n",
NumMethodPoolHits, NumMethodPoolLookups,
((float)NumMethodPoolHits/NumMethodPoolLookups * 100.0));
if (NumMethodPoolTableLookups)
std::fprintf(stderr, " %u/%u method pool table lookups succeeded (%f%%)\n",
NumMethodPoolTableHits, NumMethodPoolTableLookups,
((float)NumMethodPoolTableHits/NumMethodPoolTableLookups
* 100.0));
if (NumIdentifierLookupHits)
std::fprintf(stderr,
" %u / %u identifier table lookups succeeded (%f%%)\n",
NumIdentifierLookupHits, NumIdentifierLookups,
(double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
if (GlobalIndex) {
std::fprintf(stderr, "\n");
GlobalIndex->printStats();
}
std::fprintf(stderr, "\n");
dump();
std::fprintf(stderr, "\n");
}
template<typename Key, typename ModuleFile, unsigned InitialCapacity>
LLVM_DUMP_METHOD static void
dumpModuleIDMap(StringRef Name,
const ContinuousRangeMap<Key, ModuleFile *,
InitialCapacity> &Map) {
if (Map.begin() == Map.end())
return;
using MapType = ContinuousRangeMap<Key, ModuleFile *, InitialCapacity>;
llvm::errs() << Name << ":\n";
for (typename MapType::const_iterator I = Map.begin(), IEnd = Map.end();
I != IEnd; ++I) {
llvm::errs() << " " << I->first << " -> " << I->second->FileName
<< "\n";
}
}
LLVM_DUMP_METHOD void ASTReader::dump() {
llvm::errs() << "*** PCH/ModuleFile Remappings:\n";
dumpModuleIDMap("Global bit offset map", GlobalBitOffsetsMap);
dumpModuleIDMap("Global source location entry map", GlobalSLocEntryMap);
dumpModuleIDMap("Global type map", GlobalTypeMap);
dumpModuleIDMap("Global declaration map", GlobalDeclMap);
dumpModuleIDMap("Global identifier map", GlobalIdentifierMap);
dumpModuleIDMap("Global macro map", GlobalMacroMap);
dumpModuleIDMap("Global submodule map", GlobalSubmoduleMap);
dumpModuleIDMap("Global selector map", GlobalSelectorMap);
dumpModuleIDMap("Global preprocessed entity map",
GlobalPreprocessedEntityMap);
llvm::errs() << "\n*** PCH/Modules Loaded:";
for (ModuleFile &M : ModuleMgr)
M.dump();
}
/// Return the amount of memory used by memory buffers, breaking down
/// by heap-backed versus mmap'ed memory.
void ASTReader::getMemoryBufferSizes(MemoryBufferSizes &sizes) const {
for (ModuleFile &I : ModuleMgr) {
if (llvm::MemoryBuffer *buf = I.Buffer) {
size_t bytes = buf->getBufferSize();
switch (buf->getBufferKind()) {
case llvm::MemoryBuffer::MemoryBuffer_Malloc:
sizes.malloc_bytes += bytes;
break;
case llvm::MemoryBuffer::MemoryBuffer_MMap:
sizes.mmap_bytes += bytes;
break;
}
}
}
}
void ASTReader::InitializeSema(Sema &S) {
SemaObj = &S;
S.addExternalSource(this);
// Makes sure any declarations that were deserialized "too early"
// still get added to the identifier's declaration chains.
for (uint64_t ID : PreloadedDeclIDs) {
NamedDecl *D = cast<NamedDecl>(GetDecl(ID));
pushExternalDeclIntoScope(D, D->getDeclName());
}
PreloadedDeclIDs.clear();
// FIXME: What happens if these are changed by a module import?
if (!FPPragmaOptions.empty()) {
assert(FPPragmaOptions.size() == 1 && "Wrong number of FP_PRAGMA_OPTIONS");
FPOptionsOverride NewOverrides =
FPOptionsOverride::getFromOpaqueInt(FPPragmaOptions[0]);
SemaObj->CurFPFeatures =
NewOverrides.applyOverrides(SemaObj->getLangOpts());
}
SemaObj->OpenCLFeatures = OpenCLExtensions;
UpdateSema();
}
void ASTReader::UpdateSema() {
assert(SemaObj && "no Sema to update");
// Load the offsets of the declarations that Sema references.
// They will be lazily deserialized when needed.
if (!SemaDeclRefs.empty()) {
assert(SemaDeclRefs.size() % 3 == 0);
for (unsigned I = 0; I != SemaDeclRefs.size(); I += 3) {
if (!SemaObj->StdNamespace)
SemaObj->StdNamespace = SemaDeclRefs[I];
if (!SemaObj->StdBadAlloc)
SemaObj->StdBadAlloc = SemaDeclRefs[I+1];
if (!SemaObj->StdAlignValT)
SemaObj->StdAlignValT = SemaDeclRefs[I+2];
}
SemaDeclRefs.clear();
}
// Update the state of pragmas. Use the same API as if we had encountered the
// pragma in the source.
if(OptimizeOffPragmaLocation.isValid())
SemaObj->ActOnPragmaOptimize(/* On = */ false, OptimizeOffPragmaLocation);
if (PragmaMSStructState != -1)
SemaObj->ActOnPragmaMSStruct((PragmaMSStructKind)PragmaMSStructState);
if (PointersToMembersPragmaLocation.isValid()) {
SemaObj->ActOnPragmaMSPointersToMembers(
(LangOptions::PragmaMSPointersToMembersKind)
PragmaMSPointersToMembersState,
PointersToMembersPragmaLocation);
}
SemaObj->ForceCUDAHostDeviceDepth = ForceCUDAHostDeviceDepth;
if (PragmaAlignPackCurrentValue) {
// The bottom of the stack might have a default value. It must be adjusted
// to the current value to ensure that the packing state is preserved after
// popping entries that were included/imported from a PCH/module.
bool DropFirst = false;
if (!PragmaAlignPackStack.empty() &&
PragmaAlignPackStack.front().Location.isInvalid()) {
assert(PragmaAlignPackStack.front().Value ==
SemaObj->AlignPackStack.DefaultValue &&
"Expected a default alignment value");
SemaObj->AlignPackStack.Stack.emplace_back(
PragmaAlignPackStack.front().SlotLabel,
SemaObj->AlignPackStack.CurrentValue,
SemaObj->AlignPackStack.CurrentPragmaLocation,
PragmaAlignPackStack.front().PushLocation);
DropFirst = true;
}
for (const auto &Entry : llvm::makeArrayRef(PragmaAlignPackStack)
.drop_front(DropFirst ? 1 : 0)) {
SemaObj->AlignPackStack.Stack.emplace_back(
Entry.SlotLabel, Entry.Value, Entry.Location, Entry.PushLocation);
}
if (PragmaAlignPackCurrentLocation.isInvalid()) {
assert(*PragmaAlignPackCurrentValue ==
SemaObj->AlignPackStack.DefaultValue &&
"Expected a default align and pack value");
// Keep the current values.
} else {
SemaObj->AlignPackStack.CurrentValue = *PragmaAlignPackCurrentValue;
SemaObj->AlignPackStack.CurrentPragmaLocation =
PragmaAlignPackCurrentLocation;
}
}
if (FpPragmaCurrentValue) {
// The bottom of the stack might have a default value. It must be adjusted
// to the current value to ensure that fp-pragma state is preserved after
// popping entries that were included/imported from a PCH/module.
bool DropFirst = false;
if (!FpPragmaStack.empty() && FpPragmaStack.front().Location.isInvalid()) {
assert(FpPragmaStack.front().Value ==
SemaObj->FpPragmaStack.DefaultValue &&
"Expected a default pragma float_control value");
SemaObj->FpPragmaStack.Stack.emplace_back(
FpPragmaStack.front().SlotLabel, SemaObj->FpPragmaStack.CurrentValue,
SemaObj->FpPragmaStack.CurrentPragmaLocation,
FpPragmaStack.front().PushLocation);
DropFirst = true;
}
for (const auto &Entry :
llvm::makeArrayRef(FpPragmaStack).drop_front(DropFirst ? 1 : 0))
SemaObj->FpPragmaStack.Stack.emplace_back(
Entry.SlotLabel, Entry.Value, Entry.Location, Entry.PushLocation);
if (FpPragmaCurrentLocation.isInvalid()) {
assert(*FpPragmaCurrentValue == SemaObj->FpPragmaStack.DefaultValue &&
"Expected a default pragma float_control value");
// Keep the current values.
} else {
SemaObj->FpPragmaStack.CurrentValue = *FpPragmaCurrentValue;
SemaObj->FpPragmaStack.CurrentPragmaLocation = FpPragmaCurrentLocation;
}
}
// For non-modular AST files, restore visiblity of modules.
for (auto &Import : ImportedModules) {
if (Import.ImportLoc.isInvalid())
continue;
if (Module *Imported = getSubmodule(Import.ID)) {
SemaObj->makeModuleVisible(Imported, Import.ImportLoc);
}
}
}
IdentifierInfo *ASTReader::get(StringRef Name) {
// Note that we are loading an identifier.
Deserializing AnIdentifier(this);
IdentifierLookupVisitor Visitor(Name, /*PriorGeneration=*/0,
NumIdentifierLookups,
NumIdentifierLookupHits);
// We don't need to do identifier table lookups in C++ modules (we preload
// all interesting declarations, and don't need to use the scope for name
// lookups). Perform the lookup in PCH files, though, since we don't build
// a complete initial identifier table if we're carrying on from a PCH.
if (PP.getLangOpts().CPlusPlus) {
for (auto F : ModuleMgr.pch_modules())
if (Visitor(*F))
break;
} else {
// If there is a global index, look there first to determine which modules
// provably do not have any results for this identifier.
GlobalModuleIndex::HitSet Hits;
GlobalModuleIndex::HitSet *HitsPtr = nullptr;
if (!loadGlobalIndex()) {
if (GlobalIndex->lookupIdentifier(Name, Hits)) {
HitsPtr = &Hits;
}
}
ModuleMgr.visit(Visitor, HitsPtr);
}
IdentifierInfo *II = Visitor.getIdentifierInfo();
markIdentifierUpToDate(II);
return II;
}
namespace clang {
/// An identifier-lookup iterator that enumerates all of the
/// identifiers stored within a set of AST files.
class ASTIdentifierIterator : public IdentifierIterator {
/// The AST reader whose identifiers are being enumerated.
const ASTReader &Reader;
/// The current index into the chain of AST files stored in
/// the AST reader.
unsigned Index;
/// The current position within the identifier lookup table
/// of the current AST file.
ASTIdentifierLookupTable::key_iterator Current;
/// The end position within the identifier lookup table of
/// the current AST file.
ASTIdentifierLookupTable::key_iterator End;
/// Whether to skip any modules in the ASTReader.
bool SkipModules;
public:
explicit ASTIdentifierIterator(const ASTReader &Reader,
bool SkipModules = false);
StringRef Next() override;
};
} // namespace clang
ASTIdentifierIterator::ASTIdentifierIterator(const ASTReader &Reader,
bool SkipModules)
: Reader(Reader), Index(Reader.ModuleMgr.size()), SkipModules(SkipModules) {
}
StringRef ASTIdentifierIterator::Next() {
while (Current == End) {
// If we have exhausted all of our AST files, we're done.
if (Index == 0)
return StringRef();
--Index;
ModuleFile &F = Reader.ModuleMgr[Index];
if (SkipModules && F.isModule())
continue;
ASTIdentifierLookupTable *IdTable =
(ASTIdentifierLookupTable *)F.IdentifierLookupTable;
Current = IdTable->key_begin();
End = IdTable->key_end();
}
// We have any identifiers remaining in the current AST file; return
// the next one.
StringRef Result = *Current;
++Current;
return Result;
}
namespace {
/// A utility for appending two IdentifierIterators.
class ChainedIdentifierIterator : public IdentifierIterator {
std::unique_ptr<IdentifierIterator> Current;
std::unique_ptr<IdentifierIterator> Queued;
public:
ChainedIdentifierIterator(std::unique_ptr<IdentifierIterator> First,
std::unique_ptr<IdentifierIterator> Second)
: Current(std::move(First)), Queued(std::move(Second)) {}
StringRef Next() override {
if (!Current)
return StringRef();
StringRef result = Current->Next();
if (!result.empty())
return result;
// Try the queued iterator, which may itself be empty.
Current.reset();
std::swap(Current, Queued);
return Next();
}
};
} // namespace
IdentifierIterator *ASTReader::getIdentifiers() {
if (!loadGlobalIndex()) {
std::unique_ptr<IdentifierIterator> ReaderIter(
new ASTIdentifierIterator(*this, /*SkipModules=*/true));
std::unique_ptr<IdentifierIterator> ModulesIter(
GlobalIndex->createIdentifierIterator());
return new ChainedIdentifierIterator(std::move(ReaderIter),
std::move(ModulesIter));
}
return new ASTIdentifierIterator(*this);
}
namespace clang {
namespace serialization {
class ReadMethodPoolVisitor {
ASTReader &Reader;
Selector Sel;
unsigned PriorGeneration;
unsigned InstanceBits = 0;
unsigned FactoryBits = 0;
bool InstanceHasMoreThanOneDecl = false;
bool FactoryHasMoreThanOneDecl = false;
SmallVector<ObjCMethodDecl *, 4> InstanceMethods;
SmallVector<ObjCMethodDecl *, 4> FactoryMethods;
public:
ReadMethodPoolVisitor(ASTReader &Reader, Selector Sel,
unsigned PriorGeneration)
: Reader(Reader), Sel(Sel), PriorGeneration(PriorGeneration) {}
bool operator()(ModuleFile &M) {
if (!M.SelectorLookupTable)
return false;
// If we've already searched this module file, skip it now.
if (M.Generation <= PriorGeneration)
return true;
++Reader.NumMethodPoolTableLookups;
ASTSelectorLookupTable *PoolTable
= (ASTSelectorLookupTable*)M.SelectorLookupTable;
ASTSelectorLookupTable::iterator Pos = PoolTable->find(Sel);
if (Pos == PoolTable->end())
return false;
++Reader.NumMethodPoolTableHits;
++Reader.NumSelectorsRead;
// FIXME: Not quite happy with the statistics here. We probably should
// disable this tracking when called via LoadSelector.
// Also, should entries without methods count as misses?
++Reader.NumMethodPoolEntriesRead;
ASTSelectorLookupTrait::data_type Data = *Pos;
if (Reader.DeserializationListener)
Reader.DeserializationListener->SelectorRead(Data.ID, Sel);
InstanceMethods.append(Data.Instance.begin(), Data.Instance.end());
FactoryMethods.append(Data.Factory.begin(), Data.Factory.end());
InstanceBits = Data.InstanceBits;
FactoryBits = Data.FactoryBits;
InstanceHasMoreThanOneDecl = Data.InstanceHasMoreThanOneDecl;
FactoryHasMoreThanOneDecl = Data.FactoryHasMoreThanOneDecl;
return true;
}
/// Retrieve the instance methods found by this visitor.
ArrayRef<ObjCMethodDecl *> getInstanceMethods() const {
return InstanceMethods;
}
/// Retrieve the instance methods found by this visitor.
ArrayRef<ObjCMethodDecl *> getFactoryMethods() const {
return FactoryMethods;
}
unsigned getInstanceBits() const { return InstanceBits; }
unsigned getFactoryBits() const { return FactoryBits; }
bool instanceHasMoreThanOneDecl() const {
return InstanceHasMoreThanOneDecl;
}
bool factoryHasMoreThanOneDecl() const { return FactoryHasMoreThanOneDecl; }
};
} // namespace serialization
} // namespace clang
/// Add the given set of methods to the method list.
static void addMethodsToPool(Sema &S, ArrayRef<ObjCMethodDecl *> Methods,
ObjCMethodList &List) {
for (unsigned I = 0, N = Methods.size(); I != N; ++I) {
S.addMethodToGlobalList(&List, Methods[I]);
}
}
void ASTReader::ReadMethodPool(Selector Sel) {
// Get the selector generation and update it to the current generation.
unsigned &Generation = SelectorGeneration[Sel];
unsigned PriorGeneration = Generation;
Generation = getGeneration();
SelectorOutOfDate[Sel] = false;
// Search for methods defined with this selector.
++NumMethodPoolLookups;
ReadMethodPoolVisitor Visitor(*this, Sel, PriorGeneration);
ModuleMgr.visit(Visitor);
if (Visitor.getInstanceMethods().empty() &&
Visitor.getFactoryMethods().empty())
return;
++NumMethodPoolHits;
if (!getSema())
return;
Sema &S = *getSema();
Sema::GlobalMethodPool::iterator Pos
= S.MethodPool.insert(std::make_pair(Sel, Sema::GlobalMethods())).first;
Pos->second.first.setBits(Visitor.getInstanceBits());
Pos->second.first.setHasMoreThanOneDecl(Visitor.instanceHasMoreThanOneDecl());
Pos->second.second.setBits(Visitor.getFactoryBits());
Pos->second.second.setHasMoreThanOneDecl(Visitor.factoryHasMoreThanOneDecl());
// Add methods to the global pool *after* setting hasMoreThanOneDecl, since
// when building a module we keep every method individually and may need to
// update hasMoreThanOneDecl as we add the methods.
addMethodsToPool(S, Visitor.getInstanceMethods(), Pos->second.first);
addMethodsToPool(S, Visitor.getFactoryMethods(), Pos->second.second);
}
void ASTReader::updateOutOfDateSelector(Selector Sel) {
if (SelectorOutOfDate[Sel])
ReadMethodPool(Sel);
}
void ASTReader::ReadKnownNamespaces(
SmallVectorImpl<NamespaceDecl *> &Namespaces) {
Namespaces.clear();
for (unsigned I = 0, N = KnownNamespaces.size(); I != N; ++I) {
if (NamespaceDecl *Namespace
= dyn_cast_or_null<NamespaceDecl>(GetDecl(KnownNamespaces[I])))
Namespaces.push_back(Namespace);
}
}
void ASTReader::ReadUndefinedButUsed(
llvm::MapVector<NamedDecl *, SourceLocation> &Undefined) {
for (unsigned Idx = 0, N = UndefinedButUsed.size(); Idx != N;) {
NamedDecl *D = cast<NamedDecl>(GetDecl(UndefinedButUsed[Idx++]));
SourceLocation Loc =
SourceLocation::getFromRawEncoding(UndefinedButUsed[Idx++]);
Undefined.insert(std::make_pair(D, Loc));
}
}
void ASTReader::ReadMismatchingDeleteExpressions(llvm::MapVector<
FieldDecl *, llvm::SmallVector<std::pair<SourceLocation, bool>, 4>> &
Exprs) {
for (unsigned Idx = 0, N = DelayedDeleteExprs.size(); Idx != N;) {
FieldDecl *FD = cast<FieldDecl>(GetDecl(DelayedDeleteExprs[Idx++]));
uint64_t Count = DelayedDeleteExprs[Idx++];
for (uint64_t C = 0; C < Count; ++C) {
SourceLocation DeleteLoc =
SourceLocation::getFromRawEncoding(DelayedDeleteExprs[Idx++]);
const bool IsArrayForm = DelayedDeleteExprs[Idx++];
Exprs[FD].push_back(std::make_pair(DeleteLoc, IsArrayForm));
}
}
}
void ASTReader::ReadTentativeDefinitions(
SmallVectorImpl<VarDecl *> &TentativeDefs) {
for (unsigned I = 0, N = TentativeDefinitions.size(); I != N; ++I) {
VarDecl *Var = dyn_cast_or_null<VarDecl>(GetDecl(TentativeDefinitions[I]));
if (Var)
TentativeDefs.push_back(Var);
}
TentativeDefinitions.clear();
}
void ASTReader::ReadUnusedFileScopedDecls(
SmallVectorImpl<const DeclaratorDecl *> &Decls) {
for (unsigned I = 0, N = UnusedFileScopedDecls.size(); I != N; ++I) {
DeclaratorDecl *D
= dyn_cast_or_null<DeclaratorDecl>(GetDecl(UnusedFileScopedDecls[I]));
if (D)
Decls.push_back(D);
}
UnusedFileScopedDecls.clear();
}
void ASTReader::ReadDelegatingConstructors(
SmallVectorImpl<CXXConstructorDecl *> &Decls) {
for (unsigned I = 0, N = DelegatingCtorDecls.size(); I != N; ++I) {
CXXConstructorDecl *D
= dyn_cast_or_null<CXXConstructorDecl>(GetDecl(DelegatingCtorDecls[I]));
if (D)
Decls.push_back(D);
}
DelegatingCtorDecls.clear();
}
void ASTReader::ReadExtVectorDecls(SmallVectorImpl<TypedefNameDecl *> &Decls) {
for (unsigned I = 0, N = ExtVectorDecls.size(); I != N; ++I) {
TypedefNameDecl *D
= dyn_cast_or_null<TypedefNameDecl>(GetDecl(ExtVectorDecls[I]));
if (D)
Decls.push_back(D);
}
ExtVectorDecls.clear();
}
void ASTReader::ReadUnusedLocalTypedefNameCandidates(
llvm::SmallSetVector<const TypedefNameDecl *, 4> &Decls) {
for (unsigned I = 0, N = UnusedLocalTypedefNameCandidates.size(); I != N;
++I) {
TypedefNameDecl *D = dyn_cast_or_null<TypedefNameDecl>(
GetDecl(UnusedLocalTypedefNameCandidates[I]));
if (D)
Decls.insert(D);
}
UnusedLocalTypedefNameCandidates.clear();
}
void ASTReader::ReadDeclsToCheckForDeferredDiags(
llvm::SmallSetVector<Decl *, 4> &Decls) {
for (auto I : DeclsToCheckForDeferredDiags) {
auto *D = dyn_cast_or_null<Decl>(GetDecl(I));
if (D)
Decls.insert(D);
}
DeclsToCheckForDeferredDiags.clear();
}
void ASTReader::ReadReferencedSelectors(
SmallVectorImpl<std::pair<Selector, SourceLocation>> &Sels) {
if (ReferencedSelectorsData.empty())
return;
// If there are @selector references added them to its pool. This is for
// implementation of -Wselector.
unsigned int DataSize = ReferencedSelectorsData.size()-1;
unsigned I = 0;
while (I < DataSize) {
Selector Sel = DecodeSelector(ReferencedSelectorsData[I++]);
SourceLocation SelLoc
= SourceLocation::getFromRawEncoding(ReferencedSelectorsData[I++]);
Sels.push_back(std::make_pair(Sel, SelLoc));
}
ReferencedSelectorsData.clear();
}
void ASTReader::ReadWeakUndeclaredIdentifiers(
SmallVectorImpl<std::pair<IdentifierInfo *, WeakInfo>> &WeakIDs) {
if (WeakUndeclaredIdentifiers.empty())
return;
for (unsigned I = 0, N = WeakUndeclaredIdentifiers.size(); I < N; /*none*/) {
IdentifierInfo *WeakId
= DecodeIdentifierInfo(WeakUndeclaredIdentifiers[I++]);
IdentifierInfo *AliasId
= DecodeIdentifierInfo(WeakUndeclaredIdentifiers[I++]);
SourceLocation Loc
= SourceLocation::getFromRawEncoding(WeakUndeclaredIdentifiers[I++]);
bool Used = WeakUndeclaredIdentifiers[I++];
WeakInfo WI(AliasId, Loc);
WI.setUsed(Used);
WeakIDs.push_back(std::make_pair(WeakId, WI));
}
WeakUndeclaredIdentifiers.clear();
}
void ASTReader::ReadUsedVTables(SmallVectorImpl<ExternalVTableUse> &VTables) {
for (unsigned Idx = 0, N = VTableUses.size(); Idx < N; /* In loop */) {
ExternalVTableUse VT;
VT.Record = dyn_cast_or_null<CXXRecordDecl>(GetDecl(VTableUses[Idx++]));
VT.Location = SourceLocation::getFromRawEncoding(VTableUses[Idx++]);
VT.DefinitionRequired = VTableUses[Idx++];
VTables.push_back(VT);
}
VTableUses.clear();
}
void ASTReader::ReadPendingInstantiations(
SmallVectorImpl<std::pair<ValueDecl *, SourceLocation>> &Pending) {
for (unsigned Idx = 0, N = PendingInstantiations.size(); Idx < N;) {
ValueDecl *D = cast<ValueDecl>(GetDecl(PendingInstantiations[Idx++]));
SourceLocation Loc
= SourceLocation::getFromRawEncoding(PendingInstantiations[Idx++]);
Pending.push_back(std::make_pair(D, Loc));
}
PendingInstantiations.clear();
}
void ASTReader::ReadLateParsedTemplates(
llvm::MapVector<const FunctionDecl *, std::unique_ptr<LateParsedTemplate>>
&LPTMap) {
for (auto &LPT : LateParsedTemplates) {
ModuleFile *FMod = LPT.first;
RecordDataImpl &LateParsed = LPT.second;
for (unsigned Idx = 0, N = LateParsed.size(); Idx < N;
/* In loop */) {
FunctionDecl *FD =
cast<FunctionDecl>(GetLocalDecl(*FMod, LateParsed[Idx++]));
auto LT = std::make_unique<LateParsedTemplate>();
LT->D = GetLocalDecl(*FMod, LateParsed[Idx++]);
ModuleFile *F = getOwningModuleFile(LT->D);
assert(F && "No module");
unsigned TokN = LateParsed[Idx++];
LT->Toks.reserve(TokN);
for (unsigned T = 0; T < TokN; ++T)
LT->Toks.push_back(ReadToken(*F, LateParsed, Idx));
LPTMap.insert(std::make_pair(FD, std::move(LT)));
}
}
+
+ LateParsedTemplates.clear();
}
void ASTReader::LoadSelector(Selector Sel) {
// It would be complicated to avoid reading the methods anyway. So don't.
ReadMethodPool(Sel);
}
void ASTReader::SetIdentifierInfo(IdentifierID ID, IdentifierInfo *II) {
assert(ID && "Non-zero identifier ID required");
assert(ID <= IdentifiersLoaded.size() && "identifier ID out of range");
IdentifiersLoaded[ID - 1] = II;
if (DeserializationListener)
DeserializationListener->IdentifierRead(ID, II);
}
/// Set the globally-visible declarations associated with the given
/// identifier.
///
/// If the AST reader is currently in a state where the given declaration IDs
/// cannot safely be resolved, they are queued until it is safe to resolve
/// them.
///
/// \param II an IdentifierInfo that refers to one or more globally-visible
/// declarations.
///
/// \param DeclIDs the set of declaration IDs with the name @p II that are
/// visible at global scope.
///
/// \param Decls if non-null, this vector will be populated with the set of
/// deserialized declarations. These declarations will not be pushed into
/// scope.
void
ASTReader::SetGloballyVisibleDecls(IdentifierInfo *II,
const SmallVectorImpl<uint32_t> &DeclIDs,
SmallVectorImpl<Decl *> *Decls) {
if (NumCurrentElementsDeserializing && !Decls) {
PendingIdentifierInfos[II].append(DeclIDs.begin(), DeclIDs.end());
return;
}
for (unsigned I = 0, N = DeclIDs.size(); I != N; ++I) {
if (!SemaObj) {
// Queue this declaration so that it will be added to the
// translation unit scope and identifier's declaration chain
// once a Sema object is known.
PreloadedDeclIDs.push_back(DeclIDs[I]);
continue;
}
NamedDecl *D = cast<NamedDecl>(GetDecl(DeclIDs[I]));
// If we're simply supposed to record the declarations, do so now.
if (Decls) {
Decls->push_back(D);
continue;
}
// Introduce this declaration into the translation-unit scope
// and add it to the declaration chain for this identifier, so
// that (unqualified) name lookup will find it.
pushExternalDeclIntoScope(D, II);
}
}
IdentifierInfo *ASTReader::DecodeIdentifierInfo(IdentifierID ID) {
if (ID == 0)
return nullptr;
if (IdentifiersLoaded.empty()) {
Error("no identifier table in AST file");
return nullptr;
}
ID -= 1;
if (!IdentifiersLoaded[ID]) {
GlobalIdentifierMapType::iterator I = GlobalIdentifierMap.find(ID + 1);
assert(I != GlobalIdentifierMap.end() && "Corrupted global identifier map");
ModuleFile *M = I->second;
unsigned Index = ID - M->BaseIdentifierID;
const unsigned char *Data =
M->IdentifierTableData + M->IdentifierOffsets[Index];
ASTIdentifierLookupTrait Trait(*this, *M);
auto KeyDataLen = Trait.ReadKeyDataLength(Data);
auto Key = Trait.ReadKey(Data, KeyDataLen.first);
auto &II = PP.getIdentifierTable().get(Key);
IdentifiersLoaded[ID] = &II;
markIdentifierFromAST(*this, II);
if (DeserializationListener)
DeserializationListener->IdentifierRead(ID + 1, &II);
}
return IdentifiersLoaded[ID];
}
IdentifierInfo *ASTReader::getLocalIdentifier(ModuleFile &M, unsigned LocalID) {
return DecodeIdentifierInfo(getGlobalIdentifierID(M, LocalID));
}
IdentifierID ASTReader::getGlobalIdentifierID(ModuleFile &M, unsigned LocalID) {
if (LocalID < NUM_PREDEF_IDENT_IDS)
return LocalID;
if (!M.ModuleOffsetMap.empty())
ReadModuleOffsetMap(M);
ContinuousRangeMap<uint32_t, int, 2>::iterator I
= M.IdentifierRemap.find(LocalID - NUM_PREDEF_IDENT_IDS);
assert(I != M.IdentifierRemap.end()
&& "Invalid index into identifier index remap");
return LocalID + I->second;
}
MacroInfo *ASTReader::getMacro(MacroID ID) {
if (ID == 0)
return nullptr;
if (MacrosLoaded.empty()) {
Error("no macro table in AST file");
return nullptr;
}
ID -= NUM_PREDEF_MACRO_IDS;
if (!MacrosLoaded[ID]) {
GlobalMacroMapType::iterator I
= GlobalMacroMap.find(ID + NUM_PREDEF_MACRO_IDS);
assert(I != GlobalMacroMap.end() && "Corrupted global macro map");
ModuleFile *M = I->second;
unsigned Index = ID - M->BaseMacroID;
MacrosLoaded[ID] =
ReadMacroRecord(*M, M->MacroOffsetsBase + M->MacroOffsets[Index]);
if (DeserializationListener)
DeserializationListener->MacroRead(ID + NUM_PREDEF_MACRO_IDS,
MacrosLoaded[ID]);
}
return MacrosLoaded[ID];
}
MacroID ASTReader::getGlobalMacroID(ModuleFile &M, unsigned LocalID) {
if (LocalID < NUM_PREDEF_MACRO_IDS)
return LocalID;
if (!M.ModuleOffsetMap.empty())
ReadModuleOffsetMap(M);
ContinuousRangeMap<uint32_t, int, 2>::iterator I
= M.MacroRemap.find(LocalID - NUM_PREDEF_MACRO_IDS);
assert(I != M.MacroRemap.end() && "Invalid index into macro index remap");
return LocalID + I->second;
}
serialization::SubmoduleID
ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) {
if (LocalID < NUM_PREDEF_SUBMODULE_IDS)
return LocalID;
if (!M.ModuleOffsetMap.empty())
ReadModuleOffsetMap(M);
ContinuousRangeMap<uint32_t, int, 2>::iterator I
= M.SubmoduleRemap.find(LocalID - NUM_PREDEF_SUBMODULE_IDS);
assert(I != M.SubmoduleRemap.end()
&& "Invalid index into submodule index remap");
return LocalID + I->second;
}
Module *ASTReader::getSubmodule(SubmoduleID GlobalID) {
if (GlobalID < NUM_PREDEF_SUBMODULE_IDS) {
assert(GlobalID == 0 && "Unhandled global submodule ID");
return nullptr;
}
if (GlobalID > SubmodulesLoaded.size()) {
Error("submodule ID out of range in AST file");
return nullptr;
}
return SubmodulesLoaded[GlobalID - NUM_PREDEF_SUBMODULE_IDS];
}
Module *ASTReader::getModule(unsigned ID) {
return getSubmodule(ID);
}
ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &F, unsigned ID) {
if (ID & 1) {
// It's a module, look it up by submodule ID.
auto I = GlobalSubmoduleMap.find(getGlobalSubmoduleID(F, ID >> 1));
return I == GlobalSubmoduleMap.end() ? nullptr : I->second;
} else {
// It's a prefix (preamble, PCH, ...). Look it up by index.
unsigned IndexFromEnd = ID >> 1;
assert(IndexFromEnd && "got reference to unknown module file");
return getModuleManager().pch_modules().end()[-IndexFromEnd];
}
}
unsigned ASTReader::getModuleFileID(ModuleFile *F) {
if (!F)
return 1;
// For a file representing a module, use the submodule ID of the top-level
// module as the file ID. For any other kind of file, the number of such
// files loaded beforehand will be the same on reload.
// FIXME: Is this true even if we have an explicit module file and a PCH?
if (F->isModule())
return ((F->BaseSubmoduleID + NUM_PREDEF_SUBMODULE_IDS) << 1) | 1;
auto PCHModules = getModuleManager().pch_modules();
auto I = llvm::find(PCHModules, F);
assert(I != PCHModules.end() && "emitting reference to unknown file");
return (I - PCHModules.end()) << 1;
}
llvm::Optional<ASTSourceDescriptor>
ASTReader::getSourceDescriptor(unsigned ID) {
if (Module *M = getSubmodule(ID))
return ASTSourceDescriptor(*M);
// If there is only a single PCH, return it instead.
// Chained PCH are not supported.
const auto &PCHChain = ModuleMgr.pch_modules();
if (std::distance(std::begin(PCHChain), std::end(PCHChain))) {
ModuleFile &MF = ModuleMgr.getPrimaryModule();
StringRef ModuleName = llvm::sys::path::filename(MF.OriginalSourceFileName);
StringRef FileName = llvm::sys::path::filename(MF.FileName);
return ASTSourceDescriptor(ModuleName, MF.OriginalDir, FileName,
MF.Signature);
}
return None;
}
ExternalASTSource::ExtKind ASTReader::hasExternalDefinitions(const Decl *FD) {
auto I = DefinitionSource.find(FD);
if (I == DefinitionSource.end())
return EK_ReplyHazy;
return I->second ? EK_Never : EK_Always;
}
Selector ASTReader::getLocalSelector(ModuleFile &M, unsigned LocalID) {
return DecodeSelector(getGlobalSelectorID(M, LocalID));
}
Selector ASTReader::DecodeSelector(serialization::SelectorID ID) {
if (ID == 0)
return Selector();
if (ID > SelectorsLoaded.size()) {
Error("selector ID out of range in AST file");
return Selector();
}
if (SelectorsLoaded[ID - 1].getAsOpaquePtr() == nullptr) {
// Load this selector from the selector table.
GlobalSelectorMapType::iterator I = GlobalSelectorMap.find(ID);
assert(I != GlobalSelectorMap.end() && "Corrupted global selector map");
ModuleFile &M = *I->second;
ASTSelectorLookupTrait Trait(*this, M);
unsigned Idx = ID - M.BaseSelectorID - NUM_PREDEF_SELECTOR_IDS;
SelectorsLoaded[ID - 1] =
Trait.ReadKey(M.SelectorLookupTableData + M.SelectorOffsets[Idx], 0);
if (DeserializationListener)
DeserializationListener->SelectorRead(ID, SelectorsLoaded[ID - 1]);
}
return SelectorsLoaded[ID - 1];
}
Selector ASTReader::GetExternalSelector(serialization::SelectorID ID) {
return DecodeSelector(ID);
}
uint32_t ASTReader::GetNumExternalSelectors() {
// ID 0 (the null selector) is considered an external selector.
return getTotalNumSelectors() + 1;
}
serialization::SelectorID
ASTReader::getGlobalSelectorID(ModuleFile &M, unsigned LocalID) const {
if (LocalID < NUM_PREDEF_SELECTOR_IDS)
return LocalID;
if (!M.ModuleOffsetMap.empty())
ReadModuleOffsetMap(M);
ContinuousRangeMap<uint32_t, int, 2>::iterator I
= M.SelectorRemap.find(LocalID - NUM_PREDEF_SELECTOR_IDS);
assert(I != M.SelectorRemap.end()
&& "Invalid index into selector index remap");
return LocalID + I->second;
}
DeclarationNameLoc
ASTRecordReader::readDeclarationNameLoc(DeclarationName Name) {
switch (Name.getNameKind()) {
case DeclarationName::CXXConstructorName:
case DeclarationName::CXXDestructorName:
case DeclarationName::CXXConversionFunctionName:
return DeclarationNameLoc::makeNamedTypeLoc(readTypeSourceInfo());
case DeclarationName::CXXOperatorName:
return DeclarationNameLoc::makeCXXOperatorNameLoc(readSourceRange());
case DeclarationName::CXXLiteralOperatorName:
return DeclarationNameLoc::makeCXXLiteralOperatorNameLoc(
readSourceLocation());
case DeclarationName::Identifier:
case DeclarationName::ObjCZeroArgSelector:
case DeclarationName::ObjCOneArgSelector:
case DeclarationName::ObjCMultiArgSelector:
case DeclarationName::CXXUsingDirective:
case DeclarationName::CXXDeductionGuideName:
break;
}
return DeclarationNameLoc();
}
DeclarationNameInfo ASTRecordReader::readDeclarationNameInfo() {
DeclarationNameInfo NameInfo;
NameInfo.setName(readDeclarationName());
NameInfo.setLoc(readSourceLocation());
NameInfo.setInfo(readDeclarationNameLoc(NameInfo.getName()));
return NameInfo;
}
void ASTRecordReader::readQualifierInfo(QualifierInfo &Info) {
Info.QualifierLoc = readNestedNameSpecifierLoc();
unsigned NumTPLists = readInt();
Info.NumTemplParamLists = NumTPLists;
if (NumTPLists) {
Info.TemplParamLists =
new (getContext()) TemplateParameterList *[NumTPLists];
for (unsigned i = 0; i != NumTPLists; ++i)
Info.TemplParamLists[i] = readTemplateParameterList();
}
}
TemplateParameterList *
ASTRecordReader::readTemplateParameterList() {
SourceLocation TemplateLoc = readSourceLocation();
SourceLocation LAngleLoc = readSourceLocation();
SourceLocation RAngleLoc = readSourceLocation();
unsigned NumParams = readInt();
SmallVector<NamedDecl *, 16> Params;
Params.reserve(NumParams);
while (NumParams--)
Params.push_back(readDeclAs<NamedDecl>());
bool HasRequiresClause = readBool();
Expr *RequiresClause = HasRequiresClause ? readExpr() : nullptr;
TemplateParameterList *TemplateParams = TemplateParameterList::Create(
getContext(), TemplateLoc, LAngleLoc, Params, RAngleLoc, RequiresClause);
return TemplateParams;
}
void ASTRecordReader::readTemplateArgumentList(
SmallVectorImpl<TemplateArgument> &TemplArgs,
bool Canonicalize) {
unsigned NumTemplateArgs = readInt();
TemplArgs.reserve(NumTemplateArgs);
while (NumTemplateArgs--)
TemplArgs.push_back(readTemplateArgument(Canonicalize));
}
/// Read a UnresolvedSet structure.
void ASTRecordReader::readUnresolvedSet(LazyASTUnresolvedSet &Set) {
unsigned NumDecls = readInt();
Set.reserve(getContext(), NumDecls);
while (NumDecls--) {
DeclID ID = readDeclID();
AccessSpecifier AS = (AccessSpecifier) readInt();
Set.addLazyDecl(getContext(), ID, AS);
}
}
CXXBaseSpecifier
ASTRecordReader::readCXXBaseSpecifier() {
bool isVirtual = readBool();
bool isBaseOfClass = readBool();
AccessSpecifier AS = static_cast<AccessSpecifier>(readInt());
bool inheritConstructors = readBool();
TypeSourceInfo *TInfo = readTypeSourceInfo();
SourceRange Range = readSourceRange();
SourceLocation EllipsisLoc = readSourceLocation();
CXXBaseSpecifier Result(Range, isVirtual, isBaseOfClass, AS, TInfo,
EllipsisLoc);
Result.setInheritConstructors(inheritConstructors);
return Result;
}
CXXCtorInitializer **
ASTRecordReader::readCXXCtorInitializers() {
ASTContext &Context = getContext();
unsigned NumInitializers = readInt();
assert(NumInitializers && "wrote ctor initializers but have no inits");
auto **CtorInitializers = new (Context) CXXCtorInitializer*[NumInitializers];
for (unsigned i = 0; i != NumInitializers; ++i) {
TypeSourceInfo *TInfo = nullptr;
bool IsBaseVirtual = false;
FieldDecl *Member = nullptr;
IndirectFieldDecl *IndirectMember = nullptr;
CtorInitializerType Type = (CtorInitializerType) readInt();
switch (Type) {
case CTOR_INITIALIZER_BASE:
TInfo = readTypeSourceInfo();
IsBaseVirtual = readBool();
break;
case CTOR_INITIALIZER_DELEGATING:
TInfo = readTypeSourceInfo();
break;
case CTOR_INITIALIZER_MEMBER:
Member = readDeclAs<FieldDecl>();
break;
case CTOR_INITIALIZER_INDIRECT_MEMBER:
IndirectMember = readDeclAs<IndirectFieldDecl>();
break;
}
SourceLocation MemberOrEllipsisLoc = readSourceLocation();
Expr *Init = readExpr();
SourceLocation LParenLoc = readSourceLocation();
SourceLocation RParenLoc = readSourceLocation();
CXXCtorInitializer *BOMInit;
if (Type == CTOR_INITIALIZER_BASE)
BOMInit = new (Context)
CXXCtorInitializer(Context, TInfo, IsBaseVirtual, LParenLoc, Init,
RParenLoc, MemberOrEllipsisLoc);
else if (Type == CTOR_INITIALIZER_DELEGATING)
BOMInit = new (Context)
CXXCtorInitializer(Context, TInfo, LParenLoc, Init, RParenLoc);
else if (Member)
BOMInit = new (Context)
CXXCtorInitializer(Context, Member, MemberOrEllipsisLoc, LParenLoc,
Init, RParenLoc);
else
BOMInit = new (Context)
CXXCtorInitializer(Context, IndirectMember, MemberOrEllipsisLoc,
LParenLoc, Init, RParenLoc);
if (/*IsWritten*/readBool()) {
unsigned SourceOrder = readInt();
BOMInit->setSourceOrder(SourceOrder);
}
CtorInitializers[i] = BOMInit;
}
return CtorInitializers;
}
NestedNameSpecifierLoc
ASTRecordReader::readNestedNameSpecifierLoc() {
ASTContext &Context = getContext();
unsigned N = readInt();
NestedNameSpecifierLocBuilder Builder;
for (unsigned I = 0; I != N; ++I) {
auto Kind = readNestedNameSpecifierKind();
switch (Kind) {
case NestedNameSpecifier::Identifier: {
IdentifierInfo *II = readIdentifier();
SourceRange Range = readSourceRange();
Builder.Extend(Context, II, Range.getBegin(), Range.getEnd());
break;
}
case NestedNameSpecifier::Namespace: {
NamespaceDecl *NS = readDeclAs<NamespaceDecl>();
SourceRange Range = readSourceRange();
Builder.Extend(Context, NS, Range.getBegin(), Range.getEnd());
break;
}
case NestedNameSpecifier::NamespaceAlias: {
NamespaceAliasDecl *Alias = readDeclAs<NamespaceAliasDecl>();
SourceRange Range = readSourceRange();
Builder.Extend(Context, Alias, Range.getBegin(), Range.getEnd());
break;
}
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate: {
bool Template = readBool();
TypeSourceInfo *T = readTypeSourceInfo();
if (!T)
return NestedNameSpecifierLoc();
SourceLocation ColonColonLoc = readSourceLocation();
// FIXME: 'template' keyword location not saved anywhere, so we fake it.
Builder.Extend(Context,
Template? T->getTypeLoc().getBeginLoc() : SourceLocation(),
T->getTypeLoc(), ColonColonLoc);
break;
}
case NestedNameSpecifier::Global: {
SourceLocation ColonColonLoc = readSourceLocation();
Builder.MakeGlobal(Context, ColonColonLoc);
break;
}
case NestedNameSpecifier::Super: {
CXXRecordDecl *RD = readDeclAs<CXXRecordDecl>();
SourceRange Range = readSourceRange();
Builder.MakeSuper(Context, RD, Range.getBegin(), Range.getEnd());
break;
}
}
}
return Builder.getWithLocInContext(Context);
}
SourceRange
ASTReader::ReadSourceRange(ModuleFile &F, const RecordData &Record,
unsigned &Idx) {
SourceLocation beg = ReadSourceLocation(F, Record, Idx);
SourceLocation end = ReadSourceLocation(F, Record, Idx);
return SourceRange(beg, end);
}
/// Read a floating-point value
llvm::APFloat ASTRecordReader::readAPFloat(const llvm::fltSemantics &Sem) {
return llvm::APFloat(Sem, readAPInt());
}
// Read a string
std::string ASTReader::ReadString(const RecordData &Record, unsigned &Idx) {
unsigned Len = Record[Idx++];
std::string Result(Record.data() + Idx, Record.data() + Idx + Len);
Idx += Len;
return Result;
}
std::string ASTReader::ReadPath(ModuleFile &F, const RecordData &Record,
unsigned &Idx) {
std::string Filename = ReadString(Record, Idx);
ResolveImportedPath(F, Filename);
return Filename;
}
std::string ASTReader::ReadPath(StringRef BaseDirectory,
const RecordData &Record, unsigned &Idx) {
std::string Filename = ReadString(Record, Idx);
if (!BaseDirectory.empty())
ResolveImportedPath(Filename, BaseDirectory);
return Filename;
}
VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record,
unsigned &Idx) {
unsigned Major = Record[Idx++];
unsigned Minor = Record[Idx++];
unsigned Subminor = Record[Idx++];
if (Minor == 0)
return VersionTuple(Major);
if (Subminor == 0)
return VersionTuple(Major, Minor - 1);
return VersionTuple(Major, Minor - 1, Subminor - 1);
}
CXXTemporary *ASTReader::ReadCXXTemporary(ModuleFile &F,
const RecordData &Record,
unsigned &Idx) {
CXXDestructorDecl *Decl = ReadDeclAs<CXXDestructorDecl>(F, Record, Idx);
return CXXTemporary::Create(getContext(), Decl);
}
DiagnosticBuilder ASTReader::Diag(unsigned DiagID) const {
return Diag(CurrentImportLoc, DiagID);
}
DiagnosticBuilder ASTReader::Diag(SourceLocation Loc, unsigned DiagID) const {
return Diags.Report(Loc, DiagID);
}
/// Retrieve the identifier table associated with the
/// preprocessor.
IdentifierTable &ASTReader::getIdentifierTable() {
return PP.getIdentifierTable();
}
/// Record that the given ID maps to the given switch-case
/// statement.
void ASTReader::RecordSwitchCaseID(SwitchCase *SC, unsigned ID) {
assert((*CurrSwitchCaseStmts)[ID] == nullptr &&
"Already have a SwitchCase with this ID");
(*CurrSwitchCaseStmts)[ID] = SC;
}
/// Retrieve the switch-case statement with the given ID.
SwitchCase *ASTReader::getSwitchCaseWithID(unsigned ID) {
assert((*CurrSwitchCaseStmts)[ID] != nullptr && "No SwitchCase with this ID");
return (*CurrSwitchCaseStmts)[ID];
}
void ASTReader::ClearSwitchCaseIDs() {
CurrSwitchCaseStmts->clear();
}
void ASTReader::ReadComments() {
ASTContext &Context = getContext();
std::vector<RawComment *> Comments;
for (SmallVectorImpl<std::pair<BitstreamCursor,
serialization::ModuleFile *>>::iterator
I = CommentsCursors.begin(),
E = CommentsCursors.end();
I != E; ++I) {
Comments.clear();
BitstreamCursor &Cursor = I->first;
serialization::ModuleFile &F = *I->second;
SavedStreamPosition SavedPosition(Cursor);
RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry =
Cursor.advanceSkippingSubblocks(
BitstreamCursor::AF_DontPopBlockAtEnd);
if (!MaybeEntry) {
Error(MaybeEntry.takeError());
return;
}
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::SubBlock: // Handled for us already.
case llvm::BitstreamEntry::Error:
Error("malformed block record in AST file");
return;
case llvm::BitstreamEntry::EndBlock:
goto NextCursor;
case llvm::BitstreamEntry::Record:
// The interesting case.
break;
}
// Read a record.
Record.clear();
Expected<unsigned> MaybeComment = Cursor.readRecord(Entry.ID, Record);
if (!MaybeComment) {
Error(MaybeComment.takeError());
return;
}
switch ((CommentRecordTypes)MaybeComment.get()) {
case COMMENTS_RAW_COMMENT: {
unsigned Idx = 0;
SourceRange SR = ReadSourceRange(F, Record, Idx);
RawComment::CommentKind Kind =
(RawComment::CommentKind) Record[Idx++];
bool IsTrailingComment = Record[Idx++];
bool IsAlmostTrailingComment = Record[Idx++];
Comments.push_back(new (Context) RawComment(
SR, Kind, IsTrailingComment, IsAlmostTrailingComment));
break;
}
}
}
NextCursor:
llvm::DenseMap<FileID, std::map<unsigned, RawComment *>>
FileToOffsetToComment;
for (RawComment *C : Comments) {
SourceLocation CommentLoc = C->getBeginLoc();
if (CommentLoc.isValid()) {
std::pair<FileID, unsigned> Loc =
SourceMgr.getDecomposedLoc(CommentLoc);
if (Loc.first.isValid())
Context.Comments.OrderedComments[Loc.first].emplace(Loc.second, C);
}
}
}
}
void ASTReader::visitInputFiles(serialization::ModuleFile &MF,
bool IncludeSystem, bool Complain,
llvm::function_ref<void(const serialization::InputFile &IF,
bool isSystem)> Visitor) {
unsigned NumUserInputs = MF.NumUserInputFiles;
unsigned NumInputs = MF.InputFilesLoaded.size();
assert(NumUserInputs <= NumInputs);
unsigned N = IncludeSystem ? NumInputs : NumUserInputs;
for (unsigned I = 0; I < N; ++I) {
bool IsSystem = I >= NumUserInputs;
InputFile IF = getInputFile(MF, I+1, Complain);
Visitor(IF, IsSystem);
}
}
void ASTReader::visitTopLevelModuleMaps(
serialization::ModuleFile &MF,
llvm::function_ref<void(const FileEntry *FE)> Visitor) {
unsigned NumInputs = MF.InputFilesLoaded.size();
for (unsigned I = 0; I < NumInputs; ++I) {
InputFileInfo IFI = readInputFileInfo(MF, I + 1);
if (IFI.TopLevelModuleMap)
// FIXME: This unnecessarily re-reads the InputFileInfo.
if (auto FE = getInputFile(MF, I + 1).getFile())
Visitor(FE);
}
}
std::string ASTReader::getOwningModuleNameForDiagnostic(const Decl *D) {
// If we know the owning module, use it.
if (Module *M = D->getImportedOwningModule())
return M->getFullModuleName();
// Otherwise, use the name of the top-level module the decl is within.
if (ModuleFile *M = getOwningModuleFile(D))
return M->ModuleName;
// Not from a module.
return {};
}
void ASTReader::finishPendingActions() {
while (!PendingIdentifierInfos.empty() || !PendingFunctionTypes.empty() ||
!PendingIncompleteDeclChains.empty() || !PendingDeclChains.empty() ||
!PendingMacroIDs.empty() || !PendingDeclContextInfos.empty() ||
!PendingUpdateRecords.empty()) {
// If any identifiers with corresponding top-level declarations have
// been loaded, load those declarations now.
using TopLevelDeclsMap =
llvm::DenseMap<IdentifierInfo *, SmallVector<Decl *, 2>>;
TopLevelDeclsMap TopLevelDecls;
while (!PendingIdentifierInfos.empty()) {
IdentifierInfo *II = PendingIdentifierInfos.back().first;
SmallVector<uint32_t, 4> DeclIDs =
std::move(PendingIdentifierInfos.back().second);
PendingIdentifierInfos.pop_back();
SetGloballyVisibleDecls(II, DeclIDs, &TopLevelDecls[II]);
}
// Load each function type that we deferred loading because it was a
// deduced type that might refer to a local type declared within itself.
for (unsigned I = 0; I != PendingFunctionTypes.size(); ++I) {
auto *FD = PendingFunctionTypes[I].first;
FD->setType(GetType(PendingFunctionTypes[I].second));
// If we gave a function a deduced return type, remember that we need to
// propagate that along the redeclaration chain.
auto *DT = FD->getReturnType()->getContainedDeducedType();
if (DT && DT->isDeduced())
PendingDeducedTypeUpdates.insert(
{FD->getCanonicalDecl(), FD->getReturnType()});
}
PendingFunctionTypes.clear();
// For each decl chain that we wanted to complete while deserializing, mark
// it as "still needs to be completed".
for (unsigned I = 0; I != PendingIncompleteDeclChains.size(); ++I) {
markIncompleteDeclChain(PendingIncompleteDeclChains[I]);
}
PendingIncompleteDeclChains.clear();
// Load pending declaration chains.
for (unsigned I = 0; I != PendingDeclChains.size(); ++I)
loadPendingDeclChain(PendingDeclChains[I].first,
PendingDeclChains[I].second);
PendingDeclChains.clear();
// Make the most recent of the top-level declarations visible.
for (TopLevelDeclsMap::iterator TLD = TopLevelDecls.begin(),
TLDEnd = TopLevelDecls.end(); TLD != TLDEnd; ++TLD) {
IdentifierInfo *II = TLD->first;
for (unsigned I = 0, N = TLD->second.size(); I != N; ++I) {
pushExternalDeclIntoScope(cast<NamedDecl>(TLD->second[I]), II);
}
}
// Load any pending macro definitions.
for (unsigned I = 0; I != PendingMacroIDs.size(); ++I) {
IdentifierInfo *II = PendingMacroIDs.begin()[I].first;
SmallVector<PendingMacroInfo, 2> GlobalIDs;
GlobalIDs.swap(PendingMacroIDs.begin()[I].second);
// Initialize the macro history from chained-PCHs ahead of module imports.
for (unsigned IDIdx = 0, NumIDs = GlobalIDs.size(); IDIdx != NumIDs;
++IDIdx) {
const PendingMacroInfo &Info = GlobalIDs[IDIdx];
if (!Info.M->isModule())
resolvePendingMacro(II, Info);
}
// Handle module imports.
for (unsigned IDIdx = 0, NumIDs = GlobalIDs.size(); IDIdx != NumIDs;
++IDIdx) {
const PendingMacroInfo &Info = GlobalIDs[IDIdx];
if (Info.M->isModule())
resolvePendingMacro(II, Info);
}
}
PendingMacroIDs.clear();
// Wire up the DeclContexts for Decls that we delayed setting until
// recursive loading is completed.
while (!PendingDeclContextInfos.empty()) {
PendingDeclContextInfo Info = PendingDeclContextInfos.front();
PendingDeclContextInfos.pop_front();
DeclContext *SemaDC = cast<DeclContext>(GetDecl(Info.SemaDC));
DeclContext *LexicalDC = cast<DeclContext>(GetDecl(Info.LexicalDC));
Info.D->setDeclContextsImpl(SemaDC, LexicalDC, getContext());
}
// Perform any pending declaration updates.
while (!PendingUpdateRecords.empty()) {
auto Update = PendingUpdateRecords.pop_back_val();
ReadingKindTracker ReadingKind(Read_Decl, *this);
loadDeclUpdateRecords(Update);
}
}
// At this point, all update records for loaded decls are in place, so any
// fake class definitions should have become real.
assert(PendingFakeDefinitionData.empty() &&
"faked up a class definition but never saw the real one");
// If we deserialized any C++ or Objective-C class definitions, any
// Objective-C protocol definitions, or any redeclarable templates, make sure
// that all redeclarations point to the definitions. Note that this can only
// happen now, after the redeclaration chains have been fully wired.
for (Decl *D : PendingDefinitions) {
if (TagDecl *TD = dyn_cast<TagDecl>(D)) {
if (const TagType *TagT = dyn_cast<TagType>(TD->getTypeForDecl())) {
// Make sure that the TagType points at the definition.
const_cast<TagType*>(TagT)->decl = TD;
}
if (auto RD = dyn_cast<CXXRecordDecl>(D)) {
for (auto *R = getMostRecentExistingDecl(RD); R;
R = R->getPreviousDecl()) {
assert((R == D) ==
cast<CXXRecordDecl>(R)->isThisDeclarationADefinition() &&
"declaration thinks it's the definition but it isn't");
cast<CXXRecordDecl>(R)->DefinitionData = RD->DefinitionData;
}
}
continue;
}
if (auto ID = dyn_cast<ObjCInterfaceDecl>(D)) {
// Make sure that the ObjCInterfaceType points at the definition.
const_cast<ObjCInterfaceType *>(cast<ObjCInterfaceType>(ID->TypeForDecl))
->Decl = ID;
for (auto *R = getMostRecentExistingDecl(ID); R; R = R->getPreviousDecl())
cast<ObjCInterfaceDecl>(R)->Data = ID->Data;
continue;
}
if (auto PD = dyn_cast<ObjCProtocolDecl>(D)) {
for (auto *R = getMostRecentExistingDecl(PD); R; R = R->getPreviousDecl())
cast<ObjCProtocolDecl>(R)->Data = PD->Data;
continue;
}
auto RTD = cast<RedeclarableTemplateDecl>(D)->getCanonicalDecl();
for (auto *R = getMostRecentExistingDecl(RTD); R; R = R->getPreviousDecl())
cast<RedeclarableTemplateDecl>(R)->Common = RTD->Common;
}
PendingDefinitions.clear();
// Load the bodies of any functions or methods we've encountered. We do
// this now (delayed) so that we can be sure that the declaration chains
// have been fully wired up (hasBody relies on this).
// FIXME: We shouldn't require complete redeclaration chains here.
for (PendingBodiesMap::iterator PB = PendingBodies.begin(),
PBEnd = PendingBodies.end();
PB != PBEnd; ++PB) {
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(PB->first)) {
// For a function defined inline within a class template, force the
// canonical definition to be the one inside the canonical definition of
// the template. This ensures that we instantiate from a correct view
// of the template.
//
// Sadly we can't do this more generally: we can't be sure that all
// copies of an arbitrary class definition will have the same members
// defined (eg, some member functions may not be instantiated, and some
// special members may or may not have been implicitly defined).
if (auto *RD = dyn_cast<CXXRecordDecl>(FD->getLexicalParent()))
if (RD->isDependentContext() && !RD->isThisDeclarationADefinition())
continue;
// FIXME: Check for =delete/=default?
// FIXME: Complain about ODR violations here?
const FunctionDecl *Defn = nullptr;
if (!getContext().getLangOpts().Modules || !FD->hasBody(Defn)) {
FD->setLazyBody(PB->second);
} else {
auto *NonConstDefn = const_cast<FunctionDecl*>(Defn);
mergeDefinitionVisibility(NonConstDefn, FD);
if (!FD->isLateTemplateParsed() &&
!NonConstDefn->isLateTemplateParsed() &&
FD->getODRHash() != NonConstDefn->getODRHash()) {
if (!isa<CXXMethodDecl>(FD)) {
PendingFunctionOdrMergeFailures[FD].push_back(NonConstDefn);
} else if (FD->getLexicalParent()->isFileContext() &&
NonConstDefn->getLexicalParent()->isFileContext()) {
// Only diagnose out-of-line method definitions. If they are
// in class definitions, then an error will be generated when
// processing the class bodies.
PendingFunctionOdrMergeFailures[FD].push_back(NonConstDefn);
}
}
}
continue;
}
ObjCMethodDecl *MD = cast<ObjCMethodDecl>(PB->first);
if (!getContext().getLangOpts().Modules || !MD->hasBody())
MD->setLazyBody(PB->second);
}
PendingBodies.clear();
// Do some cleanup.
for (auto *ND : PendingMergedDefinitionsToDeduplicate)
getContext().deduplicateMergedDefinitonsFor(ND);
PendingMergedDefinitionsToDeduplicate.clear();
}
void ASTReader::diagnoseOdrViolations() {
if (PendingOdrMergeFailures.empty() && PendingOdrMergeChecks.empty() &&
PendingFunctionOdrMergeFailures.empty() &&
PendingEnumOdrMergeFailures.empty())
return;
// Trigger the import of the full definition of each class that had any
// odr-merging problems, so we can produce better diagnostics for them.
// These updates may in turn find and diagnose some ODR failures, so take
// ownership of the set first.
auto OdrMergeFailures = std::move(PendingOdrMergeFailures);
PendingOdrMergeFailures.clear();
for (auto &Merge : OdrMergeFailures) {
Merge.first->buildLookup();
Merge.first->decls_begin();
Merge.first->bases_begin();
Merge.first->vbases_begin();
for (auto &RecordPair : Merge.second) {
auto *RD = RecordPair.first;
RD->decls_begin();
RD->bases_begin();
RD->vbases_begin();
}
}
// Trigger the import of functions.
auto FunctionOdrMergeFailures = std::move(PendingFunctionOdrMergeFailures);
PendingFunctionOdrMergeFailures.clear();
for (auto &Merge : FunctionOdrMergeFailures) {
Merge.first->buildLookup();
Merge.first->decls_begin();
Merge.first->getBody();
for (auto &FD : Merge.second) {
FD->buildLookup();
FD->decls_begin();
FD->getBody();
}
}
// Trigger the import of enums.
auto EnumOdrMergeFailures = std::move(PendingEnumOdrMergeFailures);
PendingEnumOdrMergeFailures.clear();
for (auto &Merge : EnumOdrMergeFailures) {
Merge.first->decls_begin();
for (auto &Enum : Merge.second) {
Enum->decls_begin();
}
}
// For each declaration from a merged context, check that the canonical
// definition of that context also contains a declaration of the same
// entity.
//
// Caution: this loop does things that might invalidate iterators into
// PendingOdrMergeChecks. Don't turn this into a range-based for loop!
while (!PendingOdrMergeChecks.empty()) {
NamedDecl *D = PendingOdrMergeChecks.pop_back_val();
// FIXME: Skip over implicit declarations for now. This matters for things
// like implicitly-declared special member functions. This isn't entirely
// correct; we can end up with multiple unmerged declarations of the same
// implicit entity.
if (D->isImplicit())
continue;
DeclContext *CanonDef = D->getDeclContext();
bool Found = false;
const Decl *DCanon = D->getCanonicalDecl();
for (auto RI : D->redecls()) {
if (RI->getLexicalDeclContext() == CanonDef) {
Found = true;
break;
}
}
if (Found)
continue;
// Quick check failed, time to do the slow thing. Note, we can't just
// look up the name of D in CanonDef here, because the member that is
// in CanonDef might not be found by name lookup (it might have been
// replaced by a more recent declaration in the lookup table), and we
// can't necessarily find it in the redeclaration chain because it might
// be merely mergeable, not redeclarable.
llvm::SmallVector<const NamedDecl*, 4> Candidates;
for (auto *CanonMember : CanonDef->decls()) {
if (CanonMember->getCanonicalDecl() == DCanon) {
// This can happen if the declaration is merely mergeable and not
// actually redeclarable (we looked for redeclarations earlier).
//
// FIXME: We should be able to detect this more efficiently, without
// pulling in all of the members of CanonDef.
Found = true;
break;
}
if (auto *ND = dyn_cast<NamedDecl>(CanonMember))
if (ND->getDeclName() == D->getDeclName())
Candidates.push_back(ND);
}
if (!Found) {
// The AST doesn't like TagDecls becoming invalid after they've been
// completed. We only really need to mark FieldDecls as invalid here.
if (!isa<TagDecl>(D))
D->setInvalidDecl();
// Ensure we don't accidentally recursively enter deserialization while
// we're producing our diagnostic.
Deserializing RecursionGuard(this);
std::string CanonDefModule =
getOwningModuleNameForDiagnostic(cast<Decl>(CanonDef));
Diag(D->getLocation(), diag::err_module_odr_violation_missing_decl)
<< D << getOwningModuleNameForDiagnostic(D)
<< CanonDef << CanonDefModule.empty() << CanonDefModule;
if (Candidates.empty())
Diag(cast<Decl>(CanonDef)->getLocation(),
diag::note_module_odr_violation_no_possible_decls) << D;
else {
for (unsigned I = 0, N = Candidates.size(); I != N; ++I)
Diag(Candidates[I]->getLocation(),
diag::note_module_odr_violation_possible_decl)
<< Candidates[I];
}
DiagnosedOdrMergeFailures.insert(CanonDef);
}
}
if (OdrMergeFailures.empty() && FunctionOdrMergeFailures.empty() &&
EnumOdrMergeFailures.empty())
return;
// Ensure we don't accidentally recursively enter deserialization while
// we're producing our diagnostics.
Deserializing RecursionGuard(this);
// Common code for hashing helpers.
ODRHash Hash;
auto ComputeQualTypeODRHash = [&Hash](QualType Ty) {
Hash.clear();
Hash.AddQualType(Ty);
return Hash.CalculateHash();
};
auto ComputeODRHash = [&Hash](const Stmt *S) {
assert(S);
Hash.clear();
Hash.AddStmt(S);
return Hash.CalculateHash();
};
auto ComputeSubDeclODRHash = [&Hash](const Decl *D) {
assert(D);
Hash.clear();
Hash.AddSubDecl(D);
return Hash.CalculateHash();
};
auto ComputeTemplateArgumentODRHash = [&Hash](const TemplateArgument &TA) {
Hash.clear();
Hash.AddTemplateArgument(TA);
return Hash.CalculateHash();
};
auto ComputeTemplateParameterListODRHash =
[&Hash](const TemplateParameterList *TPL) {
assert(TPL);
Hash.clear();
Hash.AddTemplateParameterList(TPL);
return Hash.CalculateHash();
};
// Used with err_module_odr_violation_mismatch_decl and
// note_module_odr_violation_mismatch_decl
// This list should be the same Decl's as in ODRHash::isDeclToBeProcessed
enum ODRMismatchDecl {
EndOfClass,
PublicSpecifer,
PrivateSpecifer,
ProtectedSpecifer,
StaticAssert,
Field,
CXXMethod,
TypeAlias,
TypeDef,
Var,
Friend,
FunctionTemplate,
Other
};
// Used with err_module_odr_violation_mismatch_decl_diff and
// note_module_odr_violation_mismatch_decl_diff
enum ODRMismatchDeclDifference {
StaticAssertCondition,
StaticAssertMessage,
StaticAssertOnlyMessage,
FieldName,
FieldTypeName,
FieldSingleBitField,
FieldDifferentWidthBitField,
FieldSingleMutable,
FieldSingleInitializer,
FieldDifferentInitializers,
MethodName,
MethodDeleted,
MethodDefaulted,
MethodVirtual,
MethodStatic,
MethodVolatile,
MethodConst,
MethodInline,
MethodNumberParameters,
MethodParameterType,
MethodParameterName,
MethodParameterSingleDefaultArgument,
MethodParameterDifferentDefaultArgument,
MethodNoTemplateArguments,
MethodDifferentNumberTemplateArguments,
MethodDifferentTemplateArgument,
MethodSingleBody,
MethodDifferentBody,
TypedefName,
TypedefType,
VarName,
VarType,
VarSingleInitializer,
VarDifferentInitializer,
VarConstexpr,
FriendTypeFunction,
FriendType,
FriendFunction,
FunctionTemplateDifferentNumberParameters,
FunctionTemplateParameterDifferentKind,
FunctionTemplateParameterName,
FunctionTemplateParameterSingleDefaultArgument,
FunctionTemplateParameterDifferentDefaultArgument,
FunctionTemplateParameterDifferentType,
FunctionTemplatePackParameter,
};
// These lambdas have the common portions of the ODR diagnostics. This
// has the same return as Diag(), so addition parameters can be passed
// in with operator<<
auto ODRDiagDeclError = [this](NamedDecl *FirstRecord, StringRef FirstModule,
SourceLocation Loc, SourceRange Range,
ODRMismatchDeclDifference DiffType) {
return Diag(Loc, diag::err_module_odr_violation_mismatch_decl_diff)
<< FirstRecord << FirstModule.empty() << FirstModule << Range
<< DiffType;
};
auto ODRDiagDeclNote = [this](StringRef SecondModule, SourceLocation Loc,
SourceRange Range, ODRMismatchDeclDifference DiffType) {
return Diag(Loc, diag::note_module_odr_violation_mismatch_decl_diff)
<< SecondModule << Range << DiffType;
};
auto ODRDiagField = [this, &ODRDiagDeclError, &ODRDiagDeclNote,
&ComputeQualTypeODRHash, &ComputeODRHash](
NamedDecl *FirstRecord, StringRef FirstModule,
StringRef SecondModule, FieldDecl *FirstField,
FieldDecl *SecondField) {
IdentifierInfo *FirstII = FirstField->getIdentifier();
IdentifierInfo *SecondII = SecondField->getIdentifier();
if (FirstII->getName() != SecondII->getName()) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
FirstField->getSourceRange(), FieldName)
<< FirstII;
ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
SecondField->getSourceRange(), FieldName)
<< SecondII;
return true;
}
assert(getContext().hasSameType(FirstField->getType(),
SecondField->getType()));
QualType FirstType = FirstField->getType();
QualType SecondType = SecondField->getType();
if (ComputeQualTypeODRHash(FirstType) !=
ComputeQualTypeODRHash(SecondType)) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
FirstField->getSourceRange(), FieldTypeName)
<< FirstII << FirstType;
ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
SecondField->getSourceRange(), FieldTypeName)
<< SecondII << SecondType;
return true;
}
const bool IsFirstBitField = FirstField->isBitField();
const bool IsSecondBitField = SecondField->isBitField();
if (IsFirstBitField != IsSecondBitField) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
FirstField->getSourceRange(), FieldSingleBitField)
<< FirstII << IsFirstBitField;
ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
SecondField->getSourceRange(), FieldSingleBitField)
<< SecondII << IsSecondBitField;
return true;
}
if (IsFirstBitField && IsSecondBitField) {
unsigned FirstBitWidthHash =
ComputeODRHash(FirstField->getBitWidth());
unsigned SecondBitWidthHash =
ComputeODRHash(SecondField->getBitWidth());
if (FirstBitWidthHash != SecondBitWidthHash) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
FirstField->getSourceRange(),
FieldDifferentWidthBitField)
<< FirstII << FirstField->getBitWidth()->getSourceRange();
ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
SecondField->getSourceRange(),
FieldDifferentWidthBitField)
<< SecondII << SecondField->getBitWidth()->getSourceRange();
return true;
}
}
if (!PP.getLangOpts().CPlusPlus)
return false;
const bool IsFirstMutable = FirstField->isMutable();
const bool IsSecondMutable = SecondField->isMutable();
if (IsFirstMutable != IsSecondMutable) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
FirstField->getSourceRange(), FieldSingleMutable)
<< FirstII << IsFirstMutable;
ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
SecondField->getSourceRange(), FieldSingleMutable)
<< SecondII << IsSecondMutable;
return true;
}
const Expr *FirstInitializer = FirstField->getInClassInitializer();
const Expr *SecondInitializer = SecondField->getInClassInitializer();
if ((!FirstInitializer && SecondInitializer) ||
(FirstInitializer && !SecondInitializer)) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
FirstField->getSourceRange(), FieldSingleInitializer)
<< FirstII << (FirstInitializer != nullptr);
ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
SecondField->getSourceRange(), FieldSingleInitializer)
<< SecondII << (SecondInitializer != nullptr);
return true;
}
if (FirstInitializer && SecondInitializer) {
unsigned FirstInitHash = ComputeODRHash(FirstInitializer);
unsigned SecondInitHash = ComputeODRHash(SecondInitializer);
if (FirstInitHash != SecondInitHash) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
FirstField->getSourceRange(),
FieldDifferentInitializers)
<< FirstII << FirstInitializer->getSourceRange();
ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
SecondField->getSourceRange(),
FieldDifferentInitializers)
<< SecondII << SecondInitializer->getSourceRange();
return true;
}
}
return false;
};
auto ODRDiagTypeDefOrAlias =
[&ODRDiagDeclError, &ODRDiagDeclNote, &ComputeQualTypeODRHash](
NamedDecl *FirstRecord, StringRef FirstModule, StringRef SecondModule,
TypedefNameDecl *FirstTD, TypedefNameDecl *SecondTD,
bool IsTypeAlias) {
auto FirstName = FirstTD->getDeclName();
auto SecondName = SecondTD->getDeclName();
if (FirstName != SecondName) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstTD->getLocation(),
FirstTD->getSourceRange(), TypedefName)
<< IsTypeAlias << FirstName;
ODRDiagDeclNote(SecondModule, SecondTD->getLocation(),
SecondTD->getSourceRange(), TypedefName)
<< IsTypeAlias << SecondName;
return true;
}
QualType FirstType = FirstTD->getUnderlyingType();
QualType SecondType = SecondTD->getUnderlyingType();
if (ComputeQualTypeODRHash(FirstType) !=
ComputeQualTypeODRHash(SecondType)) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstTD->getLocation(),
FirstTD->getSourceRange(), TypedefType)
<< IsTypeAlias << FirstName << FirstType;
ODRDiagDeclNote(SecondModule, SecondTD->getLocation(),
SecondTD->getSourceRange(), TypedefType)
<< IsTypeAlias << SecondName << SecondType;
return true;
}
return false;
};
auto ODRDiagVar = [&ODRDiagDeclError, &ODRDiagDeclNote,
&ComputeQualTypeODRHash, &ComputeODRHash,
this](NamedDecl *FirstRecord, StringRef FirstModule,
StringRef SecondModule, VarDecl *FirstVD,
VarDecl *SecondVD) {
auto FirstName = FirstVD->getDeclName();
auto SecondName = SecondVD->getDeclName();
if (FirstName != SecondName) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstVD->getLocation(),
FirstVD->getSourceRange(), VarName)
<< FirstName;
ODRDiagDeclNote(SecondModule, SecondVD->getLocation(),
SecondVD->getSourceRange(), VarName)
<< SecondName;
return true;
}
QualType FirstType = FirstVD->getType();
QualType SecondType = SecondVD->getType();
if (ComputeQualTypeODRHash(FirstType) !=
ComputeQualTypeODRHash(SecondType)) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstVD->getLocation(),
FirstVD->getSourceRange(), VarType)
<< FirstName << FirstType;
ODRDiagDeclNote(SecondModule, SecondVD->getLocation(),
SecondVD->getSourceRange(), VarType)
<< SecondName << SecondType;
return true;
}
if (!PP.getLangOpts().CPlusPlus)
return false;
const Expr *FirstInit = FirstVD->getInit();
const Expr *SecondInit = SecondVD->getInit();
if ((FirstInit == nullptr) != (SecondInit == nullptr)) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstVD->getLocation(),
FirstVD->getSourceRange(), VarSingleInitializer)
<< FirstName << (FirstInit == nullptr)
<< (FirstInit ? FirstInit->getSourceRange() : SourceRange());
ODRDiagDeclNote(SecondModule, SecondVD->getLocation(),
SecondVD->getSourceRange(), VarSingleInitializer)
<< SecondName << (SecondInit == nullptr)
<< (SecondInit ? SecondInit->getSourceRange() : SourceRange());
return true;
}
if (FirstInit && SecondInit &&
ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstVD->getLocation(),
FirstVD->getSourceRange(), VarDifferentInitializer)
<< FirstName << FirstInit->getSourceRange();
ODRDiagDeclNote(SecondModule, SecondVD->getLocation(),
SecondVD->getSourceRange(), VarDifferentInitializer)
<< SecondName << SecondInit->getSourceRange();
return true;
}
const bool FirstIsConstexpr = FirstVD->isConstexpr();
const bool SecondIsConstexpr = SecondVD->isConstexpr();
if (FirstIsConstexpr != SecondIsConstexpr) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstVD->getLocation(),
FirstVD->getSourceRange(), VarConstexpr)
<< FirstName << FirstIsConstexpr;
ODRDiagDeclNote(SecondModule, SecondVD->getLocation(),
SecondVD->getSourceRange(), VarConstexpr)
<< SecondName << SecondIsConstexpr;
return true;
}
return false;
};
auto DifferenceSelector = [](Decl *D) {
assert(D && "valid Decl required");
switch (D->getKind()) {
default:
return Other;
case Decl::AccessSpec:
switch (D->getAccess()) {
case AS_public:
return PublicSpecifer;
case AS_private:
return PrivateSpecifer;
case AS_protected:
return ProtectedSpecifer;
case AS_none:
break;
}
llvm_unreachable("Invalid access specifier");
case Decl::StaticAssert:
return StaticAssert;
case Decl::Field:
return Field;
case Decl::CXXMethod:
case Decl::CXXConstructor:
case Decl::CXXDestructor:
return CXXMethod;
case Decl::TypeAlias:
return TypeAlias;
case Decl::Typedef:
return TypeDef;
case Decl::Var:
return Var;
case Decl::Friend:
return Friend;
case Decl::FunctionTemplate:
return FunctionTemplate;
}
};
using DeclHashes = llvm::SmallVector<std::pair<Decl *, unsigned>, 4>;
auto PopulateHashes = [&ComputeSubDeclODRHash](DeclHashes &Hashes,
RecordDecl *Record,
const DeclContext *DC) {
for (auto *D : Record->decls()) {
if (!ODRHash::isDeclToBeProcessed(D, DC))
continue;
Hashes.emplace_back(D, ComputeSubDeclODRHash(D));
}
};
struct DiffResult {
Decl *FirstDecl = nullptr, *SecondDecl = nullptr;
ODRMismatchDecl FirstDiffType = Other, SecondDiffType = Other;
};
// If there is a diagnoseable difference, FirstDiffType and
// SecondDiffType will not be Other and FirstDecl and SecondDecl will be
// filled in if not EndOfClass.
auto FindTypeDiffs = [&DifferenceSelector](DeclHashes &FirstHashes,
DeclHashes &SecondHashes) {
DiffResult DR;
auto FirstIt = FirstHashes.begin();
auto SecondIt = SecondHashes.begin();
while (FirstIt != FirstHashes.end() || SecondIt != SecondHashes.end()) {
if (FirstIt != FirstHashes.end() && SecondIt != SecondHashes.end() &&
FirstIt->second == SecondIt->second) {
++FirstIt;
++SecondIt;
continue;
}
DR.FirstDecl = FirstIt == FirstHashes.end() ? nullptr : FirstIt->first;
DR.SecondDecl =
SecondIt == SecondHashes.end() ? nullptr : SecondIt->first;
DR.FirstDiffType =
DR.FirstDecl ? DifferenceSelector(DR.FirstDecl) : EndOfClass;
DR.SecondDiffType =
DR.SecondDecl ? DifferenceSelector(DR.SecondDecl) : EndOfClass;
return DR;
}
return DR;
};
// Use this to diagnose that an unexpected Decl was encountered
// or no difference was detected. This causes a generic error
// message to be emitted.
auto DiagnoseODRUnexpected = [this](DiffResult &DR, NamedDecl *FirstRecord,
StringRef FirstModule,
NamedDecl *SecondRecord,
StringRef SecondModule) {
Diag(FirstRecord->getLocation(),
diag::err_module_odr_violation_different_definitions)
<< FirstRecord << FirstModule.empty() << FirstModule;
if (DR.FirstDecl) {
Diag(DR.FirstDecl->getLocation(), diag::note_first_module_difference)
<< FirstRecord << DR.FirstDecl->getSourceRange();
}
Diag(SecondRecord->getLocation(),
diag::note_module_odr_violation_different_definitions)
<< SecondModule;
if (DR.SecondDecl) {
Diag(DR.SecondDecl->getLocation(), diag::note_second_module_difference)
<< DR.SecondDecl->getSourceRange();
}
};
auto DiagnoseODRMismatch =
[this](DiffResult &DR, NamedDecl *FirstRecord, StringRef FirstModule,
NamedDecl *SecondRecord, StringRef SecondModule) {
SourceLocation FirstLoc;
SourceRange FirstRange;
auto *FirstTag = dyn_cast<TagDecl>(FirstRecord);
if (DR.FirstDiffType == EndOfClass && FirstTag) {
FirstLoc = FirstTag->getBraceRange().getEnd();
} else {
FirstLoc = DR.FirstDecl->getLocation();
FirstRange = DR.FirstDecl->getSourceRange();
}
Diag(FirstLoc, diag::err_module_odr_violation_mismatch_decl)
<< FirstRecord << FirstModule.empty() << FirstModule << FirstRange
<< DR.FirstDiffType;
SourceLocation SecondLoc;
SourceRange SecondRange;
auto *SecondTag = dyn_cast<TagDecl>(SecondRecord);
if (DR.SecondDiffType == EndOfClass && SecondTag) {
SecondLoc = SecondTag->getBraceRange().getEnd();
} else {
SecondLoc = DR.SecondDecl->getLocation();
SecondRange = DR.SecondDecl->getSourceRange();
}
Diag(SecondLoc, diag::note_module_odr_violation_mismatch_decl)
<< SecondModule << SecondRange << DR.SecondDiffType;
};
// Issue any pending ODR-failure diagnostics.
for (auto &Merge : OdrMergeFailures) {
// If we've already pointed out a specific problem with this class, don't
// bother issuing a general "something's different" diagnostic.
if (!DiagnosedOdrMergeFailures.insert(Merge.first).second)
continue;
bool Diagnosed = false;
CXXRecordDecl *FirstRecord = Merge.first;
std::string FirstModule = getOwningModuleNameForDiagnostic(FirstRecord);
for (auto &RecordPair : Merge.second) {
CXXRecordDecl *SecondRecord = RecordPair.first;
// Multiple different declarations got merged together; tell the user
// where they came from.
if (FirstRecord == SecondRecord)
continue;
std::string SecondModule = getOwningModuleNameForDiagnostic(SecondRecord);
auto *FirstDD = FirstRecord->DefinitionData;
auto *SecondDD = RecordPair.second;
assert(FirstDD && SecondDD && "Definitions without DefinitionData");
// Diagnostics from DefinitionData are emitted here.
if (FirstDD != SecondDD) {
enum ODRDefinitionDataDifference {
NumBases,
NumVBases,
BaseType,
BaseVirtual,
BaseAccess,
};
auto ODRDiagBaseError = [FirstRecord, &FirstModule,
this](SourceLocation Loc, SourceRange Range,
ODRDefinitionDataDifference DiffType) {
return Diag(Loc, diag::err_module_odr_violation_definition_data)
<< FirstRecord << FirstModule.empty() << FirstModule << Range
<< DiffType;
};
auto ODRDiagBaseNote = [&SecondModule,
this](SourceLocation Loc, SourceRange Range,
ODRDefinitionDataDifference DiffType) {
return Diag(Loc, diag::note_module_odr_violation_definition_data)
<< SecondModule << Range << DiffType;
};
unsigned FirstNumBases = FirstDD->NumBases;
unsigned FirstNumVBases = FirstDD->NumVBases;
unsigned SecondNumBases = SecondDD->NumBases;
unsigned SecondNumVBases = SecondDD->NumVBases;
auto GetSourceRange = [](struct CXXRecordDecl::DefinitionData *DD) {
unsigned NumBases = DD->NumBases;
if (NumBases == 0) return SourceRange();
auto bases = DD->bases();
return SourceRange(bases[0].getBeginLoc(),
bases[NumBases - 1].getEndLoc());
};
if (FirstNumBases != SecondNumBases) {
ODRDiagBaseError(FirstRecord->getLocation(), GetSourceRange(FirstDD),
NumBases)
<< FirstNumBases;
ODRDiagBaseNote(SecondRecord->getLocation(), GetSourceRange(SecondDD),
NumBases)
<< SecondNumBases;
Diagnosed = true;
break;
}
if (FirstNumVBases != SecondNumVBases) {
ODRDiagBaseError(FirstRecord->getLocation(), GetSourceRange(FirstDD),
NumVBases)
<< FirstNumVBases;
ODRDiagBaseNote(SecondRecord->getLocation(), GetSourceRange(SecondDD),
NumVBases)
<< SecondNumVBases;
Diagnosed = true;
break;
}
auto FirstBases = FirstDD->bases();
auto SecondBases = SecondDD->bases();
unsigned i = 0;
for (i = 0; i < FirstNumBases; ++i) {
auto FirstBase = FirstBases[i];
auto SecondBase = SecondBases[i];
if (ComputeQualTypeODRHash(FirstBase.getType()) !=
ComputeQualTypeODRHash(SecondBase.getType())) {
ODRDiagBaseError(FirstRecord->getLocation(),
FirstBase.getSourceRange(), BaseType)
<< (i + 1) << FirstBase.getType();
ODRDiagBaseNote(SecondRecord->getLocation(),
SecondBase.getSourceRange(), BaseType)
<< (i + 1) << SecondBase.getType();
break;
}
if (FirstBase.isVirtual() != SecondBase.isVirtual()) {
ODRDiagBaseError(FirstRecord->getLocation(),
FirstBase.getSourceRange(), BaseVirtual)
<< (i + 1) << FirstBase.isVirtual() << FirstBase.getType();
ODRDiagBaseNote(SecondRecord->getLocation(),
SecondBase.getSourceRange(), BaseVirtual)
<< (i + 1) << SecondBase.isVirtual() << SecondBase.getType();
break;
}
if (FirstBase.getAccessSpecifierAsWritten() !=
SecondBase.getAccessSpecifierAsWritten()) {
ODRDiagBaseError(FirstRecord->getLocation(),
FirstBase.getSourceRange(), BaseAccess)
<< (i + 1) << FirstBase.getType()
<< (int)FirstBase.getAccessSpecifierAsWritten();
ODRDiagBaseNote(SecondRecord->getLocation(),
SecondBase.getSourceRange(), BaseAccess)
<< (i + 1) << SecondBase.getType()
<< (int)SecondBase.getAccessSpecifierAsWritten();
break;
}
}
if (i != FirstNumBases) {
Diagnosed = true;
break;
}
}
const ClassTemplateDecl *FirstTemplate =
FirstRecord->getDescribedClassTemplate();
const ClassTemplateDecl *SecondTemplate =
SecondRecord->getDescribedClassTemplate();
assert(!FirstTemplate == !SecondTemplate &&
"Both pointers should be null or non-null");
enum ODRTemplateDifference {
ParamEmptyName,
ParamName,
ParamSingleDefaultArgument,
ParamDifferentDefaultArgument,
};
if (FirstTemplate && SecondTemplate) {
DeclHashes FirstTemplateHashes;
DeclHashes SecondTemplateHashes;
auto PopulateTemplateParameterHashs =
[&ComputeSubDeclODRHash](DeclHashes &Hashes,
const ClassTemplateDecl *TD) {
for (auto *D : TD->getTemplateParameters()->asArray()) {
Hashes.emplace_back(D, ComputeSubDeclODRHash(D));
}
};
PopulateTemplateParameterHashs(FirstTemplateHashes, FirstTemplate);
PopulateTemplateParameterHashs(SecondTemplateHashes, SecondTemplate);
assert(FirstTemplateHashes.size() == SecondTemplateHashes.size() &&
"Number of template parameters should be equal.");
auto FirstIt = FirstTemplateHashes.begin();
auto FirstEnd = FirstTemplateHashes.end();
auto SecondIt = SecondTemplateHashes.begin();
for (; FirstIt != FirstEnd; ++FirstIt, ++SecondIt) {
if (FirstIt->second == SecondIt->second)
continue;
auto ODRDiagTemplateError = [FirstRecord, &FirstModule, this](
SourceLocation Loc, SourceRange Range,
ODRTemplateDifference DiffType) {
return Diag(Loc, diag::err_module_odr_violation_template_parameter)
<< FirstRecord << FirstModule.empty() << FirstModule << Range
<< DiffType;
};
auto ODRDiagTemplateNote = [&SecondModule, this](
SourceLocation Loc, SourceRange Range,
ODRTemplateDifference DiffType) {
return Diag(Loc, diag::note_module_odr_violation_template_parameter)
<< SecondModule << Range << DiffType;
};
const NamedDecl* FirstDecl = cast<NamedDecl>(FirstIt->first);
const NamedDecl* SecondDecl = cast<NamedDecl>(SecondIt->first);
assert(FirstDecl->getKind() == SecondDecl->getKind() &&
"Parameter Decl's should be the same kind.");
DeclarationName FirstName = FirstDecl->getDeclName();
DeclarationName SecondName = SecondDecl->getDeclName();
if (FirstName != SecondName) {
const bool FirstNameEmpty =
FirstName.isIdentifier() && !FirstName.getAsIdentifierInfo();
const bool SecondNameEmpty =
SecondName.isIdentifier() && !SecondName.getAsIdentifierInfo();
assert((!FirstNameEmpty || !SecondNameEmpty) &&
"Both template parameters cannot be unnamed.");
ODRDiagTemplateError(FirstDecl->getLocation(),
FirstDecl->getSourceRange(),
FirstNameEmpty ? ParamEmptyName : ParamName)
<< FirstName;
ODRDiagTemplateNote(SecondDecl->getLocation(),
SecondDecl->getSourceRange(),
SecondNameEmpty ? ParamEmptyName : ParamName)
<< SecondName;
break;
}
switch (FirstDecl->getKind()) {
default:
llvm_unreachable("Invalid template parameter type.");
case Decl::TemplateTypeParm: {
const auto *FirstParam = cast<TemplateTypeParmDecl>(FirstDecl);
const auto *SecondParam = cast<TemplateTypeParmDecl>(SecondDecl);
const bool HasFirstDefaultArgument =
FirstParam->hasDefaultArgument() &&
!FirstParam->defaultArgumentWasInherited();
const bool HasSecondDefaultArgument =
SecondParam->hasDefaultArgument() &&
!SecondParam->defaultArgumentWasInherited();
if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
ODRDiagTemplateError(FirstDecl->getLocation(),
FirstDecl->getSourceRange(),
ParamSingleDefaultArgument)
<< HasFirstDefaultArgument;
ODRDiagTemplateNote(SecondDecl->getLocation(),
SecondDecl->getSourceRange(),
ParamSingleDefaultArgument)
<< HasSecondDefaultArgument;
break;
}
assert(HasFirstDefaultArgument && HasSecondDefaultArgument &&
"Expecting default arguments.");
ODRDiagTemplateError(FirstDecl->getLocation(),
FirstDecl->getSourceRange(),
ParamDifferentDefaultArgument);
ODRDiagTemplateNote(SecondDecl->getLocation(),
SecondDecl->getSourceRange(),
ParamDifferentDefaultArgument);
break;
}
case Decl::NonTypeTemplateParm: {
const auto *FirstParam = cast<NonTypeTemplateParmDecl>(FirstDecl);
const auto *SecondParam = cast<NonTypeTemplateParmDecl>(SecondDecl);
const bool HasFirstDefaultArgument =
FirstParam->hasDefaultArgument() &&
!FirstParam->defaultArgumentWasInherited();
const bool HasSecondDefaultArgument =
SecondParam->hasDefaultArgument() &&
!SecondParam->defaultArgumentWasInherited();
if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
ODRDiagTemplateError(FirstDecl->getLocation(),
FirstDecl->getSourceRange(),
ParamSingleDefaultArgument)
<< HasFirstDefaultArgument;
ODRDiagTemplateNote(SecondDecl->getLocation(),
SecondDecl->getSourceRange(),
ParamSingleDefaultArgument)
<< HasSecondDefaultArgument;
break;
}
assert(HasFirstDefaultArgument && HasSecondDefaultArgument &&
"Expecting default arguments.");
ODRDiagTemplateError(FirstDecl->getLocation(),
FirstDecl->getSourceRange(),
ParamDifferentDefaultArgument);
ODRDiagTemplateNote(SecondDecl->getLocation(),
SecondDecl->getSourceRange(),
ParamDifferentDefaultArgument);
break;
}
case Decl::TemplateTemplateParm: {
const auto *FirstParam = cast<TemplateTemplateParmDecl>(FirstDecl);
const auto *SecondParam =
cast<TemplateTemplateParmDecl>(SecondDecl);
const bool HasFirstDefaultArgument =
FirstParam->hasDefaultArgument() &&
!FirstParam->defaultArgumentWasInherited();
const bool HasSecondDefaultArgument =
SecondParam->hasDefaultArgument() &&
!SecondParam->defaultArgumentWasInherited();
if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
ODRDiagTemplateError(FirstDecl->getLocation(),
FirstDecl->getSourceRange(),
ParamSingleDefaultArgument)
<< HasFirstDefaultArgument;
ODRDiagTemplateNote(SecondDecl->getLocation(),
SecondDecl->getSourceRange(),
ParamSingleDefaultArgument)
<< HasSecondDefaultArgument;
break;
}
assert(HasFirstDefaultArgument && HasSecondDefaultArgument &&
"Expecting default arguments.");
ODRDiagTemplateError(FirstDecl->getLocation(),
FirstDecl->getSourceRange(),
ParamDifferentDefaultArgument);
ODRDiagTemplateNote(SecondDecl->getLocation(),
SecondDecl->getSourceRange(),
ParamDifferentDefaultArgument);
break;
}
}
break;
}
if (FirstIt != FirstEnd) {
Diagnosed = true;
break;
}
}
DeclHashes FirstHashes;
DeclHashes SecondHashes;
const DeclContext *DC = FirstRecord;
PopulateHashes(FirstHashes, FirstRecord, DC);
PopulateHashes(SecondHashes, SecondRecord, DC);
auto DR = FindTypeDiffs(FirstHashes, SecondHashes);
ODRMismatchDecl FirstDiffType = DR.FirstDiffType;
ODRMismatchDecl SecondDiffType = DR.SecondDiffType;
Decl *FirstDecl = DR.FirstDecl;
Decl *SecondDecl = DR.SecondDecl;
if (FirstDiffType == Other || SecondDiffType == Other) {
DiagnoseODRUnexpected(DR, FirstRecord, FirstModule, SecondRecord,
SecondModule);
Diagnosed = true;
break;
}
if (FirstDiffType != SecondDiffType) {
DiagnoseODRMismatch(DR, FirstRecord, FirstModule, SecondRecord,
SecondModule);
Diagnosed = true;
break;
}
assert(FirstDiffType == SecondDiffType);
switch (FirstDiffType) {
case Other:
case EndOfClass:
case PublicSpecifer:
case PrivateSpecifer:
case ProtectedSpecifer:
llvm_unreachable("Invalid diff type");
case StaticAssert: {
StaticAssertDecl *FirstSA = cast<StaticAssertDecl>(FirstDecl);
StaticAssertDecl *SecondSA = cast<StaticAssertDecl>(SecondDecl);
Expr *FirstExpr = FirstSA->getAssertExpr();
Expr *SecondExpr = SecondSA->getAssertExpr();
unsigned FirstODRHash = ComputeODRHash(FirstExpr);
unsigned SecondODRHash = ComputeODRHash(SecondExpr);
if (FirstODRHash != SecondODRHash) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstExpr->getBeginLoc(),
FirstExpr->getSourceRange(), StaticAssertCondition);
ODRDiagDeclNote(SecondModule, SecondExpr->getBeginLoc(),
SecondExpr->getSourceRange(), StaticAssertCondition);
Diagnosed = true;
break;
}
StringLiteral *FirstStr = FirstSA->getMessage();
StringLiteral *SecondStr = SecondSA->getMessage();
assert((FirstStr || SecondStr) && "Both messages cannot be empty");
if ((FirstStr && !SecondStr) || (!FirstStr && SecondStr)) {
SourceLocation FirstLoc, SecondLoc;
SourceRange FirstRange, SecondRange;
if (FirstStr) {
FirstLoc = FirstStr->getBeginLoc();
FirstRange = FirstStr->getSourceRange();
} else {
FirstLoc = FirstSA->getBeginLoc();
FirstRange = FirstSA->getSourceRange();
}
if (SecondStr) {
SecondLoc = SecondStr->getBeginLoc();
SecondRange = SecondStr->getSourceRange();
} else {
SecondLoc = SecondSA->getBeginLoc();
SecondRange = SecondSA->getSourceRange();
}
ODRDiagDeclError(FirstRecord, FirstModule, FirstLoc, FirstRange,
StaticAssertOnlyMessage)
<< (FirstStr == nullptr);
ODRDiagDeclNote(SecondModule, SecondLoc, SecondRange,
StaticAssertOnlyMessage)
<< (SecondStr == nullptr);
Diagnosed = true;
break;
}
if (FirstStr && SecondStr &&
FirstStr->getString() != SecondStr->getString()) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstStr->getBeginLoc(),
FirstStr->getSourceRange(), StaticAssertMessage);
ODRDiagDeclNote(SecondModule, SecondStr->getBeginLoc(),
SecondStr->getSourceRange(), StaticAssertMessage);
Diagnosed = true;
break;
}
break;
}
case Field: {
Diagnosed = ODRDiagField(FirstRecord, FirstModule, SecondModule,
cast<FieldDecl>(FirstDecl),
cast<FieldDecl>(SecondDecl));
break;
}
case CXXMethod: {
enum {
DiagMethod,
DiagConstructor,
DiagDestructor,
} FirstMethodType,
SecondMethodType;
auto GetMethodTypeForDiagnostics = [](const CXXMethodDecl* D) {
if (isa<CXXConstructorDecl>(D)) return DiagConstructor;
if (isa<CXXDestructorDecl>(D)) return DiagDestructor;
return DiagMethod;
};
const CXXMethodDecl *FirstMethod = cast<CXXMethodDecl>(FirstDecl);
const CXXMethodDecl *SecondMethod = cast<CXXMethodDecl>(SecondDecl);
FirstMethodType = GetMethodTypeForDiagnostics(FirstMethod);
SecondMethodType = GetMethodTypeForDiagnostics(SecondMethod);
auto FirstName = FirstMethod->getDeclName();
auto SecondName = SecondMethod->getDeclName();
if (FirstMethodType != SecondMethodType || FirstName != SecondName) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodName)
<< FirstMethodType << FirstName;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(), MethodName)
<< SecondMethodType << SecondName;
Diagnosed = true;
break;
}
const bool FirstDeleted = FirstMethod->isDeletedAsWritten();
const bool SecondDeleted = SecondMethod->isDeletedAsWritten();
if (FirstDeleted != SecondDeleted) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodDeleted)
<< FirstMethodType << FirstName << FirstDeleted;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(), MethodDeleted)
<< SecondMethodType << SecondName << SecondDeleted;
Diagnosed = true;
break;
}
const bool FirstDefaulted = FirstMethod->isExplicitlyDefaulted();
const bool SecondDefaulted = SecondMethod->isExplicitlyDefaulted();
if (FirstDefaulted != SecondDefaulted) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodDefaulted)
<< FirstMethodType << FirstName << FirstDefaulted;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(), MethodDefaulted)
<< SecondMethodType << SecondName << SecondDefaulted;
Diagnosed = true;
break;
}
const bool FirstVirtual = FirstMethod->isVirtualAsWritten();
const bool SecondVirtual = SecondMethod->isVirtualAsWritten();
const bool FirstPure = FirstMethod->isPure();
const bool SecondPure = SecondMethod->isPure();
if ((FirstVirtual || SecondVirtual) &&
(FirstVirtual != SecondVirtual || FirstPure != SecondPure)) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodVirtual)
<< FirstMethodType << FirstName << FirstPure << FirstVirtual;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(), MethodVirtual)
<< SecondMethodType << SecondName << SecondPure << SecondVirtual;
Diagnosed = true;
break;
}
// CXXMethodDecl::isStatic uses the canonical Decl. With Decl merging,
// FirstDecl is the canonical Decl of SecondDecl, so the storage
// class needs to be checked instead.
const auto FirstStorage = FirstMethod->getStorageClass();
const auto SecondStorage = SecondMethod->getStorageClass();
const bool FirstStatic = FirstStorage == SC_Static;
const bool SecondStatic = SecondStorage == SC_Static;
if (FirstStatic != SecondStatic) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodStatic)
<< FirstMethodType << FirstName << FirstStatic;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(), MethodStatic)
<< SecondMethodType << SecondName << SecondStatic;
Diagnosed = true;
break;
}
const bool FirstVolatile = FirstMethod->isVolatile();
const bool SecondVolatile = SecondMethod->isVolatile();
if (FirstVolatile != SecondVolatile) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodVolatile)
<< FirstMethodType << FirstName << FirstVolatile;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(), MethodVolatile)
<< SecondMethodType << SecondName << SecondVolatile;
Diagnosed = true;
break;
}
const bool FirstConst = FirstMethod->isConst();
const bool SecondConst = SecondMethod->isConst();
if (FirstConst != SecondConst) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodConst)
<< FirstMethodType << FirstName << FirstConst;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(), MethodConst)
<< SecondMethodType << SecondName << SecondConst;
Diagnosed = true;
break;
}
const bool FirstInline = FirstMethod->isInlineSpecified();
const bool SecondInline = SecondMethod->isInlineSpecified();
if (FirstInline != SecondInline) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodInline)
<< FirstMethodType << FirstName << FirstInline;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(), MethodInline)
<< SecondMethodType << SecondName << SecondInline;
Diagnosed = true;
break;
}
const unsigned FirstNumParameters = FirstMethod->param_size();
const unsigned SecondNumParameters = SecondMethod->param_size();
if (FirstNumParameters != SecondNumParameters) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(),
MethodNumberParameters)
<< FirstMethodType << FirstName << FirstNumParameters;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(),
MethodNumberParameters)
<< SecondMethodType << SecondName << SecondNumParameters;
Diagnosed = true;
break;
}
// Need this status boolean to know when break out of the switch.
bool ParameterMismatch = false;
for (unsigned I = 0; I < FirstNumParameters; ++I) {
const ParmVarDecl *FirstParam = FirstMethod->getParamDecl(I);
const ParmVarDecl *SecondParam = SecondMethod->getParamDecl(I);
QualType FirstParamType = FirstParam->getType();
QualType SecondParamType = SecondParam->getType();
if (FirstParamType != SecondParamType &&
ComputeQualTypeODRHash(FirstParamType) !=
ComputeQualTypeODRHash(SecondParamType)) {
if (const DecayedType *ParamDecayedType =
FirstParamType->getAs<DecayedType>()) {
ODRDiagDeclError(
FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodParameterType)
<< FirstMethodType << FirstName << (I + 1) << FirstParamType
<< true << ParamDecayedType->getOriginalType();
} else {
ODRDiagDeclError(
FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodParameterType)
<< FirstMethodType << FirstName << (I + 1) << FirstParamType
<< false;
}
if (const DecayedType *ParamDecayedType =
SecondParamType->getAs<DecayedType>()) {
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(),
MethodParameterType)
<< SecondMethodType << SecondName << (I + 1)
<< SecondParamType << true
<< ParamDecayedType->getOriginalType();
} else {
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(),
MethodParameterType)
<< SecondMethodType << SecondName << (I + 1)
<< SecondParamType << false;
}
ParameterMismatch = true;
break;
}
DeclarationName FirstParamName = FirstParam->getDeclName();
DeclarationName SecondParamName = SecondParam->getDeclName();
if (FirstParamName != SecondParamName) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodParameterName)
<< FirstMethodType << FirstName << (I + 1) << FirstParamName;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(), MethodParameterName)
<< SecondMethodType << SecondName << (I + 1) << SecondParamName;
ParameterMismatch = true;
break;
}
const Expr *FirstInit = FirstParam->getInit();
const Expr *SecondInit = SecondParam->getInit();
if ((FirstInit == nullptr) != (SecondInit == nullptr)) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstMethod->getLocation(),
FirstMethod->getSourceRange(),
MethodParameterSingleDefaultArgument)
<< FirstMethodType << FirstName << (I + 1)
<< (FirstInit == nullptr)
<< (FirstInit ? FirstInit->getSourceRange() : SourceRange());
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(),
MethodParameterSingleDefaultArgument)
<< SecondMethodType << SecondName << (I + 1)
<< (SecondInit == nullptr)
<< (SecondInit ? SecondInit->getSourceRange() : SourceRange());
ParameterMismatch = true;
break;
}
if (FirstInit && SecondInit &&
ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstMethod->getLocation(),
FirstMethod->getSourceRange(),
MethodParameterDifferentDefaultArgument)
<< FirstMethodType << FirstName << (I + 1)
<< FirstInit->getSourceRange();
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(),
MethodParameterDifferentDefaultArgument)
<< SecondMethodType << SecondName << (I + 1)
<< SecondInit->getSourceRange();
ParameterMismatch = true;
break;
}
}
if (ParameterMismatch) {
Diagnosed = true;
break;
}
const auto *FirstTemplateArgs =
FirstMethod->getTemplateSpecializationArgs();
const auto *SecondTemplateArgs =
SecondMethod->getTemplateSpecializationArgs();
if ((FirstTemplateArgs && !SecondTemplateArgs) ||
(!FirstTemplateArgs && SecondTemplateArgs)) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(),
MethodNoTemplateArguments)
<< FirstMethodType << FirstName << (FirstTemplateArgs != nullptr);
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(),
MethodNoTemplateArguments)
<< SecondMethodType << SecondName
<< (SecondTemplateArgs != nullptr);
Diagnosed = true;
break;
}
if (FirstTemplateArgs && SecondTemplateArgs) {
// Remove pack expansions from argument list.
auto ExpandTemplateArgumentList =
[](const TemplateArgumentList *TAL) {
llvm::SmallVector<const TemplateArgument *, 8> ExpandedList;
for (const TemplateArgument &TA : TAL->asArray()) {
if (TA.getKind() != TemplateArgument::Pack) {
ExpandedList.push_back(&TA);
continue;
}
for (const TemplateArgument &PackTA : TA.getPackAsArray()) {
ExpandedList.push_back(&PackTA);
}
}
return ExpandedList;
};
llvm::SmallVector<const TemplateArgument *, 8> FirstExpandedList =
ExpandTemplateArgumentList(FirstTemplateArgs);
llvm::SmallVector<const TemplateArgument *, 8> SecondExpandedList =
ExpandTemplateArgumentList(SecondTemplateArgs);
if (FirstExpandedList.size() != SecondExpandedList.size()) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstMethod->getLocation(),
FirstMethod->getSourceRange(),
MethodDifferentNumberTemplateArguments)
<< FirstMethodType << FirstName
<< (unsigned)FirstExpandedList.size();
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(),
MethodDifferentNumberTemplateArguments)
<< SecondMethodType << SecondName
<< (unsigned)SecondExpandedList.size();
Diagnosed = true;
break;
}
bool TemplateArgumentMismatch = false;
for (unsigned i = 0, e = FirstExpandedList.size(); i != e; ++i) {
const TemplateArgument &FirstTA = *FirstExpandedList[i],
&SecondTA = *SecondExpandedList[i];
if (ComputeTemplateArgumentODRHash(FirstTA) ==
ComputeTemplateArgumentODRHash(SecondTA)) {
continue;
}
ODRDiagDeclError(
FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodDifferentTemplateArgument)
<< FirstMethodType << FirstName << FirstTA << i + 1;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(),
MethodDifferentTemplateArgument)
<< SecondMethodType << SecondName << SecondTA << i + 1;
TemplateArgumentMismatch = true;
break;
}
if (TemplateArgumentMismatch) {
Diagnosed = true;
break;
}
}
// Compute the hash of the method as if it has no body.
auto ComputeCXXMethodODRHash = [&Hash](const CXXMethodDecl *D) {
Hash.clear();
Hash.AddFunctionDecl(D, true /*SkipBody*/);
return Hash.CalculateHash();
};
// Compare the hash generated to the hash stored. A difference means
// that a body was present in the original source. Due to merging,
// the stardard way of detecting a body will not work.
const bool HasFirstBody =
ComputeCXXMethodODRHash(FirstMethod) != FirstMethod->getODRHash();
const bool HasSecondBody =
ComputeCXXMethodODRHash(SecondMethod) != SecondMethod->getODRHash();
if (HasFirstBody != HasSecondBody) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodSingleBody)
<< FirstMethodType << FirstName << HasFirstBody;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(), MethodSingleBody)
<< SecondMethodType << SecondName << HasSecondBody;
Diagnosed = true;
break;
}
if (HasFirstBody && HasSecondBody) {
ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
FirstMethod->getSourceRange(), MethodDifferentBody)
<< FirstMethodType << FirstName;
ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
SecondMethod->getSourceRange(), MethodDifferentBody)
<< SecondMethodType << SecondName;
Diagnosed = true;
break;
}
break;
}
case TypeAlias:
case TypeDef: {
Diagnosed = ODRDiagTypeDefOrAlias(
FirstRecord, FirstModule, SecondModule,
cast<TypedefNameDecl>(FirstDecl), cast<TypedefNameDecl>(SecondDecl),
FirstDiffType == TypeAlias);
break;
}
case Var: {
Diagnosed =
ODRDiagVar(FirstRecord, FirstModule, SecondModule,
cast<VarDecl>(FirstDecl), cast<VarDecl>(SecondDecl));
break;
}
case Friend: {
FriendDecl *FirstFriend = cast<FriendDecl>(FirstDecl);
FriendDecl *SecondFriend = cast<FriendDecl>(SecondDecl);
NamedDecl *FirstND = FirstFriend->getFriendDecl();
NamedDecl *SecondND = SecondFriend->getFriendDecl();
TypeSourceInfo *FirstTSI = FirstFriend->getFriendType();
TypeSourceInfo *SecondTSI = SecondFriend->getFriendType();
if (FirstND && SecondND) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstFriend->getFriendLoc(),
FirstFriend->getSourceRange(), FriendFunction)
<< FirstND;
ODRDiagDeclNote(SecondModule, SecondFriend->getFriendLoc(),
SecondFriend->getSourceRange(), FriendFunction)
<< SecondND;
Diagnosed = true;
break;
}
if (FirstTSI && SecondTSI) {
QualType FirstFriendType = FirstTSI->getType();
QualType SecondFriendType = SecondTSI->getType();
assert(ComputeQualTypeODRHash(FirstFriendType) !=
ComputeQualTypeODRHash(SecondFriendType));
ODRDiagDeclError(FirstRecord, FirstModule,
FirstFriend->getFriendLoc(),
FirstFriend->getSourceRange(), FriendType)
<< FirstFriendType;
ODRDiagDeclNote(SecondModule, SecondFriend->getFriendLoc(),
SecondFriend->getSourceRange(), FriendType)
<< SecondFriendType;
Diagnosed = true;
break;
}
ODRDiagDeclError(FirstRecord, FirstModule, FirstFriend->getFriendLoc(),
FirstFriend->getSourceRange(), FriendTypeFunction)
<< (FirstTSI == nullptr);
ODRDiagDeclNote(SecondModule, SecondFriend->getFriendLoc(),
SecondFriend->getSourceRange(), FriendTypeFunction)
<< (SecondTSI == nullptr);
Diagnosed = true;
break;
}
case FunctionTemplate: {
FunctionTemplateDecl *FirstTemplate =
cast<FunctionTemplateDecl>(FirstDecl);
FunctionTemplateDecl *SecondTemplate =
cast<FunctionTemplateDecl>(SecondDecl);
TemplateParameterList *FirstTPL =
FirstTemplate->getTemplateParameters();
TemplateParameterList *SecondTPL =
SecondTemplate->getTemplateParameters();
if (FirstTPL->size() != SecondTPL->size()) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplateDifferentNumberParameters)
<< FirstTemplate << FirstTPL->size();
ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplateDifferentNumberParameters)
<< SecondTemplate << SecondTPL->size();
Diagnosed = true;
break;
}
bool ParameterMismatch = false;
for (unsigned i = 0, e = FirstTPL->size(); i != e; ++i) {
NamedDecl *FirstParam = FirstTPL->getParam(i);
NamedDecl *SecondParam = SecondTPL->getParam(i);
if (FirstParam->getKind() != SecondParam->getKind()) {
enum {
TemplateTypeParameter,
NonTypeTemplateParameter,
TemplateTemplateParameter,
};
auto GetParamType = [](NamedDecl *D) {
switch (D->getKind()) {
default:
llvm_unreachable("Unexpected template parameter type");
case Decl::TemplateTypeParm:
return TemplateTypeParameter;
case Decl::NonTypeTemplateParm:
return NonTypeTemplateParameter;
case Decl::TemplateTemplateParm:
return TemplateTemplateParameter;
}
};
ODRDiagDeclError(FirstRecord, FirstModule,
FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplateParameterDifferentKind)
<< FirstTemplate << (i + 1) << GetParamType(FirstParam);
ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplateParameterDifferentKind)
<< SecondTemplate << (i + 1) << GetParamType(SecondParam);
ParameterMismatch = true;
break;
}
if (FirstParam->getName() != SecondParam->getName()) {
ODRDiagDeclError(
FirstRecord, FirstModule, FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(), FunctionTemplateParameterName)
<< FirstTemplate << (i + 1) << (bool)FirstParam->getIdentifier()
<< FirstParam;
ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplateParameterName)
<< SecondTemplate << (i + 1)
<< (bool)SecondParam->getIdentifier() << SecondParam;
ParameterMismatch = true;
break;
}
if (isa<TemplateTypeParmDecl>(FirstParam) &&
isa<TemplateTypeParmDecl>(SecondParam)) {
TemplateTypeParmDecl *FirstTTPD =
cast<TemplateTypeParmDecl>(FirstParam);
TemplateTypeParmDecl *SecondTTPD =
cast<TemplateTypeParmDecl>(SecondParam);
bool HasFirstDefaultArgument =
FirstTTPD->hasDefaultArgument() &&
!FirstTTPD->defaultArgumentWasInherited();
bool HasSecondDefaultArgument =
SecondTTPD->hasDefaultArgument() &&
!SecondTTPD->defaultArgumentWasInherited();
if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplateParameterSingleDefaultArgument)
<< FirstTemplate << (i + 1) << HasFirstDefaultArgument;
ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplateParameterSingleDefaultArgument)
<< SecondTemplate << (i + 1) << HasSecondDefaultArgument;
ParameterMismatch = true;
break;
}
if (HasFirstDefaultArgument && HasSecondDefaultArgument) {
QualType FirstType = FirstTTPD->getDefaultArgument();
QualType SecondType = SecondTTPD->getDefaultArgument();
if (ComputeQualTypeODRHash(FirstType) !=
ComputeQualTypeODRHash(SecondType)) {
ODRDiagDeclError(
FirstRecord, FirstModule, FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplateParameterDifferentDefaultArgument)
<< FirstTemplate << (i + 1) << FirstType;
ODRDiagDeclNote(
SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplateParameterDifferentDefaultArgument)
<< SecondTemplate << (i + 1) << SecondType;
ParameterMismatch = true;
break;
}
}
if (FirstTTPD->isParameterPack() !=
SecondTTPD->isParameterPack()) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplatePackParameter)
<< FirstTemplate << (i + 1) << FirstTTPD->isParameterPack();
ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplatePackParameter)
<< SecondTemplate << (i + 1) << SecondTTPD->isParameterPack();
ParameterMismatch = true;
break;
}
}
if (isa<TemplateTemplateParmDecl>(FirstParam) &&
isa<TemplateTemplateParmDecl>(SecondParam)) {
TemplateTemplateParmDecl *FirstTTPD =
cast<TemplateTemplateParmDecl>(FirstParam);
TemplateTemplateParmDecl *SecondTTPD =
cast<TemplateTemplateParmDecl>(SecondParam);
TemplateParameterList *FirstTPL =
FirstTTPD->getTemplateParameters();
TemplateParameterList *SecondTPL =
SecondTTPD->getTemplateParameters();
if (ComputeTemplateParameterListODRHash(FirstTPL) !=
ComputeTemplateParameterListODRHash(SecondTPL)) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplateParameterDifferentType)
<< FirstTemplate << (i + 1);
ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplateParameterDifferentType)
<< SecondTemplate << (i + 1);
ParameterMismatch = true;
break;
}
bool HasFirstDefaultArgument =
FirstTTPD->hasDefaultArgument() &&
!FirstTTPD->defaultArgumentWasInherited();
bool HasSecondDefaultArgument =
SecondTTPD->hasDefaultArgument() &&
!SecondTTPD->defaultArgumentWasInherited();
if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplateParameterSingleDefaultArgument)
<< FirstTemplate << (i + 1) << HasFirstDefaultArgument;
ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplateParameterSingleDefaultArgument)
<< SecondTemplate << (i + 1) << HasSecondDefaultArgument;
ParameterMismatch = true;
break;
}
if (HasFirstDefaultArgument && HasSecondDefaultArgument) {
TemplateArgument FirstTA =
FirstTTPD->getDefaultArgument().getArgument();
TemplateArgument SecondTA =
SecondTTPD->getDefaultArgument().getArgument();
if (ComputeTemplateArgumentODRHash(FirstTA) !=
ComputeTemplateArgumentODRHash(SecondTA)) {
ODRDiagDeclError(
FirstRecord, FirstModule, FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplateParameterDifferentDefaultArgument)
<< FirstTemplate << (i + 1) << FirstTA;
ODRDiagDeclNote(
SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplateParameterDifferentDefaultArgument)
<< SecondTemplate << (i + 1) << SecondTA;
ParameterMismatch = true;
break;
}
}
if (FirstTTPD->isParameterPack() !=
SecondTTPD->isParameterPack()) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplatePackParameter)
<< FirstTemplate << (i + 1) << FirstTTPD->isParameterPack();
ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplatePackParameter)
<< SecondTemplate << (i + 1) << SecondTTPD->isParameterPack();
ParameterMismatch = true;
break;
}
}
if (isa<NonTypeTemplateParmDecl>(FirstParam) &&
isa<NonTypeTemplateParmDecl>(SecondParam)) {
NonTypeTemplateParmDecl *FirstNTTPD =
cast<NonTypeTemplateParmDecl>(FirstParam);
NonTypeTemplateParmDecl *SecondNTTPD =
cast<NonTypeTemplateParmDecl>(SecondParam);
QualType FirstType = FirstNTTPD->getType();
QualType SecondType = SecondNTTPD->getType();
if (ComputeQualTypeODRHash(FirstType) !=
ComputeQualTypeODRHash(SecondType)) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplateParameterDifferentType)
<< FirstTemplate << (i + 1);
ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplateParameterDifferentType)
<< SecondTemplate << (i + 1);
ParameterMismatch = true;
break;
}
bool HasFirstDefaultArgument =
FirstNTTPD->hasDefaultArgument() &&
!FirstNTTPD->defaultArgumentWasInherited();
bool HasSecondDefaultArgument =
SecondNTTPD->hasDefaultArgument() &&
!SecondNTTPD->defaultArgumentWasInherited();
if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplateParameterSingleDefaultArgument)
<< FirstTemplate << (i + 1) << HasFirstDefaultArgument;
ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplateParameterSingleDefaultArgument)
<< SecondTemplate << (i + 1) << HasSecondDefaultArgument;
ParameterMismatch = true;
break;
}
if (HasFirstDefaultArgument && HasSecondDefaultArgument) {
Expr *FirstDefaultArgument = FirstNTTPD->getDefaultArgument();
Expr *SecondDefaultArgument = SecondNTTPD->getDefaultArgument();
if (ComputeODRHash(FirstDefaultArgument) !=
ComputeODRHash(SecondDefaultArgument)) {
ODRDiagDeclError(
FirstRecord, FirstModule, FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplateParameterDifferentDefaultArgument)
<< FirstTemplate << (i + 1) << FirstDefaultArgument;
ODRDiagDeclNote(
SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplateParameterDifferentDefaultArgument)
<< SecondTemplate << (i + 1) << SecondDefaultArgument;
ParameterMismatch = true;
break;
}
}
if (FirstNTTPD->isParameterPack() !=
SecondNTTPD->isParameterPack()) {
ODRDiagDeclError(FirstRecord, FirstModule,
FirstTemplate->getLocation(),
FirstTemplate->getSourceRange(),
FunctionTemplatePackParameter)
<< FirstTemplate << (i + 1) << FirstNTTPD->isParameterPack();
ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
SecondTemplate->getSourceRange(),
FunctionTemplatePackParameter)
<< SecondTemplate << (i + 1)
<< SecondNTTPD->isParameterPack();
ParameterMismatch = true;
break;
}
}
}
if (ParameterMismatch) {
Diagnosed = true;
break;
}
break;
}
}
if (Diagnosed)
continue;
Diag(FirstDecl->getLocation(),
diag::err_module_odr_violation_mismatch_decl_unknown)
<< FirstRecord << FirstModule.empty() << FirstModule << FirstDiffType
<< FirstDecl->getSourceRange();
Diag(SecondDecl->getLocation(),
diag::note_module_odr_violation_mismatch_decl_unknown)
<< SecondModule << FirstDiffType << SecondDecl->getSourceRange();
Diagnosed = true;
}
if (!Diagnosed) {
// All definitions are updates to the same declaration. This happens if a
// module instantiates the declaration of a class template specialization
// and two or more other modules instantiate its definition.
//
// FIXME: Indicate which modules had instantiations of this definition.
// FIXME: How can this even happen?
Diag(Merge.first->getLocation(),
diag::err_module_odr_violation_different_instantiations)
<< Merge.first;
}
}
// Issue ODR failures diagnostics for functions.
for (auto &Merge : FunctionOdrMergeFailures) {
enum ODRFunctionDifference {
ReturnType,
ParameterName,
ParameterType,
ParameterSingleDefaultArgument,
ParameterDifferentDefaultArgument,
FunctionBody,
};
FunctionDecl *FirstFunction = Merge.first;
std::string FirstModule = getOwningModuleNameForDiagnostic(FirstFunction);
bool Diagnosed = false;
for (auto &SecondFunction : Merge.second) {
if (FirstFunction == SecondFunction)
continue;
std::string SecondModule =
getOwningModuleNameForDiagnostic(SecondFunction);
auto ODRDiagError = [FirstFunction, &FirstModule,
this](SourceLocation Loc, SourceRange Range,
ODRFunctionDifference DiffType) {
return Diag(Loc, diag::err_module_odr_violation_function)
<< FirstFunction << FirstModule.empty() << FirstModule << Range
<< DiffType;
};
auto ODRDiagNote = [&SecondModule, this](SourceLocation Loc,
SourceRange Range,
ODRFunctionDifference DiffType) {
return Diag(Loc, diag::note_module_odr_violation_function)
<< SecondModule << Range << DiffType;
};
if (ComputeQualTypeODRHash(FirstFunction->getReturnType()) !=
ComputeQualTypeODRHash(SecondFunction->getReturnType())) {
ODRDiagError(FirstFunction->getReturnTypeSourceRange().getBegin(),
FirstFunction->getReturnTypeSourceRange(), ReturnType)
<< FirstFunction->getReturnType();
ODRDiagNote(SecondFunction->getReturnTypeSourceRange().getBegin(),
SecondFunction->getReturnTypeSourceRange(), ReturnType)
<< SecondFunction->getReturnType();
Diagnosed = true;
break;
}
assert(FirstFunction->param_size() == SecondFunction->param_size() &&
"Merged functions with different number of parameters");
auto ParamSize = FirstFunction->param_size();
bool ParameterMismatch = false;
for (unsigned I = 0; I < ParamSize; ++I) {
auto *FirstParam = FirstFunction->getParamDecl(I);
auto *SecondParam = SecondFunction->getParamDecl(I);
assert(getContext().hasSameType(FirstParam->getType(),
SecondParam->getType()) &&
"Merged function has different parameter types.");
if (FirstParam->getDeclName() != SecondParam->getDeclName()) {
ODRDiagError(FirstParam->getLocation(), FirstParam->getSourceRange(),
ParameterName)
<< I + 1 << FirstParam->getDeclName();
ODRDiagNote(SecondParam->getLocation(), SecondParam->getSourceRange(),
ParameterName)
<< I + 1 << SecondParam->getDeclName();
ParameterMismatch = true;
break;
};
QualType FirstParamType = FirstParam->getType();
QualType SecondParamType = SecondParam->getType();
if (FirstParamType != SecondParamType &&
ComputeQualTypeODRHash(FirstParamType) !=
ComputeQualTypeODRHash(SecondParamType)) {
if (const DecayedType *ParamDecayedType =
FirstParamType->getAs<DecayedType>()) {
ODRDiagError(FirstParam->getLocation(),
FirstParam->getSourceRange(), ParameterType)
<< (I + 1) << FirstParamType << true
<< ParamDecayedType->getOriginalType();
} else {
ODRDiagError(FirstParam->getLocation(),
FirstParam->getSourceRange(), ParameterType)
<< (I + 1) << FirstParamType << false;
}
if (const DecayedType *ParamDecayedType =
SecondParamType->getAs<DecayedType>()) {
ODRDiagNote(SecondParam->getLocation(),
SecondParam->getSourceRange(), ParameterType)
<< (I + 1) << SecondParamType << true
<< ParamDecayedType->getOriginalType();
} else {
ODRDiagNote(SecondParam->getLocation(),
SecondParam->getSourceRange(), ParameterType)
<< (I + 1) << SecondParamType << false;
}
ParameterMismatch = true;
break;
}
const Expr *FirstInit = FirstParam->getInit();
const Expr *SecondInit = SecondParam->getInit();
if ((FirstInit == nullptr) != (SecondInit == nullptr)) {
ODRDiagError(FirstParam->getLocation(), FirstParam->getSourceRange(),
ParameterSingleDefaultArgument)
<< (I + 1) << (FirstInit == nullptr)
<< (FirstInit ? FirstInit->getSourceRange() : SourceRange());
ODRDiagNote(SecondParam->getLocation(), SecondParam->getSourceRange(),
ParameterSingleDefaultArgument)
<< (I + 1) << (SecondInit == nullptr)
<< (SecondInit ? SecondInit->getSourceRange() : SourceRange());
ParameterMismatch = true;
break;
}
if (FirstInit && SecondInit &&
ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) {
ODRDiagError(FirstParam->getLocation(), FirstParam->getSourceRange(),
ParameterDifferentDefaultArgument)
<< (I + 1) << FirstInit->getSourceRange();
ODRDiagNote(SecondParam->getLocation(), SecondParam->getSourceRange(),
ParameterDifferentDefaultArgument)
<< (I + 1) << SecondInit->getSourceRange();
ParameterMismatch = true;
break;
}
assert(ComputeSubDeclODRHash(FirstParam) ==
ComputeSubDeclODRHash(SecondParam) &&
"Undiagnosed parameter difference.");
}
if (ParameterMismatch) {
Diagnosed = true;
break;
}
// If no error has been generated before now, assume the problem is in
// the body and generate a message.
ODRDiagError(FirstFunction->getLocation(),
FirstFunction->getSourceRange(), FunctionBody);
ODRDiagNote(SecondFunction->getLocation(),
SecondFunction->getSourceRange(), FunctionBody);
Diagnosed = true;
break;
}
(void)Diagnosed;
assert(Diagnosed && "Unable to emit ODR diagnostic.");
}
// Issue ODR failures diagnostics for enums.
for (auto &Merge : EnumOdrMergeFailures) {
enum ODREnumDifference {
SingleScopedEnum,
EnumTagKeywordMismatch,
SingleSpecifiedType,
DifferentSpecifiedTypes,
DifferentNumberEnumConstants,
EnumConstantName,
EnumConstantSingleInitilizer,
EnumConstantDifferentInitilizer,
};
// If we've already pointed out a specific problem with this enum, don't
// bother issuing a general "something's different" diagnostic.
if (!DiagnosedOdrMergeFailures.insert(Merge.first).second)
continue;
EnumDecl *FirstEnum = Merge.first;
std::string FirstModule = getOwningModuleNameForDiagnostic(FirstEnum);
using DeclHashes =
llvm::SmallVector<std::pair<EnumConstantDecl *, unsigned>, 4>;
auto PopulateHashes = [&ComputeSubDeclODRHash, FirstEnum](
DeclHashes &Hashes, EnumDecl *Enum) {
for (auto *D : Enum->decls()) {
// Due to decl merging, the first EnumDecl is the parent of
// Decls in both records.
if (!ODRHash::isDeclToBeProcessed(D, FirstEnum))
continue;
assert(isa<EnumConstantDecl>(D) && "Unexpected Decl kind");
Hashes.emplace_back(cast<EnumConstantDecl>(D),
ComputeSubDeclODRHash(D));
}
};
DeclHashes FirstHashes;
PopulateHashes(FirstHashes, FirstEnum);
bool Diagnosed = false;
for (auto &SecondEnum : Merge.second) {
if (FirstEnum == SecondEnum)
continue;
std::string SecondModule =
getOwningModuleNameForDiagnostic(SecondEnum);
auto ODRDiagError = [FirstEnum, &FirstModule,
this](SourceLocation Loc, SourceRange Range,
ODREnumDifference DiffType) {
return Diag(Loc, diag::err_module_odr_violation_enum)
<< FirstEnum << FirstModule.empty() << FirstModule << Range
<< DiffType;
};
auto ODRDiagNote = [&SecondModule, this](SourceLocation Loc,
SourceRange Range,
ODREnumDifference DiffType) {
return Diag(Loc, diag::note_module_odr_violation_enum)
<< SecondModule << Range << DiffType;
};
if (FirstEnum->isScoped() != SecondEnum->isScoped()) {
ODRDiagError(FirstEnum->getLocation(), FirstEnum->getSourceRange(),
SingleScopedEnum)
<< FirstEnum->isScoped();
ODRDiagNote(SecondEnum->getLocation(), SecondEnum->getSourceRange(),
SingleScopedEnum)
<< SecondEnum->isScoped();
Diagnosed = true;
continue;
}
if (FirstEnum->isScoped() && SecondEnum->isScoped()) {
if (FirstEnum->isScopedUsingClassTag() !=
SecondEnum->isScopedUsingClassTag()) {
ODRDiagError(FirstEnum->getLocation(), FirstEnum->getSourceRange(),
EnumTagKeywordMismatch)
<< FirstEnum->isScopedUsingClassTag();
ODRDiagNote(SecondEnum->getLocation(), SecondEnum->getSourceRange(),
EnumTagKeywordMismatch)
<< SecondEnum->isScopedUsingClassTag();
Diagnosed = true;
continue;
}
}
QualType FirstUnderlyingType =
FirstEnum->getIntegerTypeSourceInfo()
? FirstEnum->getIntegerTypeSourceInfo()->getType()
: QualType();
QualType SecondUnderlyingType =
SecondEnum->getIntegerTypeSourceInfo()
? SecondEnum->getIntegerTypeSourceInfo()->getType()
: QualType();
if (FirstUnderlyingType.isNull() != SecondUnderlyingType.isNull()) {
ODRDiagError(FirstEnum->getLocation(), FirstEnum->getSourceRange(),
SingleSpecifiedType)
<< !FirstUnderlyingType.isNull();
ODRDiagNote(SecondEnum->getLocation(), SecondEnum->getSourceRange(),
SingleSpecifiedType)
<< !SecondUnderlyingType.isNull();
Diagnosed = true;
continue;
}
if (!FirstUnderlyingType.isNull() && !SecondUnderlyingType.isNull()) {
if (ComputeQualTypeODRHash(FirstUnderlyingType) !=
ComputeQualTypeODRHash(SecondUnderlyingType)) {
ODRDiagError(FirstEnum->getLocation(), FirstEnum->getSourceRange(),
DifferentSpecifiedTypes)
<< FirstUnderlyingType;
ODRDiagNote(SecondEnum->getLocation(), SecondEnum->getSourceRange(),
DifferentSpecifiedTypes)
<< SecondUnderlyingType;
Diagnosed = true;
continue;
}
}
DeclHashes SecondHashes;
PopulateHashes(SecondHashes, SecondEnum);
if (FirstHashes.size() != SecondHashes.size()) {
ODRDiagError(FirstEnum->getLocation(), FirstEnum->getSourceRange(),
DifferentNumberEnumConstants)
<< (int)FirstHashes.size();
ODRDiagNote(SecondEnum->getLocation(), SecondEnum->getSourceRange(),
DifferentNumberEnumConstants)
<< (int)SecondHashes.size();
Diagnosed = true;
continue;
}
for (unsigned I = 0; I < FirstHashes.size(); ++I) {
if (FirstHashes[I].second == SecondHashes[I].second)
continue;
const EnumConstantDecl *FirstEnumConstant = FirstHashes[I].first;
const EnumConstantDecl *SecondEnumConstant = SecondHashes[I].first;
if (FirstEnumConstant->getDeclName() !=
SecondEnumConstant->getDeclName()) {
ODRDiagError(FirstEnumConstant->getLocation(),
FirstEnumConstant->getSourceRange(), EnumConstantName)
<< I + 1 << FirstEnumConstant;
ODRDiagNote(SecondEnumConstant->getLocation(),
SecondEnumConstant->getSourceRange(), EnumConstantName)
<< I + 1 << SecondEnumConstant;
Diagnosed = true;
break;
}
const Expr *FirstInit = FirstEnumConstant->getInitExpr();
const Expr *SecondInit = SecondEnumConstant->getInitExpr();
if (!FirstInit && !SecondInit)
continue;
if (!FirstInit || !SecondInit) {
ODRDiagError(FirstEnumConstant->getLocation(),
FirstEnumConstant->getSourceRange(),
EnumConstantSingleInitilizer)
<< I + 1 << FirstEnumConstant << (FirstInit != nullptr);
ODRDiagNote(SecondEnumConstant->getLocation(),
SecondEnumConstant->getSourceRange(),
EnumConstantSingleInitilizer)
<< I + 1 << SecondEnumConstant << (SecondInit != nullptr);
Diagnosed = true;
break;
}
if (ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) {
ODRDiagError(FirstEnumConstant->getLocation(),
FirstEnumConstant->getSourceRange(),
EnumConstantDifferentInitilizer)
<< I + 1 << FirstEnumConstant;
ODRDiagNote(SecondEnumConstant->getLocation(),
SecondEnumConstant->getSourceRange(),
EnumConstantDifferentInitilizer)
<< I + 1 << SecondEnumConstant;
Diagnosed = true;
break;
}
}
}
(void)Diagnosed;
assert(Diagnosed && "Unable to emit ODR diagnostic.");
}
}
void ASTReader::StartedDeserializing() {
if (++NumCurrentElementsDeserializing == 1 && ReadTimer.get())
ReadTimer->startTimer();
}
void ASTReader::FinishedDeserializing() {
assert(NumCurrentElementsDeserializing &&
"FinishedDeserializing not paired with StartedDeserializing");
if (NumCurrentElementsDeserializing == 1) {
// We decrease NumCurrentElementsDeserializing only after pending actions
// are finished, to avoid recursively re-calling finishPendingActions().
finishPendingActions();
}
--NumCurrentElementsDeserializing;
if (NumCurrentElementsDeserializing == 0) {
// Propagate exception specification and deduced type updates along
// redeclaration chains.
//
// We do this now rather than in finishPendingActions because we want to
// be able to walk the complete redeclaration chains of the updated decls.
while (!PendingExceptionSpecUpdates.empty() ||
!PendingDeducedTypeUpdates.empty()) {
auto ESUpdates = std::move(PendingExceptionSpecUpdates);
PendingExceptionSpecUpdates.clear();
for (auto Update : ESUpdates) {
ProcessingUpdatesRAIIObj ProcessingUpdates(*this);
auto *FPT = Update.second->getType()->castAs<FunctionProtoType>();
auto ESI = FPT->getExtProtoInfo().ExceptionSpec;
if (auto *Listener = getContext().getASTMutationListener())
Listener->ResolvedExceptionSpec(cast<FunctionDecl>(Update.second));
for (auto *Redecl : Update.second->redecls())
getContext().adjustExceptionSpec(cast<FunctionDecl>(Redecl), ESI);
}
auto DTUpdates = std::move(PendingDeducedTypeUpdates);
PendingDeducedTypeUpdates.clear();
for (auto Update : DTUpdates) {
ProcessingUpdatesRAIIObj ProcessingUpdates(*this);
// FIXME: If the return type is already deduced, check that it matches.
getContext().adjustDeducedFunctionResultType(Update.first,
Update.second);
}
}
if (ReadTimer)
ReadTimer->stopTimer();
diagnoseOdrViolations();
// We are not in recursive loading, so it's safe to pass the "interesting"
// decls to the consumer.
if (Consumer)
PassInterestingDeclsToConsumer();
}
}
void ASTReader::pushExternalDeclIntoScope(NamedDecl *D, DeclarationName Name) {
if (IdentifierInfo *II = Name.getAsIdentifierInfo()) {
// Remove any fake results before adding any real ones.
auto It = PendingFakeLookupResults.find(II);
if (It != PendingFakeLookupResults.end()) {
for (auto *ND : It->second)
SemaObj->IdResolver.RemoveDecl(ND);
// FIXME: this works around module+PCH performance issue.
// Rather than erase the result from the map, which is O(n), just clear
// the vector of NamedDecls.
It->second.clear();
}
}
if (SemaObj->IdResolver.tryAddTopLevelDecl(D, Name) && SemaObj->TUScope) {
SemaObj->TUScope->AddDecl(D);
} else if (SemaObj->TUScope) {
// Adding the decl to IdResolver may have failed because it was already in
// (even though it was not added in scope). If it is already in, make sure
// it gets in the scope as well.
if (std::find(SemaObj->IdResolver.begin(Name),
SemaObj->IdResolver.end(), D) != SemaObj->IdResolver.end())
SemaObj->TUScope->AddDecl(D);
}
}
ASTReader::ASTReader(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
ASTContext *Context,
const PCHContainerReader &PCHContainerRdr,
ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions,
StringRef isysroot,
DisableValidationForModuleKind DisableValidationKind,
bool AllowASTWithCompilerErrors,
bool AllowConfigurationMismatch, bool ValidateSystemInputs,
bool ValidateASTInputFilesContent, bool UseGlobalIndex,
std::unique_ptr<llvm::Timer> ReadTimer)
: Listener(bool(DisableValidationKind &DisableValidationForModuleKind::PCH)
? cast<ASTReaderListener>(new SimpleASTReaderListener(PP))
: cast<ASTReaderListener>(new PCHValidator(PP, *this))),
SourceMgr(PP.getSourceManager()), FileMgr(PP.getFileManager()),
PCHContainerRdr(PCHContainerRdr), Diags(PP.getDiagnostics()), PP(PP),
ContextObj(Context), ModuleMgr(PP.getFileManager(), ModuleCache,
PCHContainerRdr, PP.getHeaderSearchInfo()),
DummyIdResolver(PP), ReadTimer(std::move(ReadTimer)), isysroot(isysroot),
DisableValidationKind(DisableValidationKind),
AllowASTWithCompilerErrors(AllowASTWithCompilerErrors),
AllowConfigurationMismatch(AllowConfigurationMismatch),
ValidateSystemInputs(ValidateSystemInputs),
ValidateASTInputFilesContent(ValidateASTInputFilesContent),
UseGlobalIndex(UseGlobalIndex), CurrSwitchCaseStmts(&SwitchCaseStmts) {
SourceMgr.setExternalSLocEntrySource(this);
for (const auto &Ext : Extensions) {
auto BlockName = Ext->getExtensionMetadata().BlockName;
auto Known = ModuleFileExtensions.find(BlockName);
if (Known != ModuleFileExtensions.end()) {
Diags.Report(diag::warn_duplicate_module_file_extension)
<< BlockName;
continue;
}
ModuleFileExtensions.insert({BlockName, Ext});
}
}
ASTReader::~ASTReader() {
if (OwnsDeserializationListener)
delete DeserializationListener;
}
IdentifierResolver &ASTReader::getIdResolver() {
return SemaObj ? SemaObj->IdResolver : DummyIdResolver;
}
Expected<unsigned> ASTRecordReader::readRecord(llvm::BitstreamCursor &Cursor,
unsigned AbbrevID) {
Idx = 0;
Record.clear();
return Cursor.readRecord(AbbrevID, Record);
}
//===----------------------------------------------------------------------===//
//// OMPClauseReader implementation
////===----------------------------------------------------------------------===//
// This has to be in namespace clang because it's friended by all
// of the OMP clauses.
namespace clang {
class OMPClauseReader : public OMPClauseVisitor<OMPClauseReader> {
ASTRecordReader &Record;
ASTContext &Context;
public:
OMPClauseReader(ASTRecordReader &Record)
: Record(Record), Context(Record.getContext()) {}
#define GEN_CLANG_CLAUSE_CLASS
#define CLAUSE_CLASS(Enum, Str, Class) void Visit##Class(Class *C);
#include "llvm/Frontend/OpenMP/OMP.inc"
OMPClause *readClause();
void VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C);
void VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *C);
};
} // end namespace clang
OMPClause *ASTRecordReader::readOMPClause() {
return OMPClauseReader(*this).readClause();
}
OMPClause *OMPClauseReader::readClause() {
OMPClause *C = nullptr;
switch (llvm::omp::Clause(Record.readInt())) {
case llvm::omp::OMPC_if:
C = new (Context) OMPIfClause();
break;
case llvm::omp::OMPC_final:
C = new (Context) OMPFinalClause();
break;
case llvm::omp::OMPC_num_threads:
C = new (Context) OMPNumThreadsClause();
break;
case llvm::omp::OMPC_safelen:
C = new (Context) OMPSafelenClause();
break;
case llvm::omp::OMPC_simdlen:
C = new (Context) OMPSimdlenClause();
break;
case llvm::omp::OMPC_sizes: {
unsigned NumSizes = Record.readInt();
C = OMPSizesClause::CreateEmpty(Context, NumSizes);
break;
}
case llvm::omp::OMPC_full:
C = OMPFullClause::CreateEmpty(Context);
break;
case llvm::omp::OMPC_partial:
C = OMPPartialClause::CreateEmpty(Context);
break;
case llvm::omp::OMPC_allocator:
C = new (Context) OMPAllocatorClause();
break;
case llvm::omp::OMPC_collapse:
C = new (Context) OMPCollapseClause();
break;
case llvm::omp::OMPC_default:
C = new (Context) OMPDefaultClause();
break;
case llvm::omp::OMPC_proc_bind:
C = new (Context) OMPProcBindClause();
break;
case llvm::omp::OMPC_schedule:
C = new (Context) OMPScheduleClause();
break;
case llvm::omp::OMPC_ordered:
C = OMPOrderedClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_nowait:
C = new (Context) OMPNowaitClause();
break;
case llvm::omp::OMPC_untied:
C = new (Context) OMPUntiedClause();
break;
case llvm::omp::OMPC_mergeable:
C = new (Context) OMPMergeableClause();
break;
case llvm::omp::OMPC_read:
C = new (Context) OMPReadClause();
break;
case llvm::omp::OMPC_write:
C = new (Context) OMPWriteClause();
break;
case llvm::omp::OMPC_update:
C = OMPUpdateClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_capture:
C = new (Context) OMPCaptureClause();
break;
case llvm::omp::OMPC_seq_cst:
C = new (Context) OMPSeqCstClause();
break;
case llvm::omp::OMPC_acq_rel:
C = new (Context) OMPAcqRelClause();
break;
case llvm::omp::OMPC_acquire:
C = new (Context) OMPAcquireClause();
break;
case llvm::omp::OMPC_release:
C = new (Context) OMPReleaseClause();
break;
case llvm::omp::OMPC_relaxed:
C = new (Context) OMPRelaxedClause();
break;
case llvm::omp::OMPC_threads:
C = new (Context) OMPThreadsClause();
break;
case llvm::omp::OMPC_simd:
C = new (Context) OMPSIMDClause();
break;
case llvm::omp::OMPC_nogroup:
C = new (Context) OMPNogroupClause();
break;
case llvm::omp::OMPC_unified_address:
C = new (Context) OMPUnifiedAddressClause();
break;
case llvm::omp::OMPC_unified_shared_memory:
C = new (Context) OMPUnifiedSharedMemoryClause();
break;
case llvm::omp::OMPC_reverse_offload:
C = new (Context) OMPReverseOffloadClause();
break;
case llvm::omp::OMPC_dynamic_allocators:
C = new (Context) OMPDynamicAllocatorsClause();
break;
case llvm::omp::OMPC_atomic_default_mem_order:
C = new (Context) OMPAtomicDefaultMemOrderClause();
break;
case llvm::omp::OMPC_private:
C = OMPPrivateClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_firstprivate:
C = OMPFirstprivateClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_lastprivate:
C = OMPLastprivateClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_shared:
C = OMPSharedClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_reduction: {
unsigned N = Record.readInt();
auto Modifier = Record.readEnum<OpenMPReductionClauseModifier>();
C = OMPReductionClause::CreateEmpty(Context, N, Modifier);
break;
}
case llvm::omp::OMPC_task_reduction:
C = OMPTaskReductionClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_in_reduction:
C = OMPInReductionClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_linear:
C = OMPLinearClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_aligned:
C = OMPAlignedClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_copyin:
C = OMPCopyinClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_copyprivate:
C = OMPCopyprivateClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_flush:
C = OMPFlushClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_depobj:
C = OMPDepobjClause::CreateEmpty(Context);
break;
case llvm::omp::OMPC_depend: {
unsigned NumVars = Record.readInt();
unsigned NumLoops = Record.readInt();
C = OMPDependClause::CreateEmpty(Context, NumVars, NumLoops);
break;
}
case llvm::omp::OMPC_device:
C = new (Context) OMPDeviceClause();
break;
case llvm::omp::OMPC_map: {
OMPMappableExprListSizeTy Sizes;
Sizes.NumVars = Record.readInt();
Sizes.NumUniqueDeclarations = Record.readInt();
Sizes.NumComponentLists = Record.readInt();
Sizes.NumComponents = Record.readInt();
C = OMPMapClause::CreateEmpty(Context, Sizes);
break;
}
case llvm::omp::OMPC_num_teams:
C = new (Context) OMPNumTeamsClause();
break;
case llvm::omp::OMPC_thread_limit:
C = new (Context) OMPThreadLimitClause();
break;
case llvm::omp::OMPC_priority:
C = new (Context) OMPPriorityClause();
break;
case llvm::omp::OMPC_grainsize:
C = new (Context) OMPGrainsizeClause();
break;
case llvm::omp::OMPC_num_tasks:
C = new (Context) OMPNumTasksClause();
break;
case llvm::omp::OMPC_hint:
C = new (Context) OMPHintClause();
break;
case llvm::omp::OMPC_dist_schedule:
C = new (Context) OMPDistScheduleClause();
break;
case llvm::omp::OMPC_defaultmap:
C = new (Context) OMPDefaultmapClause();
break;
case llvm::omp::OMPC_to: {
OMPMappableExprListSizeTy Sizes;
Sizes.NumVars = Record.readInt();
Sizes.NumUniqueDeclarations = Record.readInt();
Sizes.NumComponentLists = Record.readInt();
Sizes.NumComponents = Record.readInt();
C = OMPToClause::CreateEmpty(Context, Sizes);
break;
}
case llvm::omp::OMPC_from: {
OMPMappableExprListSizeTy Sizes;
Sizes.NumVars = Record.readInt();
Sizes.NumUniqueDeclarations = Record.readInt();
Sizes.NumComponentLists = Record.readInt();
Sizes.NumComponents = Record.readInt();
C = OMPFromClause::CreateEmpty(Context, Sizes);
break;
}
case llvm::omp::OMPC_use_device_ptr: {
OMPMappableExprListSizeTy Sizes;
Sizes.NumVars = Record.readInt();
Sizes.NumUniqueDeclarations = Record.readInt();
Sizes.NumComponentLists = Record.readInt();
Sizes.NumComponents = Record.readInt();
C = OMPUseDevicePtrClause::CreateEmpty(Context, Sizes);
break;
}
case llvm::omp::OMPC_use_device_addr: {
OMPMappableExprListSizeTy Sizes;
Sizes.NumVars = Record.readInt();
Sizes.NumUniqueDeclarations = Record.readInt();
Sizes.NumComponentLists = Record.readInt();
Sizes.NumComponents = Record.readInt();
C = OMPUseDeviceAddrClause::CreateEmpty(Context, Sizes);
break;
}
case llvm::omp::OMPC_is_device_ptr: {
OMPMappableExprListSizeTy Sizes;
Sizes.NumVars = Record.readInt();
Sizes.NumUniqueDeclarations = Record.readInt();
Sizes.NumComponentLists = Record.readInt();
Sizes.NumComponents = Record.readInt();
C = OMPIsDevicePtrClause::CreateEmpty(Context, Sizes);
break;
}
case llvm::omp::OMPC_allocate:
C = OMPAllocateClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_nontemporal:
C = OMPNontemporalClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_inclusive:
C = OMPInclusiveClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_exclusive:
C = OMPExclusiveClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_order:
C = new (Context) OMPOrderClause();
break;
case llvm::omp::OMPC_init:
C = OMPInitClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_use:
C = new (Context) OMPUseClause();
break;
case llvm::omp::OMPC_destroy:
C = new (Context) OMPDestroyClause();
break;
case llvm::omp::OMPC_novariants:
C = new (Context) OMPNovariantsClause();
break;
case llvm::omp::OMPC_nocontext:
C = new (Context) OMPNocontextClause();
break;
case llvm::omp::OMPC_detach:
C = new (Context) OMPDetachClause();
break;
case llvm::omp::OMPC_uses_allocators:
C = OMPUsesAllocatorsClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_affinity:
C = OMPAffinityClause::CreateEmpty(Context, Record.readInt());
break;
case llvm::omp::OMPC_filter:
C = new (Context) OMPFilterClause();
break;
#define OMP_CLAUSE_NO_CLASS(Enum, Str) \
case llvm::omp::Enum: \
break;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
default:
break;
}
assert(C && "Unknown OMPClause type");
Visit(C);
C->setLocStart(Record.readSourceLocation());
C->setLocEnd(Record.readSourceLocation());
return C;
}
void OMPClauseReader::VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C) {
C->setPreInitStmt(Record.readSubStmt(),
static_cast<OpenMPDirectiveKind>(Record.readInt()));
}
void OMPClauseReader::VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *C) {
VisitOMPClauseWithPreInit(C);
C->setPostUpdateExpr(Record.readSubExpr());
}
void OMPClauseReader::VisitOMPIfClause(OMPIfClause *C) {
VisitOMPClauseWithPreInit(C);
C->setNameModifier(static_cast<OpenMPDirectiveKind>(Record.readInt()));
C->setNameModifierLoc(Record.readSourceLocation());
C->setColonLoc(Record.readSourceLocation());
C->setCondition(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPFinalClause(OMPFinalClause *C) {
VisitOMPClauseWithPreInit(C);
C->setCondition(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) {
VisitOMPClauseWithPreInit(C);
C->setNumThreads(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPSafelenClause(OMPSafelenClause *C) {
C->setSafelen(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPSimdlenClause(OMPSimdlenClause *C) {
C->setSimdlen(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPSizesClause(OMPSizesClause *C) {
for (Expr *&E : C->getSizesRefs())
E = Record.readSubExpr();
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPFullClause(OMPFullClause *C) {}
void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) {
C->setFactor(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
C->setAllocator(Record.readExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPCollapseClause(OMPCollapseClause *C) {
C->setNumForLoops(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) {
C->setDefaultKind(static_cast<llvm::omp::DefaultKind>(Record.readInt()));
C->setLParenLoc(Record.readSourceLocation());
C->setDefaultKindKwLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPProcBindClause(OMPProcBindClause *C) {
C->setProcBindKind(static_cast<llvm::omp::ProcBindKind>(Record.readInt()));
C->setLParenLoc(Record.readSourceLocation());
C->setProcBindKindKwLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPScheduleClause(OMPScheduleClause *C) {
VisitOMPClauseWithPreInit(C);
C->setScheduleKind(
static_cast<OpenMPScheduleClauseKind>(Record.readInt()));
C->setFirstScheduleModifier(
static_cast<OpenMPScheduleClauseModifier>(Record.readInt()));
C->setSecondScheduleModifier(
static_cast<OpenMPScheduleClauseModifier>(Record.readInt()));
C->setChunkSize(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
C->setFirstScheduleModifierLoc(Record.readSourceLocation());
C->setSecondScheduleModifierLoc(Record.readSourceLocation());
C->setScheduleKindLoc(Record.readSourceLocation());
C->setCommaLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPOrderedClause(OMPOrderedClause *C) {
C->setNumForLoops(Record.readSubExpr());
for (unsigned I = 0, E = C->NumberOfLoops; I < E; ++I)
C->setLoopNumIterations(I, Record.readSubExpr());
for (unsigned I = 0, E = C->NumberOfLoops; I < E; ++I)
C->setLoopCounter(I, Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPDetachClause(OMPDetachClause *C) {
C->setEventHandler(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPNowaitClause(OMPNowaitClause *) {}
void OMPClauseReader::VisitOMPUntiedClause(OMPUntiedClause *) {}
void OMPClauseReader::VisitOMPMergeableClause(OMPMergeableClause *) {}
void OMPClauseReader::VisitOMPReadClause(OMPReadClause *) {}
void OMPClauseReader::VisitOMPWriteClause(OMPWriteClause *) {}
void OMPClauseReader::VisitOMPUpdateClause(OMPUpdateClause *C) {
if (C->isExtended()) {
C->setLParenLoc(Record.readSourceLocation());
C->setArgumentLoc(Record.readSourceLocation());
C->setDependencyKind(Record.readEnum<OpenMPDependClauseKind>());
}
}
void OMPClauseReader::VisitOMPCaptureClause(OMPCaptureClause *) {}
void OMPClauseReader::VisitOMPSeqCstClause(OMPSeqCstClause *) {}
void OMPClauseReader::VisitOMPAcqRelClause(OMPAcqRelClause *) {}
void OMPClauseReader::VisitOMPAcquireClause(OMPAcquireClause *) {}
void OMPClauseReader::VisitOMPReleaseClause(OMPReleaseClause *) {}
void OMPClauseReader::VisitOMPRelaxedClause(OMPRelaxedClause *) {}
void OMPClauseReader::VisitOMPThreadsClause(OMPThreadsClause *) {}
void OMPClauseReader::VisitOMPSIMDClause(OMPSIMDClause *) {}
void OMPClauseReader::VisitOMPNogroupClause(OMPNogroupClause *) {}
void OMPClauseReader::VisitOMPInitClause(OMPInitClause *C) {
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
C->setIsTarget(Record.readBool());
C->setIsTargetSync(Record.readBool());
C->setLParenLoc(Record.readSourceLocation());
C->setVarLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPUseClause(OMPUseClause *C) {
C->setInteropVar(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
C->setVarLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPDestroyClause(OMPDestroyClause *C) {
C->setInteropVar(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
C->setVarLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPNovariantsClause(OMPNovariantsClause *C) {
VisitOMPClauseWithPreInit(C);
C->setCondition(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPNocontextClause(OMPNocontextClause *C) {
VisitOMPClauseWithPreInit(C);
C->setCondition(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPUnifiedAddressClause(OMPUnifiedAddressClause *) {}
void OMPClauseReader::VisitOMPUnifiedSharedMemoryClause(
OMPUnifiedSharedMemoryClause *) {}
void OMPClauseReader::VisitOMPReverseOffloadClause(OMPReverseOffloadClause *) {}
void
OMPClauseReader::VisitOMPDynamicAllocatorsClause(OMPDynamicAllocatorsClause *) {
}
void OMPClauseReader::VisitOMPAtomicDefaultMemOrderClause(
OMPAtomicDefaultMemOrderClause *C) {
C->setAtomicDefaultMemOrderKind(
static_cast<OpenMPAtomicDefaultMemOrderClauseKind>(Record.readInt()));
C->setLParenLoc(Record.readSourceLocation());
C->setAtomicDefaultMemOrderKindKwLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPPrivateClause(OMPPrivateClause *C) {
C->setLParenLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setPrivateCopies(Vars);
}
void OMPClauseReader::VisitOMPFirstprivateClause(OMPFirstprivateClause *C) {
VisitOMPClauseWithPreInit(C);
C->setLParenLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setPrivateCopies(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setInits(Vars);
}
void OMPClauseReader::VisitOMPLastprivateClause(OMPLastprivateClause *C) {
VisitOMPClauseWithPostUpdate(C);
C->setLParenLoc(Record.readSourceLocation());
C->setKind(Record.readEnum<OpenMPLastprivateModifier>());
C->setKindLoc(Record.readSourceLocation());
C->setColonLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setPrivateCopies(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setSourceExprs(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setDestinationExprs(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setAssignmentOps(Vars);
}
void OMPClauseReader::VisitOMPSharedClause(OMPSharedClause *C) {
C->setLParenLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
}
void OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) {
VisitOMPClauseWithPostUpdate(C);
C->setLParenLoc(Record.readSourceLocation());
C->setModifierLoc(Record.readSourceLocation());
C->setColonLoc(Record.readSourceLocation());
NestedNameSpecifierLoc NNSL = Record.readNestedNameSpecifierLoc();
DeclarationNameInfo DNI = Record.readDeclarationNameInfo();
C->setQualifierLoc(NNSL);
C->setNameInfo(DNI);
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setPrivates(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setLHSExprs(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setRHSExprs(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setReductionOps(Vars);
if (C->getModifier() == OMPC_REDUCTION_inscan) {
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setInscanCopyOps(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setInscanCopyArrayTemps(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setInscanCopyArrayElems(Vars);
}
}
void OMPClauseReader::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) {
VisitOMPClauseWithPostUpdate(C);
C->setLParenLoc(Record.readSourceLocation());
C->setColonLoc(Record.readSourceLocation());
NestedNameSpecifierLoc NNSL = Record.readNestedNameSpecifierLoc();
DeclarationNameInfo DNI = Record.readDeclarationNameInfo();
C->setQualifierLoc(NNSL);
C->setNameInfo(DNI);
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
Vars.clear();
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setPrivates(Vars);
Vars.clear();
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setLHSExprs(Vars);
Vars.clear();
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setRHSExprs(Vars);
Vars.clear();
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setReductionOps(Vars);
}
void OMPClauseReader::VisitOMPInReductionClause(OMPInReductionClause *C) {
VisitOMPClauseWithPostUpdate(C);
C->setLParenLoc(Record.readSourceLocation());
C->setColonLoc(Record.readSourceLocation());
NestedNameSpecifierLoc NNSL = Record.readNestedNameSpecifierLoc();
DeclarationNameInfo DNI = Record.readDeclarationNameInfo();
C->setQualifierLoc(NNSL);
C->setNameInfo(DNI);
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
Vars.clear();
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setPrivates(Vars);
Vars.clear();
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setLHSExprs(Vars);
Vars.clear();
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setRHSExprs(Vars);
Vars.clear();
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setReductionOps(Vars);
Vars.clear();
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setTaskgroupDescriptors(Vars);
}
void OMPClauseReader::VisitOMPLinearClause(OMPLinearClause *C) {
VisitOMPClauseWithPostUpdate(C);
C->setLParenLoc(Record.readSourceLocation());
C->setColonLoc(Record.readSourceLocation());
C->setModifier(static_cast<OpenMPLinearClauseKind>(Record.readInt()));
C->setModifierLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setPrivates(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setInits(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setUpdates(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setFinals(Vars);
C->setStep(Record.readSubExpr());
C->setCalcStep(Record.readSubExpr());
Vars.clear();
for (unsigned I = 0; I != NumVars + 1; ++I)
Vars.push_back(Record.readSubExpr());
C->setUsedExprs(Vars);
}
void OMPClauseReader::VisitOMPAlignedClause(OMPAlignedClause *C) {
C->setLParenLoc(Record.readSourceLocation());
C->setColonLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
C->setAlignment(Record.readSubExpr());
}
void OMPClauseReader::VisitOMPCopyinClause(OMPCopyinClause *C) {
C->setLParenLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Exprs;
Exprs.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Exprs.push_back(Record.readSubExpr());
C->setVarRefs(Exprs);
Exprs.clear();
for (unsigned i = 0; i != NumVars; ++i)
Exprs.push_back(Record.readSubExpr());
C->setSourceExprs(Exprs);
Exprs.clear();
for (unsigned i = 0; i != NumVars; ++i)
Exprs.push_back(Record.readSubExpr());
C->setDestinationExprs(Exprs);
Exprs.clear();
for (unsigned i = 0; i != NumVars; ++i)
Exprs.push_back(Record.readSubExpr());
C->setAssignmentOps(Exprs);
}
void OMPClauseReader::VisitOMPCopyprivateClause(OMPCopyprivateClause *C) {
C->setLParenLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Exprs;
Exprs.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Exprs.push_back(Record.readSubExpr());
C->setVarRefs(Exprs);
Exprs.clear();
for (unsigned i = 0; i != NumVars; ++i)
Exprs.push_back(Record.readSubExpr());
C->setSourceExprs(Exprs);
Exprs.clear();
for (unsigned i = 0; i != NumVars; ++i)
Exprs.push_back(Record.readSubExpr());
C->setDestinationExprs(Exprs);
Exprs.clear();
for (unsigned i = 0; i != NumVars; ++i)
Exprs.push_back(Record.readSubExpr());
C->setAssignmentOps(Exprs);
}
void OMPClauseReader::VisitOMPFlushClause(OMPFlushClause *C) {
C->setLParenLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
}
void OMPClauseReader::VisitOMPDepobjClause(OMPDepobjClause *C) {
C->setDepobj(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPDependClause(OMPDependClause *C) {
C->setLParenLoc(Record.readSourceLocation());
C->setModifier(Record.readSubExpr());
C->setDependencyKind(
static_cast<OpenMPDependClauseKind>(Record.readInt()));
C->setDependencyLoc(Record.readSourceLocation());
C->setColonLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I)
C->setLoopData(I, Record.readSubExpr());
}
void OMPClauseReader::VisitOMPDeviceClause(OMPDeviceClause *C) {
VisitOMPClauseWithPreInit(C);
C->setModifier(Record.readEnum<OpenMPDeviceClauseModifier>());
C->setDevice(Record.readSubExpr());
C->setModifierLoc(Record.readSourceLocation());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPMapClause(OMPMapClause *C) {
C->setLParenLoc(Record.readSourceLocation());
for (unsigned I = 0; I < NumberOfOMPMapClauseModifiers; ++I) {
C->setMapTypeModifier(
I, static_cast<OpenMPMapModifierKind>(Record.readInt()));
C->setMapTypeModifierLoc(I, Record.readSourceLocation());
}
C->setMapperQualifierLoc(Record.readNestedNameSpecifierLoc());
C->setMapperIdInfo(Record.readDeclarationNameInfo());
C->setMapType(
static_cast<OpenMPMapClauseKind>(Record.readInt()));
C->setMapLoc(Record.readSourceLocation());
C->setColonLoc(Record.readSourceLocation());
auto NumVars = C->varlist_size();
auto UniqueDecls = C->getUniqueDeclarationsNum();
auto TotalLists = C->getTotalComponentListNum();
auto TotalComponents = C->getTotalComponentsNum();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readExpr());
C->setVarRefs(Vars);
SmallVector<Expr *, 16> UDMappers;
UDMappers.reserve(NumVars);
for (unsigned I = 0; I < NumVars; ++I)
UDMappers.push_back(Record.readExpr());
C->setUDMapperRefs(UDMappers);
SmallVector<ValueDecl *, 16> Decls;
Decls.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
Decls.push_back(Record.readDeclAs<ValueDecl>());
C->setUniqueDecls(Decls);
SmallVector<unsigned, 16> ListsPerDecl;
ListsPerDecl.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
ListsPerDecl.push_back(Record.readInt());
C->setDeclNumLists(ListsPerDecl);
SmallVector<unsigned, 32> ListSizes;
ListSizes.reserve(TotalLists);
for (unsigned i = 0; i < TotalLists; ++i)
ListSizes.push_back(Record.readInt());
C->setComponentListSizes(ListSizes);
SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
Components.reserve(TotalComponents);
for (unsigned i = 0; i < TotalComponents; ++i) {
Expr *AssociatedExprPr = Record.readExpr();
auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
Components.emplace_back(AssociatedExprPr, AssociatedDecl,
/*IsNonContiguous=*/false);
}
C->setComponents(Components, ListSizes);
}
void OMPClauseReader::VisitOMPAllocateClause(OMPAllocateClause *C) {
C->setLParenLoc(Record.readSourceLocation());
C->setColonLoc(Record.readSourceLocation());
C->setAllocator(Record.readSubExpr());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
}
void OMPClauseReader::VisitOMPNumTeamsClause(OMPNumTeamsClause *C) {
VisitOMPClauseWithPreInit(C);
C->setNumTeams(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPThreadLimitClause(OMPThreadLimitClause *C) {
VisitOMPClauseWithPreInit(C);
C->setThreadLimit(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPPriorityClause(OMPPriorityClause *C) {
VisitOMPClauseWithPreInit(C);
C->setPriority(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPGrainsizeClause(OMPGrainsizeClause *C) {
VisitOMPClauseWithPreInit(C);
C->setGrainsize(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPNumTasksClause(OMPNumTasksClause *C) {
VisitOMPClauseWithPreInit(C);
C->setNumTasks(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPHintClause(OMPHintClause *C) {
C->setHint(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPDistScheduleClause(OMPDistScheduleClause *C) {
VisitOMPClauseWithPreInit(C);
C->setDistScheduleKind(
static_cast<OpenMPDistScheduleClauseKind>(Record.readInt()));
C->setChunkSize(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
C->setDistScheduleKindLoc(Record.readSourceLocation());
C->setCommaLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPDefaultmapClause(OMPDefaultmapClause *C) {
C->setDefaultmapKind(
static_cast<OpenMPDefaultmapClauseKind>(Record.readInt()));
C->setDefaultmapModifier(
static_cast<OpenMPDefaultmapClauseModifier>(Record.readInt()));
C->setLParenLoc(Record.readSourceLocation());
C->setDefaultmapModifierLoc(Record.readSourceLocation());
C->setDefaultmapKindLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPToClause(OMPToClause *C) {
C->setLParenLoc(Record.readSourceLocation());
for (unsigned I = 0; I < NumberOfOMPMotionModifiers; ++I) {
C->setMotionModifier(
I, static_cast<OpenMPMotionModifierKind>(Record.readInt()));
C->setMotionModifierLoc(I, Record.readSourceLocation());
}
C->setMapperQualifierLoc(Record.readNestedNameSpecifierLoc());
C->setMapperIdInfo(Record.readDeclarationNameInfo());
C->setColonLoc(Record.readSourceLocation());
auto NumVars = C->varlist_size();
auto UniqueDecls = C->getUniqueDeclarationsNum();
auto TotalLists = C->getTotalComponentListNum();
auto TotalComponents = C->getTotalComponentsNum();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
SmallVector<Expr *, 16> UDMappers;
UDMappers.reserve(NumVars);
for (unsigned I = 0; I < NumVars; ++I)
UDMappers.push_back(Record.readSubExpr());
C->setUDMapperRefs(UDMappers);
SmallVector<ValueDecl *, 16> Decls;
Decls.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
Decls.push_back(Record.readDeclAs<ValueDecl>());
C->setUniqueDecls(Decls);
SmallVector<unsigned, 16> ListsPerDecl;
ListsPerDecl.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
ListsPerDecl.push_back(Record.readInt());
C->setDeclNumLists(ListsPerDecl);
SmallVector<unsigned, 32> ListSizes;
ListSizes.reserve(TotalLists);
for (unsigned i = 0; i < TotalLists; ++i)
ListSizes.push_back(Record.readInt());
C->setComponentListSizes(ListSizes);
SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
Components.reserve(TotalComponents);
for (unsigned i = 0; i < TotalComponents; ++i) {
Expr *AssociatedExprPr = Record.readSubExpr();
bool IsNonContiguous = Record.readBool();
auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
Components.emplace_back(AssociatedExprPr, AssociatedDecl, IsNonContiguous);
}
C->setComponents(Components, ListSizes);
}
void OMPClauseReader::VisitOMPFromClause(OMPFromClause *C) {
C->setLParenLoc(Record.readSourceLocation());
for (unsigned I = 0; I < NumberOfOMPMotionModifiers; ++I) {
C->setMotionModifier(
I, static_cast<OpenMPMotionModifierKind>(Record.readInt()));
C->setMotionModifierLoc(I, Record.readSourceLocation());
}
C->setMapperQualifierLoc(Record.readNestedNameSpecifierLoc());
C->setMapperIdInfo(Record.readDeclarationNameInfo());
C->setColonLoc(Record.readSourceLocation());
auto NumVars = C->varlist_size();
auto UniqueDecls = C->getUniqueDeclarationsNum();
auto TotalLists = C->getTotalComponentListNum();
auto TotalComponents = C->getTotalComponentsNum();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
SmallVector<Expr *, 16> UDMappers;
UDMappers.reserve(NumVars);
for (unsigned I = 0; I < NumVars; ++I)
UDMappers.push_back(Record.readSubExpr());
C->setUDMapperRefs(UDMappers);
SmallVector<ValueDecl *, 16> Decls;
Decls.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
Decls.push_back(Record.readDeclAs<ValueDecl>());
C->setUniqueDecls(Decls);
SmallVector<unsigned, 16> ListsPerDecl;
ListsPerDecl.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
ListsPerDecl.push_back(Record.readInt());
C->setDeclNumLists(ListsPerDecl);
SmallVector<unsigned, 32> ListSizes;
ListSizes.reserve(TotalLists);
for (unsigned i = 0; i < TotalLists; ++i)
ListSizes.push_back(Record.readInt());
C->setComponentListSizes(ListSizes);
SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
Components.reserve(TotalComponents);
for (unsigned i = 0; i < TotalComponents; ++i) {
Expr *AssociatedExprPr = Record.readSubExpr();
bool IsNonContiguous = Record.readBool();
auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
Components.emplace_back(AssociatedExprPr, AssociatedDecl, IsNonContiguous);
}
C->setComponents(Components, ListSizes);
}
void OMPClauseReader::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) {
C->setLParenLoc(Record.readSourceLocation());
auto NumVars = C->varlist_size();
auto UniqueDecls = C->getUniqueDeclarationsNum();
auto TotalLists = C->getTotalComponentListNum();
auto TotalComponents = C->getTotalComponentsNum();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setPrivateCopies(Vars);
Vars.clear();
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setInits(Vars);
SmallVector<ValueDecl *, 16> Decls;
Decls.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
Decls.push_back(Record.readDeclAs<ValueDecl>());
C->setUniqueDecls(Decls);
SmallVector<unsigned, 16> ListsPerDecl;
ListsPerDecl.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
ListsPerDecl.push_back(Record.readInt());
C->setDeclNumLists(ListsPerDecl);
SmallVector<unsigned, 32> ListSizes;
ListSizes.reserve(TotalLists);
for (unsigned i = 0; i < TotalLists; ++i)
ListSizes.push_back(Record.readInt());
C->setComponentListSizes(ListSizes);
SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
Components.reserve(TotalComponents);
for (unsigned i = 0; i < TotalComponents; ++i) {
auto *AssociatedExprPr = Record.readSubExpr();
auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
Components.emplace_back(AssociatedExprPr, AssociatedDecl,
/*IsNonContiguous=*/false);
}
C->setComponents(Components, ListSizes);
}
void OMPClauseReader::VisitOMPUseDeviceAddrClause(OMPUseDeviceAddrClause *C) {
C->setLParenLoc(Record.readSourceLocation());
auto NumVars = C->varlist_size();
auto UniqueDecls = C->getUniqueDeclarationsNum();
auto TotalLists = C->getTotalComponentListNum();
auto TotalComponents = C->getTotalComponentsNum();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
SmallVector<ValueDecl *, 16> Decls;
Decls.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
Decls.push_back(Record.readDeclAs<ValueDecl>());
C->setUniqueDecls(Decls);
SmallVector<unsigned, 16> ListsPerDecl;
ListsPerDecl.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
ListsPerDecl.push_back(Record.readInt());
C->setDeclNumLists(ListsPerDecl);
SmallVector<unsigned, 32> ListSizes;
ListSizes.reserve(TotalLists);
for (unsigned i = 0; i < TotalLists; ++i)
ListSizes.push_back(Record.readInt());
C->setComponentListSizes(ListSizes);
SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
Components.reserve(TotalComponents);
for (unsigned i = 0; i < TotalComponents; ++i) {
Expr *AssociatedExpr = Record.readSubExpr();
auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
Components.emplace_back(AssociatedExpr, AssociatedDecl,
/*IsNonContiguous*/ false);
}
C->setComponents(Components, ListSizes);
}
void OMPClauseReader::VisitOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) {
C->setLParenLoc(Record.readSourceLocation());
auto NumVars = C->varlist_size();
auto UniqueDecls = C->getUniqueDeclarationsNum();
auto TotalLists = C->getTotalComponentListNum();
auto TotalComponents = C->getTotalComponentsNum();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
Vars.clear();
SmallVector<ValueDecl *, 16> Decls;
Decls.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
Decls.push_back(Record.readDeclAs<ValueDecl>());
C->setUniqueDecls(Decls);
SmallVector<unsigned, 16> ListsPerDecl;
ListsPerDecl.reserve(UniqueDecls);
for (unsigned i = 0; i < UniqueDecls; ++i)
ListsPerDecl.push_back(Record.readInt());
C->setDeclNumLists(ListsPerDecl);
SmallVector<unsigned, 32> ListSizes;
ListSizes.reserve(TotalLists);
for (unsigned i = 0; i < TotalLists; ++i)
ListSizes.push_back(Record.readInt());
C->setComponentListSizes(ListSizes);
SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
Components.reserve(TotalComponents);
for (unsigned i = 0; i < TotalComponents; ++i) {
Expr *AssociatedExpr = Record.readSubExpr();
auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
Components.emplace_back(AssociatedExpr, AssociatedDecl,
/*IsNonContiguous=*/false);
}
C->setComponents(Components, ListSizes);
}
void OMPClauseReader::VisitOMPNontemporalClause(OMPNontemporalClause *C) {
C->setLParenLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
Vars.clear();
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setPrivateRefs(Vars);
}
void OMPClauseReader::VisitOMPInclusiveClause(OMPInclusiveClause *C) {
C->setLParenLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
}
void OMPClauseReader::VisitOMPExclusiveClause(OMPExclusiveClause *C) {
C->setLParenLoc(Record.readSourceLocation());
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
for (unsigned i = 0; i != NumVars; ++i)
Vars.push_back(Record.readSubExpr());
C->setVarRefs(Vars);
}
void OMPClauseReader::VisitOMPUsesAllocatorsClause(OMPUsesAllocatorsClause *C) {
C->setLParenLoc(Record.readSourceLocation());
unsigned NumOfAllocators = C->getNumberOfAllocators();
SmallVector<OMPUsesAllocatorsClause::Data, 4> Data;
Data.reserve(NumOfAllocators);
for (unsigned I = 0; I != NumOfAllocators; ++I) {
OMPUsesAllocatorsClause::Data &D = Data.emplace_back();
D.Allocator = Record.readSubExpr();
D.AllocatorTraits = Record.readSubExpr();
D.LParenLoc = Record.readSourceLocation();
D.RParenLoc = Record.readSourceLocation();
}
C->setAllocatorsData(Data);
}
void OMPClauseReader::VisitOMPAffinityClause(OMPAffinityClause *C) {
C->setLParenLoc(Record.readSourceLocation());
C->setModifier(Record.readSubExpr());
C->setColonLoc(Record.readSourceLocation());
unsigned NumOfLocators = C->varlist_size();
SmallVector<Expr *, 4> Locators;
Locators.reserve(NumOfLocators);
for (unsigned I = 0; I != NumOfLocators; ++I)
Locators.push_back(Record.readSubExpr());
C->setVarRefs(Locators);
}
void OMPClauseReader::VisitOMPOrderClause(OMPOrderClause *C) {
C->setKind(Record.readEnum<OpenMPOrderClauseKind>());
C->setLParenLoc(Record.readSourceLocation());
C->setKindKwLoc(Record.readSourceLocation());
}
void OMPClauseReader::VisitOMPFilterClause(OMPFilterClause *C) {
VisitOMPClauseWithPreInit(C);
C->setThreadID(Record.readSubExpr());
C->setLParenLoc(Record.readSourceLocation());
}
OMPTraitInfo *ASTRecordReader::readOMPTraitInfo() {
OMPTraitInfo &TI = getContext().getNewOMPTraitInfo();
TI.Sets.resize(readUInt32());
for (auto &Set : TI.Sets) {
Set.Kind = readEnum<llvm::omp::TraitSet>();
Set.Selectors.resize(readUInt32());
for (auto &Selector : Set.Selectors) {
Selector.Kind = readEnum<llvm::omp::TraitSelector>();
Selector.ScoreOrCondition = nullptr;
if (readBool())
Selector.ScoreOrCondition = readExprRef();
Selector.Properties.resize(readUInt32());
for (auto &Property : Selector.Properties)
Property.Kind = readEnum<llvm::omp::TraitProperty>();
}
}
return &TI;
}
void ASTRecordReader::readOMPChildren(OMPChildren *Data) {
if (!Data)
return;
if (Reader->ReadingKind == ASTReader::Read_Stmt) {
// Skip NumClauses, NumChildren and HasAssociatedStmt fields.
skipInts(3);
}
SmallVector<OMPClause *, 4> Clauses(Data->getNumClauses());
for (unsigned I = 0, E = Data->getNumClauses(); I < E; ++I)
Clauses[I] = readOMPClause();
Data->setClauses(Clauses);
if (Data->hasAssociatedStmt())
Data->setAssociatedStmt(readStmt());
for (unsigned I = 0, E = Data->getNumChildren(); I < E; ++I)
Data->getChildren()[I] = readStmt();
}
diff --git a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c
index 518447e3e422..2e91f16a2158 100644
--- a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -1,1208 +1,1215 @@
/*===- InstrProfilingFile.c - Write instrumentation to a file -------------===*\
|*
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|* See https://llvm.org/LICENSE.txt for license information.
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|*
\*===----------------------------------------------------------------------===*/
#if !defined(__Fuchsia__)
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
/* For _alloca. */
#include <malloc.h>
#endif
#if defined(_WIN32)
#include "WindowsMMap.h"
/* For _chsize_s */
#include <io.h>
#include <process.h>
#else
#include <sys/file.h>
#include <sys/mman.h>
#include <unistd.h>
#if defined(__linux__)
#include <sys/types.h>
#endif
#endif
#include "InstrProfiling.h"
#include "InstrProfilingInternal.h"
#include "InstrProfilingPort.h"
#include "InstrProfilingUtil.h"
/* From where is profile name specified.
* The order the enumerators define their
* precedence. Re-order them may lead to
* runtime behavior change. */
typedef enum ProfileNameSpecifier {
PNS_unknown = 0,
PNS_default,
PNS_command_line,
PNS_environment,
PNS_runtime_api
} ProfileNameSpecifier;
static const char *getPNSStr(ProfileNameSpecifier PNS) {
switch (PNS) {
case PNS_default:
return "default setting";
case PNS_command_line:
return "command line";
case PNS_environment:
return "environment variable";
case PNS_runtime_api:
return "runtime API";
default:
return "Unknown";
}
}
#define MAX_PID_SIZE 16
/* Data structure holding the result of parsed filename pattern. */
typedef struct lprofFilename {
/* File name string possibly with %p or %h specifiers. */
const char *FilenamePat;
/* A flag indicating if FilenamePat's memory is allocated
* by runtime. */
unsigned OwnsFilenamePat;
const char *ProfilePathPrefix;
char PidChars[MAX_PID_SIZE];
char *TmpDir;
char Hostname[COMPILER_RT_MAX_HOSTLEN];
unsigned NumPids;
unsigned NumHosts;
/* When in-process merging is enabled, this parameter specifies
* the total number of profile data files shared by all the processes
* spawned from the same binary. By default the value is 1. If merging
* is not enabled, its value should be 0. This parameter is specified
* by the %[0-9]m specifier. For instance %2m enables merging using
* 2 profile data files. %1m is equivalent to %m. Also %m specifier
* can only appear once at the end of the name pattern. */
unsigned MergePoolSize;
ProfileNameSpecifier PNS;
} lprofFilename;
static lprofFilename lprofCurFilename = {0, 0, 0, {0}, NULL,
{0}, 0, 0, 0, PNS_unknown};
static int ProfileMergeRequested = 0;
static int isProfileMergeRequested() { return ProfileMergeRequested; }
static void setProfileMergeRequested(int EnableMerge) {
ProfileMergeRequested = EnableMerge;
}
static FILE *ProfileFile = NULL;
static FILE *getProfileFile() { return ProfileFile; }
static void setProfileFile(FILE *File) { ProfileFile = File; }
COMPILER_RT_VISIBILITY void __llvm_profile_set_file_object(FILE *File,
int EnableMerge) {
if (__llvm_profile_is_continuous_mode_enabled()) {
PROF_WARN("__llvm_profile_set_file_object(fd=%d) not supported, because "
"continuous sync mode (%%c) is enabled",
fileno(File));
return;
}
setProfileFile(File);
setProfileMergeRequested(EnableMerge);
}
static int getCurFilenameLength();
static const char *getCurFilename(char *FilenameBuf, int ForceUseBuf);
static unsigned doMerging() {
return lprofCurFilename.MergePoolSize || isProfileMergeRequested();
}
/* Return 1 if there is an error, otherwise return 0. */
static uint32_t fileWriter(ProfDataWriter *This, ProfDataIOVec *IOVecs,
uint32_t NumIOVecs) {
uint32_t I;
FILE *File = (FILE *)This->WriterCtx;
char Zeroes[sizeof(uint64_t)] = {0};
for (I = 0; I < NumIOVecs; I++) {
if (IOVecs[I].Data) {
if (fwrite(IOVecs[I].Data, IOVecs[I].ElmSize, IOVecs[I].NumElm, File) !=
IOVecs[I].NumElm)
return 1;
} else if (IOVecs[I].UseZeroPadding) {
size_t BytesToWrite = IOVecs[I].ElmSize * IOVecs[I].NumElm;
while (BytesToWrite > 0) {
size_t PartialWriteLen =
(sizeof(uint64_t) > BytesToWrite) ? BytesToWrite : sizeof(uint64_t);
if (fwrite(Zeroes, sizeof(uint8_t), PartialWriteLen, File) !=
PartialWriteLen) {
return 1;
}
BytesToWrite -= PartialWriteLen;
}
} else {
if (fseek(File, IOVecs[I].ElmSize * IOVecs[I].NumElm, SEEK_CUR) == -1)
return 1;
}
}
return 0;
}
/* TODO: make buffer size controllable by an internal option, and compiler can pass the size
to runtime via a variable. */
static uint32_t orderFileWriter(FILE *File, const uint32_t *DataStart) {
if (fwrite(DataStart, sizeof(uint32_t), INSTR_ORDER_FILE_BUFFER_SIZE, File) !=
INSTR_ORDER_FILE_BUFFER_SIZE)
return 1;
return 0;
}
static void initFileWriter(ProfDataWriter *This, FILE *File) {
This->Write = fileWriter;
This->WriterCtx = File;
}
COMPILER_RT_VISIBILITY ProfBufferIO *
lprofCreateBufferIOInternal(void *File, uint32_t BufferSz) {
FreeHook = &free;
DynamicBufferIOBuffer = (uint8_t *)calloc(BufferSz, 1);
VPBufferSize = BufferSz;
ProfDataWriter *fileWriter =
(ProfDataWriter *)calloc(sizeof(ProfDataWriter), 1);
initFileWriter(fileWriter, File);
ProfBufferIO *IO = lprofCreateBufferIO(fileWriter);
IO->OwnFileWriter = 1;
return IO;
}
static void setupIOBuffer() {
const char *BufferSzStr = 0;
BufferSzStr = getenv("LLVM_VP_BUFFER_SIZE");
if (BufferSzStr && BufferSzStr[0]) {
VPBufferSize = atoi(BufferSzStr);
DynamicBufferIOBuffer = (uint8_t *)calloc(VPBufferSize, 1);
}
}
/* Get the size of the profile file. If there are any errors, print the
* message under the assumption that the profile is being read for merging
* purposes, and return -1. Otherwise return the file size in the inout param
* \p ProfileFileSize. */
static int getProfileFileSizeForMerging(FILE *ProfileFile,
uint64_t *ProfileFileSize) {
if (fseek(ProfileFile, 0L, SEEK_END) == -1) {
PROF_ERR("Unable to merge profile data, unable to get size: %s\n",
strerror(errno));
return -1;
}
*ProfileFileSize = ftell(ProfileFile);
/* Restore file offset. */
if (fseek(ProfileFile, 0L, SEEK_SET) == -1) {
PROF_ERR("Unable to merge profile data, unable to rewind: %s\n",
strerror(errno));
return -1;
}
if (*ProfileFileSize > 0 &&
*ProfileFileSize < sizeof(__llvm_profile_header)) {
PROF_WARN("Unable to merge profile data: %s\n",
"source profile file is too small.");
return -1;
}
return 0;
}
/* mmap() \p ProfileFile for profile merging purposes, assuming that an
* exclusive lock is held on the file and that \p ProfileFileSize is the
* length of the file. Return the mmap'd buffer in the inout variable
* \p ProfileBuffer. Returns -1 on failure. On success, the caller is
* responsible for unmapping the mmap'd buffer in \p ProfileBuffer. */
static int mmapProfileForMerging(FILE *ProfileFile, uint64_t ProfileFileSize,
char **ProfileBuffer) {
*ProfileBuffer = mmap(NULL, ProfileFileSize, PROT_READ, MAP_SHARED | MAP_FILE,
fileno(ProfileFile), 0);
if (*ProfileBuffer == MAP_FAILED) {
PROF_ERR("Unable to merge profile data, mmap failed: %s\n",
strerror(errno));
return -1;
}
if (__llvm_profile_check_compatibility(*ProfileBuffer, ProfileFileSize)) {
(void)munmap(*ProfileBuffer, ProfileFileSize);
PROF_WARN("Unable to merge profile data: %s\n",
"source profile file is not compatible.");
return -1;
}
return 0;
}
/* Read profile data in \c ProfileFile and merge with in-memory
profile counters. Returns -1 if there is fatal error, otheriwse
0 is returned. Returning 0 does not mean merge is actually
performed. If merge is actually done, *MergeDone is set to 1.
*/
static int doProfileMerging(FILE *ProfileFile, int *MergeDone) {
uint64_t ProfileFileSize;
char *ProfileBuffer;
/* Get the size of the profile on disk. */
if (getProfileFileSizeForMerging(ProfileFile, &ProfileFileSize) == -1)
return -1;
/* Nothing to merge. */
if (!ProfileFileSize)
return 0;
/* mmap() the profile and check that it is compatible with the data in
* the current image. */
if (mmapProfileForMerging(ProfileFile, ProfileFileSize, &ProfileBuffer) == -1)
return -1;
/* Now start merging */
if (__llvm_profile_merge_from_buffer(ProfileBuffer, ProfileFileSize)) {
PROF_ERR("%s\n", "Invalid profile data to merge");
(void)munmap(ProfileBuffer, ProfileFileSize);
return -1;
}
// Truncate the file in case merging of value profile did not happen to
// prevent from leaving garbage data at the end of the profile file.
(void)COMPILER_RT_FTRUNCATE(ProfileFile,
__llvm_profile_get_size_for_buffer());
(void)munmap(ProfileBuffer, ProfileFileSize);
*MergeDone = 1;
return 0;
}
/* Create the directory holding the file, if needed. */
static void createProfileDir(const char *Filename) {
size_t Length = strlen(Filename);
if (lprofFindFirstDirSeparator(Filename)) {
char *Copy = (char *)COMPILER_RT_ALLOCA(Length + 1);
strncpy(Copy, Filename, Length + 1);
__llvm_profile_recursive_mkdir(Copy);
}
}
/* Open the profile data for merging. It opens the file in r+b mode with
* file locking. If the file has content which is compatible with the
* current process, it also reads in the profile data in the file and merge
* it with in-memory counters. After the profile data is merged in memory,
* the original profile data is truncated and gets ready for the profile
* dumper. With profile merging enabled, each executable as well as any of
* its instrumented shared libraries dump profile data into their own data file.
*/
static FILE *openFileForMerging(const char *ProfileFileName, int *MergeDone) {
FILE *ProfileFile = NULL;
int rc;
ProfileFile = getProfileFile();
if (ProfileFile) {
lprofLockFileHandle(ProfileFile);
} else {
createProfileDir(ProfileFileName);
ProfileFile = lprofOpenFileEx(ProfileFileName);
}
if (!ProfileFile)
return NULL;
rc = doProfileMerging(ProfileFile, MergeDone);
if (rc || (!*MergeDone && COMPILER_RT_FTRUNCATE(ProfileFile, 0L)) ||
fseek(ProfileFile, 0L, SEEK_SET) == -1) {
PROF_ERR("Profile Merging of file %s failed: %s\n", ProfileFileName,
strerror(errno));
fclose(ProfileFile);
return NULL;
}
return ProfileFile;
}
static FILE *getFileObject(const char *OutputName) {
FILE *File;
File = getProfileFile();
if (File != NULL) {
return File;
}
return fopen(OutputName, "ab");
}
/* Write profile data to file \c OutputName. */
static int writeFile(const char *OutputName) {
int RetVal;
FILE *OutputFile;
int MergeDone = 0;
VPMergeHook = &lprofMergeValueProfData;
if (doMerging())
OutputFile = openFileForMerging(OutputName, &MergeDone);
else
OutputFile = getFileObject(OutputName);
if (!OutputFile)
return -1;
FreeHook = &free;
setupIOBuffer();
ProfDataWriter fileWriter;
initFileWriter(&fileWriter, OutputFile);
RetVal = lprofWriteData(&fileWriter, lprofGetVPDataReader(), MergeDone);
if (OutputFile == getProfileFile()) {
fflush(OutputFile);
if (doMerging()) {
lprofUnlockFileHandle(OutputFile);
}
} else {
fclose(OutputFile);
}
return RetVal;
}
/* Write order data to file \c OutputName. */
static int writeOrderFile(const char *OutputName) {
int RetVal;
FILE *OutputFile;
OutputFile = fopen(OutputName, "w");
if (!OutputFile) {
PROF_WARN("can't open file with mode ab: %s\n", OutputName);
return -1;
}
FreeHook = &free;
setupIOBuffer();
const uint32_t *DataBegin = __llvm_profile_begin_orderfile();
RetVal = orderFileWriter(OutputFile, DataBegin);
fclose(OutputFile);
return RetVal;
}
#define LPROF_INIT_ONCE_ENV "__LLVM_PROFILE_RT_INIT_ONCE"
static void truncateCurrentFile(void) {
const char *Filename;
char *FilenameBuf;
FILE *File;
int Length;
Length = getCurFilenameLength();
FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
Filename = getCurFilename(FilenameBuf, 0);
if (!Filename)
return;
/* Only create the profile directory and truncate an existing profile once.
* In continuous mode, this is necessary, as the profile is written-to by the
* runtime initializer. */
int initialized = getenv(LPROF_INIT_ONCE_ENV) != NULL;
if (initialized)
return;
#if defined(_WIN32)
_putenv(LPROF_INIT_ONCE_ENV "=" LPROF_INIT_ONCE_ENV);
#else
setenv(LPROF_INIT_ONCE_ENV, LPROF_INIT_ONCE_ENV, 1);
#endif
/* Create the profile dir (even if online merging is enabled), so that
* the profile file can be set up if continuous mode is enabled. */
createProfileDir(Filename);
/* By pass file truncation to allow online raw profile merging. */
if (lprofCurFilename.MergePoolSize)
return;
/* Truncate the file. Later we'll reopen and append. */
File = fopen(Filename, "w");
if (!File)
return;
fclose(File);
}
// TODO: Move these functions into InstrProfilingPlatform* files.
#if defined(__APPLE__)
static void assertIsZero(int *i) {
if (*i)
PROF_WARN("Expected flag to be 0, but got: %d\n", *i);
}
/* Write a partial profile to \p Filename, which is required to be backed by
* the open file object \p File. */
static int writeProfileWithFileObject(const char *Filename, FILE *File) {
setProfileFile(File);
int rc = writeFile(Filename);
if (rc)
PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno));
setProfileFile(NULL);
return rc;
}
/* Unlock the profile \p File and clear the unlock flag. */
static void unlockProfile(int *ProfileRequiresUnlock, FILE *File) {
if (!*ProfileRequiresUnlock) {
PROF_WARN("%s", "Expected to require profile unlock\n");
}
lprofUnlockFileHandle(File);
*ProfileRequiresUnlock = 0;
}
static void initializeProfileForContinuousMode(void) {
if (!__llvm_profile_is_continuous_mode_enabled())
return;
/* Get the sizes of various profile data sections. Taken from
* __llvm_profile_get_size_for_buffer(). */
const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
const uint64_t *CountersBegin = __llvm_profile_begin_counters();
const uint64_t *CountersEnd = __llvm_profile_end_counters();
const char *NamesBegin = __llvm_profile_begin_names();
const char *NamesEnd = __llvm_profile_end_names();
const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char);
uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
uint64_t CountersSize = CountersEnd - CountersBegin;
/* Check that the counter and data sections in this image are page-aligned. */
unsigned PageSize = getpagesize();
if ((intptr_t)CountersBegin % PageSize != 0) {
PROF_ERR("Counters section not page-aligned (start = %p, pagesz = %u).\n",
CountersBegin, PageSize);
return;
}
if ((intptr_t)DataBegin % PageSize != 0) {
PROF_ERR("Data section not page-aligned (start = %p, pagesz = %u).\n",
DataBegin, PageSize);
return;
}
int Length = getCurFilenameLength();
char *FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
const char *Filename = getCurFilename(FilenameBuf, 0);
if (!Filename)
return;
FILE *File = NULL;
off_t CurrentFileOffset = 0;
off_t OffsetModPage = 0;
/* Whether an exclusive lock on the profile must be dropped after init.
* Use a cleanup to warn if the unlock does not occur. */
COMPILER_RT_CLEANUP(assertIsZero) int ProfileRequiresUnlock = 0;
if (!doMerging()) {
/* We are not merging profiles, so open the raw profile in append mode. */
File = fopen(Filename, "a+b");
if (!File)
return;
/* Check that the offset within the file is page-aligned. */
CurrentFileOffset = ftello(File);
OffsetModPage = CurrentFileOffset % PageSize;
if (OffsetModPage != 0) {
PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not"
"page-aligned. CurrentFileOffset = %" PRIu64 ", pagesz = %u.\n",
(uint64_t)CurrentFileOffset, PageSize);
return;
}
/* Grow the profile so that mmap() can succeed. Leak the file handle, as
* the file should stay open. */
if (writeProfileWithFileObject(Filename, File) != 0)
return;
} else {
/* We are merging profiles. Map the counter section as shared memory into
* the profile, i.e. into each participating process. An increment in one
* process should be visible to every other process with the same counter
* section mapped. */
File = lprofOpenFileEx(Filename);
if (!File)
return;
ProfileRequiresUnlock = 1;
uint64_t ProfileFileSize;
if (getProfileFileSizeForMerging(File, &ProfileFileSize) == -1)
return unlockProfile(&ProfileRequiresUnlock, File);
if (ProfileFileSize == 0) {
/* Grow the profile so that mmap() can succeed. Leak the file handle, as
* the file should stay open. */
if (writeProfileWithFileObject(Filename, File) != 0)
return unlockProfile(&ProfileRequiresUnlock, File);
} else {
/* The merged profile has a non-zero length. Check that it is compatible
* with the data in this process. */
char *ProfileBuffer;
if (mmapProfileForMerging(File, ProfileFileSize, &ProfileBuffer) == -1 ||
munmap(ProfileBuffer, ProfileFileSize) == -1)
return unlockProfile(&ProfileRequiresUnlock, File);
}
}
/* mmap() the profile counters so long as there is at least one counter.
* If there aren't any counters, mmap() would fail with EINVAL. */
if (CountersSize > 0) {
int Fileno = fileno(File);
/* Determine how much padding is needed before/after the counters and after
* the names. */
uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
PaddingBytesAfterNames;
__llvm_profile_get_padding_sizes_for_counters(
DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters,
&PaddingBytesAfterCounters, &PaddingBytesAfterNames);
uint64_t PageAlignedCountersLength =
(CountersSize * sizeof(uint64_t)) + PaddingBytesAfterCounters;
uint64_t FileOffsetToCounters =
CurrentFileOffset + sizeof(__llvm_profile_header) +
(DataSize * sizeof(__llvm_profile_data)) + PaddingBytesBeforeCounters;
uint64_t *CounterMmap = (uint64_t *)mmap(
(void *)CountersBegin, PageAlignedCountersLength, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_SHARED, Fileno, FileOffsetToCounters);
if (CounterMmap != CountersBegin) {
PROF_ERR(
"Continuous counter sync mode is enabled, but mmap() failed (%s).\n"
" - CountersBegin: %p\n"
" - PageAlignedCountersLength: %" PRIu64 "\n"
" - Fileno: %d\n"
" - FileOffsetToCounters: %" PRIu64 "\n",
strerror(errno), CountersBegin, PageAlignedCountersLength, Fileno,
FileOffsetToCounters);
}
}
if (ProfileRequiresUnlock)
unlockProfile(&ProfileRequiresUnlock, File);
}
#elif defined(__ELF__) || defined(_WIN32)
#define INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR \
INSTR_PROF_CONCAT(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR, _default)
intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR = 0;
/* This variable is a weak external reference which could be used to detect
* whether or not the compiler defined this symbol. */
-#if defined(_WIN32)
+#if defined(_MSC_VER)
COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;
-#pragma comment(linker, "/alternatename:" \
- INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" \
- INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))
+#if defined(_M_IX86) || defined(__i386__)
+#define WIN_SYM_PREFIX "_"
+#else
+#define WIN_SYM_PREFIX
+#endif
+#pragma comment( \
+ linker, "/alternatename:" WIN_SYM_PREFIX INSTR_PROF_QUOTE( \
+ INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" WIN_SYM_PREFIX \
+ INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))
#else
COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR
__attribute__((weak, alias(INSTR_PROF_QUOTE(
INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))));
#endif
static int writeMMappedFile(FILE *OutputFile, char **Profile) {
if (!OutputFile)
return -1;
/* Write the data into a file. */
setupIOBuffer();
ProfDataWriter fileWriter;
initFileWriter(&fileWriter, OutputFile);
if (lprofWriteData(&fileWriter, NULL, 0)) {
PROF_ERR("Failed to write profile: %s\n", strerror(errno));
return -1;
}
fflush(OutputFile);
/* Get the file size. */
uint64_t FileSize = ftell(OutputFile);
/* Map the profile. */
*Profile = (char *)mmap(
NULL, FileSize, PROT_READ | PROT_WRITE, MAP_SHARED, fileno(OutputFile), 0);
if (*Profile == MAP_FAILED) {
PROF_ERR("Unable to mmap profile: %s\n", strerror(errno));
return -1;
}
return 0;
}
static void initializeProfileForContinuousMode(void) {
if (!__llvm_profile_is_continuous_mode_enabled())
return;
/* This symbol is defined by the compiler when runtime counter relocation is
* used and runtime provides a weak alias so we can check if it's defined. */
void *BiasAddr = &INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;
void *BiasDefaultAddr = &INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR;
if (BiasAddr == BiasDefaultAddr) {
PROF_ERR("%s\n", "__llvm_profile_counter_bias is undefined");
return;
}
/* Get the sizes of various profile data sections. Taken from
* __llvm_profile_get_size_for_buffer(). */
const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
const uint64_t *CountersBegin = __llvm_profile_begin_counters();
const uint64_t *CountersEnd = __llvm_profile_end_counters();
uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
- const uint64_t CountersOffset =
- sizeof(__llvm_profile_header) + (DataSize * sizeof(__llvm_profile_data));
+ const uint64_t CountersOffset = sizeof(__llvm_profile_header) +
+ __llvm_write_binary_ids(NULL) +
+ (DataSize * sizeof(__llvm_profile_data));
int Length = getCurFilenameLength();
char *FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
const char *Filename = getCurFilename(FilenameBuf, 0);
if (!Filename)
return;
FILE *File = NULL;
char *Profile = NULL;
if (!doMerging()) {
File = fopen(Filename, "w+b");
if (!File)
return;
if (writeMMappedFile(File, &Profile) == -1) {
fclose(File);
return;
}
} else {
File = lprofOpenFileEx(Filename);
if (!File)
return;
uint64_t ProfileFileSize = 0;
if (getProfileFileSizeForMerging(File, &ProfileFileSize) == -1) {
lprofUnlockFileHandle(File);
fclose(File);
return;
}
if (!ProfileFileSize) {
if (writeMMappedFile(File, &Profile) == -1) {
fclose(File);
return;
}
} else {
/* The merged profile has a non-zero length. Check that it is compatible
* with the data in this process. */
if (mmapProfileForMerging(File, ProfileFileSize, &Profile) == -1) {
fclose(File);
return;
}
}
lprofUnlockFileHandle(File);
}
/* Update the profile fields based on the current mapping. */
INSTR_PROF_PROFILE_COUNTER_BIAS_VAR =
(intptr_t)Profile - (uintptr_t)CountersBegin +
CountersOffset;
/* Return the memory allocated for counters to OS. */
lprofReleaseMemoryPagesToOS((uintptr_t)CountersBegin, (uintptr_t)CountersEnd);
}
#else
static void initializeProfileForContinuousMode(void) {
PROF_ERR("%s\n", "continuous mode is unsupported on this platform");
}
#endif
static const char *DefaultProfileName = "default.profraw";
static void resetFilenameToDefault(void) {
if (lprofCurFilename.FilenamePat && lprofCurFilename.OwnsFilenamePat) {
free((void *)lprofCurFilename.FilenamePat);
}
memset(&lprofCurFilename, 0, sizeof(lprofCurFilename));
lprofCurFilename.FilenamePat = DefaultProfileName;
lprofCurFilename.PNS = PNS_default;
}
static unsigned getMergePoolSize(const char *FilenamePat, int *I) {
unsigned J = 0, Num = 0;
for (;; ++J) {
char C = FilenamePat[*I + J];
if (C == 'm') {
*I += J;
return Num ? Num : 1;
}
if (C < '0' || C > '9')
break;
Num = Num * 10 + C - '0';
/* If FilenamePat[*I+J] is between '0' and '9', the next byte is guaranteed
* to be in-bound as the string is null terminated. */
}
return 0;
}
/* Assert that Idx does index past a string null terminator. Return the
* result of the check. */
static int checkBounds(int Idx, int Strlen) {
assert(Idx <= Strlen && "Indexing past string null terminator");
return Idx <= Strlen;
}
/* Parses the pattern string \p FilenamePat and stores the result to
* lprofcurFilename structure. */
static int parseFilenamePattern(const char *FilenamePat,
unsigned CopyFilenamePat) {
int NumPids = 0, NumHosts = 0, I;
char *PidChars = &lprofCurFilename.PidChars[0];
char *Hostname = &lprofCurFilename.Hostname[0];
int MergingEnabled = 0;
int FilenamePatLen = strlen(FilenamePat);
/* Clean up cached prefix and filename. */
if (lprofCurFilename.ProfilePathPrefix)
free((void *)lprofCurFilename.ProfilePathPrefix);
if (lprofCurFilename.FilenamePat && lprofCurFilename.OwnsFilenamePat) {
free((void *)lprofCurFilename.FilenamePat);
}
memset(&lprofCurFilename, 0, sizeof(lprofCurFilename));
if (!CopyFilenamePat)
lprofCurFilename.FilenamePat = FilenamePat;
else {
lprofCurFilename.FilenamePat = strdup(FilenamePat);
lprofCurFilename.OwnsFilenamePat = 1;
}
/* Check the filename for "%p", which indicates a pid-substitution. */
for (I = 0; checkBounds(I, FilenamePatLen) && FilenamePat[I]; ++I) {
if (FilenamePat[I] == '%') {
++I; /* Advance to the next character. */
if (!checkBounds(I, FilenamePatLen))
break;
if (FilenamePat[I] == 'p') {
if (!NumPids++) {
if (snprintf(PidChars, MAX_PID_SIZE, "%ld", (long)getpid()) <= 0) {
PROF_WARN("Unable to get pid for filename pattern %s. Using the "
"default name.",
FilenamePat);
return -1;
}
}
} else if (FilenamePat[I] == 'h') {
if (!NumHosts++)
if (COMPILER_RT_GETHOSTNAME(Hostname, COMPILER_RT_MAX_HOSTLEN)) {
PROF_WARN("Unable to get hostname for filename pattern %s. Using "
"the default name.",
FilenamePat);
return -1;
}
} else if (FilenamePat[I] == 't') {
lprofCurFilename.TmpDir = getenv("TMPDIR");
if (!lprofCurFilename.TmpDir) {
PROF_WARN("Unable to get the TMPDIR environment variable, referenced "
"in %s. Using the default path.",
FilenamePat);
return -1;
}
} else if (FilenamePat[I] == 'c') {
if (__llvm_profile_is_continuous_mode_enabled()) {
PROF_WARN("%%c specifier can only be specified once in %s.\n",
FilenamePat);
return -1;
}
#if defined(__APPLE__) || defined(__ELF__) || defined(_WIN32)
__llvm_profile_set_page_size(getpagesize());
__llvm_profile_enable_continuous_mode();
#else
PROF_WARN("%s", "Continous mode is currently only supported for Mach-O,"
" ELF and COFF formats.");
return -1;
#endif
} else {
unsigned MergePoolSize = getMergePoolSize(FilenamePat, &I);
if (!MergePoolSize)
continue;
if (MergingEnabled) {
PROF_WARN("%%m specifier can only be specified once in %s.\n",
FilenamePat);
return -1;
}
MergingEnabled = 1;
lprofCurFilename.MergePoolSize = MergePoolSize;
}
}
}
lprofCurFilename.NumPids = NumPids;
lprofCurFilename.NumHosts = NumHosts;
return 0;
}
static void parseAndSetFilename(const char *FilenamePat,
ProfileNameSpecifier PNS,
unsigned CopyFilenamePat) {
const char *OldFilenamePat = lprofCurFilename.FilenamePat;
ProfileNameSpecifier OldPNS = lprofCurFilename.PNS;
/* The old profile name specifier takes precedence over the old one. */
if (PNS < OldPNS)
return;
if (!FilenamePat)
FilenamePat = DefaultProfileName;
if (OldFilenamePat && !strcmp(OldFilenamePat, FilenamePat)) {
lprofCurFilename.PNS = PNS;
return;
}
/* When PNS >= OldPNS, the last one wins. */
if (!FilenamePat || parseFilenamePattern(FilenamePat, CopyFilenamePat))
resetFilenameToDefault();
lprofCurFilename.PNS = PNS;
if (!OldFilenamePat) {
if (getenv("LLVM_PROFILE_VERBOSE"))
PROF_NOTE("Set profile file path to \"%s\" via %s.\n",
lprofCurFilename.FilenamePat, getPNSStr(PNS));
} else {
if (getenv("LLVM_PROFILE_VERBOSE"))
PROF_NOTE("Override old profile path \"%s\" via %s to \"%s\" via %s.\n",
OldFilenamePat, getPNSStr(OldPNS), lprofCurFilename.FilenamePat,
getPNSStr(PNS));
}
truncateCurrentFile();
if (__llvm_profile_is_continuous_mode_enabled())
initializeProfileForContinuousMode();
}
/* Return buffer length that is required to store the current profile
* filename with PID and hostname substitutions. */
/* The length to hold uint64_t followed by 3 digits pool id including '_' */
#define SIGLEN 24
static int getCurFilenameLength() {
int Len;
if (!lprofCurFilename.FilenamePat || !lprofCurFilename.FilenamePat[0])
return 0;
if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts ||
lprofCurFilename.TmpDir || lprofCurFilename.MergePoolSize))
return strlen(lprofCurFilename.FilenamePat);
Len = strlen(lprofCurFilename.FilenamePat) +
lprofCurFilename.NumPids * (strlen(lprofCurFilename.PidChars) - 2) +
lprofCurFilename.NumHosts * (strlen(lprofCurFilename.Hostname) - 2) +
(lprofCurFilename.TmpDir ? (strlen(lprofCurFilename.TmpDir) - 1) : 0);
if (lprofCurFilename.MergePoolSize)
Len += SIGLEN;
return Len;
}
/* Return the pointer to the current profile file name (after substituting
* PIDs and Hostnames in filename pattern. \p FilenameBuf is the buffer
* to store the resulting filename. If no substitution is needed, the
* current filename pattern string is directly returned, unless ForceUseBuf
* is enabled. */
static const char *getCurFilename(char *FilenameBuf, int ForceUseBuf) {
int I, J, PidLength, HostNameLength, TmpDirLength, FilenamePatLength;
const char *FilenamePat = lprofCurFilename.FilenamePat;
if (!lprofCurFilename.FilenamePat || !lprofCurFilename.FilenamePat[0])
return 0;
if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts ||
lprofCurFilename.TmpDir || lprofCurFilename.MergePoolSize ||
__llvm_profile_is_continuous_mode_enabled())) {
if (!ForceUseBuf)
return lprofCurFilename.FilenamePat;
FilenamePatLength = strlen(lprofCurFilename.FilenamePat);
memcpy(FilenameBuf, lprofCurFilename.FilenamePat, FilenamePatLength);
FilenameBuf[FilenamePatLength] = '\0';
return FilenameBuf;
}
PidLength = strlen(lprofCurFilename.PidChars);
HostNameLength = strlen(lprofCurFilename.Hostname);
TmpDirLength = lprofCurFilename.TmpDir ? strlen(lprofCurFilename.TmpDir) : 0;
/* Construct the new filename. */
for (I = 0, J = 0; FilenamePat[I]; ++I)
if (FilenamePat[I] == '%') {
if (FilenamePat[++I] == 'p') {
memcpy(FilenameBuf + J, lprofCurFilename.PidChars, PidLength);
J += PidLength;
} else if (FilenamePat[I] == 'h') {
memcpy(FilenameBuf + J, lprofCurFilename.Hostname, HostNameLength);
J += HostNameLength;
} else if (FilenamePat[I] == 't') {
memcpy(FilenameBuf + J, lprofCurFilename.TmpDir, TmpDirLength);
FilenameBuf[J + TmpDirLength] = DIR_SEPARATOR;
J += TmpDirLength + 1;
} else {
if (!getMergePoolSize(FilenamePat, &I))
continue;
char LoadModuleSignature[SIGLEN + 1];
int S;
int ProfilePoolId = getpid() % lprofCurFilename.MergePoolSize;
S = snprintf(LoadModuleSignature, SIGLEN + 1, "%" PRIu64 "_%d",
lprofGetLoadModuleSignature(), ProfilePoolId);
if (S == -1 || S > SIGLEN)
S = SIGLEN;
memcpy(FilenameBuf + J, LoadModuleSignature, S);
J += S;
}
/* Drop any unknown substitutions. */
} else
FilenameBuf[J++] = FilenamePat[I];
FilenameBuf[J] = 0;
return FilenameBuf;
}
/* Returns the pointer to the environment variable
* string. Returns null if the env var is not set. */
static const char *getFilenamePatFromEnv(void) {
const char *Filename = getenv("LLVM_PROFILE_FILE");
if (!Filename || !Filename[0])
return 0;
return Filename;
}
COMPILER_RT_VISIBILITY
const char *__llvm_profile_get_path_prefix(void) {
int Length;
char *FilenameBuf, *Prefix;
const char *Filename, *PrefixEnd;
if (lprofCurFilename.ProfilePathPrefix)
return lprofCurFilename.ProfilePathPrefix;
Length = getCurFilenameLength();
FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
Filename = getCurFilename(FilenameBuf, 0);
if (!Filename)
return "\0";
PrefixEnd = lprofFindLastDirSeparator(Filename);
if (!PrefixEnd)
return "\0";
Length = PrefixEnd - Filename + 1;
Prefix = (char *)malloc(Length + 1);
if (!Prefix) {
PROF_ERR("Failed to %s\n", "allocate memory.");
return "\0";
}
memcpy(Prefix, Filename, Length);
Prefix[Length] = '\0';
lprofCurFilename.ProfilePathPrefix = Prefix;
return Prefix;
}
COMPILER_RT_VISIBILITY
const char *__llvm_profile_get_filename(void) {
int Length;
char *FilenameBuf;
const char *Filename;
Length = getCurFilenameLength();
FilenameBuf = (char *)malloc(Length + 1);
if (!FilenameBuf) {
PROF_ERR("Failed to %s\n", "allocate memory.");
return "\0";
}
Filename = getCurFilename(FilenameBuf, 1);
if (!Filename)
return "\0";
return FilenameBuf;
}
/* This API initializes the file handling, both user specified
* profile path via -fprofile-instr-generate= and LLVM_PROFILE_FILE
* environment variable can override this default value.
*/
COMPILER_RT_VISIBILITY
void __llvm_profile_initialize_file(void) {
const char *EnvFilenamePat;
const char *SelectedPat = NULL;
ProfileNameSpecifier PNS = PNS_unknown;
int hasCommandLineOverrider = (INSTR_PROF_PROFILE_NAME_VAR[0] != 0);
EnvFilenamePat = getFilenamePatFromEnv();
if (EnvFilenamePat) {
/* Pass CopyFilenamePat = 1, to ensure that the filename would be valid
at the moment when __llvm_profile_write_file() gets executed. */
parseAndSetFilename(EnvFilenamePat, PNS_environment, 1);
return;
} else if (hasCommandLineOverrider) {
SelectedPat = INSTR_PROF_PROFILE_NAME_VAR;
PNS = PNS_command_line;
} else {
SelectedPat = NULL;
PNS = PNS_default;
}
parseAndSetFilename(SelectedPat, PNS, 0);
}
/* This method is invoked by the runtime initialization hook
* InstrProfilingRuntime.o if it is linked in.
*/
COMPILER_RT_VISIBILITY
void __llvm_profile_initialize(void) {
__llvm_profile_initialize_file();
if (!__llvm_profile_is_continuous_mode_enabled())
__llvm_profile_register_write_file_atexit();
}
/* This API is directly called by the user application code. It has the
* highest precedence compared with LLVM_PROFILE_FILE environment variable
* and command line option -fprofile-instr-generate=<profile_name>.
*/
COMPILER_RT_VISIBILITY
void __llvm_profile_set_filename(const char *FilenamePat) {
if (__llvm_profile_is_continuous_mode_enabled())
return;
parseAndSetFilename(FilenamePat, PNS_runtime_api, 1);
}
/* The public API for writing profile data into the file with name
* set by previous calls to __llvm_profile_set_filename or
* __llvm_profile_override_default_filename or
* __llvm_profile_initialize_file. */
COMPILER_RT_VISIBILITY
int __llvm_profile_write_file(void) {
int rc, Length;
const char *Filename;
char *FilenameBuf;
int PDeathSig = 0;
if (lprofProfileDumped() || __llvm_profile_is_continuous_mode_enabled()) {
PROF_NOTE("Profile data not written to file: %s.\n", "already written");
return 0;
}
Length = getCurFilenameLength();
FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
Filename = getCurFilename(FilenameBuf, 0);
/* Check the filename. */
if (!Filename) {
PROF_ERR("Failed to write file : %s\n", "Filename not set");
return -1;
}
/* Check if there is llvm/runtime version mismatch. */
if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) {
PROF_ERR("Runtime and instrumentation version mismatch : "
"expected %d, but get %d\n",
INSTR_PROF_RAW_VERSION,
(int)GET_VERSION(__llvm_profile_get_version()));
return -1;
}
// Temporarily suspend getting SIGKILL when the parent exits.
PDeathSig = lprofSuspendSigKill();
/* Write profile data to the file. */
rc = writeFile(Filename);
if (rc)
PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno));
// Restore SIGKILL.
if (PDeathSig == 1)
lprofRestoreSigKill();
return rc;
}
COMPILER_RT_VISIBILITY
int __llvm_profile_dump(void) {
if (!doMerging())
PROF_WARN("Later invocation of __llvm_profile_dump can lead to clobbering "
" of previously dumped profile data : %s. Either use %%m "
"in profile name or change profile name before dumping.\n",
"online profile merging is not on");
int rc = __llvm_profile_write_file();
lprofSetProfileDumped(1);
return rc;
}
/* Order file data will be saved in a file with suffx .order. */
static const char *OrderFileSuffix = ".order";
COMPILER_RT_VISIBILITY
int __llvm_orderfile_write_file(void) {
int rc, Length, LengthBeforeAppend, SuffixLength;
const char *Filename;
char *FilenameBuf;
int PDeathSig = 0;
SuffixLength = strlen(OrderFileSuffix);
Length = getCurFilenameLength() + SuffixLength;
FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
Filename = getCurFilename(FilenameBuf, 1);
/* Check the filename. */
if (!Filename) {
PROF_ERR("Failed to write file : %s\n", "Filename not set");
return -1;
}
/* Append order file suffix */
LengthBeforeAppend = strlen(Filename);
memcpy(FilenameBuf + LengthBeforeAppend, OrderFileSuffix, SuffixLength);
FilenameBuf[LengthBeforeAppend + SuffixLength] = '\0';
/* Check if there is llvm/runtime version mismatch. */
if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) {
PROF_ERR("Runtime and instrumentation version mismatch : "
"expected %d, but get %d\n",
INSTR_PROF_RAW_VERSION,
(int)GET_VERSION(__llvm_profile_get_version()));
return -1;
}
// Temporarily suspend getting SIGKILL when the parent exits.
PDeathSig = lprofSuspendSigKill();
/* Write order data to the file. */
rc = writeOrderFile(Filename);
if (rc)
PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno));
// Restore SIGKILL.
if (PDeathSig == 1)
lprofRestoreSigKill();
return rc;
}
COMPILER_RT_VISIBILITY
int __llvm_orderfile_dump(void) {
int rc = __llvm_orderfile_write_file();
return rc;
}
static void writeFileWithoutReturn(void) { __llvm_profile_write_file(); }
COMPILER_RT_VISIBILITY
int __llvm_profile_register_write_file_atexit(void) {
static int HasBeenRegistered = 0;
if (HasBeenRegistered)
return 0;
lprofSetupValueProfiler();
HasBeenRegistered = 1;
return atexit(writeFileWithoutReturn);
}
#endif
diff --git a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
index 0146b14c193f..1be0ef36a288 100644
--- a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
+++ b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
@@ -1,192 +1,193 @@
/*===- InstrProfilingPlatformFuchsia.c - Profile data Fuchsia platform ----===*\
|*
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|* See https://llvm.org/LICENSE.txt for license information.
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|*
\*===----------------------------------------------------------------------===*/
/*
* This file implements the profiling runtime for Fuchsia and defines the
* shared profile runtime interface. Each module (executable or DSO) statically
* links in the whole profile runtime to satisfy the calls from its
* instrumented code. Several modules in the same program might be separately
* compiled and even use different versions of the instrumentation ABI and data
* format. All they share in common is the VMO and the offset, which live in
* exported globals so that exactly one definition will be shared across all
* modules. Each module has its own independent runtime that registers its own
* atexit hook to append its own data into the shared VMO which is published
* via the data sink hook provided by Fuchsia's dynamic linker.
*/
#if defined(__Fuchsia__)
#include <inttypes.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdlib.h>
#include <zircon/process.h>
#include <zircon/sanitizer.h>
#include <zircon/status.h>
#include <zircon/syscalls.h>
#include "InstrProfiling.h"
#include "InstrProfilingInternal.h"
#include "InstrProfilingUtil.h"
/* This variable is an external reference to symbol defined by the compiler. */
COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;
COMPILER_RT_VISIBILITY unsigned lprofProfileDumped() {
return 1;
}
COMPILER_RT_VISIBILITY void lprofSetProfileDumped(unsigned Value) {}
static const char ProfileSinkName[] = "llvm-profile";
static inline void lprofWrite(const char *fmt, ...) {
char s[256];
va_list ap;
va_start(ap, fmt);
int ret = vsnprintf(s, sizeof(s), fmt, ap);
va_end(ap);
__sanitizer_log_write(s, ret + 1);
}
struct lprofVMOWriterCtx {
/* VMO that contains the profile data for this module. */
zx_handle_t Vmo;
/* Current offset within the VMO where data should be written next. */
uint64_t Offset;
};
static uint32_t lprofVMOWriter(ProfDataWriter *This, ProfDataIOVec *IOVecs,
uint32_t NumIOVecs) {
struct lprofVMOWriterCtx *Ctx = (struct lprofVMOWriterCtx *)This->WriterCtx;
/* Compute the total length of data to be written. */
size_t Length = 0;
for (uint32_t I = 0; I < NumIOVecs; I++)
Length += IOVecs[I].ElmSize * IOVecs[I].NumElm;
/* Resize the VMO to ensure there's sufficient space for the data. */
zx_status_t Status = _zx_vmo_set_size(Ctx->Vmo, Ctx->Offset + Length);
if (Status != ZX_OK)
return -1;
/* Copy the data into VMO. */
for (uint32_t I = 0; I < NumIOVecs; I++) {
size_t Length = IOVecs[I].ElmSize * IOVecs[I].NumElm;
if (IOVecs[I].Data) {
Status = _zx_vmo_write(Ctx->Vmo, IOVecs[I].Data, Ctx->Offset, Length);
if (Status != ZX_OK)
return -1;
} else if (IOVecs[I].UseZeroPadding) {
/* Resizing the VMO should zero fill. */
}
Ctx->Offset += Length;
}
/* Record the profile size as a property of the VMO. */
_zx_object_set_property(Ctx->Vmo, ZX_PROP_VMO_CONTENT_SIZE, &Ctx->Offset,
sizeof(Ctx->Offset));
return 0;
}
static void initVMOWriter(ProfDataWriter *This, struct lprofVMOWriterCtx *Ctx) {
This->Write = lprofVMOWriter;
This->WriterCtx = Ctx;
}
/* This method is invoked by the runtime initialization hook
* InstrProfilingRuntime.o if it is linked in. */
COMPILER_RT_VISIBILITY
void __llvm_profile_initialize(void) {
/* Check if there is llvm/runtime version mismatch. */
if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) {
lprofWrite("LLVM Profile: runtime and instrumentation version mismatch: "
"expected %d, but got %d\n",
INSTR_PROF_RAW_VERSION,
(int)GET_VERSION(__llvm_profile_get_version()));
return;
}
const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
const uint64_t *CountersBegin = __llvm_profile_begin_counters();
const uint64_t *CountersEnd = __llvm_profile_end_counters();
const uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
- const uint64_t CountersOffset =
- sizeof(__llvm_profile_header) + (DataSize * sizeof(__llvm_profile_data));
+ const uint64_t CountersOffset = sizeof(__llvm_profile_header) +
+ __llvm_write_binary_ids(NULL) +
+ (DataSize * sizeof(__llvm_profile_data));
uint64_t CountersSize = CountersEnd - CountersBegin;
/* Don't publish a VMO if there are no counters. */
if (!CountersSize)
return;
zx_status_t Status;
/* Create a VMO to hold the profile data. */
zx_handle_t Vmo = ZX_HANDLE_INVALID;
Status = _zx_vmo_create(0, ZX_VMO_RESIZABLE, &Vmo);
if (Status != ZX_OK) {
lprofWrite("LLVM Profile: cannot create VMO: %s\n",
_zx_status_get_string(Status));
return;
}
/* Give the VMO a name that includes the module signature. */
char VmoName[ZX_MAX_NAME_LEN];
snprintf(VmoName, sizeof(VmoName), "%" PRIu64 ".profraw",
lprofGetLoadModuleSignature());
_zx_object_set_property(Vmo, ZX_PROP_NAME, VmoName, strlen(VmoName));
/* Write the profile data into the mapped region. */
ProfDataWriter VMOWriter;
struct lprofVMOWriterCtx Ctx = {.Vmo = Vmo, .Offset = 0};
initVMOWriter(&VMOWriter, &Ctx);
if (lprofWriteData(&VMOWriter, 0, 0) != 0) {
lprofWrite("LLVM Profile: failed to write data\n");
_zx_handle_close(Vmo);
return;
}
uint64_t Len = 0;
Status = _zx_vmo_get_size(Vmo, &Len);
if (Status != ZX_OK) {
lprofWrite("LLVM Profile: failed to get the VMO size: %s\n",
_zx_status_get_string(Status));
_zx_handle_close(Vmo);
return;
}
uintptr_t Mapping;
Status =
_zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
Vmo, 0, Len, &Mapping);
if (Status != ZX_OK) {
lprofWrite("LLVM Profile: failed to map the VMO: %s\n",
_zx_status_get_string(Status));
_zx_handle_close(Vmo);
return;
}
/* Publish the VMO which contains profile data to the system. Note that this
* also consumes the VMO handle. */
__sanitizer_publish_data(ProfileSinkName, Vmo);
/* Use the dumpfile symbolizer markup element to write the name of VMO. */
lprofWrite("LLVM Profile: {{{dumpfile:%s:%s}}}\n", ProfileSinkName, VmoName);
/* Update the profile fields based on the current mapping. */
INSTR_PROF_PROFILE_COUNTER_BIAS_VAR =
(intptr_t)Mapping - (uintptr_t)CountersBegin + CountersOffset;
/* Return the memory allocated for counters to OS. */
lprofReleaseMemoryPagesToOS((uintptr_t)CountersBegin, (uintptr_t)CountersEnd);
}
#endif
diff --git a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
index 7c15f97aff89..5d47083b8bfe 100644
--- a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
+++ b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -1,202 +1,203 @@
/*===- InstrProfilingPlatformLinux.c - Profile data Linux platform ------===*\
|*
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|* See https://llvm.org/LICENSE.txt for license information.
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|*
\*===----------------------------------------------------------------------===*/
#if defined(__linux__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
(defined(__sun__) && defined(__svr4__)) || defined(__NetBSD__)
#include <elf.h>
#include <link.h>
#include <stdlib.h>
#include <string.h>
#include "InstrProfiling.h"
#include "InstrProfilingInternal.h"
#if defined(__FreeBSD__) && !defined(ElfW)
/*
* FreeBSD's elf.h and link.h headers do not define the ElfW(type) macro yet.
* If this is added to all supported FreeBSD versions in the future, this
* compatibility macro can be removed.
*/
#define ElfW(type) __ElfN(type)
#endif
#define PROF_DATA_START INSTR_PROF_SECT_START(INSTR_PROF_DATA_COMMON)
#define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
#define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
#define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_COMMON)
#define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_COMMON)
#define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_COMMON)
#define PROF_ORDERFILE_START INSTR_PROF_SECT_START(INSTR_PROF_ORDERFILE_COMMON)
#define PROF_VNODES_START INSTR_PROF_SECT_START(INSTR_PROF_VNODES_COMMON)
#define PROF_VNODES_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNODES_COMMON)
/* Declare section start and stop symbols for various sections
* generated by compiler instrumentation.
*/
extern __llvm_profile_data PROF_DATA_START COMPILER_RT_VISIBILITY
COMPILER_RT_WEAK;
extern __llvm_profile_data PROF_DATA_STOP COMPILER_RT_VISIBILITY
COMPILER_RT_WEAK;
extern uint64_t PROF_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
extern uint64_t PROF_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
extern uint32_t PROF_ORDERFILE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
extern char PROF_NAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
extern char PROF_NAME_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
extern ValueProfNode PROF_VNODES_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
extern ValueProfNode PROF_VNODES_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
COMPILER_RT_VISIBILITY const __llvm_profile_data *
__llvm_profile_begin_data(void) {
return &PROF_DATA_START;
}
COMPILER_RT_VISIBILITY const __llvm_profile_data *
__llvm_profile_end_data(void) {
return &PROF_DATA_STOP;
}
COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_names(void) {
return &PROF_NAME_START;
}
COMPILER_RT_VISIBILITY const char *__llvm_profile_end_names(void) {
return &PROF_NAME_STOP;
}
COMPILER_RT_VISIBILITY uint64_t *__llvm_profile_begin_counters(void) {
return &PROF_CNTS_START;
}
COMPILER_RT_VISIBILITY uint64_t *__llvm_profile_end_counters(void) {
return &PROF_CNTS_STOP;
}
COMPILER_RT_VISIBILITY uint32_t *__llvm_profile_begin_orderfile(void) {
return &PROF_ORDERFILE_START;
}
COMPILER_RT_VISIBILITY ValueProfNode *
__llvm_profile_begin_vnodes(void) {
return &PROF_VNODES_START;
}
COMPILER_RT_VISIBILITY ValueProfNode *__llvm_profile_end_vnodes(void) {
return &PROF_VNODES_STOP;
}
COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = &PROF_VNODES_START;
COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = &PROF_VNODES_STOP;
#ifdef NT_GNU_BUILD_ID
static size_t RoundUp(size_t size, size_t align) {
return (size + align - 1) & ~(align - 1);
}
/*
* Write binary id length and then its data, because binary id does not
* have a fixed length.
*/
-int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
- const uint8_t *BinaryIdData) {
+static int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
+ const uint8_t *BinaryIdData) {
ProfDataIOVec BinaryIdIOVec[] = {
{&BinaryIdLen, sizeof(uint64_t), 1, 0},
{BinaryIdData, sizeof(uint8_t), BinaryIdLen, 0}};
if (Writer->Write(Writer, BinaryIdIOVec,
sizeof(BinaryIdIOVec) / sizeof(*BinaryIdIOVec)))
return -1;
/* Successfully wrote binary id, report success. */
return 0;
}
/*
* Look for the note that has the name "GNU\0" and type NT_GNU_BUILD_ID
* that contains build id. If build id exists, write binary id.
*
* Each note in notes section starts with a struct which includes
* n_namesz, n_descsz, and n_type members. It is followed by the name
* (whose length is defined in n_namesz) and then by the descriptor
* (whose length is defined in n_descsz).
*
* Note sections like .note.ABI-tag and .note.gnu.build-id are aligned
* to 4 bytes, so round n_namesz and n_descsz to the nearest 4 bytes.
*/
-int WriteBinaryIdForNote(ProfDataWriter *Writer, const ElfW(Nhdr) * Note) {
+static int WriteBinaryIdForNote(ProfDataWriter *Writer,
+ const ElfW(Nhdr) * Note) {
int BinaryIdSize = 0;
const char *NoteName = (const char *)Note + sizeof(ElfW(Nhdr));
if (Note->n_type == NT_GNU_BUILD_ID && Note->n_namesz == 4 &&
memcmp(NoteName, "GNU\0", 4) == 0) {
uint64_t BinaryIdLen = Note->n_descsz;
const uint8_t *BinaryIdData =
(const uint8_t *)(NoteName + RoundUp(Note->n_namesz, 4));
if (Writer != NULL &&
WriteOneBinaryId(Writer, BinaryIdLen, BinaryIdData) == -1)
return -1;
BinaryIdSize = sizeof(BinaryIdLen) + BinaryIdLen;
}
return BinaryIdSize;
}
/*
* Helper function that iterates through notes section and find build ids.
* If writer is given, write binary ids into profiles.
* If an error happens while writing, return -1.
*/
-int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
- const ElfW(Nhdr) * NotesEnd) {
+static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
+ const ElfW(Nhdr) * NotesEnd) {
int TotalBinaryIdsSize = 0;
while (Note < NotesEnd) {
int Result = WriteBinaryIdForNote(Writer, Note);
if (Result == -1)
return -1;
TotalBinaryIdsSize += Result;
/* Calculate the offset of the next note in notes section. */
size_t NoteOffset = sizeof(ElfW(Nhdr)) + RoundUp(Note->n_namesz, 4) +
RoundUp(Note->n_descsz, 4);
Note = (const ElfW(Nhdr) *)((const char *)(Note) + NoteOffset);
}
return TotalBinaryIdsSize;
}
/*
* Write binary ids into profiles if writer is given.
* Return the total size of binary ids.
* If an error happens while writing, return -1.
*/
COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden")));
const ElfW(Ehdr) *ElfHeader = &__ehdr_start;
const ElfW(Phdr) *ProgramHeader =
(const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff);
uint32_t I;
/* Iterate through entries in the program header. */
for (I = 0; I < ElfHeader->e_phnum; I++) {
/* Look for the notes section in program header entries. */
if (ProgramHeader[I].p_type != PT_NOTE)
continue;
const ElfW(Nhdr) *Note =
(const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_offset);
const ElfW(Nhdr) *NotesEnd =
(const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_filesz);
return WriteBinaryIds(Writer, Note, NotesEnd);
}
return 0;
}
#else /* !NT_GNU_BUILD_ID */
/*
* Fallback implementation for targets that don't support the GNU
* extensions NT_GNU_BUILD_ID and __ehdr_start.
*/
COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
return 0;
}
#endif
#endif
diff --git a/contrib/llvm-project/libcxx/include/cwctype b/contrib/llvm-project/libcxx/include/cwctype
index 17c68d6d4544..27eea2f15730 100644
--- a/contrib/llvm-project/libcxx/include/cwctype
+++ b/contrib/llvm-project/libcxx/include/cwctype
@@ -1,86 +1,88 @@
// -*- C++ -*-
//===--------------------------- cwctype ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP_CWCTYPE
#define _LIBCPP_CWCTYPE
/*
cwctype synopsis
Macros:
WEOF
namespace std
{
Types:
wint_t
wctrans_t
wctype_t
int iswalnum(wint_t wc);
int iswalpha(wint_t wc);
int iswblank(wint_t wc); // C99
int iswcntrl(wint_t wc);
int iswdigit(wint_t wc);
int iswgraph(wint_t wc);
int iswlower(wint_t wc);
int iswprint(wint_t wc);
int iswpunct(wint_t wc);
int iswspace(wint_t wc);
int iswupper(wint_t wc);
int iswxdigit(wint_t wc);
int iswctype(wint_t wc, wctype_t desc);
wctype_t wctype(const char* property);
wint_t towlower(wint_t wc);
wint_t towupper(wint_t wc);
wint_t towctrans(wint_t wc, wctrans_t desc);
wctrans_t wctrans(const char* property);
} // std
*/
#include <__config>
#include <cctype>
#include <wctype.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
_LIBCPP_BEGIN_NAMESPACE_STD
+#if defined(_LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H)
using ::wint_t _LIBCPP_USING_IF_EXISTS;
using ::wctrans_t _LIBCPP_USING_IF_EXISTS;
using ::wctype_t _LIBCPP_USING_IF_EXISTS;
using ::iswalnum _LIBCPP_USING_IF_EXISTS;
using ::iswalpha _LIBCPP_USING_IF_EXISTS;
using ::iswblank _LIBCPP_USING_IF_EXISTS;
using ::iswcntrl _LIBCPP_USING_IF_EXISTS;
using ::iswdigit _LIBCPP_USING_IF_EXISTS;
using ::iswgraph _LIBCPP_USING_IF_EXISTS;
using ::iswlower _LIBCPP_USING_IF_EXISTS;
using ::iswprint _LIBCPP_USING_IF_EXISTS;
using ::iswpunct _LIBCPP_USING_IF_EXISTS;
using ::iswspace _LIBCPP_USING_IF_EXISTS;
using ::iswupper _LIBCPP_USING_IF_EXISTS;
using ::iswxdigit _LIBCPP_USING_IF_EXISTS;
using ::iswctype _LIBCPP_USING_IF_EXISTS;
using ::wctype _LIBCPP_USING_IF_EXISTS;
using ::towlower _LIBCPP_USING_IF_EXISTS;
using ::towupper _LIBCPP_USING_IF_EXISTS;
using ::towctrans _LIBCPP_USING_IF_EXISTS;
using ::wctrans _LIBCPP_USING_IF_EXISTS;
+#endif // _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_CWCTYPE
diff --git a/contrib/llvm-project/libcxx/include/string b/contrib/llvm-project/libcxx/include/string
index 4940021b0c68..4159ea580345 100644
--- a/contrib/llvm-project/libcxx/include/string
+++ b/contrib/llvm-project/libcxx/include/string
@@ -1,4566 +1,4585 @@
// -*- C++ -*-
//===--------------------------- string -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP_STRING
#define _LIBCPP_STRING
/*
string synopsis
namespace std
{
template <class stateT>
class fpos
{
private:
stateT st;
public:
fpos(streamoff = streamoff());
operator streamoff() const;
stateT state() const;
void state(stateT);
fpos& operator+=(streamoff);
fpos operator+ (streamoff) const;
fpos& operator-=(streamoff);
fpos operator- (streamoff) const;
};
template <class stateT> streamoff operator-(const fpos<stateT>& x, const fpos<stateT>& y);
template <class stateT> bool operator==(const fpos<stateT>& x, const fpos<stateT>& y);
template <class stateT> bool operator!=(const fpos<stateT>& x, const fpos<stateT>& y);
template <class charT>
struct char_traits
{
typedef charT char_type;
typedef ... int_type;
typedef streamoff off_type;
typedef streampos pos_type;
typedef mbstate_t state_type;
static void assign(char_type& c1, const char_type& c2) noexcept;
static constexpr bool eq(char_type c1, char_type c2) noexcept;
static constexpr bool lt(char_type c1, char_type c2) noexcept;
static int compare(const char_type* s1, const char_type* s2, size_t n);
static size_t length(const char_type* s);
static const char_type* find(const char_type* s, size_t n, const char_type& a);
static char_type* move(char_type* s1, const char_type* s2, size_t n);
static char_type* copy(char_type* s1, const char_type* s2, size_t n);
static char_type* assign(char_type* s, size_t n, char_type a);
static constexpr int_type not_eof(int_type c) noexcept;
static constexpr char_type to_char_type(int_type c) noexcept;
static constexpr int_type to_int_type(char_type c) noexcept;
static constexpr bool eq_int_type(int_type c1, int_type c2) noexcept;
static constexpr int_type eof() noexcept;
};
template <> struct char_traits<char>;
template <> struct char_traits<wchar_t>;
template <> struct char_traits<char8_t>; // C++20
template <> struct char_traits<char16_t>;
template <> struct char_traits<char32_t>;
template<class charT, class traits = char_traits<charT>, class Allocator = allocator<charT> >
class basic_string
{
public:
// types:
typedef traits traits_type;
typedef typename traits_type::char_type value_type;
typedef Allocator allocator_type;
typedef typename allocator_type::size_type size_type;
typedef typename allocator_type::difference_type difference_type;
typedef typename allocator_type::reference reference;
typedef typename allocator_type::const_reference const_reference;
typedef typename allocator_type::pointer pointer;
typedef typename allocator_type::const_pointer const_pointer;
typedef implementation-defined iterator;
typedef implementation-defined const_iterator;
typedef std::reverse_iterator<iterator> reverse_iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
static const size_type npos = -1;
basic_string()
noexcept(is_nothrow_default_constructible<allocator_type>::value);
explicit basic_string(const allocator_type& a);
basic_string(const basic_string& str);
basic_string(basic_string&& str)
noexcept(is_nothrow_move_constructible<allocator_type>::value);
basic_string(const basic_string& str, size_type pos,
const allocator_type& a = allocator_type());
basic_string(const basic_string& str, size_type pos, size_type n,
const Allocator& a = Allocator());
template<class T>
basic_string(const T& t, size_type pos, size_type n, const Allocator& a = Allocator()); // C++17
template <class T>
explicit basic_string(const T& t, const Allocator& a = Allocator()); // C++17
basic_string(const value_type* s, const allocator_type& a = allocator_type());
basic_string(const value_type* s, size_type n, const allocator_type& a = allocator_type());
basic_string(nullptr_t) = delete; // C++2b
basic_string(size_type n, value_type c, const allocator_type& a = allocator_type());
template<class InputIterator>
basic_string(InputIterator begin, InputIterator end,
const allocator_type& a = allocator_type());
basic_string(initializer_list<value_type>, const Allocator& = Allocator());
basic_string(const basic_string&, const Allocator&);
basic_string(basic_string&&, const Allocator&);
~basic_string();
operator basic_string_view<charT, traits>() const noexcept;
basic_string& operator=(const basic_string& str);
template <class T>
basic_string& operator=(const T& t); // C++17
basic_string& operator=(basic_string&& str)
noexcept(
allocator_type::propagate_on_container_move_assignment::value ||
allocator_type::is_always_equal::value ); // C++17
basic_string& operator=(const value_type* s);
basic_string& operator=(nullptr_t) = delete; // C++2b
basic_string& operator=(value_type c);
basic_string& operator=(initializer_list<value_type>);
iterator begin() noexcept;
const_iterator begin() const noexcept;
iterator end() noexcept;
const_iterator end() const noexcept;
reverse_iterator rbegin() noexcept;
const_reverse_iterator rbegin() const noexcept;
reverse_iterator rend() noexcept;
const_reverse_iterator rend() const noexcept;
const_iterator cbegin() const noexcept;
const_iterator cend() const noexcept;
const_reverse_iterator crbegin() const noexcept;
const_reverse_iterator crend() const noexcept;
size_type size() const noexcept;
size_type length() const noexcept;
size_type max_size() const noexcept;
size_type capacity() const noexcept;
void resize(size_type n, value_type c);
void resize(size_type n);
void reserve(size_type res_arg);
void reserve(); // deprecated in C++20
void shrink_to_fit();
void clear() noexcept;
bool empty() const noexcept;
const_reference operator[](size_type pos) const;
reference operator[](size_type pos);
const_reference at(size_type n) const;
reference at(size_type n);
basic_string& operator+=(const basic_string& str);
template <class T>
basic_string& operator+=(const T& t); // C++17
basic_string& operator+=(const value_type* s);
basic_string& operator+=(value_type c);
basic_string& operator+=(initializer_list<value_type>);
basic_string& append(const basic_string& str);
template <class T>
basic_string& append(const T& t); // C++17
basic_string& append(const basic_string& str, size_type pos, size_type n=npos); //C++14
template <class T>
basic_string& append(const T& t, size_type pos, size_type n=npos); // C++17
basic_string& append(const value_type* s, size_type n);
basic_string& append(const value_type* s);
basic_string& append(size_type n, value_type c);
template<class InputIterator>
basic_string& append(InputIterator first, InputIterator last);
basic_string& append(initializer_list<value_type>);
void push_back(value_type c);
void pop_back();
reference front();
const_reference front() const;
reference back();
const_reference back() const;
basic_string& assign(const basic_string& str);
template <class T>
basic_string& assign(const T& t); // C++17
basic_string& assign(basic_string&& str);
basic_string& assign(const basic_string& str, size_type pos, size_type n=npos); // C++14
template <class T>
basic_string& assign(const T& t, size_type pos, size_type n=npos); // C++17
basic_string& assign(const value_type* s, size_type n);
basic_string& assign(const value_type* s);
basic_string& assign(size_type n, value_type c);
template<class InputIterator>
basic_string& assign(InputIterator first, InputIterator last);
basic_string& assign(initializer_list<value_type>);
basic_string& insert(size_type pos1, const basic_string& str);
template <class T>
basic_string& insert(size_type pos1, const T& t);
basic_string& insert(size_type pos1, const basic_string& str,
size_type pos2, size_type n);
template <class T>
basic_string& insert(size_type pos1, const T& t, size_type pos2, size_type n); // C++17
basic_string& insert(size_type pos, const value_type* s, size_type n=npos); //C++14
basic_string& insert(size_type pos, const value_type* s);
basic_string& insert(size_type pos, size_type n, value_type c);
iterator insert(const_iterator p, value_type c);
iterator insert(const_iterator p, size_type n, value_type c);
template<class InputIterator>
iterator insert(const_iterator p, InputIterator first, InputIterator last);
iterator insert(const_iterator p, initializer_list<value_type>);
basic_string& erase(size_type pos = 0, size_type n = npos);
iterator erase(const_iterator position);
iterator erase(const_iterator first, const_iterator last);
basic_string& replace(size_type pos1, size_type n1, const basic_string& str);
template <class T>
basic_string& replace(size_type pos1, size_type n1, const T& t); // C++17
basic_string& replace(size_type pos1, size_type n1, const basic_string& str,
size_type pos2, size_type n2=npos); // C++14
template <class T>
basic_string& replace(size_type pos1, size_type n1, const T& t,
size_type pos2, size_type n); // C++17
basic_string& replace(size_type pos, size_type n1, const value_type* s, size_type n2);
basic_string& replace(size_type pos, size_type n1, const value_type* s);
basic_string& replace(size_type pos, size_type n1, size_type n2, value_type c);
basic_string& replace(const_iterator i1, const_iterator i2, const basic_string& str);
template <class T>
basic_string& replace(const_iterator i1, const_iterator i2, const T& t); // C++17
basic_string& replace(const_iterator i1, const_iterator i2, const value_type* s, size_type n);
basic_string& replace(const_iterator i1, const_iterator i2, const value_type* s);
basic_string& replace(const_iterator i1, const_iterator i2, size_type n, value_type c);
template<class InputIterator>
basic_string& replace(const_iterator i1, const_iterator i2, InputIterator j1, InputIterator j2);
basic_string& replace(const_iterator i1, const_iterator i2, initializer_list<value_type>);
size_type copy(value_type* s, size_type n, size_type pos = 0) const;
basic_string substr(size_type pos = 0, size_type n = npos) const;
void swap(basic_string& str)
noexcept(allocator_traits<allocator_type>::propagate_on_container_swap::value ||
allocator_traits<allocator_type>::is_always_equal::value); // C++17
const value_type* c_str() const noexcept;
const value_type* data() const noexcept;
value_type* data() noexcept; // C++17
allocator_type get_allocator() const noexcept;
size_type find(const basic_string& str, size_type pos = 0) const noexcept;
template <class T>
size_type find(const T& t, size_type pos = 0) const noexcept; // C++17, noexcept as an extension
size_type find(const value_type* s, size_type pos, size_type n) const noexcept;
size_type find(const value_type* s, size_type pos = 0) const noexcept;
size_type find(value_type c, size_type pos = 0) const noexcept;
size_type rfind(const basic_string& str, size_type pos = npos) const noexcept;
template <class T>
size_type rfind(const T& t, size_type pos = npos) const noexcept; // C++17, noexcept as an extension
size_type rfind(const value_type* s, size_type pos, size_type n) const noexcept;
size_type rfind(const value_type* s, size_type pos = npos) const noexcept;
size_type rfind(value_type c, size_type pos = npos) const noexcept;
size_type find_first_of(const basic_string& str, size_type pos = 0) const noexcept;
template <class T>
size_type find_first_of(const T& t, size_type pos = 0) const noexcept; // C++17, noexcept as an extension
size_type find_first_of(const value_type* s, size_type pos, size_type n) const noexcept;
size_type find_first_of(const value_type* s, size_type pos = 0) const noexcept;
size_type find_first_of(value_type c, size_type pos = 0) const noexcept;
size_type find_last_of(const basic_string& str, size_type pos = npos) const noexcept;
template <class T>
size_type find_last_of(const T& t, size_type pos = npos) const noexcept noexcept; // C++17, noexcept as an extension
size_type find_last_of(const value_type* s, size_type pos, size_type n) const noexcept;
size_type find_last_of(const value_type* s, size_type pos = npos) const noexcept;
size_type find_last_of(value_type c, size_type pos = npos) const noexcept;
size_type find_first_not_of(const basic_string& str, size_type pos = 0) const noexcept;
template <class T>
size_type find_first_not_of(const T& t, size_type pos = 0) const noexcept; // C++17, noexcept as an extension
size_type find_first_not_of(const value_type* s, size_type pos, size_type n) const noexcept;
size_type find_first_not_of(const value_type* s, size_type pos = 0) const noexcept;
size_type find_first_not_of(value_type c, size_type pos = 0) const noexcept;
size_type find_last_not_of(const basic_string& str, size_type pos = npos) const noexcept;
template <class T>
size_type find_last_not_of(const T& t, size_type pos = npos) const noexcept; // C++17, noexcept as an extension
size_type find_last_not_of(const value_type* s, size_type pos, size_type n) const noexcept;
size_type find_last_not_of(const value_type* s, size_type pos = npos) const noexcept;
size_type find_last_not_of(value_type c, size_type pos = npos) const noexcept;
int compare(const basic_string& str) const noexcept;
template <class T>
int compare(const T& t) const noexcept; // C++17, noexcept as an extension
int compare(size_type pos1, size_type n1, const basic_string& str) const;
template <class T>
int compare(size_type pos1, size_type n1, const T& t) const; // C++17
int compare(size_type pos1, size_type n1, const basic_string& str,
size_type pos2, size_type n2=npos) const; // C++14
template <class T>
int compare(size_type pos1, size_type n1, const T& t,
size_type pos2, size_type n2=npos) const; // C++17
int compare(const value_type* s) const noexcept;
int compare(size_type pos1, size_type n1, const value_type* s) const;
int compare(size_type pos1, size_type n1, const value_type* s, size_type n2) const;
bool starts_with(basic_string_view<charT, traits> sv) const noexcept; // C++20
bool starts_with(charT c) const noexcept; // C++20
bool starts_with(const charT* s) const; // C++20
bool ends_with(basic_string_view<charT, traits> sv) const noexcept; // C++20
bool ends_with(charT c) const noexcept; // C++20
bool ends_with(const charT* s) const; // C++20
constexpr bool contains(basic_string_view<charT, traits> sv) const noexcept; // C++2b
constexpr bool contains(charT c) const noexcept; // C++2b
constexpr bool contains(const charT* s) const; // C++2b
bool __invariants() const;
};
template<class InputIterator,
class Allocator = allocator<typename iterator_traits<InputIterator>::value_type>>
basic_string(InputIterator, InputIterator, Allocator = Allocator())
-> basic_string<typename iterator_traits<InputIterator>::value_type,
char_traits<typename iterator_traits<InputIterator>::value_type>,
Allocator>; // C++17
template<class charT, class traits, class Allocator>
basic_string<charT, traits, Allocator>
operator+(const basic_string<charT, traits, Allocator>& lhs,
const basic_string<charT, traits, Allocator>& rhs);
template<class charT, class traits, class Allocator>
basic_string<charT, traits, Allocator>
operator+(const charT* lhs , const basic_string<charT,traits,Allocator>&rhs);
template<class charT, class traits, class Allocator>
basic_string<charT, traits, Allocator>
operator+(charT lhs, const basic_string<charT,traits,Allocator>& rhs);
template<class charT, class traits, class Allocator>
basic_string<charT, traits, Allocator>
operator+(const basic_string<charT, traits, Allocator>& lhs, const charT* rhs);
template<class charT, class traits, class Allocator>
basic_string<charT, traits, Allocator>
operator+(const basic_string<charT, traits, Allocator>& lhs, charT rhs);
template<class charT, class traits, class Allocator>
bool operator==(const basic_string<charT, traits, Allocator>& lhs,
const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator==(const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator==(const basic_string<charT,traits,Allocator>& lhs, const charT* rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator!=(const basic_string<charT,traits,Allocator>& lhs,
const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator!=(const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator!=(const basic_string<charT, traits, Allocator>& lhs, const charT* rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator< (const basic_string<charT, traits, Allocator>& lhs,
const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator< (const basic_string<charT, traits, Allocator>& lhs, const charT* rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator< (const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator> (const basic_string<charT, traits, Allocator>& lhs,
const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator> (const basic_string<charT, traits, Allocator>& lhs, const charT* rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator> (const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator<=(const basic_string<charT, traits, Allocator>& lhs,
const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator<=(const basic_string<charT, traits, Allocator>& lhs, const charT* rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator<=(const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator>=(const basic_string<charT, traits, Allocator>& lhs,
const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator>=(const basic_string<charT, traits, Allocator>& lhs, const charT* rhs) noexcept;
template<class charT, class traits, class Allocator>
bool operator>=(const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;
template<class charT, class traits, class Allocator>
void swap(basic_string<charT, traits, Allocator>& lhs,
basic_string<charT, traits, Allocator>& rhs)
noexcept(noexcept(lhs.swap(rhs)));
template<class charT, class traits, class Allocator>
basic_istream<charT, traits>&
operator>>(basic_istream<charT, traits>& is, basic_string<charT, traits, Allocator>& str);
template<class charT, class traits, class Allocator>
basic_ostream<charT, traits>&
operator<<(basic_ostream<charT, traits>& os, const basic_string<charT, traits, Allocator>& str);
template<class charT, class traits, class Allocator>
basic_istream<charT, traits>&
getline(basic_istream<charT, traits>& is, basic_string<charT, traits, Allocator>& str,
charT delim);
template<class charT, class traits, class Allocator>
basic_istream<charT, traits>&
getline(basic_istream<charT, traits>& is, basic_string<charT, traits, Allocator>& str);
template<class charT, class traits, class Allocator, class U>
typename basic_string<charT, traits, Allocator>::size_type
erase(basic_string<charT, traits, Allocator>& c, const U& value); // C++20
template<class charT, class traits, class Allocator, class Predicate>
typename basic_string<charT, traits, Allocator>::size_type
erase_if(basic_string<charT, traits, Allocator>& c, Predicate pred); // C++20
typedef basic_string<char> string;
typedef basic_string<wchar_t> wstring;
typedef basic_string<char8_t> u8string; // C++20
typedef basic_string<char16_t> u16string;
typedef basic_string<char32_t> u32string;
int stoi (const string& str, size_t* idx = nullptr, int base = 10);
long stol (const string& str, size_t* idx = nullptr, int base = 10);
unsigned long stoul (const string& str, size_t* idx = nullptr, int base = 10);
long long stoll (const string& str, size_t* idx = nullptr, int base = 10);
unsigned long long stoull(const string& str, size_t* idx = nullptr, int base = 10);
float stof (const string& str, size_t* idx = nullptr);
double stod (const string& str, size_t* idx = nullptr);
long double stold(const string& str, size_t* idx = nullptr);
string to_string(int val);
string to_string(unsigned val);
string to_string(long val);
string to_string(unsigned long val);
string to_string(long long val);
string to_string(unsigned long long val);
string to_string(float val);
string to_string(double val);
string to_string(long double val);
int stoi (const wstring& str, size_t* idx = nullptr, int base = 10);
long stol (const wstring& str, size_t* idx = nullptr, int base = 10);
unsigned long stoul (const wstring& str, size_t* idx = nullptr, int base = 10);
long long stoll (const wstring& str, size_t* idx = nullptr, int base = 10);
unsigned long long stoull(const wstring& str, size_t* idx = nullptr, int base = 10);
float stof (const wstring& str, size_t* idx = nullptr);
double stod (const wstring& str, size_t* idx = nullptr);
long double stold(const wstring& str, size_t* idx = nullptr);
wstring to_wstring(int val);
wstring to_wstring(unsigned val);
wstring to_wstring(long val);
wstring to_wstring(unsigned long val);
wstring to_wstring(long long val);
wstring to_wstring(unsigned long long val);
wstring to_wstring(float val);
wstring to_wstring(double val);
wstring to_wstring(long double val);
template <> struct hash<string>;
template <> struct hash<u8string>; // C++20
template <> struct hash<u16string>;
template <> struct hash<u32string>;
template <> struct hash<wstring>;
basic_string<char> operator "" s( const char *str, size_t len ); // C++14
basic_string<wchar_t> operator "" s( const wchar_t *str, size_t len ); // C++14
basic_string<char8_t> operator "" s( const char8_t *str, size_t len ); // C++20
basic_string<char16_t> operator "" s( const char16_t *str, size_t len ); // C++14
basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++14
} // std
*/
#include <__config>
#include <__debug>
#include <__functional_base>
#include <__iterator/wrap_iter.h>
#include <algorithm>
#include <compare>
#include <cstdio> // EOF
+#include <cstdlib>
#include <cstring>
#include <cwchar>
#include <initializer_list>
#include <iosfwd>
#include <iterator>
#include <memory>
#include <stdexcept>
#include <string_view>
#include <type_traits>
#include <utility>
#include <version>
#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
# include <cstdint>
#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
// fpos
template <class _StateT>
class _LIBCPP_TEMPLATE_VIS fpos
{
private:
_StateT __st_;
streamoff __off_;
public:
_LIBCPP_INLINE_VISIBILITY fpos(streamoff __off = streamoff()) : __st_(), __off_(__off) {}
_LIBCPP_INLINE_VISIBILITY operator streamoff() const {return __off_;}
_LIBCPP_INLINE_VISIBILITY _StateT state() const {return __st_;}
_LIBCPP_INLINE_VISIBILITY void state(_StateT __st) {__st_ = __st;}
_LIBCPP_INLINE_VISIBILITY fpos& operator+=(streamoff __off) {__off_ += __off; return *this;}
_LIBCPP_INLINE_VISIBILITY fpos operator+ (streamoff __off) const {fpos __t(*this); __t += __off; return __t;}
_LIBCPP_INLINE_VISIBILITY fpos& operator-=(streamoff __off) {__off_ -= __off; return *this;}
_LIBCPP_INLINE_VISIBILITY fpos operator- (streamoff __off) const {fpos __t(*this); __t -= __off; return __t;}
};
template <class _StateT>
inline _LIBCPP_INLINE_VISIBILITY
streamoff operator-(const fpos<_StateT>& __x, const fpos<_StateT>& __y)
{return streamoff(__x) - streamoff(__y);}
template <class _StateT>
inline _LIBCPP_INLINE_VISIBILITY
bool operator==(const fpos<_StateT>& __x, const fpos<_StateT>& __y)
{return streamoff(__x) == streamoff(__y);}
template <class _StateT>
inline _LIBCPP_INLINE_VISIBILITY
bool operator!=(const fpos<_StateT>& __x, const fpos<_StateT>& __y)
{return streamoff(__x) != streamoff(__y);}
// basic_string
template<class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>
operator+(const basic_string<_CharT, _Traits, _Allocator>& __x,
const basic_string<_CharT, _Traits, _Allocator>& __y);
template<class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>
operator+(const _CharT* __x, const basic_string<_CharT,_Traits,_Allocator>& __y);
template<class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>
operator+(_CharT __x, const basic_string<_CharT,_Traits,_Allocator>& __y);
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
basic_string<_CharT, _Traits, _Allocator>
operator+(const basic_string<_CharT, _Traits, _Allocator>& __x, const _CharT* __y);
template<class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>
operator+(const basic_string<_CharT, _Traits, _Allocator>& __x, _CharT __y);
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS string operator+<char, char_traits<char>, allocator<char> >(char const*, string const&))
template <bool>
class _LIBCPP_TEMPLATE_VIS __basic_string_common
{
protected:
_LIBCPP_NORETURN void __throw_length_error() const;
_LIBCPP_NORETURN void __throw_out_of_range() const;
};
template <bool __b>
void
__basic_string_common<__b>::__throw_length_error() const
{
_VSTD::__throw_length_error("basic_string");
}
template <bool __b>
void
__basic_string_common<__b>::__throw_out_of_range() const
{
_VSTD::__throw_out_of_range("basic_string");
}
_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __basic_string_common<true>)
template <class _Iter>
struct __string_is_trivial_iterator : public false_type {};
template <class _Tp>
struct __string_is_trivial_iterator<_Tp*>
: public is_arithmetic<_Tp> {};
template <class _Iter>
struct __string_is_trivial_iterator<__wrap_iter<_Iter> >
: public __string_is_trivial_iterator<_Iter> {};
template <class _CharT, class _Traits, class _Tp>
struct __can_be_converted_to_string_view : public _BoolConstant<
is_convertible<const _Tp&, basic_string_view<_CharT, _Traits> >::value &&
!is_convertible<const _Tp&, const _CharT*>::value
> {};
#ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
template <class _CharT, size_t = sizeof(_CharT)>
struct __padding
{
unsigned char __xx[sizeof(_CharT)-1];
};
template <class _CharT>
struct __padding<_CharT, 1>
{
};
#endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
#ifndef _LIBCPP_HAS_NO_CHAR8_T
typedef basic_string<char8_t> u8string;
#endif
#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
typedef basic_string<char16_t> u16string;
typedef basic_string<char32_t> u32string;
#endif // _LIBCPP_HAS_NO_UNICODE_CHARS
template<class _CharT, class _Traits, class _Allocator>
class
_LIBCPP_TEMPLATE_VIS
#ifndef _LIBCPP_HAS_NO_CHAR8_T
_LIBCPP_PREFERRED_NAME(u8string)
#endif
#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
_LIBCPP_PREFERRED_NAME(u16string)
_LIBCPP_PREFERRED_NAME(u32string)
#endif
basic_string
: private __basic_string_common<true>
{
public:
typedef basic_string __self;
typedef basic_string_view<_CharT, _Traits> __self_view;
typedef _Traits traits_type;
typedef _CharT value_type;
typedef _Allocator allocator_type;
typedef allocator_traits<allocator_type> __alloc_traits;
typedef typename __alloc_traits::size_type size_type;
typedef typename __alloc_traits::difference_type difference_type;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef typename __alloc_traits::pointer pointer;
typedef typename __alloc_traits::const_pointer const_pointer;
static_assert((!is_array<value_type>::value), "Character type of basic_string must not be an array");
static_assert(( is_standard_layout<value_type>::value), "Character type of basic_string must be standard-layout");
static_assert(( is_trivial<value_type>::value), "Character type of basic_string must be trivial");
static_assert(( is_same<_CharT, typename traits_type::char_type>::value),
"traits_type::char_type must be the same type as CharT");
static_assert(( is_same<typename allocator_type::value_type, value_type>::value),
"Allocator::value_type must be same type as value_type");
typedef __wrap_iter<pointer> iterator;
typedef __wrap_iter<const_pointer> const_iterator;
typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
private:
#ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
struct __long
{
pointer __data_;
size_type __size_;
size_type __cap_;
};
#ifdef _LIBCPP_BIG_ENDIAN
static const size_type __short_mask = 0x01;
static const size_type __long_mask = 0x1ul;
#else // _LIBCPP_BIG_ENDIAN
static const size_type __short_mask = 0x80;
static const size_type __long_mask = ~(size_type(~0) >> 1);
#endif // _LIBCPP_BIG_ENDIAN
enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
(sizeof(__long) - 1)/sizeof(value_type) : 2};
struct __short
{
value_type __data_[__min_cap];
struct
: __padding<value_type>
{
unsigned char __size_;
};
};
#else
struct __long
{
size_type __cap_;
size_type __size_;
pointer __data_;
};
#ifdef _LIBCPP_BIG_ENDIAN
static const size_type __short_mask = 0x80;
static const size_type __long_mask = ~(size_type(~0) >> 1);
#else // _LIBCPP_BIG_ENDIAN
static const size_type __short_mask = 0x01;
static const size_type __long_mask = 0x1ul;
#endif // _LIBCPP_BIG_ENDIAN
enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
(sizeof(__long) - 1)/sizeof(value_type) : 2};
struct __short
{
union
{
unsigned char __size_;
value_type __lx;
};
value_type __data_[__min_cap];
};
#endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
union __ulx{__long __lx; __short __lxx;};
enum {__n_words = sizeof(__ulx) / sizeof(size_type)};
struct __raw
{
size_type __words[__n_words];
};
struct __rep
{
union
{
__long __l;
__short __s;
__raw __r;
};
};
__compressed_pair<__rep, allocator_type> __r_;
public:
_LIBCPP_TEMPLATE_DATA_VIS
static const size_type npos = -1;
_LIBCPP_INLINE_VISIBILITY basic_string()
_NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value);
_LIBCPP_INLINE_VISIBILITY explicit basic_string(const allocator_type& __a)
#if _LIBCPP_STD_VER <= 14
_NOEXCEPT_(is_nothrow_copy_constructible<allocator_type>::value);
#else
_NOEXCEPT;
#endif
basic_string(const basic_string& __str);
basic_string(const basic_string& __str, const allocator_type& __a);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
basic_string(basic_string&& __str)
#if _LIBCPP_STD_VER <= 14
_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value);
#else
_NOEXCEPT;
#endif
_LIBCPP_INLINE_VISIBILITY
basic_string(basic_string&& __str, const allocator_type& __a);
#endif // _LIBCPP_CXX03_LANG
template <class = _EnableIf<__is_allocator<_Allocator>::value, nullptr_t> >
_LIBCPP_INLINE_VISIBILITY
basic_string(const _CharT* __s) : __r_(__default_init_tag(), __default_init_tag()) {
_LIBCPP_ASSERT(__s != nullptr, "basic_string(const char*) detected nullptr");
__init(__s, traits_type::length(__s));
# if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
# endif
}
template <class = _EnableIf<__is_allocator<_Allocator>::value, nullptr_t> >
_LIBCPP_INLINE_VISIBILITY
basic_string(const _CharT* __s, const _Allocator& __a);
#if _LIBCPP_STD_VER > 20
basic_string(nullptr_t) = delete;
#endif
_LIBCPP_INLINE_VISIBILITY
basic_string(const _CharT* __s, size_type __n);
_LIBCPP_INLINE_VISIBILITY
basic_string(const _CharT* __s, size_type __n, const _Allocator& __a);
_LIBCPP_INLINE_VISIBILITY
basic_string(size_type __n, _CharT __c);
template <class = _EnableIf<__is_allocator<_Allocator>::value, nullptr_t> >
_LIBCPP_INLINE_VISIBILITY
basic_string(size_type __n, _CharT __c, const _Allocator& __a);
basic_string(const basic_string& __str, size_type __pos, size_type __n,
const _Allocator& __a = _Allocator());
_LIBCPP_INLINE_VISIBILITY
basic_string(const basic_string& __str, size_type __pos,
const _Allocator& __a = _Allocator());
template<class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
basic_string(const _Tp& __t, size_type __pos, size_type __n,
const allocator_type& __a = allocator_type());
template<class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
!__is_same_uncvref<_Tp, basic_string>::value> >
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
explicit basic_string(const _Tp& __t);
template<class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
explicit basic_string(const _Tp& __t, const allocator_type& __a);
template<class _InputIterator, class = _EnableIf<__is_cpp17_input_iterator<_InputIterator>::value> >
_LIBCPP_INLINE_VISIBILITY
basic_string(_InputIterator __first, _InputIterator __last);
template<class _InputIterator, class = _EnableIf<__is_cpp17_input_iterator<_InputIterator>::value> >
_LIBCPP_INLINE_VISIBILITY
basic_string(_InputIterator __first, _InputIterator __last, const allocator_type& __a);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
basic_string(initializer_list<_CharT> __il);
_LIBCPP_INLINE_VISIBILITY
basic_string(initializer_list<_CharT> __il, const _Allocator& __a);
#endif // _LIBCPP_CXX03_LANG
inline ~basic_string();
_LIBCPP_INLINE_VISIBILITY
operator __self_view() const _NOEXCEPT { return __self_view(data(), size()); }
basic_string& operator=(const basic_string& __str);
template <class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
basic_string& operator=(const _Tp& __t)
{__self_view __sv = __t; return assign(__sv);}
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
basic_string& operator=(basic_string&& __str)
_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value));
_LIBCPP_INLINE_VISIBILITY
basic_string& operator=(initializer_list<value_type> __il) {return assign(__il.begin(), __il.size());}
#endif
_LIBCPP_INLINE_VISIBILITY basic_string& operator=(const value_type* __s) {return assign(__s);}
#if _LIBCPP_STD_VER > 20
basic_string& operator=(nullptr_t) = delete;
#endif
basic_string& operator=(value_type __c);
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_INLINE_VISIBILITY
iterator begin() _NOEXCEPT
{return iterator(this, __get_pointer());}
_LIBCPP_INLINE_VISIBILITY
const_iterator begin() const _NOEXCEPT
{return const_iterator(this, __get_pointer());}
_LIBCPP_INLINE_VISIBILITY
iterator end() _NOEXCEPT
{return iterator(this, __get_pointer() + size());}
_LIBCPP_INLINE_VISIBILITY
const_iterator end() const _NOEXCEPT
{return const_iterator(this, __get_pointer() + size());}
#else
_LIBCPP_INLINE_VISIBILITY
iterator begin() _NOEXCEPT
{return iterator(__get_pointer());}
_LIBCPP_INLINE_VISIBILITY
const_iterator begin() const _NOEXCEPT
{return const_iterator(__get_pointer());}
_LIBCPP_INLINE_VISIBILITY
iterator end() _NOEXCEPT
{return iterator(__get_pointer() + size());}
_LIBCPP_INLINE_VISIBILITY
const_iterator end() const _NOEXCEPT
{return const_iterator(__get_pointer() + size());}
#endif // _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_INLINE_VISIBILITY
reverse_iterator rbegin() _NOEXCEPT
{return reverse_iterator(end());}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator rbegin() const _NOEXCEPT
{return const_reverse_iterator(end());}
_LIBCPP_INLINE_VISIBILITY
reverse_iterator rend() _NOEXCEPT
{return reverse_iterator(begin());}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator rend() const _NOEXCEPT
{return const_reverse_iterator(begin());}
_LIBCPP_INLINE_VISIBILITY
const_iterator cbegin() const _NOEXCEPT
{return begin();}
_LIBCPP_INLINE_VISIBILITY
const_iterator cend() const _NOEXCEPT
{return end();}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator crbegin() const _NOEXCEPT
{return rbegin();}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator crend() const _NOEXCEPT
{return rend();}
_LIBCPP_INLINE_VISIBILITY size_type size() const _NOEXCEPT
{return __is_long() ? __get_long_size() : __get_short_size();}
_LIBCPP_INLINE_VISIBILITY size_type length() const _NOEXCEPT {return size();}
_LIBCPP_INLINE_VISIBILITY size_type max_size() const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY size_type capacity() const _NOEXCEPT
{return (__is_long() ? __get_long_cap()
: static_cast<size_type>(__min_cap)) - 1;}
void resize(size_type __n, value_type __c);
_LIBCPP_INLINE_VISIBILITY void resize(size_type __n) {resize(__n, value_type());}
void reserve(size_type __requested_capacity);
_LIBCPP_INLINE_VISIBILITY void __resize_default_init(size_type __n);
_LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_INLINE_VISIBILITY
void reserve() _NOEXCEPT {shrink_to_fit();}
_LIBCPP_INLINE_VISIBILITY
void shrink_to_fit() _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
void clear() _NOEXCEPT;
_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY
bool empty() const _NOEXCEPT {return size() == 0;}
_LIBCPP_INLINE_VISIBILITY const_reference operator[](size_type __pos) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY reference operator[](size_type __pos) _NOEXCEPT;
const_reference at(size_type __n) const;
reference at(size_type __n);
_LIBCPP_INLINE_VISIBILITY basic_string& operator+=(const basic_string& __str) {return append(__str);}
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string >::value,
basic_string&
>
operator+=(const _Tp& __t) {__self_view __sv = __t; return append(__sv);}
_LIBCPP_INLINE_VISIBILITY basic_string& operator+=(const value_type* __s) {return append(__s);}
_LIBCPP_INLINE_VISIBILITY basic_string& operator+=(value_type __c) {push_back(__c); return *this;}
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY basic_string& operator+=(initializer_list<value_type> __il) {return append(__il);}
#endif // _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
basic_string& append(const basic_string& __str);
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string>::value,
basic_string&
>
append(const _Tp& __t) { __self_view __sv = __t; return append(__sv.data(), __sv.size()); }
basic_string& append(const basic_string& __str, size_type __pos, size_type __n=npos);
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string>::value,
basic_string&
>
append(const _Tp& __t, size_type __pos, size_type __n=npos);
basic_string& append(const value_type* __s, size_type __n);
basic_string& append(const value_type* __s);
basic_string& append(size_type __n, value_type __c);
_LIBCPP_INLINE_VISIBILITY
void __append_default_init(size_type __n);
template<class _InputIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value,
basic_string&
>
_LIBCPP_INLINE_VISIBILITY
append(_InputIterator __first, _InputIterator __last) {
const basic_string __temp(__first, __last, __alloc());
append(__temp.data(), __temp.size());
return *this;
}
template<class _ForwardIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
basic_string&
>
_LIBCPP_INLINE_VISIBILITY
append(_ForwardIterator __first, _ForwardIterator __last);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
basic_string& append(initializer_list<value_type> __il) {return append(__il.begin(), __il.size());}
#endif // _LIBCPP_CXX03_LANG
void push_back(value_type __c);
_LIBCPP_INLINE_VISIBILITY
void pop_back();
_LIBCPP_INLINE_VISIBILITY reference front() _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY const_reference front() const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY reference back() _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY const_reference back() const _NOEXCEPT;
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
basic_string&
>
assign(const _Tp & __t) { __self_view __sv = __t; return assign(__sv.data(), __sv.size()); }
_LIBCPP_INLINE_VISIBILITY
basic_string& assign(const basic_string& __str) { return *this = __str; }
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
basic_string& assign(basic_string&& __str)
_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value))
{*this = _VSTD::move(__str); return *this;}
#endif
basic_string& assign(const basic_string& __str, size_type __pos, size_type __n=npos);
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string>::value,
basic_string&
>
assign(const _Tp & __t, size_type __pos, size_type __n=npos);
basic_string& assign(const value_type* __s, size_type __n);
basic_string& assign(const value_type* __s);
basic_string& assign(size_type __n, value_type __c);
template<class _InputIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value,
basic_string&
>
assign(_InputIterator __first, _InputIterator __last);
template<class _ForwardIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
basic_string&
>
assign(_ForwardIterator __first, _ForwardIterator __last);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
basic_string& assign(initializer_list<value_type> __il) {return assign(__il.begin(), __il.size());}
#endif // _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
basic_string& insert(size_type __pos1, const basic_string& __str);
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
basic_string&
>
insert(size_type __pos1, const _Tp& __t)
{ __self_view __sv = __t; return insert(__pos1, __sv.data(), __sv.size()); }
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value,
basic_string&
>
insert(size_type __pos1, const _Tp& __t, size_type __pos2, size_type __n=npos);
basic_string& insert(size_type __pos1, const basic_string& __str, size_type __pos2, size_type __n=npos);
basic_string& insert(size_type __pos, const value_type* __s, size_type __n);
basic_string& insert(size_type __pos, const value_type* __s);
basic_string& insert(size_type __pos, size_type __n, value_type __c);
iterator insert(const_iterator __pos, value_type __c);
_LIBCPP_INLINE_VISIBILITY
iterator insert(const_iterator __pos, size_type __n, value_type __c);
template<class _InputIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value,
iterator
>
insert(const_iterator __pos, _InputIterator __first, _InputIterator __last);
template<class _ForwardIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
iterator
>
insert(const_iterator __pos, _ForwardIterator __first, _ForwardIterator __last);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
iterator insert(const_iterator __pos, initializer_list<value_type> __il)
{return insert(__pos, __il.begin(), __il.end());}
#endif // _LIBCPP_CXX03_LANG
basic_string& erase(size_type __pos = 0, size_type __n = npos);
_LIBCPP_INLINE_VISIBILITY
iterator erase(const_iterator __pos);
_LIBCPP_INLINE_VISIBILITY
iterator erase(const_iterator __first, const_iterator __last);
_LIBCPP_INLINE_VISIBILITY
basic_string& replace(size_type __pos1, size_type __n1, const basic_string& __str);
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
basic_string&
>
replace(size_type __pos1, size_type __n1, const _Tp& __t) { __self_view __sv = __t; return replace(__pos1, __n1, __sv.data(), __sv.size()); }
basic_string& replace(size_type __pos1, size_type __n1, const basic_string& __str, size_type __pos2, size_type __n2=npos);
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value,
basic_string&
>
replace(size_type __pos1, size_type __n1, const _Tp& __t, size_type __pos2, size_type __n2=npos);
basic_string& replace(size_type __pos, size_type __n1, const value_type* __s, size_type __n2);
basic_string& replace(size_type __pos, size_type __n1, const value_type* __s);
basic_string& replace(size_type __pos, size_type __n1, size_type __n2, value_type __c);
_LIBCPP_INLINE_VISIBILITY
basic_string& replace(const_iterator __i1, const_iterator __i2, const basic_string& __str);
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
basic_string&
>
replace(const_iterator __i1, const_iterator __i2, const _Tp& __t) { __self_view __sv = __t; return replace(__i1 - begin(), __i2 - __i1, __sv); }
_LIBCPP_INLINE_VISIBILITY
basic_string& replace(const_iterator __i1, const_iterator __i2, const value_type* __s, size_type __n);
_LIBCPP_INLINE_VISIBILITY
basic_string& replace(const_iterator __i1, const_iterator __i2, const value_type* __s);
_LIBCPP_INLINE_VISIBILITY
basic_string& replace(const_iterator __i1, const_iterator __i2, size_type __n, value_type __c);
template<class _InputIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__is_cpp17_input_iterator<_InputIterator>::value,
basic_string&
>
replace(const_iterator __i1, const_iterator __i2, _InputIterator __j1, _InputIterator __j2);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
basic_string& replace(const_iterator __i1, const_iterator __i2, initializer_list<value_type> __il)
{return replace(__i1, __i2, __il.begin(), __il.end());}
#endif // _LIBCPP_CXX03_LANG
size_type copy(value_type* __s, size_type __n, size_type __pos = 0) const;
_LIBCPP_INLINE_VISIBILITY
basic_string substr(size_type __pos = 0, size_type __n = npos) const;
_LIBCPP_INLINE_VISIBILITY
void swap(basic_string& __str)
#if _LIBCPP_STD_VER >= 14
_NOEXCEPT;
#else
_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value ||
__is_nothrow_swappable<allocator_type>::value);
#endif
_LIBCPP_INLINE_VISIBILITY
const value_type* c_str() const _NOEXCEPT {return data();}
_LIBCPP_INLINE_VISIBILITY
const value_type* data() const _NOEXCEPT {return _VSTD::__to_address(__get_pointer());}
#if _LIBCPP_STD_VER > 14 || defined(_LIBCPP_BUILDING_LIBRARY)
_LIBCPP_INLINE_VISIBILITY
value_type* data() _NOEXCEPT {return _VSTD::__to_address(__get_pointer());}
#endif
_LIBCPP_INLINE_VISIBILITY
allocator_type get_allocator() const _NOEXCEPT {return __alloc();}
_LIBCPP_INLINE_VISIBILITY
size_type find(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT;
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
>
find(const _Tp& __t, size_type __pos = 0) const _NOEXCEPT;
size_type find(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find(const value_type* __s, size_type __pos = 0) const _NOEXCEPT;
size_type find(value_type __c, size_type __pos = 0) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type rfind(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT;
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
>
rfind(const _Tp& __t, size_type __pos = npos) const _NOEXCEPT;
size_type rfind(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type rfind(const value_type* __s, size_type __pos = npos) const _NOEXCEPT;
size_type rfind(value_type __c, size_type __pos = npos) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_first_of(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT;
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
>
find_first_of(const _Tp& __t, size_type __pos = 0) const _NOEXCEPT;
size_type find_first_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_first_of(const value_type* __s, size_type __pos = 0) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_first_of(value_type __c, size_type __pos = 0) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_last_of(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT;
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
>
find_last_of(const _Tp& __t, size_type __pos = npos) const _NOEXCEPT;
size_type find_last_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_last_of(const value_type* __s, size_type __pos = npos) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_last_of(value_type __c, size_type __pos = npos) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_first_not_of(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT;
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
>
find_first_not_of(const _Tp &__t, size_type __pos = 0) const _NOEXCEPT;
size_type find_first_not_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_first_not_of(const value_type* __s, size_type __pos = 0) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_first_not_of(value_type __c, size_type __pos = 0) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_last_not_of(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT;
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
>
find_last_not_of(const _Tp& __t, size_type __pos = npos) const _NOEXCEPT;
size_type find_last_not_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_last_not_of(const value_type* __s, size_type __pos = npos) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type find_last_not_of(value_type __c, size_type __pos = npos) const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
int compare(const basic_string& __str) const _NOEXCEPT;
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
int
>
compare(const _Tp &__t) const _NOEXCEPT;
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
int
>
compare(size_type __pos1, size_type __n1, const _Tp& __t) const;
_LIBCPP_INLINE_VISIBILITY
int compare(size_type __pos1, size_type __n1, const basic_string& __str) const;
int compare(size_type __pos1, size_type __n1, const basic_string& __str, size_type __pos2, size_type __n2=npos) const;
template <class _Tp>
inline _LIBCPP_INLINE_VISIBILITY
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value,
int
>
compare(size_type __pos1, size_type __n1, const _Tp& __t, size_type __pos2, size_type __n2=npos) const;
int compare(const value_type* __s) const _NOEXCEPT;
int compare(size_type __pos1, size_type __n1, const value_type* __s) const;
int compare(size_type __pos1, size_type __n1, const value_type* __s, size_type __n2) const;
#if _LIBCPP_STD_VER > 17
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool starts_with(__self_view __sv) const _NOEXCEPT
{ return __self_view(data(), size()).starts_with(__sv); }
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool starts_with(value_type __c) const _NOEXCEPT
{ return !empty() && _Traits::eq(front(), __c); }
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool starts_with(const value_type* __s) const _NOEXCEPT
{ return starts_with(__self_view(__s)); }
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool ends_with(__self_view __sv) const _NOEXCEPT
{ return __self_view(data(), size()).ends_with( __sv); }
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool ends_with(value_type __c) const _NOEXCEPT
{ return !empty() && _Traits::eq(back(), __c); }
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool ends_with(const value_type* __s) const _NOEXCEPT
{ return ends_with(__self_view(__s)); }
#endif
#if _LIBCPP_STD_VER > 20
constexpr _LIBCPP_INLINE_VISIBILITY
bool contains(__self_view __sv) const noexcept
{ return __self_view(data(), size()).contains(__sv); }
constexpr _LIBCPP_INLINE_VISIBILITY
bool contains(value_type __c) const noexcept
{ return __self_view(data(), size()).contains(__c); }
constexpr _LIBCPP_INLINE_VISIBILITY
bool contains(const value_type* __s) const
{ return __self_view(data(), size()).contains(__s); }
#endif
_LIBCPP_INLINE_VISIBILITY bool __invariants() const;
_LIBCPP_INLINE_VISIBILITY void __clear_and_shrink() _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY void __shrink_or_extend(size_type __target_capacity);
_LIBCPP_INLINE_VISIBILITY
bool __is_long() const _NOEXCEPT
{return bool(__r_.first().__s.__size_ & __short_mask);}
#if _LIBCPP_DEBUG_LEVEL == 2
bool __dereferenceable(const const_iterator* __i) const;
bool __decrementable(const const_iterator* __i) const;
bool __addable(const const_iterator* __i, ptrdiff_t __n) const;
bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const;
#endif // _LIBCPP_DEBUG_LEVEL == 2
private:
_LIBCPP_INLINE_VISIBILITY
allocator_type& __alloc() _NOEXCEPT
{return __r_.second();}
_LIBCPP_INLINE_VISIBILITY
const allocator_type& __alloc() const _NOEXCEPT
{return __r_.second();}
#ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
_LIBCPP_INLINE_VISIBILITY
void __set_short_size(size_type __s) _NOEXCEPT
# ifdef _LIBCPP_BIG_ENDIAN
{__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
# else
{__r_.first().__s.__size_ = (unsigned char)(__s);}
# endif
_LIBCPP_INLINE_VISIBILITY
size_type __get_short_size() const _NOEXCEPT
# ifdef _LIBCPP_BIG_ENDIAN
{return __r_.first().__s.__size_ >> 1;}
# else
{return __r_.first().__s.__size_;}
# endif
#else // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
_LIBCPP_INLINE_VISIBILITY
void __set_short_size(size_type __s) _NOEXCEPT
# ifdef _LIBCPP_BIG_ENDIAN
{__r_.first().__s.__size_ = (unsigned char)(__s);}
# else
{__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
# endif
_LIBCPP_INLINE_VISIBILITY
size_type __get_short_size() const _NOEXCEPT
# ifdef _LIBCPP_BIG_ENDIAN
{return __r_.first().__s.__size_;}
# else
{return __r_.first().__s.__size_ >> 1;}
# endif
#endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
_LIBCPP_INLINE_VISIBILITY
void __set_long_size(size_type __s) _NOEXCEPT
{__r_.first().__l.__size_ = __s;}
_LIBCPP_INLINE_VISIBILITY
size_type __get_long_size() const _NOEXCEPT
{return __r_.first().__l.__size_;}
_LIBCPP_INLINE_VISIBILITY
void __set_size(size_type __s) _NOEXCEPT
{if (__is_long()) __set_long_size(__s); else __set_short_size(__s);}
_LIBCPP_INLINE_VISIBILITY
void __set_long_cap(size_type __s) _NOEXCEPT
{__r_.first().__l.__cap_ = __long_mask | __s;}
_LIBCPP_INLINE_VISIBILITY
size_type __get_long_cap() const _NOEXCEPT
{return __r_.first().__l.__cap_ & size_type(~__long_mask);}
_LIBCPP_INLINE_VISIBILITY
void __set_long_pointer(pointer __p) _NOEXCEPT
{__r_.first().__l.__data_ = __p;}
_LIBCPP_INLINE_VISIBILITY
pointer __get_long_pointer() _NOEXCEPT
{return __r_.first().__l.__data_;}
_LIBCPP_INLINE_VISIBILITY
const_pointer __get_long_pointer() const _NOEXCEPT
{return __r_.first().__l.__data_;}
_LIBCPP_INLINE_VISIBILITY
pointer __get_short_pointer() _NOEXCEPT
{return pointer_traits<pointer>::pointer_to(__r_.first().__s.__data_[0]);}
_LIBCPP_INLINE_VISIBILITY
const_pointer __get_short_pointer() const _NOEXCEPT
{return pointer_traits<const_pointer>::pointer_to(__r_.first().__s.__data_[0]);}
_LIBCPP_INLINE_VISIBILITY
pointer __get_pointer() _NOEXCEPT
{return __is_long() ? __get_long_pointer() : __get_short_pointer();}
_LIBCPP_INLINE_VISIBILITY
const_pointer __get_pointer() const _NOEXCEPT
{return __is_long() ? __get_long_pointer() : __get_short_pointer();}
_LIBCPP_INLINE_VISIBILITY
void __zero() _NOEXCEPT
{
size_type (&__a)[__n_words] = __r_.first().__r.__words;
for (unsigned __i = 0; __i < __n_words; ++__i)
__a[__i] = 0;
}
template <size_type __a> static
_LIBCPP_INLINE_VISIBILITY
size_type __align_it(size_type __s) _NOEXCEPT
{return (__s + (__a-1)) & ~(__a-1);}
enum {__alignment = 16};
static _LIBCPP_INLINE_VISIBILITY
size_type __recommend(size_type __s) _NOEXCEPT
{
if (__s < __min_cap) return static_cast<size_type>(__min_cap) - 1;
size_type __guess = __align_it<sizeof(value_type) < __alignment ?
__alignment/sizeof(value_type) : 1 > (__s+1) - 1;
if (__guess == __min_cap) ++__guess;
return __guess;
}
inline
void __init(const value_type* __s, size_type __sz, size_type __reserve);
inline
void __init(const value_type* __s, size_type __sz);
inline
void __init(size_type __n, value_type __c);
// Slow path for the (inlined) copy constructor for 'long' strings.
// Always externally instantiated and not inlined.
// Requires that __s is zero terminated.
// The main reason for this function to exist is because for unstable, we
// want to allow inlining of the copy constructor. However, we don't want
// to call the __init() functions as those are marked as inline which may
// result in over-aggressive inlining by the compiler, where our aim is
// to only inline the fast path code directly in the ctor.
void __init_copy_ctor_external(const value_type* __s, size_type __sz);
template <class _InputIterator>
inline
_EnableIf
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value
>
__init(_InputIterator __first, _InputIterator __last);
template <class _ForwardIterator>
inline
_EnableIf
<
__is_cpp17_forward_iterator<_ForwardIterator>::value
>
__init(_ForwardIterator __first, _ForwardIterator __last);
void __grow_by(size_type __old_cap, size_type __delta_cap, size_type __old_sz,
size_type __n_copy, size_type __n_del, size_type __n_add = 0);
void __grow_by_and_replace(size_type __old_cap, size_type __delta_cap, size_type __old_sz,
size_type __n_copy, size_type __n_del,
size_type __n_add, const value_type* __p_new_stuff);
// __assign_no_alias is invoked for assignment operations where we
// have proof that the input does not alias the current instance.
// For example, operator=(basic_string) performs a 'self' check.
template <bool __is_short>
basic_string& __assign_no_alias(const value_type* __s, size_type __n);
_LIBCPP_INLINE_VISIBILITY
void __erase_to_end(size_type __pos);
// __erase_external_with_move is invoked for erase() invocations where
// `n ~= npos`, likely requiring memory moves on the string data.
void __erase_external_with_move(size_type __pos, size_type __n);
_LIBCPP_INLINE_VISIBILITY
void __copy_assign_alloc(const basic_string& __str)
{__copy_assign_alloc(__str, integral_constant<bool,
__alloc_traits::propagate_on_container_copy_assignment::value>());}
_LIBCPP_INLINE_VISIBILITY
void __copy_assign_alloc(const basic_string& __str, true_type)
{
if (__alloc() == __str.__alloc())
__alloc() = __str.__alloc();
else
{
if (!__str.__is_long())
{
__clear_and_shrink();
__alloc() = __str.__alloc();
}
else
{
allocator_type __a = __str.__alloc();
pointer __p = __alloc_traits::allocate(__a, __str.__get_long_cap());
__clear_and_shrink();
__alloc() = _VSTD::move(__a);
__set_long_pointer(__p);
__set_long_cap(__str.__get_long_cap());
__set_long_size(__str.size());
}
}
}
_LIBCPP_INLINE_VISIBILITY
void __copy_assign_alloc(const basic_string&, false_type) _NOEXCEPT
{}
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
void __move_assign(basic_string& __str, false_type)
_NOEXCEPT_(__alloc_traits::is_always_equal::value);
_LIBCPP_INLINE_VISIBILITY
void __move_assign(basic_string& __str, true_type)
#if _LIBCPP_STD_VER > 14
_NOEXCEPT;
#else
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value);
#endif
#endif
_LIBCPP_INLINE_VISIBILITY
void
__move_assign_alloc(basic_string& __str)
_NOEXCEPT_(
!__alloc_traits::propagate_on_container_move_assignment::value ||
is_nothrow_move_assignable<allocator_type>::value)
{__move_assign_alloc(__str, integral_constant<bool,
__alloc_traits::propagate_on_container_move_assignment::value>());}
_LIBCPP_INLINE_VISIBILITY
void __move_assign_alloc(basic_string& __c, true_type)
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
{
__alloc() = _VSTD::move(__c.__alloc());
}
_LIBCPP_INLINE_VISIBILITY
void __move_assign_alloc(basic_string&, false_type)
_NOEXCEPT
{}
basic_string& __assign_external(const value_type* __s);
basic_string& __assign_external(const value_type* __s, size_type __n);
// Assigns the value in __s, guaranteed to be __n < __min_cap in length.
inline basic_string& __assign_short(const value_type* __s, size_type __n) {
pointer __p = __is_long()
? (__set_long_size(__n), __get_long_pointer())
: (__set_short_size(__n), __get_short_pointer());
traits_type::move(_VSTD::__to_address(__p), __s, __n);
traits_type::assign(__p[__n], value_type());
return *this;
}
_LIBCPP_INLINE_VISIBILITY void __invalidate_all_iterators();
_LIBCPP_INLINE_VISIBILITY void __invalidate_iterators_past(size_type);
template<class _Tp>
_LIBCPP_INLINE_VISIBILITY
bool __addr_in_range(_Tp&& __t) const {
const volatile void *__p = _VSTD::addressof(__t);
return data() <= __p && __p <= data() + size();
}
+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
+ void __throw_length_error() const {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ __basic_string_common<true>::__throw_length_error();
+#else
+ _VSTD::abort();
+#endif
+ }
+
+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
+ void __throw_out_of_range() const {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ __basic_string_common<true>::__throw_out_of_range();
+#else
+ _VSTD::abort();
+#endif
+ }
+
friend basic_string operator+<>(const basic_string&, const basic_string&);
friend basic_string operator+<>(const value_type*, const basic_string&);
friend basic_string operator+<>(value_type, const basic_string&);
friend basic_string operator+<>(const basic_string&, const value_type*);
friend basic_string operator+<>(const basic_string&, value_type);
};
// These declarations must appear before any functions are implicitly used
// so that they have the correct visibility specifier.
#ifdef _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION
_LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, char)
_LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, wchar_t)
#else
_LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, char)
_LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, wchar_t)
#endif
#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
template<class _InputIterator,
class _CharT = __iter_value_type<_InputIterator>,
class _Allocator = allocator<_CharT>,
class = _EnableIf<__is_cpp17_input_iterator<_InputIterator>::value>,
class = _EnableIf<__is_allocator<_Allocator>::value>
>
basic_string(_InputIterator, _InputIterator, _Allocator = _Allocator())
-> basic_string<_CharT, char_traits<_CharT>, _Allocator>;
template<class _CharT,
class _Traits,
class _Allocator = allocator<_CharT>,
class = _EnableIf<__is_allocator<_Allocator>::value>
>
explicit basic_string(basic_string_view<_CharT, _Traits>, const _Allocator& = _Allocator())
-> basic_string<_CharT, _Traits, _Allocator>;
template<class _CharT,
class _Traits,
class _Allocator = allocator<_CharT>,
class = _EnableIf<__is_allocator<_Allocator>::value>,
class _Sz = typename allocator_traits<_Allocator>::size_type
>
basic_string(basic_string_view<_CharT, _Traits>, _Sz, _Sz, const _Allocator& = _Allocator())
-> basic_string<_CharT, _Traits, _Allocator>;
#endif
template <class _CharT, class _Traits, class _Allocator>
inline
void
basic_string<_CharT, _Traits, _Allocator>::__invalidate_all_iterators()
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__invalidate_all(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
inline
void
basic_string<_CharT, _Traits, _Allocator>::__invalidate_iterators_past(size_type __pos)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__c_node* __c = __get_db()->__find_c_and_lock(this);
if (__c)
{
const_pointer __new_last = __get_pointer() + __pos;
for (__i_node** __p = __c->end_; __p != __c->beg_; )
{
--__p;
const_iterator* __i = static_cast<const_iterator*>((*__p)->__i_);
if (__i->base() > __new_last)
{
(*__p)->__c_ = nullptr;
if (--__c->end_ != __p)
_VSTD::memmove(__p, __p+1, (__c->end_ - __p)*sizeof(__i_node*));
}
}
__get_db()->unlock();
}
#else
(void)__pos;
#endif // _LIBCPP_DEBUG_LEVEL == 2
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string()
_NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value)
: __r_(__default_init_tag(), __default_init_tag())
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
__zero();
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(const allocator_type& __a)
#if _LIBCPP_STD_VER <= 14
_NOEXCEPT_(is_nothrow_copy_constructible<allocator_type>::value)
#else
_NOEXCEPT
#endif
: __r_(__default_init_tag(), __a)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
__zero();
}
template <class _CharT, class _Traits, class _Allocator>
void basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s,
size_type __sz,
size_type __reserve)
{
if (__reserve > max_size())
this->__throw_length_error();
pointer __p;
if (__reserve < __min_cap)
{
__set_short_size(__sz);
__p = __get_short_pointer();
}
else
{
size_type __cap = __recommend(__reserve);
__p = __alloc_traits::allocate(__alloc(), __cap+1);
__set_long_pointer(__p);
__set_long_cap(__cap+1);
__set_long_size(__sz);
}
traits_type::copy(_VSTD::__to_address(__p), __s, __sz);
traits_type::assign(__p[__sz], value_type());
}
template <class _CharT, class _Traits, class _Allocator>
void
basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz)
{
if (__sz > max_size())
this->__throw_length_error();
pointer __p;
if (__sz < __min_cap)
{
__set_short_size(__sz);
__p = __get_short_pointer();
}
else
{
size_type __cap = __recommend(__sz);
__p = __alloc_traits::allocate(__alloc(), __cap+1);
__set_long_pointer(__p);
__set_long_cap(__cap+1);
__set_long_size(__sz);
}
traits_type::copy(_VSTD::__to_address(__p), __s, __sz);
traits_type::assign(__p[__sz], value_type());
}
template <class _CharT, class _Traits, class _Allocator>
template <class>
basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, const _Allocator& __a)
: __r_(__default_init_tag(), __a)
{
_LIBCPP_ASSERT(__s != nullptr, "basic_string(const char*, allocator) detected nullptr");
__init(__s, traits_type::length(__s));
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, size_type __n)
: __r_(__default_init_tag(), __default_init_tag())
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "basic_string(const char*, n) detected nullptr");
__init(__s, __n);
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, size_type __n, const _Allocator& __a)
: __r_(__default_init_tag(), __a)
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "basic_string(const char*, n, allocator) detected nullptr");
__init(__s, __n);
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __str)
: __r_(__default_init_tag(), __alloc_traits::select_on_container_copy_construction(__str.__alloc()))
{
if (!__str.__is_long())
__r_.first().__r = __str.__r_.first().__r;
else
__init_copy_ctor_external(_VSTD::__to_address(__str.__get_long_pointer()),
__str.__get_long_size());
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>::basic_string(
const basic_string& __str, const allocator_type& __a)
: __r_(__default_init_tag(), __a)
{
if (!__str.__is_long())
__r_.first().__r = __str.__r_.first().__r;
else
__init_copy_ctor_external(_VSTD::__to_address(__str.__get_long_pointer()),
__str.__get_long_size());
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
void basic_string<_CharT, _Traits, _Allocator>::__init_copy_ctor_external(
const value_type* __s, size_type __sz) {
pointer __p;
if (__sz < __min_cap) {
__p = __get_short_pointer();
__set_short_size(__sz);
} else {
if (__sz > max_size())
this->__throw_length_error();
size_t __cap = __recommend(__sz);
__p = __alloc_traits::allocate(__alloc(), __cap + 1);
__set_long_pointer(__p);
__set_long_cap(__cap + 1);
__set_long_size(__sz);
}
traits_type::copy(_VSTD::__to_address(__p), __s, __sz + 1);
}
#ifndef _LIBCPP_CXX03_LANG
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(basic_string&& __str)
#if _LIBCPP_STD_VER <= 14
_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value)
#else
_NOEXCEPT
#endif
: __r_(_VSTD::move(__str.__r_))
{
__str.__zero();
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
if (__is_long())
__get_db()->swap(this, &__str);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(basic_string&& __str, const allocator_type& __a)
: __r_(__default_init_tag(), __a)
{
if (__str.__is_long() && __a != __str.__alloc()) // copy, not move
__init(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size());
else
{
__r_.first().__r = __str.__r_.first().__r;
__str.__zero();
}
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
if (__is_long())
__get_db()->swap(this, &__str);
#endif
}
#endif // _LIBCPP_CXX03_LANG
template <class _CharT, class _Traits, class _Allocator>
void
basic_string<_CharT, _Traits, _Allocator>::__init(size_type __n, value_type __c)
{
if (__n > max_size())
this->__throw_length_error();
pointer __p;
if (__n < __min_cap)
{
__set_short_size(__n);
__p = __get_short_pointer();
}
else
{
size_type __cap = __recommend(__n);
__p = __alloc_traits::allocate(__alloc(), __cap+1);
__set_long_pointer(__p);
__set_long_cap(__cap+1);
__set_long_size(__n);
}
traits_type::assign(_VSTD::__to_address(__p), __n, __c);
traits_type::assign(__p[__n], value_type());
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(size_type __n, _CharT __c)
: __r_(__default_init_tag(), __default_init_tag())
{
__init(__n, __c);
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
template <class>
basic_string<_CharT, _Traits, _Allocator>::basic_string(size_type __n, _CharT __c, const _Allocator& __a)
: __r_(__default_init_tag(), __a)
{
__init(__n, __c);
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __str,
size_type __pos, size_type __n,
const _Allocator& __a)
: __r_(__default_init_tag(), __a)
{
size_type __str_sz = __str.size();
if (__pos > __str_sz)
this->__throw_out_of_range();
__init(__str.data() + __pos, _VSTD::min(__n, __str_sz - __pos));
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __str, size_type __pos,
const _Allocator& __a)
: __r_(__default_init_tag(), __a)
{
size_type __str_sz = __str.size();
if (__pos > __str_sz)
this->__throw_out_of_range();
__init(__str.data() + __pos, __str_sz - __pos);
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp, class>
basic_string<_CharT, _Traits, _Allocator>::basic_string(
const _Tp& __t, size_type __pos, size_type __n, const allocator_type& __a)
: __r_(__default_init_tag(), __a)
{
__self_view __sv0 = __t;
__self_view __sv = __sv0.substr(__pos, __n);
__init(__sv.data(), __sv.size());
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp, class>
basic_string<_CharT, _Traits, _Allocator>::basic_string(const _Tp & __t)
: __r_(__default_init_tag(), __default_init_tag())
{
__self_view __sv = __t;
__init(__sv.data(), __sv.size());
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp, class>
basic_string<_CharT, _Traits, _Allocator>::basic_string(const _Tp & __t, const _Allocator& __a)
: __r_(__default_init_tag(), __a)
{
__self_view __sv = __t;
__init(__sv.data(), __sv.size());
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
template <class _InputIterator>
_EnableIf
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value
>
basic_string<_CharT, _Traits, _Allocator>::__init(_InputIterator __first, _InputIterator __last)
{
__zero();
#ifndef _LIBCPP_NO_EXCEPTIONS
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
for (; __first != __last; ++__first)
push_back(*__first);
#ifndef _LIBCPP_NO_EXCEPTIONS
}
catch (...)
{
if (__is_long())
__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap());
throw;
}
#endif // _LIBCPP_NO_EXCEPTIONS
}
template <class _CharT, class _Traits, class _Allocator>
template <class _ForwardIterator>
_EnableIf
<
__is_cpp17_forward_iterator<_ForwardIterator>::value
>
basic_string<_CharT, _Traits, _Allocator>::__init(_ForwardIterator __first, _ForwardIterator __last)
{
size_type __sz = static_cast<size_type>(_VSTD::distance(__first, __last));
if (__sz > max_size())
this->__throw_length_error();
pointer __p;
if (__sz < __min_cap)
{
__set_short_size(__sz);
__p = __get_short_pointer();
}
else
{
size_type __cap = __recommend(__sz);
__p = __alloc_traits::allocate(__alloc(), __cap+1);
__set_long_pointer(__p);
__set_long_cap(__cap+1);
__set_long_size(__sz);
}
#ifndef _LIBCPP_NO_EXCEPTIONS
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
for (; __first != __last; ++__first, (void) ++__p)
traits_type::assign(*__p, *__first);
traits_type::assign(*__p, value_type());
#ifndef _LIBCPP_NO_EXCEPTIONS
}
catch (...)
{
if (__is_long())
__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap());
throw;
}
#endif // _LIBCPP_NO_EXCEPTIONS
}
template <class _CharT, class _Traits, class _Allocator>
template<class _InputIterator, class>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(_InputIterator __first, _InputIterator __last)
: __r_(__default_init_tag(), __default_init_tag())
{
__init(__first, __last);
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
template<class _InputIterator, class>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(_InputIterator __first, _InputIterator __last,
const allocator_type& __a)
: __r_(__default_init_tag(), __a)
{
__init(__first, __last);
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
#ifndef _LIBCPP_CXX03_LANG
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(
initializer_list<_CharT> __il)
: __r_(__default_init_tag(), __default_init_tag())
{
__init(__il.begin(), __il.end());
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(
initializer_list<_CharT> __il, const _Allocator& __a)
: __r_(__default_init_tag(), __a)
{
__init(__il.begin(), __il.end());
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
#endif // _LIBCPP_CXX03_LANG
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>::~basic_string()
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__erase_c(this);
#endif
if (__is_long())
__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap());
}
template <class _CharT, class _Traits, class _Allocator>
void
basic_string<_CharT, _Traits, _Allocator>::__grow_by_and_replace
(size_type __old_cap, size_type __delta_cap, size_type __old_sz,
size_type __n_copy, size_type __n_del, size_type __n_add, const value_type* __p_new_stuff)
{
size_type __ms = max_size();
if (__delta_cap > __ms - __old_cap - 1)
this->__throw_length_error();
pointer __old_p = __get_pointer();
size_type __cap = __old_cap < __ms / 2 - __alignment ?
__recommend(_VSTD::max(__old_cap + __delta_cap, 2 * __old_cap)) :
__ms - 1;
pointer __p = __alloc_traits::allocate(__alloc(), __cap+1);
__invalidate_all_iterators();
if (__n_copy != 0)
traits_type::copy(_VSTD::__to_address(__p),
_VSTD::__to_address(__old_p), __n_copy);
if (__n_add != 0)
traits_type::copy(_VSTD::__to_address(__p) + __n_copy, __p_new_stuff, __n_add);
size_type __sec_cp_sz = __old_sz - __n_del - __n_copy;
if (__sec_cp_sz != 0)
traits_type::copy(_VSTD::__to_address(__p) + __n_copy + __n_add,
_VSTD::__to_address(__old_p) + __n_copy + __n_del, __sec_cp_sz);
if (__old_cap+1 != __min_cap)
__alloc_traits::deallocate(__alloc(), __old_p, __old_cap+1);
__set_long_pointer(__p);
__set_long_cap(__cap+1);
__old_sz = __n_copy + __n_add + __sec_cp_sz;
__set_long_size(__old_sz);
traits_type::assign(__p[__old_sz], value_type());
}
template <class _CharT, class _Traits, class _Allocator>
void
basic_string<_CharT, _Traits, _Allocator>::__grow_by(size_type __old_cap, size_type __delta_cap, size_type __old_sz,
size_type __n_copy, size_type __n_del, size_type __n_add)
{
size_type __ms = max_size();
if (__delta_cap > __ms - __old_cap)
this->__throw_length_error();
pointer __old_p = __get_pointer();
size_type __cap = __old_cap < __ms / 2 - __alignment ?
__recommend(_VSTD::max(__old_cap + __delta_cap, 2 * __old_cap)) :
__ms - 1;
pointer __p = __alloc_traits::allocate(__alloc(), __cap+1);
__invalidate_all_iterators();
if (__n_copy != 0)
traits_type::copy(_VSTD::__to_address(__p),
_VSTD::__to_address(__old_p), __n_copy);
size_type __sec_cp_sz = __old_sz - __n_del - __n_copy;
if (__sec_cp_sz != 0)
traits_type::copy(_VSTD::__to_address(__p) + __n_copy + __n_add,
_VSTD::__to_address(__old_p) + __n_copy + __n_del,
__sec_cp_sz);
if (__old_cap+1 != __min_cap)
__alloc_traits::deallocate(__alloc(), __old_p, __old_cap+1);
__set_long_pointer(__p);
__set_long_cap(__cap+1);
}
// assign
template <class _CharT, class _Traits, class _Allocator>
template <bool __is_short>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::__assign_no_alias(
const value_type* __s, size_type __n) {
size_type __cap = __is_short ? __min_cap : __get_long_cap();
if (__n < __cap) {
pointer __p = __is_short ? __get_short_pointer() : __get_long_pointer();
__is_short ? __set_short_size(__n) : __set_long_size(__n);
traits_type::copy(_VSTD::__to_address(__p), __s, __n);
traits_type::assign(__p[__n], value_type());
__invalidate_iterators_past(__n);
} else {
size_type __sz = __is_short ? __get_short_size() : __get_long_size();
__grow_by_and_replace(__cap - 1, __n - __cap + 1, __sz, 0, __sz, __n, __s);
}
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::__assign_external(
const value_type* __s, size_type __n) {
size_type __cap = capacity();
if (__cap >= __n) {
value_type* __p = _VSTD::__to_address(__get_pointer());
traits_type::move(__p, __s, __n);
traits_type::assign(__p[__n], value_type());
__set_size(__n);
__invalidate_iterators_past(__n);
} else {
size_type __sz = size();
__grow_by_and_replace(__cap, __n - __cap, __sz, 0, __sz, __n, __s);
}
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::assign(const value_type* __s, size_type __n)
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::assign received nullptr");
return (_LIBCPP_BUILTIN_CONSTANT_P(__n) && __n < __min_cap)
? __assign_short(__s, __n)
: __assign_external(__s, __n);
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::assign(size_type __n, value_type __c)
{
size_type __cap = capacity();
if (__cap < __n)
{
size_type __sz = size();
__grow_by(__cap, __n - __cap, __sz, 0, __sz);
}
value_type* __p = _VSTD::__to_address(__get_pointer());
traits_type::assign(__p, __n, __c);
traits_type::assign(__p[__n], value_type());
__set_size(__n);
__invalidate_iterators_past(__n);
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::operator=(value_type __c)
{
pointer __p;
if (__is_long())
{
__p = __get_long_pointer();
__set_long_size(1);
}
else
{
__p = __get_short_pointer();
__set_short_size(1);
}
traits_type::assign(*__p, __c);
traits_type::assign(*++__p, value_type());
__invalidate_iterators_past(1);
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::operator=(const basic_string& __str)
{
if (this != &__str) {
__copy_assign_alloc(__str);
if (!__is_long()) {
if (!__str.__is_long()) {
__r_.first().__r = __str.__r_.first().__r;
} else {
return __assign_no_alias<true>(__str.data(), __str.size());
}
} else {
return __assign_no_alias<false>(__str.data(), __str.size());
}
}
return *this;
}
#ifndef _LIBCPP_CXX03_LANG
template <class _CharT, class _Traits, class _Allocator>
inline
void
basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, false_type)
_NOEXCEPT_(__alloc_traits::is_always_equal::value)
{
if (__alloc() != __str.__alloc())
assign(__str);
else
__move_assign(__str, true_type());
}
template <class _CharT, class _Traits, class _Allocator>
inline
void
basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, true_type)
#if _LIBCPP_STD_VER > 14
_NOEXCEPT
#else
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
#endif
{
if (__is_long()) {
__alloc_traits::deallocate(__alloc(), __get_long_pointer(),
__get_long_cap());
#if _LIBCPP_STD_VER <= 14
if (!is_nothrow_move_assignable<allocator_type>::value) {
__set_short_size(0);
traits_type::assign(__get_short_pointer()[0], value_type());
}
#endif
}
__move_assign_alloc(__str);
__r_.first() = __str.__r_.first();
__str.__set_short_size(0);
traits_type::assign(__str.__get_short_pointer()[0], value_type());
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::operator=(basic_string&& __str)
_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value))
{
__move_assign(__str, integral_constant<bool,
__alloc_traits::propagate_on_container_move_assignment::value>());
return *this;
}
#endif
template <class _CharT, class _Traits, class _Allocator>
template<class _InputIterator>
_EnableIf
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value,
basic_string<_CharT, _Traits, _Allocator>&
>
basic_string<_CharT, _Traits, _Allocator>::assign(_InputIterator __first, _InputIterator __last)
{
const basic_string __temp(__first, __last, __alloc());
assign(__temp.data(), __temp.size());
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
template<class _ForwardIterator>
_EnableIf
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
basic_string<_CharT, _Traits, _Allocator>&
>
basic_string<_CharT, _Traits, _Allocator>::assign(_ForwardIterator __first, _ForwardIterator __last)
{
size_type __cap = capacity();
size_type __n = __string_is_trivial_iterator<_ForwardIterator>::value ?
static_cast<size_type>(_VSTD::distance(__first, __last)) : 0;
if (__string_is_trivial_iterator<_ForwardIterator>::value &&
(__cap >= __n || !__addr_in_range(*__first)))
{
if (__cap < __n)
{
size_type __sz = size();
__grow_by(__cap, __n - __cap, __sz, 0, __sz);
}
pointer __p = __get_pointer();
for (; __first != __last; ++__first, ++__p)
traits_type::assign(*__p, *__first);
traits_type::assign(*__p, value_type());
__set_size(__n);
__invalidate_iterators_past(__n);
}
else
{
const basic_string __temp(__first, __last, __alloc());
assign(__temp.data(), __temp.size());
}
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::assign(const basic_string& __str, size_type __pos, size_type __n)
{
size_type __sz = __str.size();
if (__pos > __sz)
this->__throw_out_of_range();
return assign(__str.data() + __pos, _VSTD::min(__n, __sz - __pos));
}
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
basic_string<_CharT, _Traits, _Allocator>&
>
basic_string<_CharT, _Traits, _Allocator>::assign(const _Tp & __t, size_type __pos, size_type __n)
{
__self_view __sv = __t;
size_type __sz = __sv.size();
if (__pos > __sz)
this->__throw_out_of_range();
return assign(__sv.data() + __pos, _VSTD::min(__n, __sz - __pos));
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::__assign_external(const value_type* __s) {
return __assign_external(__s, traits_type::length(__s));
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::assign(const value_type* __s)
{
_LIBCPP_ASSERT(__s != nullptr, "string::assign received nullptr");
return _LIBCPP_BUILTIN_CONSTANT_P(*__s)
? (traits_type::length(__s) < __min_cap
? __assign_short(__s, traits_type::length(__s))
: __assign_external(__s, traits_type::length(__s)))
: __assign_external(__s);
}
// append
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::append(const value_type* __s, size_type __n)
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::append received nullptr");
size_type __cap = capacity();
size_type __sz = size();
if (__cap - __sz >= __n)
{
if (__n)
{
value_type* __p = _VSTD::__to_address(__get_pointer());
traits_type::copy(__p + __sz, __s, __n);
__sz += __n;
__set_size(__sz);
traits_type::assign(__p[__sz], value_type());
}
}
else
__grow_by_and_replace(__cap, __sz + __n - __cap, __sz, __sz, 0, __n, __s);
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::append(size_type __n, value_type __c)
{
if (__n)
{
size_type __cap = capacity();
size_type __sz = size();
if (__cap - __sz < __n)
__grow_by(__cap, __sz + __n - __cap, __sz, __sz, 0);
pointer __p = __get_pointer();
traits_type::assign(_VSTD::__to_address(__p) + __sz, __n, __c);
__sz += __n;
__set_size(__sz);
traits_type::assign(__p[__sz], value_type());
}
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
inline void
basic_string<_CharT, _Traits, _Allocator>::__append_default_init(size_type __n)
{
if (__n)
{
size_type __cap = capacity();
size_type __sz = size();
if (__cap - __sz < __n)
__grow_by(__cap, __sz + __n - __cap, __sz, __sz, 0);
pointer __p = __get_pointer();
__sz += __n;
__set_size(__sz);
traits_type::assign(__p[__sz], value_type());
}
}
template <class _CharT, class _Traits, class _Allocator>
void
basic_string<_CharT, _Traits, _Allocator>::push_back(value_type __c)
{
bool __is_short = !__is_long();
size_type __cap;
size_type __sz;
if (__is_short)
{
__cap = __min_cap - 1;
__sz = __get_short_size();
}
else
{
__cap = __get_long_cap() - 1;
__sz = __get_long_size();
}
if (__sz == __cap)
{
__grow_by(__cap, 1, __sz, __sz, 0);
__is_short = !__is_long();
}
pointer __p;
if (__is_short)
{
__p = __get_short_pointer() + __sz;
__set_short_size(__sz+1);
}
else
{
__p = __get_long_pointer() + __sz;
__set_long_size(__sz+1);
}
traits_type::assign(*__p, __c);
traits_type::assign(*++__p, value_type());
}
template <class _CharT, class _Traits, class _Allocator>
template<class _ForwardIterator>
_EnableIf
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
basic_string<_CharT, _Traits, _Allocator>&
>
basic_string<_CharT, _Traits, _Allocator>::append(
_ForwardIterator __first, _ForwardIterator __last)
{
size_type __sz = size();
size_type __cap = capacity();
size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
if (__n)
{
if (__string_is_trivial_iterator<_ForwardIterator>::value &&
!__addr_in_range(*__first))
{
if (__cap - __sz < __n)
__grow_by(__cap, __sz + __n - __cap, __sz, __sz, 0);
pointer __p = __get_pointer() + __sz;
for (; __first != __last; ++__p, ++__first)
traits_type::assign(*__p, *__first);
traits_type::assign(*__p, value_type());
__set_size(__sz + __n);
}
else
{
const basic_string __temp(__first, __last, __alloc());
append(__temp.data(), __temp.size());
}
}
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::append(const basic_string& __str)
{
return append(__str.data(), __str.size());
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::append(const basic_string& __str, size_type __pos, size_type __n)
{
size_type __sz = __str.size();
if (__pos > __sz)
this->__throw_out_of_range();
return append(__str.data() + __pos, _VSTD::min(__n, __sz - __pos));
}
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
basic_string<_CharT, _Traits, _Allocator>&
>
basic_string<_CharT, _Traits, _Allocator>::append(const _Tp & __t, size_type __pos, size_type __n)
{
__self_view __sv = __t;
size_type __sz = __sv.size();
if (__pos > __sz)
this->__throw_out_of_range();
return append(__sv.data() + __pos, _VSTD::min(__n, __sz - __pos));
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::append(const value_type* __s)
{
_LIBCPP_ASSERT(__s != nullptr, "string::append received nullptr");
return append(__s, traits_type::length(__s));
}
// insert
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, const value_type* __s, size_type __n)
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::insert received nullptr");
size_type __sz = size();
if (__pos > __sz)
this->__throw_out_of_range();
size_type __cap = capacity();
if (__cap - __sz >= __n)
{
if (__n)
{
value_type* __p = _VSTD::__to_address(__get_pointer());
size_type __n_move = __sz - __pos;
if (__n_move != 0)
{
if (__p + __pos <= __s && __s < __p + __sz)
__s += __n;
traits_type::move(__p + __pos + __n, __p + __pos, __n_move);
}
traits_type::move(__p + __pos, __s, __n);
__sz += __n;
__set_size(__sz);
traits_type::assign(__p[__sz], value_type());
}
}
else
__grow_by_and_replace(__cap, __sz + __n - __cap, __sz, __pos, 0, __n, __s);
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, size_type __n, value_type __c)
{
size_type __sz = size();
if (__pos > __sz)
this->__throw_out_of_range();
if (__n)
{
size_type __cap = capacity();
value_type* __p;
if (__cap - __sz >= __n)
{
__p = _VSTD::__to_address(__get_pointer());
size_type __n_move = __sz - __pos;
if (__n_move != 0)
traits_type::move(__p + __pos + __n, __p + __pos, __n_move);
}
else
{
__grow_by(__cap, __sz + __n - __cap, __sz, __pos, 0, __n);
__p = _VSTD::__to_address(__get_long_pointer());
}
traits_type::assign(__p + __pos, __n, __c);
__sz += __n;
__set_size(__sz);
traits_type::assign(__p[__sz], value_type());
}
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
template<class _InputIterator>
_EnableIf
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value,
typename basic_string<_CharT, _Traits, _Allocator>::iterator
>
basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, _InputIterator __first, _InputIterator __last)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this,
"string::insert(iterator, range) called with an iterator not"
" referring to this string");
#endif
const basic_string __temp(__first, __last, __alloc());
return insert(__pos, __temp.data(), __temp.data() + __temp.size());
}
template <class _CharT, class _Traits, class _Allocator>
template<class _ForwardIterator>
_EnableIf
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
typename basic_string<_CharT, _Traits, _Allocator>::iterator
>
basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, _ForwardIterator __first, _ForwardIterator __last)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this,
"string::insert(iterator, range) called with an iterator not"
" referring to this string");
#endif
size_type __ip = static_cast<size_type>(__pos - begin());
size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
if (__n)
{
if (__string_is_trivial_iterator<_ForwardIterator>::value &&
!__addr_in_range(*__first))
{
size_type __sz = size();
size_type __cap = capacity();
value_type* __p;
if (__cap - __sz >= __n)
{
__p = _VSTD::__to_address(__get_pointer());
size_type __n_move = __sz - __ip;
if (__n_move != 0)
traits_type::move(__p + __ip + __n, __p + __ip, __n_move);
}
else
{
__grow_by(__cap, __sz + __n - __cap, __sz, __ip, 0, __n);
__p = _VSTD::__to_address(__get_long_pointer());
}
__sz += __n;
__set_size(__sz);
traits_type::assign(__p[__sz], value_type());
for (__p += __ip; __first != __last; ++__p, ++__first)
traits_type::assign(*__p, *__first);
}
else
{
const basic_string __temp(__first, __last, __alloc());
return insert(__pos, __temp.data(), __temp.data() + __temp.size());
}
}
return begin() + __ip;
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const basic_string& __str)
{
return insert(__pos1, __str.data(), __str.size());
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const basic_string& __str,
size_type __pos2, size_type __n)
{
size_type __str_sz = __str.size();
if (__pos2 > __str_sz)
this->__throw_out_of_range();
return insert(__pos1, __str.data() + __pos2, _VSTD::min(__n, __str_sz - __pos2));
}
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
basic_string<_CharT, _Traits, _Allocator>&
>
basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const _Tp& __t,
size_type __pos2, size_type __n)
{
__self_view __sv = __t;
size_type __str_sz = __sv.size();
if (__pos2 > __str_sz)
this->__throw_out_of_range();
return insert(__pos1, __sv.data() + __pos2, _VSTD::min(__n, __str_sz - __pos2));
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, const value_type* __s)
{
_LIBCPP_ASSERT(__s != nullptr, "string::insert received nullptr");
return insert(__pos, __s, traits_type::length(__s));
}
template <class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::iterator
basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, value_type __c)
{
size_type __ip = static_cast<size_type>(__pos - begin());
size_type __sz = size();
size_type __cap = capacity();
value_type* __p;
if (__cap == __sz)
{
__grow_by(__cap, 1, __sz, __ip, 0, 1);
__p = _VSTD::__to_address(__get_long_pointer());
}
else
{
__p = _VSTD::__to_address(__get_pointer());
size_type __n_move = __sz - __ip;
if (__n_move != 0)
traits_type::move(__p + __ip + 1, __p + __ip, __n_move);
}
traits_type::assign(__p[__ip], __c);
traits_type::assign(__p[++__sz], value_type());
__set_size(__sz);
return begin() + static_cast<difference_type>(__ip);
}
template <class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::iterator
basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, size_type __n, value_type __c)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this,
"string::insert(iterator, n, value) called with an iterator not"
" referring to this string");
#endif
difference_type __p = __pos - begin();
insert(static_cast<size_type>(__p), __n, __c);
return begin() + __p;
}
// replace
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __n1, const value_type* __s, size_type __n2)
_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
{
_LIBCPP_ASSERT(__n2 == 0 || __s != nullptr, "string::replace received nullptr");
size_type __sz = size();
if (__pos > __sz)
this->__throw_out_of_range();
__n1 = _VSTD::min(__n1, __sz - __pos);
size_type __cap = capacity();
if (__cap - __sz + __n1 >= __n2)
{
value_type* __p = _VSTD::__to_address(__get_pointer());
if (__n1 != __n2)
{
size_type __n_move = __sz - __pos - __n1;
if (__n_move != 0)
{
if (__n1 > __n2)
{
traits_type::move(__p + __pos, __s, __n2);
traits_type::move(__p + __pos + __n2, __p + __pos + __n1, __n_move);
goto __finish;
}
if (__p + __pos < __s && __s < __p + __sz)
{
if (__p + __pos + __n1 <= __s)
__s += __n2 - __n1;
else // __p + __pos < __s < __p + __pos + __n1
{
traits_type::move(__p + __pos, __s, __n1);
__pos += __n1;
__s += __n2;
__n2 -= __n1;
__n1 = 0;
}
}
traits_type::move(__p + __pos + __n2, __p + __pos + __n1, __n_move);
}
}
traits_type::move(__p + __pos, __s, __n2);
__finish:
// __sz += __n2 - __n1; in this and the below function below can cause unsigned
// integer overflow, but this is a safe operation, so we disable the check.
__sz += __n2 - __n1;
__set_size(__sz);
__invalidate_iterators_past(__sz);
traits_type::assign(__p[__sz], value_type());
}
else
__grow_by_and_replace(__cap, __sz - __n1 + __n2 - __cap, __sz, __pos, __n1, __n2, __s);
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __n1, size_type __n2, value_type __c)
_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
{
size_type __sz = size();
if (__pos > __sz)
this->__throw_out_of_range();
__n1 = _VSTD::min(__n1, __sz - __pos);
size_type __cap = capacity();
value_type* __p;
if (__cap - __sz + __n1 >= __n2)
{
__p = _VSTD::__to_address(__get_pointer());
if (__n1 != __n2)
{
size_type __n_move = __sz - __pos - __n1;
if (__n_move != 0)
traits_type::move(__p + __pos + __n2, __p + __pos + __n1, __n_move);
}
}
else
{
__grow_by(__cap, __sz - __n1 + __n2 - __cap, __sz, __pos, __n1, __n2);
__p = _VSTD::__to_address(__get_long_pointer());
}
traits_type::assign(__p + __pos, __n2, __c);
__sz += __n2 - __n1;
__set_size(__sz);
__invalidate_iterators_past(__sz);
traits_type::assign(__p[__sz], value_type());
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
template<class _InputIterator>
_EnableIf
<
__is_cpp17_input_iterator<_InputIterator>::value,
basic_string<_CharT, _Traits, _Allocator>&
>
basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2,
_InputIterator __j1, _InputIterator __j2)
{
const basic_string __temp(__j1, __j2, __alloc());
return this->replace(__i1, __i2, __temp);
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type __n1, const basic_string& __str)
{
return replace(__pos1, __n1, __str.data(), __str.size());
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type __n1, const basic_string& __str,
size_type __pos2, size_type __n2)
{
size_type __str_sz = __str.size();
if (__pos2 > __str_sz)
this->__throw_out_of_range();
return replace(__pos1, __n1, __str.data() + __pos2, _VSTD::min(__n2, __str_sz - __pos2));
}
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
basic_string<_CharT, _Traits, _Allocator>&
>
basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type __n1, const _Tp& __t,
size_type __pos2, size_type __n2)
{
__self_view __sv = __t;
size_type __str_sz = __sv.size();
if (__pos2 > __str_sz)
this->__throw_out_of_range();
return replace(__pos1, __n1, __sv.data() + __pos2, _VSTD::min(__n2, __str_sz - __pos2));
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __n1, const value_type* __s)
{
_LIBCPP_ASSERT(__s != nullptr, "string::replace received nullptr");
return replace(__pos, __n1, __s, traits_type::length(__s));
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const basic_string& __str)
{
return replace(static_cast<size_type>(__i1 - begin()), static_cast<size_type>(__i2 - __i1),
__str.data(), __str.size());
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const value_type* __s, size_type __n)
{
return replace(static_cast<size_type>(__i1 - begin()), static_cast<size_type>(__i2 - __i1), __s, __n);
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const value_type* __s)
{
return replace(static_cast<size_type>(__i1 - begin()), static_cast<size_type>(__i2 - __i1), __s);
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, size_type __n, value_type __c)
{
return replace(static_cast<size_type>(__i1 - begin()), static_cast<size_type>(__i2 - __i1), __n, __c);
}
// erase
// 'externally instantiated' erase() implementation, called when __n != npos.
// Does not check __pos against size()
template <class _CharT, class _Traits, class _Allocator>
void
basic_string<_CharT, _Traits, _Allocator>::__erase_external_with_move(
size_type __pos, size_type __n)
{
if (__n)
{
size_type __sz = size();
value_type* __p = _VSTD::__to_address(__get_pointer());
__n = _VSTD::min(__n, __sz - __pos);
size_type __n_move = __sz - __pos - __n;
if (__n_move != 0)
traits_type::move(__p + __pos, __p + __pos + __n, __n_move);
__sz -= __n;
__set_size(__sz);
__invalidate_iterators_past(__sz);
traits_type::assign(__p[__sz], value_type());
}
}
template <class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::erase(size_type __pos,
size_type __n) {
if (__pos > size()) this->__throw_out_of_range();
if (__n == npos) {
__erase_to_end(__pos);
} else {
__erase_external_with_move(__pos, __n);
}
return *this;
}
template <class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::iterator
basic_string<_CharT, _Traits, _Allocator>::erase(const_iterator __pos)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this,
"string::erase(iterator) called with an iterator not"
" referring to this string");
#endif
_LIBCPP_ASSERT(__pos != end(),
"string::erase(iterator) called with a non-dereferenceable iterator");
iterator __b = begin();
size_type __r = static_cast<size_type>(__pos - __b);
erase(__r, 1);
return __b + static_cast<difference_type>(__r);
}
template <class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::iterator
basic_string<_CharT, _Traits, _Allocator>::erase(const_iterator __first, const_iterator __last)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this,
"string::erase(iterator, iterator) called with an iterator not"
" referring to this string");
#endif
_LIBCPP_ASSERT(__first <= __last, "string::erase(first, last) called with invalid range");
iterator __b = begin();
size_type __r = static_cast<size_type>(__first - __b);
erase(__r, static_cast<size_type>(__last - __first));
return __b + static_cast<difference_type>(__r);
}
template <class _CharT, class _Traits, class _Allocator>
inline
void
basic_string<_CharT, _Traits, _Allocator>::pop_back()
{
_LIBCPP_ASSERT(!empty(), "string::pop_back(): string is already empty");
size_type __sz;
if (__is_long())
{
__sz = __get_long_size() - 1;
__set_long_size(__sz);
traits_type::assign(*(__get_long_pointer() + __sz), value_type());
}
else
{
__sz = __get_short_size() - 1;
__set_short_size(__sz);
traits_type::assign(*(__get_short_pointer() + __sz), value_type());
}
__invalidate_iterators_past(__sz);
}
template <class _CharT, class _Traits, class _Allocator>
inline
void
basic_string<_CharT, _Traits, _Allocator>::clear() _NOEXCEPT
{
__invalidate_all_iterators();
if (__is_long())
{
traits_type::assign(*__get_long_pointer(), value_type());
__set_long_size(0);
}
else
{
traits_type::assign(*__get_short_pointer(), value_type());
__set_short_size(0);
}
}
template <class _CharT, class _Traits, class _Allocator>
inline
void
basic_string<_CharT, _Traits, _Allocator>::__erase_to_end(size_type __pos)
{
if (__is_long())
{
traits_type::assign(*(__get_long_pointer() + __pos), value_type());
__set_long_size(__pos);
}
else
{
traits_type::assign(*(__get_short_pointer() + __pos), value_type());
__set_short_size(__pos);
}
__invalidate_iterators_past(__pos);
}
template <class _CharT, class _Traits, class _Allocator>
void
basic_string<_CharT, _Traits, _Allocator>::resize(size_type __n, value_type __c)
{
size_type __sz = size();
if (__n > __sz)
append(__n - __sz, __c);
else
__erase_to_end(__n);
}
template <class _CharT, class _Traits, class _Allocator>
inline void
basic_string<_CharT, _Traits, _Allocator>::__resize_default_init(size_type __n)
{
size_type __sz = size();
if (__n > __sz) {
__append_default_init(__n - __sz);
} else
__erase_to_end(__n);
}
template <class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::max_size() const _NOEXCEPT
{
size_type __m = __alloc_traits::max_size(__alloc());
#ifdef _LIBCPP_BIG_ENDIAN
return (__m <= ~__long_mask ? __m : __m/2) - __alignment;
#else
return __m - __alignment;
#endif
}
template <class _CharT, class _Traits, class _Allocator>
void
basic_string<_CharT, _Traits, _Allocator>::reserve(size_type __requested_capacity)
{
if (__requested_capacity > max_size())
this->__throw_length_error();
#if _LIBCPP_STD_VER > 17
// Reserve never shrinks as of C++20.
if (__requested_capacity <= capacity()) return;
#endif
size_type __target_capacity = _VSTD::max(__requested_capacity, size());
__target_capacity = __recommend(__target_capacity);
if (__target_capacity == capacity()) return;
__shrink_or_extend(__target_capacity);
}
template <class _CharT, class _Traits, class _Allocator>
void
basic_string<_CharT, _Traits, _Allocator>::shrink_to_fit() _NOEXCEPT
{
size_type __target_capacity = __recommend(size());
if (__target_capacity == capacity()) return;
__shrink_or_extend(__target_capacity);
}
template <class _CharT, class _Traits, class _Allocator>
void
basic_string<_CharT, _Traits, _Allocator>::__shrink_or_extend(size_type __target_capacity)
{
size_type __cap = capacity();
size_type __sz = size();
pointer __new_data, __p;
bool __was_long, __now_long;
if (__target_capacity == __min_cap - 1)
{
__was_long = true;
__now_long = false;
__new_data = __get_short_pointer();
__p = __get_long_pointer();
}
else
{
if (__target_capacity > __cap)
__new_data = __alloc_traits::allocate(__alloc(), __target_capacity+1);
else
{
#ifndef _LIBCPP_NO_EXCEPTIONS
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
__new_data = __alloc_traits::allocate(__alloc(), __target_capacity+1);
#ifndef _LIBCPP_NO_EXCEPTIONS
}
catch (...)
{
return;
}
#else // _LIBCPP_NO_EXCEPTIONS
if (__new_data == nullptr)
return;
#endif // _LIBCPP_NO_EXCEPTIONS
}
__now_long = true;
__was_long = __is_long();
__p = __get_pointer();
}
traits_type::copy(_VSTD::__to_address(__new_data),
_VSTD::__to_address(__p), size()+1);
if (__was_long)
__alloc_traits::deallocate(__alloc(), __p, __cap+1);
if (__now_long)
{
__set_long_cap(__target_capacity+1);
__set_long_size(__sz);
__set_long_pointer(__new_data);
}
else
__set_short_size(__sz);
__invalidate_all_iterators();
}
template <class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::const_reference
basic_string<_CharT, _Traits, _Allocator>::operator[](size_type __pos) const _NOEXCEPT
{
_LIBCPP_ASSERT(__pos <= size(), "string index out of bounds");
return *(data() + __pos);
}
template <class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::reference
basic_string<_CharT, _Traits, _Allocator>::operator[](size_type __pos) _NOEXCEPT
{
_LIBCPP_ASSERT(__pos <= size(), "string index out of bounds");
return *(__get_pointer() + __pos);
}
template <class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::const_reference
basic_string<_CharT, _Traits, _Allocator>::at(size_type __n) const
{
if (__n >= size())
this->__throw_out_of_range();
return (*this)[__n];
}
template <class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::reference
basic_string<_CharT, _Traits, _Allocator>::at(size_type __n)
{
if (__n >= size())
this->__throw_out_of_range();
return (*this)[__n];
}
template <class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::reference
basic_string<_CharT, _Traits, _Allocator>::front() _NOEXCEPT
{
_LIBCPP_ASSERT(!empty(), "string::front(): string is empty");
return *__get_pointer();
}
template <class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::const_reference
basic_string<_CharT, _Traits, _Allocator>::front() const _NOEXCEPT
{
_LIBCPP_ASSERT(!empty(), "string::front(): string is empty");
return *data();
}
template <class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::reference
basic_string<_CharT, _Traits, _Allocator>::back() _NOEXCEPT
{
_LIBCPP_ASSERT(!empty(), "string::back(): string is empty");
return *(__get_pointer() + size() - 1);
}
template <class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::const_reference
basic_string<_CharT, _Traits, _Allocator>::back() const _NOEXCEPT
{
_LIBCPP_ASSERT(!empty(), "string::back(): string is empty");
return *(data() + size() - 1);
}
template <class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::copy(value_type* __s, size_type __n, size_type __pos) const
{
size_type __sz = size();
if (__pos > __sz)
this->__throw_out_of_range();
size_type __rlen = _VSTD::min(__n, __sz - __pos);
traits_type::copy(__s, data() + __pos, __rlen);
return __rlen;
}
template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>
basic_string<_CharT, _Traits, _Allocator>::substr(size_type __pos, size_type __n) const
{
return basic_string(*this, __pos, __n, __alloc());
}
template <class _CharT, class _Traits, class _Allocator>
inline
void
basic_string<_CharT, _Traits, _Allocator>::swap(basic_string& __str)
#if _LIBCPP_STD_VER >= 14
_NOEXCEPT
#else
_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value ||
__is_nothrow_swappable<allocator_type>::value)
#endif
{
#if _LIBCPP_DEBUG_LEVEL == 2
if (!__is_long())
__get_db()->__invalidate_all(this);
if (!__str.__is_long())
__get_db()->__invalidate_all(&__str);
__get_db()->swap(this, &__str);
#endif
_LIBCPP_ASSERT(
__alloc_traits::propagate_on_container_swap::value ||
__alloc_traits::is_always_equal::value ||
__alloc() == __str.__alloc(), "swapping non-equal allocators");
_VSTD::swap(__r_.first(), __str.__r_.first());
_VSTD::__swap_allocator(__alloc(), __str.__alloc());
}
// find
template <class _Traits>
struct _LIBCPP_HIDDEN __traits_eq
{
typedef typename _Traits::char_type char_type;
_LIBCPP_INLINE_VISIBILITY
bool operator()(const char_type& __x, const char_type& __y) _NOEXCEPT
{return _Traits::eq(__x, __y);}
};
template<class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find(const value_type* __s,
size_type __pos,
size_type __n) const _NOEXCEPT
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::find(): received nullptr");
return __str_find<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, __n);
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find(const basic_string& __str,
size_type __pos) const _NOEXCEPT
{
return __str_find<value_type, size_type, traits_type, npos>
(data(), size(), __str.data(), __pos, __str.size());
}
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
>
basic_string<_CharT, _Traits, _Allocator>::find(const _Tp &__t,
size_type __pos) const _NOEXCEPT
{
__self_view __sv = __t;
return __str_find<value_type, size_type, traits_type, npos>
(data(), size(), __sv.data(), __pos, __sv.size());
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find(const value_type* __s,
size_type __pos) const _NOEXCEPT
{
_LIBCPP_ASSERT(__s != nullptr, "string::find(): received nullptr");
return __str_find<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, traits_type::length(__s));
}
template<class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find(value_type __c,
size_type __pos) const _NOEXCEPT
{
return __str_find<value_type, size_type, traits_type, npos>
(data(), size(), __c, __pos);
}
// rfind
template<class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::rfind(const value_type* __s,
size_type __pos,
size_type __n) const _NOEXCEPT
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::rfind(): received nullptr");
return __str_rfind<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, __n);
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::rfind(const basic_string& __str,
size_type __pos) const _NOEXCEPT
{
return __str_rfind<value_type, size_type, traits_type, npos>
(data(), size(), __str.data(), __pos, __str.size());
}
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
>
basic_string<_CharT, _Traits, _Allocator>::rfind(const _Tp& __t,
size_type __pos) const _NOEXCEPT
{
__self_view __sv = __t;
return __str_rfind<value_type, size_type, traits_type, npos>
(data(), size(), __sv.data(), __pos, __sv.size());
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::rfind(const value_type* __s,
size_type __pos) const _NOEXCEPT
{
_LIBCPP_ASSERT(__s != nullptr, "string::rfind(): received nullptr");
return __str_rfind<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, traits_type::length(__s));
}
template<class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::rfind(value_type __c,
size_type __pos) const _NOEXCEPT
{
return __str_rfind<value_type, size_type, traits_type, npos>
(data(), size(), __c, __pos);
}
// find_first_of
template<class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_first_of(const value_type* __s,
size_type __pos,
size_type __n) const _NOEXCEPT
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::find_first_of(): received nullptr");
return __str_find_first_of<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, __n);
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_first_of(const basic_string& __str,
size_type __pos) const _NOEXCEPT
{
return __str_find_first_of<value_type, size_type, traits_type, npos>
(data(), size(), __str.data(), __pos, __str.size());
}
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
>
basic_string<_CharT, _Traits, _Allocator>::find_first_of(const _Tp& __t,
size_type __pos) const _NOEXCEPT
{
__self_view __sv = __t;
return __str_find_first_of<value_type, size_type, traits_type, npos>
(data(), size(), __sv.data(), __pos, __sv.size());
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_first_of(const value_type* __s,
size_type __pos) const _NOEXCEPT
{
_LIBCPP_ASSERT(__s != nullptr, "string::find_first_of(): received nullptr");
return __str_find_first_of<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, traits_type::length(__s));
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_first_of(value_type __c,
size_type __pos) const _NOEXCEPT
{
return find(__c, __pos);
}
// find_last_of
template<class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_last_of(const value_type* __s,
size_type __pos,
size_type __n) const _NOEXCEPT
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::find_last_of(): received nullptr");
return __str_find_last_of<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, __n);
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_last_of(const basic_string& __str,
size_type __pos) const _NOEXCEPT
{
return __str_find_last_of<value_type, size_type, traits_type, npos>
(data(), size(), __str.data(), __pos, __str.size());
}
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
>
basic_string<_CharT, _Traits, _Allocator>::find_last_of(const _Tp& __t,
size_type __pos) const _NOEXCEPT
{
__self_view __sv = __t;
return __str_find_last_of<value_type, size_type, traits_type, npos>
(data(), size(), __sv.data(), __pos, __sv.size());
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_last_of(const value_type* __s,
size_type __pos) const _NOEXCEPT
{
_LIBCPP_ASSERT(__s != nullptr, "string::find_last_of(): received nullptr");
return __str_find_last_of<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, traits_type::length(__s));
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_last_of(value_type __c,
size_type __pos) const _NOEXCEPT
{
return rfind(__c, __pos);
}
// find_first_not_of
template<class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(const value_type* __s,
size_type __pos,
size_type __n) const _NOEXCEPT
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::find_first_not_of(): received nullptr");
return __str_find_first_not_of<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, __n);
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(const basic_string& __str,
size_type __pos) const _NOEXCEPT
{
return __str_find_first_not_of<value_type, size_type, traits_type, npos>
(data(), size(), __str.data(), __pos, __str.size());
}
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
>
basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(const _Tp& __t,
size_type __pos) const _NOEXCEPT
{
__self_view __sv = __t;
return __str_find_first_not_of<value_type, size_type, traits_type, npos>
(data(), size(), __sv.data(), __pos, __sv.size());
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(const value_type* __s,
size_type __pos) const _NOEXCEPT
{
_LIBCPP_ASSERT(__s != nullptr, "string::find_first_not_of(): received nullptr");
return __str_find_first_not_of<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, traits_type::length(__s));
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(value_type __c,
size_type __pos) const _NOEXCEPT
{
return __str_find_first_not_of<value_type, size_type, traits_type, npos>
(data(), size(), __c, __pos);
}
// find_last_not_of
template<class _CharT, class _Traits, class _Allocator>
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(const value_type* __s,
size_type __pos,
size_type __n) const _NOEXCEPT
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::find_last_not_of(): received nullptr");
return __str_find_last_not_of<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, __n);
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(const basic_string& __str,
size_type __pos) const _NOEXCEPT
{
return __str_find_last_not_of<value_type, size_type, traits_type, npos>
(data(), size(), __str.data(), __pos, __str.size());
}
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
>
basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(const _Tp& __t,
size_type __pos) const _NOEXCEPT
{
__self_view __sv = __t;
return __str_find_last_not_of<value_type, size_type, traits_type, npos>
(data(), size(), __sv.data(), __pos, __sv.size());
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(const value_type* __s,
size_type __pos) const _NOEXCEPT
{
_LIBCPP_ASSERT(__s != nullptr, "string::find_last_not_of(): received nullptr");
return __str_find_last_not_of<value_type, size_type, traits_type, npos>
(data(), size(), __s, __pos, traits_type::length(__s));
}
template<class _CharT, class _Traits, class _Allocator>
inline
typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(value_type __c,
size_type __pos) const _NOEXCEPT
{
return __str_find_last_not_of<value_type, size_type, traits_type, npos>
(data(), size(), __c, __pos);
}
// compare
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
int
>
basic_string<_CharT, _Traits, _Allocator>::compare(const _Tp& __t) const _NOEXCEPT
{
__self_view __sv = __t;
size_t __lhs_sz = size();
size_t __rhs_sz = __sv.size();
int __result = traits_type::compare(data(), __sv.data(),
_VSTD::min(__lhs_sz, __rhs_sz));
if (__result != 0)
return __result;
if (__lhs_sz < __rhs_sz)
return -1;
if (__lhs_sz > __rhs_sz)
return 1;
return 0;
}
template <class _CharT, class _Traits, class _Allocator>
inline
int
basic_string<_CharT, _Traits, _Allocator>::compare(const basic_string& __str) const _NOEXCEPT
{
return compare(__self_view(__str));
}
template <class _CharT, class _Traits, class _Allocator>
int
basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
size_type __n1,
const value_type* __s,
size_type __n2) const
{
_LIBCPP_ASSERT(__n2 == 0 || __s != nullptr, "string::compare(): received nullptr");
size_type __sz = size();
if (__pos1 > __sz || __n2 == npos)
this->__throw_out_of_range();
size_type __rlen = _VSTD::min(__n1, __sz - __pos1);
int __r = traits_type::compare(data() + __pos1, __s, _VSTD::min(__rlen, __n2));
if (__r == 0)
{
if (__rlen < __n2)
__r = -1;
else if (__rlen > __n2)
__r = 1;
}
return __r;
}
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
int
>
basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
size_type __n1,
const _Tp& __t) const
{
__self_view __sv = __t;
return compare(__pos1, __n1, __sv.data(), __sv.size());
}
template <class _CharT, class _Traits, class _Allocator>
inline
int
basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
size_type __n1,
const basic_string& __str) const
{
return compare(__pos1, __n1, __str.data(), __str.size());
}
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
_EnableIf
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
int
>
basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
size_type __n1,
const _Tp& __t,
size_type __pos2,
size_type __n2) const
{
__self_view __sv = __t;
return __self_view(*this).substr(__pos1, __n1).compare(__sv.substr(__pos2, __n2));
}
template <class _CharT, class _Traits, class _Allocator>
int
basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
size_type __n1,
const basic_string& __str,
size_type __pos2,
size_type __n2) const
{
return compare(__pos1, __n1, __self_view(__str), __pos2, __n2);
}
template <class _CharT, class _Traits, class _Allocator>
int
basic_string<_CharT, _Traits, _Allocator>::compare(const value_type* __s) const _NOEXCEPT
{
_LIBCPP_ASSERT(__s != nullptr, "string::compare(): received nullptr");
return compare(0, npos, __s, traits_type::length(__s));
}
template <class _CharT, class _Traits, class _Allocator>
int
basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
size_type __n1,
const value_type* __s) const
{
_LIBCPP_ASSERT(__s != nullptr, "string::compare(): received nullptr");
return compare(__pos1, __n1, __s, traits_type::length(__s));
}
// __invariants
template<class _CharT, class _Traits, class _Allocator>
inline
bool
basic_string<_CharT, _Traits, _Allocator>::__invariants() const
{
if (size() > capacity())
return false;
if (capacity() < __min_cap - 1)
return false;
if (data() == nullptr)
return false;
if (data()[size()] != value_type())
return false;
return true;
}
// __clear_and_shrink
template<class _CharT, class _Traits, class _Allocator>
inline
void
basic_string<_CharT, _Traits, _Allocator>::__clear_and_shrink() _NOEXCEPT
{
clear();
if(__is_long())
{
__alloc_traits::deallocate(__alloc(), __get_long_pointer(), capacity() + 1);
__set_long_cap(0);
__set_short_size(0);
traits_type::assign(*__get_short_pointer(), value_type());
}
}
// operator==
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator==(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
size_t __lhs_sz = __lhs.size();
return __lhs_sz == __rhs.size() && _Traits::compare(__lhs.data(),
__rhs.data(),
__lhs_sz) == 0;
}
template<class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator==(const basic_string<char, char_traits<char>, _Allocator>& __lhs,
const basic_string<char, char_traits<char>, _Allocator>& __rhs) _NOEXCEPT
{
size_t __lhs_sz = __lhs.size();
if (__lhs_sz != __rhs.size())
return false;
const char* __lp = __lhs.data();
const char* __rp = __rhs.data();
if (__lhs.__is_long())
return char_traits<char>::compare(__lp, __rp, __lhs_sz) == 0;
for (; __lhs_sz != 0; --__lhs_sz, ++__lp, ++__rp)
if (*__lp != *__rp)
return false;
return true;
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator==(const _CharT* __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
typedef basic_string<_CharT, _Traits, _Allocator> _String;
_LIBCPP_ASSERT(__lhs != nullptr, "operator==(char*, basic_string): received nullptr");
size_t __lhs_len = _Traits::length(__lhs);
if (__lhs_len != __rhs.size()) return false;
return __rhs.compare(0, _String::npos, __lhs, __lhs_len) == 0;
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator==(const basic_string<_CharT,_Traits,_Allocator>& __lhs,
const _CharT* __rhs) _NOEXCEPT
{
typedef basic_string<_CharT, _Traits, _Allocator> _String;
_LIBCPP_ASSERT(__rhs != nullptr, "operator==(basic_string, char*): received nullptr");
size_t __rhs_len = _Traits::length(__rhs);
if (__rhs_len != __lhs.size()) return false;
return __lhs.compare(0, _String::npos, __rhs, __rhs_len) == 0;
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator!=(const basic_string<_CharT,_Traits,_Allocator>& __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
return !(__lhs == __rhs);
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator!=(const _CharT* __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
return !(__lhs == __rhs);
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator!=(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
const _CharT* __rhs) _NOEXCEPT
{
return !(__lhs == __rhs);
}
// operator<
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator< (const basic_string<_CharT, _Traits, _Allocator>& __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
return __lhs.compare(__rhs) < 0;
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator< (const basic_string<_CharT, _Traits, _Allocator>& __lhs,
const _CharT* __rhs) _NOEXCEPT
{
return __lhs.compare(__rhs) < 0;
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator< (const _CharT* __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
return __rhs.compare(__lhs) > 0;
}
// operator>
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator> (const basic_string<_CharT, _Traits, _Allocator>& __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
return __rhs < __lhs;
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator> (const basic_string<_CharT, _Traits, _Allocator>& __lhs,
const _CharT* __rhs) _NOEXCEPT
{
return __rhs < __lhs;
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator> (const _CharT* __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
return __rhs < __lhs;
}
// operator<=
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator<=(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
return !(__rhs < __lhs);
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator<=(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
const _CharT* __rhs) _NOEXCEPT
{
return !(__rhs < __lhs);
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator<=(const _CharT* __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
return !(__rhs < __lhs);
}
// operator>=
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator>=(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
return !(__lhs < __rhs);
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator>=(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
const _CharT* __rhs) _NOEXCEPT
{
return !(__lhs < __rhs);
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator>=(const _CharT* __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
{
return !(__lhs < __rhs);
}
// operator +
template<class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>
operator+(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
const basic_string<_CharT, _Traits, _Allocator>& __rhs)
{
basic_string<_CharT, _Traits, _Allocator> __r(__lhs.get_allocator());
typename basic_string<_CharT, _Traits, _Allocator>::size_type __lhs_sz = __lhs.size();
typename basic_string<_CharT, _Traits, _Allocator>::size_type __rhs_sz = __rhs.size();
__r.__init(__lhs.data(), __lhs_sz, __lhs_sz + __rhs_sz);
__r.append(__rhs.data(), __rhs_sz);
return __r;
}
template<class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>
operator+(const _CharT* __lhs , const basic_string<_CharT,_Traits,_Allocator>& __rhs)
{
basic_string<_CharT, _Traits, _Allocator> __r(__rhs.get_allocator());
typename basic_string<_CharT, _Traits, _Allocator>::size_type __lhs_sz = _Traits::length(__lhs);
typename basic_string<_CharT, _Traits, _Allocator>::size_type __rhs_sz = __rhs.size();
__r.__init(__lhs, __lhs_sz, __lhs_sz + __rhs_sz);
__r.append(__rhs.data(), __rhs_sz);
return __r;
}
template<class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>
operator+(_CharT __lhs, const basic_string<_CharT,_Traits,_Allocator>& __rhs)
{
basic_string<_CharT, _Traits, _Allocator> __r(__rhs.get_allocator());
typename basic_string<_CharT, _Traits, _Allocator>::size_type __rhs_sz = __rhs.size();
__r.__init(&__lhs, 1, 1 + __rhs_sz);
__r.append(__rhs.data(), __rhs_sz);
return __r;
}
template<class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>
operator+(const basic_string<_CharT, _Traits, _Allocator>& __lhs, const _CharT* __rhs)
{
basic_string<_CharT, _Traits, _Allocator> __r(__lhs.get_allocator());
typename basic_string<_CharT, _Traits, _Allocator>::size_type __lhs_sz = __lhs.size();
typename basic_string<_CharT, _Traits, _Allocator>::size_type __rhs_sz = _Traits::length(__rhs);
__r.__init(__lhs.data(), __lhs_sz, __lhs_sz + __rhs_sz);
__r.append(__rhs, __rhs_sz);
return __r;
}
template<class _CharT, class _Traits, class _Allocator>
basic_string<_CharT, _Traits, _Allocator>
operator+(const basic_string<_CharT, _Traits, _Allocator>& __lhs, _CharT __rhs)
{
basic_string<_CharT, _Traits, _Allocator> __r(__lhs.get_allocator());
typename basic_string<_CharT, _Traits, _Allocator>::size_type __lhs_sz = __lhs.size();
__r.__init(__lhs.data(), __lhs_sz, __lhs_sz + 1);
__r.push_back(__rhs);
return __r;
}
#ifndef _LIBCPP_CXX03_LANG
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
basic_string<_CharT, _Traits, _Allocator>
operator+(basic_string<_CharT, _Traits, _Allocator>&& __lhs, const basic_string<_CharT, _Traits, _Allocator>& __rhs)
{
return _VSTD::move(__lhs.append(__rhs));
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
basic_string<_CharT, _Traits, _Allocator>
operator+(const basic_string<_CharT, _Traits, _Allocator>& __lhs, basic_string<_CharT, _Traits, _Allocator>&& __rhs)
{
return _VSTD::move(__rhs.insert(0, __lhs));
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
basic_string<_CharT, _Traits, _Allocator>
operator+(basic_string<_CharT, _Traits, _Allocator>&& __lhs, basic_string<_CharT, _Traits, _Allocator>&& __rhs)
{
return _VSTD::move(__lhs.append(__rhs));
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
basic_string<_CharT, _Traits, _Allocator>
operator+(const _CharT* __lhs , basic_string<_CharT,_Traits,_Allocator>&& __rhs)
{
return _VSTD::move(__rhs.insert(0, __lhs));
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
basic_string<_CharT, _Traits, _Allocator>
operator+(_CharT __lhs, basic_string<_CharT,_Traits,_Allocator>&& __rhs)
{
__rhs.insert(__rhs.begin(), __lhs);
return _VSTD::move(__rhs);
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
basic_string<_CharT, _Traits, _Allocator>
operator+(basic_string<_CharT, _Traits, _Allocator>&& __lhs, const _CharT* __rhs)
{
return _VSTD::move(__lhs.append(__rhs));
}
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
basic_string<_CharT, _Traits, _Allocator>
operator+(basic_string<_CharT, _Traits, _Allocator>&& __lhs, _CharT __rhs)
{
__lhs.push_back(__rhs);
return _VSTD::move(__lhs);
}
#endif // _LIBCPP_CXX03_LANG
// swap
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
void
swap(basic_string<_CharT, _Traits, _Allocator>& __lhs,
basic_string<_CharT, _Traits, _Allocator>& __rhs)
_NOEXCEPT_(_NOEXCEPT_(__lhs.swap(__rhs)))
{
__lhs.swap(__rhs);
}
_LIBCPP_FUNC_VIS int stoi (const string& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS long stol (const string& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS unsigned long stoul (const string& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS long long stoll (const string& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS unsigned long long stoull(const string& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS float stof (const string& __str, size_t* __idx = nullptr);
_LIBCPP_FUNC_VIS double stod (const string& __str, size_t* __idx = nullptr);
_LIBCPP_FUNC_VIS long double stold(const string& __str, size_t* __idx = nullptr);
_LIBCPP_FUNC_VIS string to_string(int __val);
_LIBCPP_FUNC_VIS string to_string(unsigned __val);
_LIBCPP_FUNC_VIS string to_string(long __val);
_LIBCPP_FUNC_VIS string to_string(unsigned long __val);
_LIBCPP_FUNC_VIS string to_string(long long __val);
_LIBCPP_FUNC_VIS string to_string(unsigned long long __val);
_LIBCPP_FUNC_VIS string to_string(float __val);
_LIBCPP_FUNC_VIS string to_string(double __val);
_LIBCPP_FUNC_VIS string to_string(long double __val);
_LIBCPP_FUNC_VIS int stoi (const wstring& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS long stol (const wstring& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS unsigned long stoul (const wstring& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS long long stoll (const wstring& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS unsigned long long stoull(const wstring& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS float stof (const wstring& __str, size_t* __idx = nullptr);
_LIBCPP_FUNC_VIS double stod (const wstring& __str, size_t* __idx = nullptr);
_LIBCPP_FUNC_VIS long double stold(const wstring& __str, size_t* __idx = nullptr);
_LIBCPP_FUNC_VIS wstring to_wstring(int __val);
_LIBCPP_FUNC_VIS wstring to_wstring(unsigned __val);
_LIBCPP_FUNC_VIS wstring to_wstring(long __val);
_LIBCPP_FUNC_VIS wstring to_wstring(unsigned long __val);
_LIBCPP_FUNC_VIS wstring to_wstring(long long __val);
_LIBCPP_FUNC_VIS wstring to_wstring(unsigned long long __val);
_LIBCPP_FUNC_VIS wstring to_wstring(float __val);
_LIBCPP_FUNC_VIS wstring to_wstring(double __val);
_LIBCPP_FUNC_VIS wstring to_wstring(long double __val);
template<class _CharT, class _Traits, class _Allocator>
_LIBCPP_TEMPLATE_DATA_VIS
const typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::npos;
template <class _CharT, class _Allocator>
struct _LIBCPP_TEMPLATE_VIS
hash<basic_string<_CharT, char_traits<_CharT>, _Allocator> >
: public unary_function<
basic_string<_CharT, char_traits<_CharT>, _Allocator>, size_t>
{
size_t
operator()(const basic_string<_CharT, char_traits<_CharT>, _Allocator>& __val) const _NOEXCEPT
{ return __do_string_hash(__val.data(), __val.data() + __val.size()); }
};
template<class _CharT, class _Traits, class _Allocator>
basic_ostream<_CharT, _Traits>&
operator<<(basic_ostream<_CharT, _Traits>& __os,
const basic_string<_CharT, _Traits, _Allocator>& __str);
template<class _CharT, class _Traits, class _Allocator>
basic_istream<_CharT, _Traits>&
operator>>(basic_istream<_CharT, _Traits>& __is,
basic_string<_CharT, _Traits, _Allocator>& __str);
template<class _CharT, class _Traits, class _Allocator>
basic_istream<_CharT, _Traits>&
getline(basic_istream<_CharT, _Traits>& __is,
basic_string<_CharT, _Traits, _Allocator>& __str, _CharT __dlm);
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
basic_istream<_CharT, _Traits>&
getline(basic_istream<_CharT, _Traits>& __is,
basic_string<_CharT, _Traits, _Allocator>& __str);
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
basic_istream<_CharT, _Traits>&
getline(basic_istream<_CharT, _Traits>&& __is,
basic_string<_CharT, _Traits, _Allocator>& __str, _CharT __dlm);
template<class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
basic_istream<_CharT, _Traits>&
getline(basic_istream<_CharT, _Traits>&& __is,
basic_string<_CharT, _Traits, _Allocator>& __str);
#if _LIBCPP_STD_VER > 17
template <class _CharT, class _Traits, class _Allocator, class _Up>
inline _LIBCPP_INLINE_VISIBILITY
typename basic_string<_CharT, _Traits, _Allocator>::size_type
erase(basic_string<_CharT, _Traits, _Allocator>& __str, const _Up& __v) {
auto __old_size = __str.size();
__str.erase(_VSTD::remove(__str.begin(), __str.end(), __v), __str.end());
return __old_size - __str.size();
}
template <class _CharT, class _Traits, class _Allocator, class _Predicate>
inline _LIBCPP_INLINE_VISIBILITY
typename basic_string<_CharT, _Traits, _Allocator>::size_type
erase_if(basic_string<_CharT, _Traits, _Allocator>& __str,
_Predicate __pred) {
auto __old_size = __str.size();
__str.erase(_VSTD::remove_if(__str.begin(), __str.end(), __pred),
__str.end());
return __old_size - __str.size();
}
#endif
#if _LIBCPP_DEBUG_LEVEL == 2
template<class _CharT, class _Traits, class _Allocator>
bool
basic_string<_CharT, _Traits, _Allocator>::__dereferenceable(const const_iterator* __i) const
{
return this->data() <= _VSTD::__to_address(__i->base()) &&
_VSTD::__to_address(__i->base()) < this->data() + this->size();
}
template<class _CharT, class _Traits, class _Allocator>
bool
basic_string<_CharT, _Traits, _Allocator>::__decrementable(const const_iterator* __i) const
{
return this->data() < _VSTD::__to_address(__i->base()) &&
_VSTD::__to_address(__i->base()) <= this->data() + this->size();
}
template<class _CharT, class _Traits, class _Allocator>
bool
basic_string<_CharT, _Traits, _Allocator>::__addable(const const_iterator* __i, ptrdiff_t __n) const
{
const value_type* __p = _VSTD::__to_address(__i->base()) + __n;
return this->data() <= __p && __p <= this->data() + this->size();
}
template<class _CharT, class _Traits, class _Allocator>
bool
basic_string<_CharT, _Traits, _Allocator>::__subscriptable(const const_iterator* __i, ptrdiff_t __n) const
{
const value_type* __p = _VSTD::__to_address(__i->base()) + __n;
return this->data() <= __p && __p < this->data() + this->size();
}
#endif // _LIBCPP_DEBUG_LEVEL == 2
#if _LIBCPP_STD_VER > 11
// Literal suffixes for basic_string [basic.string.literals]
inline namespace literals
{
inline namespace string_literals
{
inline _LIBCPP_INLINE_VISIBILITY
basic_string<char> operator "" s( const char *__str, size_t __len )
{
return basic_string<char> (__str, __len);
}
inline _LIBCPP_INLINE_VISIBILITY
basic_string<wchar_t> operator "" s( const wchar_t *__str, size_t __len )
{
return basic_string<wchar_t> (__str, __len);
}
#ifndef _LIBCPP_HAS_NO_CHAR8_T
inline _LIBCPP_INLINE_VISIBILITY
basic_string<char8_t> operator "" s(const char8_t *__str, size_t __len) _NOEXCEPT
{
return basic_string<char8_t> (__str, __len);
}
#endif
inline _LIBCPP_INLINE_VISIBILITY
basic_string<char16_t> operator "" s( const char16_t *__str, size_t __len )
{
return basic_string<char16_t> (__str, __len);
}
inline _LIBCPP_INLINE_VISIBILITY
basic_string<char32_t> operator "" s( const char32_t *__str, size_t __len )
{
return basic_string<char32_t> (__str, __len);
}
}
}
#endif
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP_STRING
diff --git a/contrib/llvm-project/libcxx/include/vector b/contrib/llvm-project/libcxx/include/vector
index 9189ed44a80c..90d8b946f135 100644
--- a/contrib/llvm-project/libcxx/include/vector
+++ b/contrib/llvm-project/libcxx/include/vector
@@ -1,3416 +1,3436 @@
// -*- C++ -*-
//===------------------------------ vector --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP_VECTOR
#define _LIBCPP_VECTOR
/*
vector synopsis
namespace std
{
template <class T, class Allocator = allocator<T> >
class vector
{
public:
typedef T value_type;
typedef Allocator allocator_type;
typedef typename allocator_type::reference reference;
typedef typename allocator_type::const_reference const_reference;
typedef implementation-defined iterator;
typedef implementation-defined const_iterator;
typedef typename allocator_type::size_type size_type;
typedef typename allocator_type::difference_type difference_type;
typedef typename allocator_type::pointer pointer;
typedef typename allocator_type::const_pointer const_pointer;
typedef std::reverse_iterator<iterator> reverse_iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
vector()
noexcept(is_nothrow_default_constructible<allocator_type>::value);
explicit vector(const allocator_type&);
explicit vector(size_type n);
explicit vector(size_type n, const allocator_type&); // C++14
vector(size_type n, const value_type& value, const allocator_type& = allocator_type());
template <class InputIterator>
vector(InputIterator first, InputIterator last, const allocator_type& = allocator_type());
vector(const vector& x);
vector(vector&& x)
noexcept(is_nothrow_move_constructible<allocator_type>::value);
vector(initializer_list<value_type> il);
vector(initializer_list<value_type> il, const allocator_type& a);
~vector();
vector& operator=(const vector& x);
vector& operator=(vector&& x)
noexcept(
allocator_type::propagate_on_container_move_assignment::value ||
allocator_type::is_always_equal::value); // C++17
vector& operator=(initializer_list<value_type> il);
template <class InputIterator>
void assign(InputIterator first, InputIterator last);
void assign(size_type n, const value_type& u);
void assign(initializer_list<value_type> il);
allocator_type get_allocator() const noexcept;
iterator begin() noexcept;
const_iterator begin() const noexcept;
iterator end() noexcept;
const_iterator end() const noexcept;
reverse_iterator rbegin() noexcept;
const_reverse_iterator rbegin() const noexcept;
reverse_iterator rend() noexcept;
const_reverse_iterator rend() const noexcept;
const_iterator cbegin() const noexcept;
const_iterator cend() const noexcept;
const_reverse_iterator crbegin() const noexcept;
const_reverse_iterator crend() const noexcept;
size_type size() const noexcept;
size_type max_size() const noexcept;
size_type capacity() const noexcept;
bool empty() const noexcept;
void reserve(size_type n);
void shrink_to_fit() noexcept;
reference operator[](size_type n);
const_reference operator[](size_type n) const;
reference at(size_type n);
const_reference at(size_type n) const;
reference front();
const_reference front() const;
reference back();
const_reference back() const;
value_type* data() noexcept;
const value_type* data() const noexcept;
void push_back(const value_type& x);
void push_back(value_type&& x);
template <class... Args>
reference emplace_back(Args&&... args); // reference in C++17
void pop_back();
template <class... Args> iterator emplace(const_iterator position, Args&&... args);
iterator insert(const_iterator position, const value_type& x);
iterator insert(const_iterator position, value_type&& x);
iterator insert(const_iterator position, size_type n, const value_type& x);
template <class InputIterator>
iterator insert(const_iterator position, InputIterator first, InputIterator last);
iterator insert(const_iterator position, initializer_list<value_type> il);
iterator erase(const_iterator position);
iterator erase(const_iterator first, const_iterator last);
void clear() noexcept;
void resize(size_type sz);
void resize(size_type sz, const value_type& c);
void swap(vector&)
noexcept(allocator_traits<allocator_type>::propagate_on_container_swap::value ||
allocator_traits<allocator_type>::is_always_equal::value); // C++17
bool __invariants() const;
};
template <class Allocator = allocator<T> >
class vector<bool, Allocator>
{
public:
typedef bool value_type;
typedef Allocator allocator_type;
typedef implementation-defined iterator;
typedef implementation-defined const_iterator;
typedef typename allocator_type::size_type size_type;
typedef typename allocator_type::difference_type difference_type;
typedef iterator pointer;
typedef const_iterator const_pointer;
typedef std::reverse_iterator<iterator> reverse_iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
class reference
{
public:
reference(const reference&) noexcept;
operator bool() const noexcept;
reference& operator=(bool x) noexcept;
reference& operator=(const reference& x) noexcept;
iterator operator&() const noexcept;
void flip() noexcept;
};
class const_reference
{
public:
const_reference(const reference&) noexcept;
operator bool() const noexcept;
const_iterator operator&() const noexcept;
};
vector()
noexcept(is_nothrow_default_constructible<allocator_type>::value);
explicit vector(const allocator_type&);
explicit vector(size_type n, const allocator_type& a = allocator_type()); // C++14
vector(size_type n, const value_type& value, const allocator_type& = allocator_type());
template <class InputIterator>
vector(InputIterator first, InputIterator last, const allocator_type& = allocator_type());
vector(const vector& x);
vector(vector&& x)
noexcept(is_nothrow_move_constructible<allocator_type>::value);
vector(initializer_list<value_type> il);
vector(initializer_list<value_type> il, const allocator_type& a);
~vector();
vector& operator=(const vector& x);
vector& operator=(vector&& x)
noexcept(
allocator_type::propagate_on_container_move_assignment::value ||
allocator_type::is_always_equal::value); // C++17
vector& operator=(initializer_list<value_type> il);
template <class InputIterator>
void assign(InputIterator first, InputIterator last);
void assign(size_type n, const value_type& u);
void assign(initializer_list<value_type> il);
allocator_type get_allocator() const noexcept;
iterator begin() noexcept;
const_iterator begin() const noexcept;
iterator end() noexcept;
const_iterator end() const noexcept;
reverse_iterator rbegin() noexcept;
const_reverse_iterator rbegin() const noexcept;
reverse_iterator rend() noexcept;
const_reverse_iterator rend() const noexcept;
const_iterator cbegin() const noexcept;
const_iterator cend() const noexcept;
const_reverse_iterator crbegin() const noexcept;
const_reverse_iterator crend() const noexcept;
size_type size() const noexcept;
size_type max_size() const noexcept;
size_type capacity() const noexcept;
bool empty() const noexcept;
void reserve(size_type n);
void shrink_to_fit() noexcept;
reference operator[](size_type n);
const_reference operator[](size_type n) const;
reference at(size_type n);
const_reference at(size_type n) const;
reference front();
const_reference front() const;
reference back();
const_reference back() const;
void push_back(const value_type& x);
template <class... Args> reference emplace_back(Args&&... args); // C++14; reference in C++17
void pop_back();
template <class... Args> iterator emplace(const_iterator position, Args&&... args); // C++14
iterator insert(const_iterator position, const value_type& x);
iterator insert(const_iterator position, size_type n, const value_type& x);
template <class InputIterator>
iterator insert(const_iterator position, InputIterator first, InputIterator last);
iterator insert(const_iterator position, initializer_list<value_type> il);
iterator erase(const_iterator position);
iterator erase(const_iterator first, const_iterator last);
void clear() noexcept;
void resize(size_type sz);
void resize(size_type sz, value_type x);
void swap(vector&)
noexcept(allocator_traits<allocator_type>::propagate_on_container_swap::value ||
allocator_traits<allocator_type>::is_always_equal::value); // C++17
void flip() noexcept;
bool __invariants() const;
};
template <class InputIterator, class Allocator = allocator<typename iterator_traits<InputIterator>::value_type>>
vector(InputIterator, InputIterator, Allocator = Allocator())
-> vector<typename iterator_traits<InputIterator>::value_type, Allocator>;
template <class Allocator> struct hash<std::vector<bool, Allocator>>;
template <class T, class Allocator> bool operator==(const vector<T,Allocator>& x, const vector<T,Allocator>& y);
template <class T, class Allocator> bool operator< (const vector<T,Allocator>& x, const vector<T,Allocator>& y);
template <class T, class Allocator> bool operator!=(const vector<T,Allocator>& x, const vector<T,Allocator>& y);
template <class T, class Allocator> bool operator> (const vector<T,Allocator>& x, const vector<T,Allocator>& y);
template <class T, class Allocator> bool operator>=(const vector<T,Allocator>& x, const vector<T,Allocator>& y);
template <class T, class Allocator> bool operator<=(const vector<T,Allocator>& x, const vector<T,Allocator>& y);
template <class T, class Allocator>
void swap(vector<T,Allocator>& x, vector<T,Allocator>& y)
noexcept(noexcept(x.swap(y)));
template <class T, class Allocator, class U>
typename vector<T, Allocator>::size_type
erase(vector<T, Allocator>& c, const U& value); // C++20
template <class T, class Allocator, class Predicate>
typename vector<T, Allocator>::size_type
erase_if(vector<T, Allocator>& c, Predicate pred); // C++20
} // std
*/
#include <__config>
#include <__bit_reference>
#include <__debug>
#include <__functional_base>
#include <__iterator/wrap_iter.h>
#include <__split_buffer>
#include <__utility/forward.h>
#include <algorithm>
#include <climits>
#include <compare>
+#include <cstdlib>
#include <cstring>
#include <initializer_list>
#include <iosfwd> // for forward declaration of vector
#include <limits>
#include <memory>
#include <stdexcept>
#include <type_traits>
#include <version>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
template <bool>
class _LIBCPP_TEMPLATE_VIS __vector_base_common
{
protected:
_LIBCPP_INLINE_VISIBILITY __vector_base_common() {}
_LIBCPP_NORETURN void __throw_length_error() const;
_LIBCPP_NORETURN void __throw_out_of_range() const;
};
template <bool __b>
void
__vector_base_common<__b>::__throw_length_error() const
{
_VSTD::__throw_length_error("vector");
}
template <bool __b>
void
__vector_base_common<__b>::__throw_out_of_range() const
{
_VSTD::__throw_out_of_range("vector");
}
_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __vector_base_common<true>)
template <class _Tp, class _Allocator>
class __vector_base
: protected __vector_base_common<true>
{
public:
typedef _Allocator allocator_type;
typedef allocator_traits<allocator_type> __alloc_traits;
typedef typename __alloc_traits::size_type size_type;
protected:
typedef _Tp value_type;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef typename __alloc_traits::difference_type difference_type;
typedef typename __alloc_traits::pointer pointer;
typedef typename __alloc_traits::const_pointer const_pointer;
typedef pointer iterator;
typedef const_pointer const_iterator;
pointer __begin_;
pointer __end_;
__compressed_pair<pointer, allocator_type> __end_cap_;
_LIBCPP_INLINE_VISIBILITY
allocator_type& __alloc() _NOEXCEPT
{return __end_cap_.second();}
_LIBCPP_INLINE_VISIBILITY
const allocator_type& __alloc() const _NOEXCEPT
{return __end_cap_.second();}
_LIBCPP_INLINE_VISIBILITY
pointer& __end_cap() _NOEXCEPT
{return __end_cap_.first();}
_LIBCPP_INLINE_VISIBILITY
const pointer& __end_cap() const _NOEXCEPT
{return __end_cap_.first();}
_LIBCPP_INLINE_VISIBILITY
__vector_base()
_NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value);
_LIBCPP_INLINE_VISIBILITY __vector_base(const allocator_type& __a);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY __vector_base(allocator_type&& __a) _NOEXCEPT;
#endif
~__vector_base();
_LIBCPP_INLINE_VISIBILITY
void clear() _NOEXCEPT {__destruct_at_end(__begin_);}
_LIBCPP_INLINE_VISIBILITY
size_type capacity() const _NOEXCEPT
{return static_cast<size_type>(__end_cap() - __begin_);}
_LIBCPP_INLINE_VISIBILITY
void __destruct_at_end(pointer __new_last) _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
void __copy_assign_alloc(const __vector_base& __c)
{__copy_assign_alloc(__c, integral_constant<bool,
__alloc_traits::propagate_on_container_copy_assignment::value>());}
_LIBCPP_INLINE_VISIBILITY
void __move_assign_alloc(__vector_base& __c)
_NOEXCEPT_(
!__alloc_traits::propagate_on_container_move_assignment::value ||
is_nothrow_move_assignable<allocator_type>::value)
{__move_assign_alloc(__c, integral_constant<bool,
__alloc_traits::propagate_on_container_move_assignment::value>());}
+
+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
+ void __throw_length_error() const {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ __vector_base_common<true>::__throw_length_error();
+#else
+ _VSTD::abort();
+#endif
+ }
+
+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
+ void __throw_out_of_range() const {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ __vector_base_common<true>::__throw_out_of_range();
+#else
+ _VSTD::abort();
+#endif
+ }
+
private:
_LIBCPP_INLINE_VISIBILITY
void __copy_assign_alloc(const __vector_base& __c, true_type)
{
if (__alloc() != __c.__alloc())
{
clear();
__alloc_traits::deallocate(__alloc(), __begin_, capacity());
__begin_ = __end_ = __end_cap() = nullptr;
}
__alloc() = __c.__alloc();
}
_LIBCPP_INLINE_VISIBILITY
void __copy_assign_alloc(const __vector_base&, false_type)
{}
_LIBCPP_INLINE_VISIBILITY
void __move_assign_alloc(__vector_base& __c, true_type)
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
{
__alloc() = _VSTD::move(__c.__alloc());
}
_LIBCPP_INLINE_VISIBILITY
void __move_assign_alloc(__vector_base&, false_type)
_NOEXCEPT
{}
};
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
void
__vector_base<_Tp, _Allocator>::__destruct_at_end(pointer __new_last) _NOEXCEPT
{
pointer __soon_to_be_end = __end_;
while (__new_last != __soon_to_be_end)
__alloc_traits::destroy(__alloc(), _VSTD::__to_address(--__soon_to_be_end));
__end_ = __new_last;
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
__vector_base<_Tp, _Allocator>::__vector_base()
_NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value)
: __begin_(nullptr),
__end_(nullptr),
__end_cap_(nullptr, __default_init_tag())
{
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
__vector_base<_Tp, _Allocator>::__vector_base(const allocator_type& __a)
: __begin_(nullptr),
__end_(nullptr),
__end_cap_(nullptr, __a)
{
}
#ifndef _LIBCPP_CXX03_LANG
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
__vector_base<_Tp, _Allocator>::__vector_base(allocator_type&& __a) _NOEXCEPT
: __begin_(nullptr),
__end_(nullptr),
__end_cap_(nullptr, _VSTD::move(__a)) {}
#endif
template <class _Tp, class _Allocator>
__vector_base<_Tp, _Allocator>::~__vector_base()
{
if (__begin_ != nullptr)
{
clear();
__alloc_traits::deallocate(__alloc(), __begin_, capacity());
}
}
template <class _Tp, class _Allocator /* = allocator<_Tp> */>
class _LIBCPP_TEMPLATE_VIS vector
: private __vector_base<_Tp, _Allocator>
{
private:
typedef __vector_base<_Tp, _Allocator> __base;
typedef allocator<_Tp> __default_allocator_type;
public:
typedef vector __self;
typedef _Tp value_type;
typedef _Allocator allocator_type;
typedef typename __base::__alloc_traits __alloc_traits;
typedef typename __base::reference reference;
typedef typename __base::const_reference const_reference;
typedef typename __base::size_type size_type;
typedef typename __base::difference_type difference_type;
typedef typename __base::pointer pointer;
typedef typename __base::const_pointer const_pointer;
typedef __wrap_iter<pointer> iterator;
typedef __wrap_iter<const_pointer> const_iterator;
typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
static_assert((is_same<typename allocator_type::value_type, value_type>::value),
"Allocator::value_type must be same type as value_type");
_LIBCPP_INLINE_VISIBILITY
vector() _NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
_LIBCPP_INLINE_VISIBILITY explicit vector(const allocator_type& __a)
#if _LIBCPP_STD_VER <= 14
_NOEXCEPT_(is_nothrow_copy_constructible<allocator_type>::value)
#else
_NOEXCEPT
#endif
: __base(__a)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
}
explicit vector(size_type __n);
#if _LIBCPP_STD_VER > 11
explicit vector(size_type __n, const allocator_type& __a);
#endif
vector(size_type __n, const value_type& __x);
vector(size_type __n, const value_type& __x, const allocator_type& __a);
template <class _InputIterator>
vector(_InputIterator __first,
typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_InputIterator>::reference>::value,
_InputIterator>::type __last);
template <class _InputIterator>
vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a,
typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_InputIterator>::reference>::value>::type* = 0);
template <class _ForwardIterator>
vector(_ForwardIterator __first,
typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_ForwardIterator>::reference>::value,
_ForwardIterator>::type __last);
template <class _ForwardIterator>
vector(_ForwardIterator __first, _ForwardIterator __last, const allocator_type& __a,
typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_ForwardIterator>::reference>::value>::type* = 0);
_LIBCPP_INLINE_VISIBILITY
~vector()
{
__annotate_delete();
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__erase_c(this);
#endif
}
vector(const vector& __x);
vector(const vector& __x, const __identity_t<allocator_type>& __a);
_LIBCPP_INLINE_VISIBILITY
vector& operator=(const vector& __x);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
vector(initializer_list<value_type> __il);
_LIBCPP_INLINE_VISIBILITY
vector(initializer_list<value_type> __il, const allocator_type& __a);
_LIBCPP_INLINE_VISIBILITY
vector(vector&& __x)
#if _LIBCPP_STD_VER > 14
_NOEXCEPT;
#else
_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value);
#endif
_LIBCPP_INLINE_VISIBILITY
vector(vector&& __x, const __identity_t<allocator_type>& __a);
_LIBCPP_INLINE_VISIBILITY
vector& operator=(vector&& __x)
_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value));
_LIBCPP_INLINE_VISIBILITY
vector& operator=(initializer_list<value_type> __il)
{assign(__il.begin(), __il.end()); return *this;}
#endif // !_LIBCPP_CXX03_LANG
template <class _InputIterator>
typename enable_if
<
__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_InputIterator>::reference>::value,
void
>::type
assign(_InputIterator __first, _InputIterator __last);
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_ForwardIterator>::reference>::value,
void
>::type
assign(_ForwardIterator __first, _ForwardIterator __last);
void assign(size_type __n, const_reference __u);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
void assign(initializer_list<value_type> __il)
{assign(__il.begin(), __il.end());}
#endif
_LIBCPP_INLINE_VISIBILITY
allocator_type get_allocator() const _NOEXCEPT
{return this->__alloc();}
_LIBCPP_INLINE_VISIBILITY iterator begin() _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY const_iterator begin() const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY iterator end() _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY const_iterator end() const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
reverse_iterator rbegin() _NOEXCEPT
{return reverse_iterator(end());}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator rbegin() const _NOEXCEPT
{return const_reverse_iterator(end());}
_LIBCPP_INLINE_VISIBILITY
reverse_iterator rend() _NOEXCEPT
{return reverse_iterator(begin());}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator rend() const _NOEXCEPT
{return const_reverse_iterator(begin());}
_LIBCPP_INLINE_VISIBILITY
const_iterator cbegin() const _NOEXCEPT
{return begin();}
_LIBCPP_INLINE_VISIBILITY
const_iterator cend() const _NOEXCEPT
{return end();}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator crbegin() const _NOEXCEPT
{return rbegin();}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator crend() const _NOEXCEPT
{return rend();}
_LIBCPP_INLINE_VISIBILITY
size_type size() const _NOEXCEPT
{return static_cast<size_type>(this->__end_ - this->__begin_);}
_LIBCPP_INLINE_VISIBILITY
size_type capacity() const _NOEXCEPT
{return __base::capacity();}
_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY
bool empty() const _NOEXCEPT
{return this->__begin_ == this->__end_;}
size_type max_size() const _NOEXCEPT;
void reserve(size_type __n);
void shrink_to_fit() _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY reference operator[](size_type __n) _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY const_reference operator[](size_type __n) const _NOEXCEPT;
reference at(size_type __n);
const_reference at(size_type __n) const;
_LIBCPP_INLINE_VISIBILITY reference front() _NOEXCEPT
{
_LIBCPP_ASSERT(!empty(), "front() called on an empty vector");
return *this->__begin_;
}
_LIBCPP_INLINE_VISIBILITY const_reference front() const _NOEXCEPT
{
_LIBCPP_ASSERT(!empty(), "front() called on an empty vector");
return *this->__begin_;
}
_LIBCPP_INLINE_VISIBILITY reference back() _NOEXCEPT
{
_LIBCPP_ASSERT(!empty(), "back() called on an empty vector");
return *(this->__end_ - 1);
}
_LIBCPP_INLINE_VISIBILITY const_reference back() const _NOEXCEPT
{
_LIBCPP_ASSERT(!empty(), "back() called on an empty vector");
return *(this->__end_ - 1);
}
_LIBCPP_INLINE_VISIBILITY
value_type* data() _NOEXCEPT
{return _VSTD::__to_address(this->__begin_);}
_LIBCPP_INLINE_VISIBILITY
const value_type* data() const _NOEXCEPT
{return _VSTD::__to_address(this->__begin_);}
#ifdef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
void __emplace_back(const value_type& __x) { push_back(__x); }
#else
template <class _Arg>
_LIBCPP_INLINE_VISIBILITY
void __emplace_back(_Arg&& __arg) {
emplace_back(_VSTD::forward<_Arg>(__arg));
}
#endif
_LIBCPP_INLINE_VISIBILITY void push_back(const_reference __x);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY void push_back(value_type&& __x);
template <class... _Args>
_LIBCPP_INLINE_VISIBILITY
#if _LIBCPP_STD_VER > 14
reference emplace_back(_Args&&... __args);
#else
void emplace_back(_Args&&... __args);
#endif
#endif // !_LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
void pop_back();
iterator insert(const_iterator __position, const_reference __x);
#ifndef _LIBCPP_CXX03_LANG
iterator insert(const_iterator __position, value_type&& __x);
template <class... _Args>
iterator emplace(const_iterator __position, _Args&&... __args);
#endif // !_LIBCPP_CXX03_LANG
iterator insert(const_iterator __position, size_type __n, const_reference __x);
template <class _InputIterator>
typename enable_if
<
__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_InputIterator>::reference>::value,
iterator
>::type
insert(const_iterator __position, _InputIterator __first, _InputIterator __last);
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_ForwardIterator>::reference>::value,
iterator
>::type
insert(const_iterator __position, _ForwardIterator __first, _ForwardIterator __last);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
iterator insert(const_iterator __position, initializer_list<value_type> __il)
{return insert(__position, __il.begin(), __il.end());}
#endif
_LIBCPP_INLINE_VISIBILITY iterator erase(const_iterator __position);
iterator erase(const_iterator __first, const_iterator __last);
_LIBCPP_INLINE_VISIBILITY
void clear() _NOEXCEPT
{
size_type __old_size = size();
__base::clear();
__annotate_shrink(__old_size);
__invalidate_all_iterators();
}
void resize(size_type __sz);
void resize(size_type __sz, const_reference __x);
void swap(vector&)
#if _LIBCPP_STD_VER >= 14
_NOEXCEPT;
#else
_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value ||
__is_nothrow_swappable<allocator_type>::value);
#endif
bool __invariants() const;
#if _LIBCPP_DEBUG_LEVEL == 2
bool __dereferenceable(const const_iterator* __i) const;
bool __decrementable(const const_iterator* __i) const;
bool __addable(const const_iterator* __i, ptrdiff_t __n) const;
bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const;
#endif // _LIBCPP_DEBUG_LEVEL == 2
private:
_LIBCPP_INLINE_VISIBILITY void __invalidate_all_iterators();
_LIBCPP_INLINE_VISIBILITY void __invalidate_iterators_past(pointer __new_last);
void __vallocate(size_type __n);
void __vdeallocate() _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY size_type __recommend(size_type __new_size) const;
void __construct_at_end(size_type __n);
_LIBCPP_INLINE_VISIBILITY
void __construct_at_end(size_type __n, const_reference __x);
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
void
>::type
__construct_at_end(_ForwardIterator __first, _ForwardIterator __last, size_type __n);
void __append(size_type __n);
void __append(size_type __n, const_reference __x);
_LIBCPP_INLINE_VISIBILITY
iterator __make_iter(pointer __p) _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
const_iterator __make_iter(const_pointer __p) const _NOEXCEPT;
void __swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v);
pointer __swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v, pointer __p);
void __move_range(pointer __from_s, pointer __from_e, pointer __to);
void __move_assign(vector& __c, true_type)
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value);
void __move_assign(vector& __c, false_type)
_NOEXCEPT_(__alloc_traits::is_always_equal::value);
_LIBCPP_INLINE_VISIBILITY
void __destruct_at_end(pointer __new_last) _NOEXCEPT
{
__invalidate_iterators_past(__new_last);
size_type __old_size = size();
__base::__destruct_at_end(__new_last);
__annotate_shrink(__old_size);
}
#ifndef _LIBCPP_CXX03_LANG
template <class _Up>
_LIBCPP_INLINE_VISIBILITY
inline void __push_back_slow_path(_Up&& __x);
template <class... _Args>
_LIBCPP_INLINE_VISIBILITY
inline void __emplace_back_slow_path(_Args&&... __args);
#else
template <class _Up>
_LIBCPP_INLINE_VISIBILITY
inline void __push_back_slow_path(_Up& __x);
#endif
// The following functions are no-ops outside of AddressSanitizer mode.
// We call annotatations only for the default Allocator because other allocators
// may not meet the AddressSanitizer alignment constraints.
// See the documentation for __sanitizer_annotate_contiguous_container for more details.
#ifndef _LIBCPP_HAS_NO_ASAN
void __annotate_contiguous_container(const void *__beg, const void *__end,
const void *__old_mid,
const void *__new_mid) const
{
if (__beg && is_same<allocator_type, __default_allocator_type>::value)
__sanitizer_annotate_contiguous_container(__beg, __end, __old_mid, __new_mid);
}
#else
_LIBCPP_INLINE_VISIBILITY
void __annotate_contiguous_container(const void*, const void*, const void*,
const void*) const _NOEXCEPT {}
#endif
_LIBCPP_INLINE_VISIBILITY
void __annotate_new(size_type __current_size) const _NOEXCEPT {
__annotate_contiguous_container(data(), data() + capacity(),
data() + capacity(), data() + __current_size);
}
_LIBCPP_INLINE_VISIBILITY
void __annotate_delete() const _NOEXCEPT {
__annotate_contiguous_container(data(), data() + capacity(),
data() + size(), data() + capacity());
}
_LIBCPP_INLINE_VISIBILITY
void __annotate_increase(size_type __n) const _NOEXCEPT
{
__annotate_contiguous_container(data(), data() + capacity(),
data() + size(), data() + size() + __n);
}
_LIBCPP_INLINE_VISIBILITY
void __annotate_shrink(size_type __old_size) const _NOEXCEPT
{
__annotate_contiguous_container(data(), data() + capacity(),
data() + __old_size, data() + size());
}
struct _ConstructTransaction {
explicit _ConstructTransaction(vector &__v, size_type __n)
: __v_(__v), __pos_(__v.__end_), __new_end_(__v.__end_ + __n) {
#ifndef _LIBCPP_HAS_NO_ASAN
__v_.__annotate_increase(__n);
#endif
}
~_ConstructTransaction() {
__v_.__end_ = __pos_;
#ifndef _LIBCPP_HAS_NO_ASAN
if (__pos_ != __new_end_) {
__v_.__annotate_shrink(__new_end_ - __v_.__begin_);
}
#endif
}
vector &__v_;
pointer __pos_;
const_pointer const __new_end_;
private:
_ConstructTransaction(_ConstructTransaction const&) = delete;
_ConstructTransaction& operator=(_ConstructTransaction const&) = delete;
};
template <class ..._Args>
_LIBCPP_INLINE_VISIBILITY
void __construct_one_at_end(_Args&& ...__args) {
_ConstructTransaction __tx(*this, 1);
__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__tx.__pos_),
_VSTD::forward<_Args>(__args)...);
++__tx.__pos_;
}
};
#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
template<class _InputIterator,
class _Alloc = allocator<__iter_value_type<_InputIterator>>,
class = _EnableIf<__is_allocator<_Alloc>::value>
>
vector(_InputIterator, _InputIterator)
-> vector<__iter_value_type<_InputIterator>, _Alloc>;
template<class _InputIterator,
class _Alloc,
class = _EnableIf<__is_allocator<_Alloc>::value>
>
vector(_InputIterator, _InputIterator, _Alloc)
-> vector<__iter_value_type<_InputIterator>, _Alloc>;
#endif
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v)
{
__annotate_delete();
_VSTD::__construct_backward_with_exception_guarantees(this->__alloc(), this->__begin_, this->__end_, __v.__begin_);
_VSTD::swap(this->__begin_, __v.__begin_);
_VSTD::swap(this->__end_, __v.__end_);
_VSTD::swap(this->__end_cap(), __v.__end_cap());
__v.__first_ = __v.__begin_;
__annotate_new(size());
__invalidate_all_iterators();
}
template <class _Tp, class _Allocator>
typename vector<_Tp, _Allocator>::pointer
vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v, pointer __p)
{
__annotate_delete();
pointer __r = __v.__begin_;
_VSTD::__construct_backward_with_exception_guarantees(this->__alloc(), this->__begin_, __p, __v.__begin_);
_VSTD::__construct_forward_with_exception_guarantees(this->__alloc(), __p, this->__end_, __v.__end_);
_VSTD::swap(this->__begin_, __v.__begin_);
_VSTD::swap(this->__end_, __v.__end_);
_VSTD::swap(this->__end_cap(), __v.__end_cap());
__v.__first_ = __v.__begin_;
__annotate_new(size());
__invalidate_all_iterators();
return __r;
}
// Allocate space for __n objects
// throws length_error if __n > max_size()
// throws (probably bad_alloc) if memory run out
// Precondition: __begin_ == __end_ == __end_cap() == 0
// Precondition: __n > 0
// Postcondition: capacity() == __n
// Postcondition: size() == 0
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::__vallocate(size_type __n)
{
if (__n > max_size())
this->__throw_length_error();
this->__begin_ = this->__end_ = __alloc_traits::allocate(this->__alloc(), __n);
this->__end_cap() = this->__begin_ + __n;
__annotate_new(0);
}
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::__vdeallocate() _NOEXCEPT
{
if (this->__begin_ != nullptr)
{
clear();
__alloc_traits::deallocate(this->__alloc(), this->__begin_, capacity());
this->__begin_ = this->__end_ = this->__end_cap() = nullptr;
}
}
template <class _Tp, class _Allocator>
typename vector<_Tp, _Allocator>::size_type
vector<_Tp, _Allocator>::max_size() const _NOEXCEPT
{
return _VSTD::min<size_type>(__alloc_traits::max_size(this->__alloc()),
numeric_limits<difference_type>::max());
}
// Precondition: __new_size > capacity()
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<_Tp, _Allocator>::size_type
vector<_Tp, _Allocator>::__recommend(size_type __new_size) const
{
const size_type __ms = max_size();
if (__new_size > __ms)
this->__throw_length_error();
const size_type __cap = capacity();
if (__cap >= __ms / 2)
return __ms;
return _VSTD::max<size_type>(2 * __cap, __new_size);
}
// Default constructs __n objects starting at __end_
// throws if construction throws
// Precondition: __n > 0
// Precondition: size() + __n <= capacity()
// Postcondition: size() == size() + __n
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::__construct_at_end(size_type __n)
{
_ConstructTransaction __tx(*this, __n);
const_pointer __new_end = __tx.__new_end_;
for (pointer __pos = __tx.__pos_; __pos != __new_end; ++__pos, __tx.__pos_ = __pos) {
__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__pos));
}
}
// Copy constructs __n objects starting at __end_ from __x
// throws if construction throws
// Precondition: __n > 0
// Precondition: size() + __n <= capacity()
// Postcondition: size() == old size() + __n
// Postcondition: [i] == __x for all i in [size() - __n, __n)
template <class _Tp, class _Allocator>
inline
void
vector<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
{
_ConstructTransaction __tx(*this, __n);
const_pointer __new_end = __tx.__new_end_;
for (pointer __pos = __tx.__pos_; __pos != __new_end; ++__pos, __tx.__pos_ = __pos) {
__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__pos), __x);
}
}
template <class _Tp, class _Allocator>
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
void
>::type
vector<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last, size_type __n)
{
_ConstructTransaction __tx(*this, __n);
_VSTD::__construct_range_forward(this->__alloc(), __first, __last, __tx.__pos_);
}
// Default constructs __n objects starting at __end_
// throws if construction throws
// Postcondition: size() == size() + __n
// Exception safety: strong.
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::__append(size_type __n)
{
if (static_cast<size_type>(this->__end_cap() - this->__end_) >= __n)
this->__construct_at_end(__n);
else
{
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), size(), __a);
__v.__construct_at_end(__n);
__swap_out_circular_buffer(__v);
}
}
// Default constructs __n objects starting at __end_
// throws if construction throws
// Postcondition: size() == size() + __n
// Exception safety: strong.
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::__append(size_type __n, const_reference __x)
{
if (static_cast<size_type>(this->__end_cap() - this->__end_) >= __n)
this->__construct_at_end(__n, __x);
else
{
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), size(), __a);
__v.__construct_at_end(__n, __x);
__swap_out_circular_buffer(__v);
}
}
template <class _Tp, class _Allocator>
vector<_Tp, _Allocator>::vector(size_type __n)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__n);
}
}
#if _LIBCPP_STD_VER > 11
template <class _Tp, class _Allocator>
vector<_Tp, _Allocator>::vector(size_type __n, const allocator_type& __a)
: __base(__a)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__n);
}
}
#endif
template <class _Tp, class _Allocator>
vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__n, __x);
}
}
template <class _Tp, class _Allocator>
vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x, const allocator_type& __a)
: __base(__a)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__n, __x);
}
}
template <class _Tp, class _Allocator>
template <class _InputIterator>
vector<_Tp, _Allocator>::vector(_InputIterator __first,
typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_InputIterator>::reference>::value,
_InputIterator>::type __last)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
for (; __first != __last; ++__first)
__emplace_back(*__first);
}
template <class _Tp, class _Allocator>
template <class _InputIterator>
vector<_Tp, _Allocator>::vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a,
typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_InputIterator>::reference>::value>::type*)
: __base(__a)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
for (; __first != __last; ++__first)
__emplace_back(*__first);
}
template <class _Tp, class _Allocator>
template <class _ForwardIterator>
vector<_Tp, _Allocator>::vector(_ForwardIterator __first,
typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_ForwardIterator>::reference>::value,
_ForwardIterator>::type __last)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__first, __last, __n);
}
}
template <class _Tp, class _Allocator>
template <class _ForwardIterator>
vector<_Tp, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __last, const allocator_type& __a,
typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value &&
is_constructible<
value_type,
typename iterator_traits<_ForwardIterator>::reference>::value>::type*)
: __base(__a)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__first, __last, __n);
}
}
template <class _Tp, class _Allocator>
vector<_Tp, _Allocator>::vector(const vector& __x)
: __base(__alloc_traits::select_on_container_copy_construction(__x.__alloc()))
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
size_type __n = __x.size();
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__x.__begin_, __x.__end_, __n);
}
}
template <class _Tp, class _Allocator>
vector<_Tp, _Allocator>::vector(const vector& __x, const __identity_t<allocator_type>& __a)
: __base(__a)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
size_type __n = __x.size();
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__x.__begin_, __x.__end_, __n);
}
}
#ifndef _LIBCPP_CXX03_LANG
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
vector<_Tp, _Allocator>::vector(vector&& __x)
#if _LIBCPP_STD_VER > 14
_NOEXCEPT
#else
_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value)
#endif
: __base(_VSTD::move(__x.__alloc()))
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
__get_db()->swap(this, &__x);
#endif
this->__begin_ = __x.__begin_;
this->__end_ = __x.__end_;
this->__end_cap() = __x.__end_cap();
__x.__begin_ = __x.__end_ = __x.__end_cap() = nullptr;
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
vector<_Tp, _Allocator>::vector(vector&& __x, const __identity_t<allocator_type>& __a)
: __base(__a)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
if (__a == __x.__alloc())
{
this->__begin_ = __x.__begin_;
this->__end_ = __x.__end_;
this->__end_cap() = __x.__end_cap();
__x.__begin_ = __x.__end_ = __x.__end_cap() = nullptr;
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->swap(this, &__x);
#endif
}
else
{
typedef move_iterator<iterator> _Ip;
assign(_Ip(__x.begin()), _Ip(__x.end()));
}
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
vector<_Tp, _Allocator>::vector(initializer_list<value_type> __il)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
if (__il.size() > 0)
{
__vallocate(__il.size());
__construct_at_end(__il.begin(), __il.end(), __il.size());
}
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
vector<_Tp, _Allocator>::vector(initializer_list<value_type> __il, const allocator_type& __a)
: __base(__a)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
#endif
if (__il.size() > 0)
{
__vallocate(__il.size());
__construct_at_end(__il.begin(), __il.end(), __il.size());
}
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
vector<_Tp, _Allocator>&
vector<_Tp, _Allocator>::operator=(vector&& __x)
_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value))
{
__move_assign(__x, integral_constant<bool,
__alloc_traits::propagate_on_container_move_assignment::value>());
return *this;
}
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::__move_assign(vector& __c, false_type)
_NOEXCEPT_(__alloc_traits::is_always_equal::value)
{
if (__base::__alloc() != __c.__alloc())
{
typedef move_iterator<iterator> _Ip;
assign(_Ip(__c.begin()), _Ip(__c.end()));
}
else
__move_assign(__c, true_type());
}
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::__move_assign(vector& __c, true_type)
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
{
__vdeallocate();
__base::__move_assign_alloc(__c); // this can throw
this->__begin_ = __c.__begin_;
this->__end_ = __c.__end_;
this->__end_cap() = __c.__end_cap();
__c.__begin_ = __c.__end_ = __c.__end_cap() = nullptr;
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->swap(this, &__c);
#endif
}
#endif // !_LIBCPP_CXX03_LANG
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
vector<_Tp, _Allocator>&
vector<_Tp, _Allocator>::operator=(const vector& __x)
{
if (this != &__x)
{
__base::__copy_assign_alloc(__x);
assign(__x.__begin_, __x.__end_);
}
return *this;
}
template <class _Tp, class _Allocator>
template <class _InputIterator>
typename enable_if
<
__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value &&
is_constructible<
_Tp,
typename iterator_traits<_InputIterator>::reference>::value,
void
>::type
vector<_Tp, _Allocator>::assign(_InputIterator __first, _InputIterator __last)
{
clear();
for (; __first != __last; ++__first)
__emplace_back(*__first);
}
template <class _Tp, class _Allocator>
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value &&
is_constructible<
_Tp,
typename iterator_traits<_ForwardIterator>::reference>::value,
void
>::type
vector<_Tp, _Allocator>::assign(_ForwardIterator __first, _ForwardIterator __last)
{
size_type __new_size = static_cast<size_type>(_VSTD::distance(__first, __last));
if (__new_size <= capacity())
{
_ForwardIterator __mid = __last;
bool __growing = false;
if (__new_size > size())
{
__growing = true;
__mid = __first;
_VSTD::advance(__mid, size());
}
pointer __m = _VSTD::copy(__first, __mid, this->__begin_);
if (__growing)
__construct_at_end(__mid, __last, __new_size - size());
else
this->__destruct_at_end(__m);
}
else
{
__vdeallocate();
__vallocate(__recommend(__new_size));
__construct_at_end(__first, __last, __new_size);
}
__invalidate_all_iterators();
}
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::assign(size_type __n, const_reference __u)
{
if (__n <= capacity())
{
size_type __s = size();
_VSTD::fill_n(this->__begin_, _VSTD::min(__n, __s), __u);
if (__n > __s)
__construct_at_end(__n - __s, __u);
else
this->__destruct_at_end(this->__begin_ + __n);
}
else
{
__vdeallocate();
__vallocate(__recommend(static_cast<size_type>(__n)));
__construct_at_end(__n, __u);
}
__invalidate_all_iterators();
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::__make_iter(pointer __p) _NOEXCEPT
{
#if _LIBCPP_DEBUG_LEVEL == 2
return iterator(this, __p);
#else
return iterator(__p);
#endif
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<_Tp, _Allocator>::const_iterator
vector<_Tp, _Allocator>::__make_iter(const_pointer __p) const _NOEXCEPT
{
#if _LIBCPP_DEBUG_LEVEL == 2
return const_iterator(this, __p);
#else
return const_iterator(__p);
#endif
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::begin() _NOEXCEPT
{
return __make_iter(this->__begin_);
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<_Tp, _Allocator>::const_iterator
vector<_Tp, _Allocator>::begin() const _NOEXCEPT
{
return __make_iter(this->__begin_);
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::end() _NOEXCEPT
{
return __make_iter(this->__end_);
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<_Tp, _Allocator>::const_iterator
vector<_Tp, _Allocator>::end() const _NOEXCEPT
{
return __make_iter(this->__end_);
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<_Tp, _Allocator>::reference
vector<_Tp, _Allocator>::operator[](size_type __n) _NOEXCEPT
{
_LIBCPP_ASSERT(__n < size(), "vector[] index out of bounds");
return this->__begin_[__n];
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<_Tp, _Allocator>::const_reference
vector<_Tp, _Allocator>::operator[](size_type __n) const _NOEXCEPT
{
_LIBCPP_ASSERT(__n < size(), "vector[] index out of bounds");
return this->__begin_[__n];
}
template <class _Tp, class _Allocator>
typename vector<_Tp, _Allocator>::reference
vector<_Tp, _Allocator>::at(size_type __n)
{
if (__n >= size())
this->__throw_out_of_range();
return this->__begin_[__n];
}
template <class _Tp, class _Allocator>
typename vector<_Tp, _Allocator>::const_reference
vector<_Tp, _Allocator>::at(size_type __n) const
{
if (__n >= size())
this->__throw_out_of_range();
return this->__begin_[__n];
}
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::reserve(size_type __n)
{
if (__n > capacity())
{
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(__n, size(), __a);
__swap_out_circular_buffer(__v);
}
}
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::shrink_to_fit() _NOEXCEPT
{
if (capacity() > size())
{
#ifndef _LIBCPP_NO_EXCEPTIONS
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(size(), size(), __a);
__swap_out_circular_buffer(__v);
#ifndef _LIBCPP_NO_EXCEPTIONS
}
catch (...)
{
}
#endif // _LIBCPP_NO_EXCEPTIONS
}
}
template <class _Tp, class _Allocator>
template <class _Up>
void
#ifndef _LIBCPP_CXX03_LANG
vector<_Tp, _Allocator>::__push_back_slow_path(_Up&& __x)
#else
vector<_Tp, _Allocator>::__push_back_slow_path(_Up& __x)
#endif
{
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), size(), __a);
// __v.push_back(_VSTD::forward<_Up>(__x));
__alloc_traits::construct(__a, _VSTD::__to_address(__v.__end_), _VSTD::forward<_Up>(__x));
__v.__end_++;
__swap_out_circular_buffer(__v);
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
void
vector<_Tp, _Allocator>::push_back(const_reference __x)
{
if (this->__end_ != this->__end_cap())
{
__construct_one_at_end(__x);
}
else
__push_back_slow_path(__x);
}
#ifndef _LIBCPP_CXX03_LANG
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
void
vector<_Tp, _Allocator>::push_back(value_type&& __x)
{
if (this->__end_ < this->__end_cap())
{
__construct_one_at_end(_VSTD::move(__x));
}
else
__push_back_slow_path(_VSTD::move(__x));
}
template <class _Tp, class _Allocator>
template <class... _Args>
void
vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args)
{
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), size(), __a);
// __v.emplace_back(_VSTD::forward<_Args>(__args)...);
__alloc_traits::construct(__a, _VSTD::__to_address(__v.__end_), _VSTD::forward<_Args>(__args)...);
__v.__end_++;
__swap_out_circular_buffer(__v);
}
template <class _Tp, class _Allocator>
template <class... _Args>
inline
#if _LIBCPP_STD_VER > 14
typename vector<_Tp, _Allocator>::reference
#else
void
#endif
vector<_Tp, _Allocator>::emplace_back(_Args&&... __args)
{
if (this->__end_ < this->__end_cap())
{
__construct_one_at_end(_VSTD::forward<_Args>(__args)...);
}
else
__emplace_back_slow_path(_VSTD::forward<_Args>(__args)...);
#if _LIBCPP_STD_VER > 14
return this->back();
#endif
}
#endif // !_LIBCPP_CXX03_LANG
template <class _Tp, class _Allocator>
inline
void
vector<_Tp, _Allocator>::pop_back()
{
_LIBCPP_ASSERT(!empty(), "vector::pop_back called on an empty vector");
this->__destruct_at_end(this->__end_ - 1);
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::erase(const_iterator __position)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
"vector::erase(iterator) called with an iterator not"
" referring to this vector");
#endif
_LIBCPP_ASSERT(__position != end(),
"vector::erase(iterator) called with a non-dereferenceable iterator");
difference_type __ps = __position - cbegin();
pointer __p = this->__begin_ + __ps;
this->__destruct_at_end(_VSTD::move(__p + 1, this->__end_, __p));
this->__invalidate_iterators_past(__p-1);
iterator __r = __make_iter(__p);
return __r;
}
template <class _Tp, class _Allocator>
typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::erase(const_iterator __first, const_iterator __last)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this,
"vector::erase(iterator, iterator) called with an iterator not"
" referring to this vector");
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__last) == this,
"vector::erase(iterator, iterator) called with an iterator not"
" referring to this vector");
#endif
_LIBCPP_ASSERT(__first <= __last, "vector::erase(first, last) called with invalid range");
pointer __p = this->__begin_ + (__first - begin());
if (__first != __last) {
this->__destruct_at_end(_VSTD::move(__p + (__last - __first), this->__end_, __p));
this->__invalidate_iterators_past(__p - 1);
}
iterator __r = __make_iter(__p);
return __r;
}
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::__move_range(pointer __from_s, pointer __from_e, pointer __to)
{
pointer __old_last = this->__end_;
difference_type __n = __old_last - __to;
{
pointer __i = __from_s + __n;
_ConstructTransaction __tx(*this, __from_e - __i);
for (pointer __pos = __tx.__pos_; __i < __from_e;
++__i, ++__pos, __tx.__pos_ = __pos) {
__alloc_traits::construct(this->__alloc(),
_VSTD::__to_address(__pos),
_VSTD::move(*__i));
}
}
_VSTD::move_backward(__from_s, __from_s + __n, __old_last);
}
template <class _Tp, class _Allocator>
typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::insert(const_iterator __position, const_reference __x)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
"vector::insert(iterator, x) called with an iterator not"
" referring to this vector");
#endif
pointer __p = this->__begin_ + (__position - begin());
if (this->__end_ < this->__end_cap())
{
if (__p == this->__end_)
{
__construct_one_at_end(__x);
}
else
{
__move_range(__p, this->__end_, __p + 1);
const_pointer __xr = pointer_traits<const_pointer>::pointer_to(__x);
if (__p <= __xr && __xr < this->__end_)
++__xr;
*__p = *__xr;
}
}
else
{
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), __p - this->__begin_, __a);
__v.push_back(__x);
__p = __swap_out_circular_buffer(__v, __p);
}
return __make_iter(__p);
}
#ifndef _LIBCPP_CXX03_LANG
template <class _Tp, class _Allocator>
typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::insert(const_iterator __position, value_type&& __x)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
"vector::insert(iterator, x) called with an iterator not"
" referring to this vector");
#endif
pointer __p = this->__begin_ + (__position - begin());
if (this->__end_ < this->__end_cap())
{
if (__p == this->__end_)
{
__construct_one_at_end(_VSTD::move(__x));
}
else
{
__move_range(__p, this->__end_, __p + 1);
*__p = _VSTD::move(__x);
}
}
else
{
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), __p - this->__begin_, __a);
__v.push_back(_VSTD::move(__x));
__p = __swap_out_circular_buffer(__v, __p);
}
return __make_iter(__p);
}
template <class _Tp, class _Allocator>
template <class... _Args>
typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::emplace(const_iterator __position, _Args&&... __args)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
"vector::emplace(iterator, x) called with an iterator not"
" referring to this vector");
#endif
pointer __p = this->__begin_ + (__position - begin());
if (this->__end_ < this->__end_cap())
{
if (__p == this->__end_)
{
__construct_one_at_end(_VSTD::forward<_Args>(__args)...);
}
else
{
__temp_value<value_type, _Allocator> __tmp(this->__alloc(), _VSTD::forward<_Args>(__args)...);
__move_range(__p, this->__end_, __p + 1);
*__p = _VSTD::move(__tmp.get());
}
}
else
{
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), __p - this->__begin_, __a);
__v.emplace_back(_VSTD::forward<_Args>(__args)...);
__p = __swap_out_circular_buffer(__v, __p);
}
return __make_iter(__p);
}
#endif // !_LIBCPP_CXX03_LANG
template <class _Tp, class _Allocator>
typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::insert(const_iterator __position, size_type __n, const_reference __x)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
"vector::insert(iterator, n, x) called with an iterator not"
" referring to this vector");
#endif
pointer __p = this->__begin_ + (__position - begin());
if (__n > 0)
{
if (__n <= static_cast<size_type>(this->__end_cap() - this->__end_))
{
size_type __old_n = __n;
pointer __old_last = this->__end_;
if (__n > static_cast<size_type>(this->__end_ - __p))
{
size_type __cx = __n - (this->__end_ - __p);
__construct_at_end(__cx, __x);
__n -= __cx;
}
if (__n > 0)
{
__move_range(__p, __old_last, __p + __old_n);
const_pointer __xr = pointer_traits<const_pointer>::pointer_to(__x);
if (__p <= __xr && __xr < this->__end_)
__xr += __old_n;
_VSTD::fill_n(__p, __n, *__xr);
}
}
else
{
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), __p - this->__begin_, __a);
__v.__construct_at_end(__n, __x);
__p = __swap_out_circular_buffer(__v, __p);
}
}
return __make_iter(__p);
}
template <class _Tp, class _Allocator>
template <class _InputIterator>
typename enable_if
<
__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value &&
is_constructible<
_Tp,
typename iterator_traits<_InputIterator>::reference>::value,
typename vector<_Tp, _Allocator>::iterator
>::type
vector<_Tp, _Allocator>::insert(const_iterator __position, _InputIterator __first, _InputIterator __last)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
"vector::insert(iterator, range) called with an iterator not"
" referring to this vector");
#endif
difference_type __off = __position - begin();
pointer __p = this->__begin_ + __off;
allocator_type& __a = this->__alloc();
pointer __old_last = this->__end_;
for (; this->__end_ != this->__end_cap() && __first != __last; ++__first)
{
__construct_one_at_end(*__first);
}
__split_buffer<value_type, allocator_type&> __v(__a);
if (__first != __last)
{
#ifndef _LIBCPP_NO_EXCEPTIONS
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
__v.__construct_at_end(__first, __last);
difference_type __old_size = __old_last - this->__begin_;
difference_type __old_p = __p - this->__begin_;
reserve(__recommend(size() + __v.size()));
__p = this->__begin_ + __old_p;
__old_last = this->__begin_ + __old_size;
#ifndef _LIBCPP_NO_EXCEPTIONS
}
catch (...)
{
erase(__make_iter(__old_last), end());
throw;
}
#endif // _LIBCPP_NO_EXCEPTIONS
}
__p = _VSTD::rotate(__p, __old_last, this->__end_);
insert(__make_iter(__p), _VSTD::make_move_iterator(__v.begin()),
_VSTD::make_move_iterator(__v.end()));
return begin() + __off;
}
template <class _Tp, class _Allocator>
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value &&
is_constructible<
_Tp,
typename iterator_traits<_ForwardIterator>::reference>::value,
typename vector<_Tp, _Allocator>::iterator
>::type
vector<_Tp, _Allocator>::insert(const_iterator __position, _ForwardIterator __first, _ForwardIterator __last)
{
#if _LIBCPP_DEBUG_LEVEL == 2
_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
"vector::insert(iterator, range) called with an iterator not"
" referring to this vector");
#endif
pointer __p = this->__begin_ + (__position - begin());
difference_type __n = _VSTD::distance(__first, __last);
if (__n > 0)
{
if (__n <= this->__end_cap() - this->__end_)
{
size_type __old_n = __n;
pointer __old_last = this->__end_;
_ForwardIterator __m = __last;
difference_type __dx = this->__end_ - __p;
if (__n > __dx)
{
__m = __first;
difference_type __diff = this->__end_ - __p;
_VSTD::advance(__m, __diff);
__construct_at_end(__m, __last, __n - __diff);
__n = __dx;
}
if (__n > 0)
{
__move_range(__p, __old_last, __p + __old_n);
_VSTD::copy(__first, __m, __p);
}
}
else
{
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), __p - this->__begin_, __a);
__v.__construct_at_end(__first, __last);
__p = __swap_out_circular_buffer(__v, __p);
}
}
return __make_iter(__p);
}
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::resize(size_type __sz)
{
size_type __cs = size();
if (__cs < __sz)
this->__append(__sz - __cs);
else if (__cs > __sz)
this->__destruct_at_end(this->__begin_ + __sz);
}
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::resize(size_type __sz, const_reference __x)
{
size_type __cs = size();
if (__cs < __sz)
this->__append(__sz - __cs, __x);
else if (__cs > __sz)
this->__destruct_at_end(this->__begin_ + __sz);
}
template <class _Tp, class _Allocator>
void
vector<_Tp, _Allocator>::swap(vector& __x)
#if _LIBCPP_STD_VER >= 14
_NOEXCEPT
#else
_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value ||
__is_nothrow_swappable<allocator_type>::value)
#endif
{
_LIBCPP_ASSERT(__alloc_traits::propagate_on_container_swap::value ||
this->__alloc() == __x.__alloc(),
"vector::swap: Either propagate_on_container_swap must be true"
" or the allocators must compare equal");
_VSTD::swap(this->__begin_, __x.__begin_);
_VSTD::swap(this->__end_, __x.__end_);
_VSTD::swap(this->__end_cap(), __x.__end_cap());
_VSTD::__swap_allocator(this->__alloc(), __x.__alloc(),
integral_constant<bool,__alloc_traits::propagate_on_container_swap::value>());
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->swap(this, &__x);
#endif
}
template <class _Tp, class _Allocator>
bool
vector<_Tp, _Allocator>::__invariants() const
{
if (this->__begin_ == nullptr)
{
if (this->__end_ != nullptr || this->__end_cap() != nullptr)
return false;
}
else
{
if (this->__begin_ > this->__end_)
return false;
if (this->__begin_ == this->__end_cap())
return false;
if (this->__end_ > this->__end_cap())
return false;
}
return true;
}
#if _LIBCPP_DEBUG_LEVEL == 2
template <class _Tp, class _Allocator>
bool
vector<_Tp, _Allocator>::__dereferenceable(const const_iterator* __i) const
{
return this->__begin_ <= __i->base() && __i->base() < this->__end_;
}
template <class _Tp, class _Allocator>
bool
vector<_Tp, _Allocator>::__decrementable(const const_iterator* __i) const
{
return this->__begin_ < __i->base() && __i->base() <= this->__end_;
}
template <class _Tp, class _Allocator>
bool
vector<_Tp, _Allocator>::__addable(const const_iterator* __i, ptrdiff_t __n) const
{
const_pointer __p = __i->base() + __n;
return this->__begin_ <= __p && __p <= this->__end_;
}
template <class _Tp, class _Allocator>
bool
vector<_Tp, _Allocator>::__subscriptable(const const_iterator* __i, ptrdiff_t __n) const
{
const_pointer __p = __i->base() + __n;
return this->__begin_ <= __p && __p < this->__end_;
}
#endif // _LIBCPP_DEBUG_LEVEL == 2
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
void
vector<_Tp, _Allocator>::__invalidate_all_iterators()
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__invalidate_all(this);
#endif
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
void
vector<_Tp, _Allocator>::__invalidate_iterators_past(pointer __new_last) {
#if _LIBCPP_DEBUG_LEVEL == 2
__c_node* __c = __get_db()->__find_c_and_lock(this);
for (__i_node** __p = __c->end_; __p != __c->beg_; ) {
--__p;
const_iterator* __i = static_cast<const_iterator*>((*__p)->__i_);
if (__i->base() > __new_last) {
(*__p)->__c_ = nullptr;
if (--__c->end_ != __p)
_VSTD::memmove(__p, __p+1, (__c->end_ - __p)*sizeof(__i_node*));
}
}
__get_db()->unlock();
#else
((void)__new_last);
#endif
}
// vector<bool>
template <class _Allocator> class vector<bool, _Allocator>;
template <class _Allocator> struct hash<vector<bool, _Allocator> >;
template <class _Allocator>
struct __has_storage_type<vector<bool, _Allocator> >
{
static const bool value = true;
};
template <class _Allocator>
class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator>
: private __vector_base_common<true>
{
public:
typedef vector __self;
typedef bool value_type;
typedef _Allocator allocator_type;
typedef allocator_traits<allocator_type> __alloc_traits;
typedef typename __alloc_traits::size_type size_type;
typedef typename __alloc_traits::difference_type difference_type;
typedef size_type __storage_type;
typedef __bit_iterator<vector, false> pointer;
typedef __bit_iterator<vector, true> const_pointer;
typedef pointer iterator;
typedef const_pointer const_iterator;
typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
private:
typedef typename __rebind_alloc_helper<__alloc_traits, __storage_type>::type __storage_allocator;
typedef allocator_traits<__storage_allocator> __storage_traits;
typedef typename __storage_traits::pointer __storage_pointer;
typedef typename __storage_traits::const_pointer __const_storage_pointer;
__storage_pointer __begin_;
size_type __size_;
__compressed_pair<size_type, __storage_allocator> __cap_alloc_;
public:
typedef __bit_reference<vector> reference;
typedef __bit_const_reference<vector> const_reference;
private:
_LIBCPP_INLINE_VISIBILITY
size_type& __cap() _NOEXCEPT
{return __cap_alloc_.first();}
_LIBCPP_INLINE_VISIBILITY
const size_type& __cap() const _NOEXCEPT
{return __cap_alloc_.first();}
_LIBCPP_INLINE_VISIBILITY
__storage_allocator& __alloc() _NOEXCEPT
{return __cap_alloc_.second();}
_LIBCPP_INLINE_VISIBILITY
const __storage_allocator& __alloc() const _NOEXCEPT
{return __cap_alloc_.second();}
static const unsigned __bits_per_word = static_cast<unsigned>(sizeof(__storage_type) * CHAR_BIT);
_LIBCPP_INLINE_VISIBILITY
static size_type __internal_cap_to_external(size_type __n) _NOEXCEPT
{return __n * __bits_per_word;}
_LIBCPP_INLINE_VISIBILITY
static size_type __external_cap_to_internal(size_type __n) _NOEXCEPT
{return (__n - 1) / __bits_per_word + 1;}
public:
_LIBCPP_INLINE_VISIBILITY
vector() _NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value);
_LIBCPP_INLINE_VISIBILITY explicit vector(const allocator_type& __a)
#if _LIBCPP_STD_VER <= 14
_NOEXCEPT_(is_nothrow_copy_constructible<allocator_type>::value);
#else
_NOEXCEPT;
#endif
~vector();
explicit vector(size_type __n);
#if _LIBCPP_STD_VER > 11
explicit vector(size_type __n, const allocator_type& __a);
#endif
vector(size_type __n, const value_type& __v);
vector(size_type __n, const value_type& __v, const allocator_type& __a);
template <class _InputIterator>
vector(_InputIterator __first, _InputIterator __last,
typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value>::type* = 0);
template <class _InputIterator>
vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a,
typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value>::type* = 0);
template <class _ForwardIterator>
vector(_ForwardIterator __first, _ForwardIterator __last,
typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value>::type* = 0);
template <class _ForwardIterator>
vector(_ForwardIterator __first, _ForwardIterator __last, const allocator_type& __a,
typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value>::type* = 0);
vector(const vector& __v);
vector(const vector& __v, const allocator_type& __a);
vector& operator=(const vector& __v);
#ifndef _LIBCPP_CXX03_LANG
vector(initializer_list<value_type> __il);
vector(initializer_list<value_type> __il, const allocator_type& __a);
_LIBCPP_INLINE_VISIBILITY
vector(vector&& __v)
#if _LIBCPP_STD_VER > 14
_NOEXCEPT;
#else
_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value);
#endif
vector(vector&& __v, const __identity_t<allocator_type>& __a);
_LIBCPP_INLINE_VISIBILITY
vector& operator=(vector&& __v)
_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value));
_LIBCPP_INLINE_VISIBILITY
vector& operator=(initializer_list<value_type> __il)
{assign(__il.begin(), __il.end()); return *this;}
#endif // !_LIBCPP_CXX03_LANG
template <class _InputIterator>
typename enable_if
<
__is_cpp17_input_iterator<_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value,
void
>::type
assign(_InputIterator __first, _InputIterator __last);
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
void
>::type
assign(_ForwardIterator __first, _ForwardIterator __last);
void assign(size_type __n, const value_type& __x);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
void assign(initializer_list<value_type> __il)
{assign(__il.begin(), __il.end());}
#endif
_LIBCPP_INLINE_VISIBILITY allocator_type get_allocator() const _NOEXCEPT
{return allocator_type(this->__alloc());}
size_type max_size() const _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
size_type capacity() const _NOEXCEPT
{return __internal_cap_to_external(__cap());}
_LIBCPP_INLINE_VISIBILITY
size_type size() const _NOEXCEPT
{return __size_;}
_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY
bool empty() const _NOEXCEPT
{return __size_ == 0;}
void reserve(size_type __n);
void shrink_to_fit() _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
iterator begin() _NOEXCEPT
{return __make_iter(0);}
_LIBCPP_INLINE_VISIBILITY
const_iterator begin() const _NOEXCEPT
{return __make_iter(0);}
_LIBCPP_INLINE_VISIBILITY
iterator end() _NOEXCEPT
{return __make_iter(__size_);}
_LIBCPP_INLINE_VISIBILITY
const_iterator end() const _NOEXCEPT
{return __make_iter(__size_);}
_LIBCPP_INLINE_VISIBILITY
reverse_iterator rbegin() _NOEXCEPT
{return reverse_iterator(end());}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator rbegin() const _NOEXCEPT
{return const_reverse_iterator(end());}
_LIBCPP_INLINE_VISIBILITY
reverse_iterator rend() _NOEXCEPT
{return reverse_iterator(begin());}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator rend() const _NOEXCEPT
{return const_reverse_iterator(begin());}
_LIBCPP_INLINE_VISIBILITY
const_iterator cbegin() const _NOEXCEPT
{return __make_iter(0);}
_LIBCPP_INLINE_VISIBILITY
const_iterator cend() const _NOEXCEPT
{return __make_iter(__size_);}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator crbegin() const _NOEXCEPT
{return rbegin();}
_LIBCPP_INLINE_VISIBILITY
const_reverse_iterator crend() const _NOEXCEPT
{return rend();}
_LIBCPP_INLINE_VISIBILITY reference operator[](size_type __n) {return __make_ref(__n);}
_LIBCPP_INLINE_VISIBILITY const_reference operator[](size_type __n) const {return __make_ref(__n);}
reference at(size_type __n);
const_reference at(size_type __n) const;
_LIBCPP_INLINE_VISIBILITY reference front() {return __make_ref(0);}
_LIBCPP_INLINE_VISIBILITY const_reference front() const {return __make_ref(0);}
_LIBCPP_INLINE_VISIBILITY reference back() {return __make_ref(__size_ - 1);}
_LIBCPP_INLINE_VISIBILITY const_reference back() const {return __make_ref(__size_ - 1);}
void push_back(const value_type& __x);
#if _LIBCPP_STD_VER > 11
template <class... _Args>
#if _LIBCPP_STD_VER > 14
_LIBCPP_INLINE_VISIBILITY reference emplace_back(_Args&&... __args)
#else
_LIBCPP_INLINE_VISIBILITY void emplace_back(_Args&&... __args)
#endif
{
push_back ( value_type ( _VSTD::forward<_Args>(__args)... ));
#if _LIBCPP_STD_VER > 14
return this->back();
#endif
}
#endif
_LIBCPP_INLINE_VISIBILITY void pop_back() {--__size_;}
#if _LIBCPP_STD_VER > 11
template <class... _Args>
_LIBCPP_INLINE_VISIBILITY iterator emplace(const_iterator position, _Args&&... __args)
{ return insert ( position, value_type ( _VSTD::forward<_Args>(__args)... )); }
#endif
iterator insert(const_iterator __position, const value_type& __x);
iterator insert(const_iterator __position, size_type __n, const value_type& __x);
iterator insert(const_iterator __position, size_type __n, const_reference __x);
template <class _InputIterator>
typename enable_if
<
__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value,
iterator
>::type
insert(const_iterator __position, _InputIterator __first, _InputIterator __last);
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
iterator
>::type
insert(const_iterator __position, _ForwardIterator __first, _ForwardIterator __last);
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_INLINE_VISIBILITY
iterator insert(const_iterator __position, initializer_list<value_type> __il)
{return insert(__position, __il.begin(), __il.end());}
#endif
_LIBCPP_INLINE_VISIBILITY iterator erase(const_iterator __position);
iterator erase(const_iterator __first, const_iterator __last);
_LIBCPP_INLINE_VISIBILITY
void clear() _NOEXCEPT {__size_ = 0;}
void swap(vector&)
#if _LIBCPP_STD_VER >= 14
_NOEXCEPT;
#else
_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value ||
__is_nothrow_swappable<allocator_type>::value);
#endif
static void swap(reference __x, reference __y) _NOEXCEPT { _VSTD::swap(__x, __y); }
void resize(size_type __sz, value_type __x = false);
void flip() _NOEXCEPT;
bool __invariants() const;
private:
_LIBCPP_INLINE_VISIBILITY void __invalidate_all_iterators();
void __vallocate(size_type __n);
void __vdeallocate() _NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
static size_type __align_it(size_type __new_size) _NOEXCEPT
{return __new_size + (__bits_per_word-1) & ~((size_type)__bits_per_word-1);}
_LIBCPP_INLINE_VISIBILITY size_type __recommend(size_type __new_size) const;
_LIBCPP_INLINE_VISIBILITY void __construct_at_end(size_type __n, bool __x);
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
void
>::type
__construct_at_end(_ForwardIterator __first, _ForwardIterator __last);
void __append(size_type __n, const_reference __x);
_LIBCPP_INLINE_VISIBILITY
reference __make_ref(size_type __pos) _NOEXCEPT
{return reference(__begin_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word);}
_LIBCPP_INLINE_VISIBILITY
const_reference __make_ref(size_type __pos) const _NOEXCEPT
{return const_reference(__begin_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word);}
_LIBCPP_INLINE_VISIBILITY
iterator __make_iter(size_type __pos) _NOEXCEPT
{return iterator(__begin_ + __pos / __bits_per_word, static_cast<unsigned>(__pos % __bits_per_word));}
_LIBCPP_INLINE_VISIBILITY
const_iterator __make_iter(size_type __pos) const _NOEXCEPT
{return const_iterator(__begin_ + __pos / __bits_per_word, static_cast<unsigned>(__pos % __bits_per_word));}
_LIBCPP_INLINE_VISIBILITY
iterator __const_iterator_cast(const_iterator __p) _NOEXCEPT
{return begin() + (__p - cbegin());}
_LIBCPP_INLINE_VISIBILITY
void __copy_assign_alloc(const vector& __v)
{__copy_assign_alloc(__v, integral_constant<bool,
__storage_traits::propagate_on_container_copy_assignment::value>());}
_LIBCPP_INLINE_VISIBILITY
void __copy_assign_alloc(const vector& __c, true_type)
{
if (__alloc() != __c.__alloc())
__vdeallocate();
__alloc() = __c.__alloc();
}
_LIBCPP_INLINE_VISIBILITY
void __copy_assign_alloc(const vector&, false_type)
{}
void __move_assign(vector& __c, false_type);
void __move_assign(vector& __c, true_type)
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value);
_LIBCPP_INLINE_VISIBILITY
void __move_assign_alloc(vector& __c)
_NOEXCEPT_(
!__storage_traits::propagate_on_container_move_assignment::value ||
is_nothrow_move_assignable<allocator_type>::value)
{__move_assign_alloc(__c, integral_constant<bool,
__storage_traits::propagate_on_container_move_assignment::value>());}
_LIBCPP_INLINE_VISIBILITY
void __move_assign_alloc(vector& __c, true_type)
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
{
__alloc() = _VSTD::move(__c.__alloc());
}
_LIBCPP_INLINE_VISIBILITY
void __move_assign_alloc(vector&, false_type)
_NOEXCEPT
{}
size_t __hash_code() const _NOEXCEPT;
friend class __bit_reference<vector>;
friend class __bit_const_reference<vector>;
friend class __bit_iterator<vector, false>;
friend class __bit_iterator<vector, true>;
friend struct __bit_array<vector>;
friend struct _LIBCPP_TEMPLATE_VIS hash<vector>;
};
template <class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
void
vector<bool, _Allocator>::__invalidate_all_iterators()
{
}
// Allocate space for __n objects
// throws length_error if __n > max_size()
// throws (probably bad_alloc) if memory run out
// Precondition: __begin_ == __end_ == __cap() == 0
// Precondition: __n > 0
// Postcondition: capacity() == __n
// Postcondition: size() == 0
template <class _Allocator>
void
vector<bool, _Allocator>::__vallocate(size_type __n)
{
if (__n > max_size())
this->__throw_length_error();
__n = __external_cap_to_internal(__n);
this->__begin_ = __storage_traits::allocate(this->__alloc(), __n);
this->__size_ = 0;
this->__cap() = __n;
}
template <class _Allocator>
void
vector<bool, _Allocator>::__vdeallocate() _NOEXCEPT
{
if (this->__begin_ != nullptr)
{
__storage_traits::deallocate(this->__alloc(), this->__begin_, __cap());
__invalidate_all_iterators();
this->__begin_ = nullptr;
this->__size_ = this->__cap() = 0;
}
}
template <class _Allocator>
typename vector<bool, _Allocator>::size_type
vector<bool, _Allocator>::max_size() const _NOEXCEPT
{
size_type __amax = __storage_traits::max_size(__alloc());
size_type __nmax = numeric_limits<size_type>::max() / 2; // end() >= begin(), always
if (__nmax / __bits_per_word <= __amax)
return __nmax;
return __internal_cap_to_external(__amax);
}
// Precondition: __new_size > capacity()
template <class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<bool, _Allocator>::size_type
vector<bool, _Allocator>::__recommend(size_type __new_size) const
{
const size_type __ms = max_size();
if (__new_size > __ms)
this->__throw_length_error();
const size_type __cap = capacity();
if (__cap >= __ms / 2)
return __ms;
return _VSTD::max(2 * __cap, __align_it(__new_size));
}
// Default constructs __n objects starting at __end_
// Precondition: __n > 0
// Precondition: size() + __n <= capacity()
// Postcondition: size() == size() + __n
template <class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
void
vector<bool, _Allocator>::__construct_at_end(size_type __n, bool __x)
{
size_type __old_size = this->__size_;
this->__size_ += __n;
if (__old_size == 0 || ((__old_size - 1) / __bits_per_word) != ((this->__size_ - 1) / __bits_per_word))
{
if (this->__size_ <= __bits_per_word)
this->__begin_[0] = __storage_type(0);
else
this->__begin_[(this->__size_ - 1) / __bits_per_word] = __storage_type(0);
}
_VSTD::fill_n(__make_iter(__old_size), __n, __x);
}
template <class _Allocator>
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
void
>::type
vector<bool, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last)
{
size_type __old_size = this->__size_;
this->__size_ += _VSTD::distance(__first, __last);
if (__old_size == 0 || ((__old_size - 1) / __bits_per_word) != ((this->__size_ - 1) / __bits_per_word))
{
if (this->__size_ <= __bits_per_word)
this->__begin_[0] = __storage_type(0);
else
this->__begin_[(this->__size_ - 1) / __bits_per_word] = __storage_type(0);
}
_VSTD::copy(__first, __last, __make_iter(__old_size));
}
template <class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
vector<bool, _Allocator>::vector()
_NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, __default_init_tag())
{
}
template <class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
vector<bool, _Allocator>::vector(const allocator_type& __a)
#if _LIBCPP_STD_VER <= 14
_NOEXCEPT_(is_nothrow_copy_constructible<allocator_type>::value)
#else
_NOEXCEPT
#endif
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, static_cast<__storage_allocator>(__a))
{
}
template <class _Allocator>
vector<bool, _Allocator>::vector(size_type __n)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, __default_init_tag())
{
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__n, false);
}
}
#if _LIBCPP_STD_VER > 11
template <class _Allocator>
vector<bool, _Allocator>::vector(size_type __n, const allocator_type& __a)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, static_cast<__storage_allocator>(__a))
{
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__n, false);
}
}
#endif
template <class _Allocator>
vector<bool, _Allocator>::vector(size_type __n, const value_type& __x)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, __default_init_tag())
{
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__n, __x);
}
}
template <class _Allocator>
vector<bool, _Allocator>::vector(size_type __n, const value_type& __x, const allocator_type& __a)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, static_cast<__storage_allocator>(__a))
{
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__n, __x);
}
}
template <class _Allocator>
template <class _InputIterator>
vector<bool, _Allocator>::vector(_InputIterator __first, _InputIterator __last,
typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value>::type*)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, __default_init_tag())
{
#ifndef _LIBCPP_NO_EXCEPTIONS
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
for (; __first != __last; ++__first)
push_back(*__first);
#ifndef _LIBCPP_NO_EXCEPTIONS
}
catch (...)
{
if (__begin_ != nullptr)
__storage_traits::deallocate(__alloc(), __begin_, __cap());
__invalidate_all_iterators();
throw;
}
#endif // _LIBCPP_NO_EXCEPTIONS
}
template <class _Allocator>
template <class _InputIterator>
vector<bool, _Allocator>::vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a,
typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value>::type*)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, static_cast<__storage_allocator>(__a))
{
#ifndef _LIBCPP_NO_EXCEPTIONS
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
for (; __first != __last; ++__first)
push_back(*__first);
#ifndef _LIBCPP_NO_EXCEPTIONS
}
catch (...)
{
if (__begin_ != nullptr)
__storage_traits::deallocate(__alloc(), __begin_, __cap());
__invalidate_all_iterators();
throw;
}
#endif // _LIBCPP_NO_EXCEPTIONS
}
template <class _Allocator>
template <class _ForwardIterator>
vector<bool, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __last,
typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value>::type*)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, __default_init_tag())
{
size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__first, __last);
}
}
template <class _Allocator>
template <class _ForwardIterator>
vector<bool, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __last, const allocator_type& __a,
typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value>::type*)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, static_cast<__storage_allocator>(__a))
{
size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__first, __last);
}
}
#ifndef _LIBCPP_CXX03_LANG
template <class _Allocator>
vector<bool, _Allocator>::vector(initializer_list<value_type> __il)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, __default_init_tag())
{
size_type __n = static_cast<size_type>(__il.size());
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__il.begin(), __il.end());
}
}
template <class _Allocator>
vector<bool, _Allocator>::vector(initializer_list<value_type> __il, const allocator_type& __a)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, static_cast<__storage_allocator>(__a))
{
size_type __n = static_cast<size_type>(__il.size());
if (__n > 0)
{
__vallocate(__n);
__construct_at_end(__il.begin(), __il.end());
}
}
#endif // _LIBCPP_CXX03_LANG
template <class _Allocator>
vector<bool, _Allocator>::~vector()
{
if (__begin_ != nullptr)
__storage_traits::deallocate(__alloc(), __begin_, __cap());
__invalidate_all_iterators();
}
template <class _Allocator>
vector<bool, _Allocator>::vector(const vector& __v)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, __storage_traits::select_on_container_copy_construction(__v.__alloc()))
{
if (__v.size() > 0)
{
__vallocate(__v.size());
__construct_at_end(__v.begin(), __v.end());
}
}
template <class _Allocator>
vector<bool, _Allocator>::vector(const vector& __v, const allocator_type& __a)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, __a)
{
if (__v.size() > 0)
{
__vallocate(__v.size());
__construct_at_end(__v.begin(), __v.end());
}
}
template <class _Allocator>
vector<bool, _Allocator>&
vector<bool, _Allocator>::operator=(const vector& __v)
{
if (this != &__v)
{
__copy_assign_alloc(__v);
if (__v.__size_)
{
if (__v.__size_ > capacity())
{
__vdeallocate();
__vallocate(__v.__size_);
}
_VSTD::copy(__v.__begin_, __v.__begin_ + __external_cap_to_internal(__v.__size_), __begin_);
}
__size_ = __v.__size_;
}
return *this;
}
#ifndef _LIBCPP_CXX03_LANG
template <class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY vector<bool, _Allocator>::vector(vector&& __v)
#if _LIBCPP_STD_VER > 14
_NOEXCEPT
#else
_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value)
#endif
: __begin_(__v.__begin_),
__size_(__v.__size_),
__cap_alloc_(_VSTD::move(__v.__cap_alloc_)) {
__v.__begin_ = nullptr;
__v.__size_ = 0;
__v.__cap() = 0;
}
template <class _Allocator>
vector<bool, _Allocator>::vector(vector&& __v, const __identity_t<allocator_type>& __a)
: __begin_(nullptr),
__size_(0),
__cap_alloc_(0, __a)
{
if (__a == allocator_type(__v.__alloc()))
{
this->__begin_ = __v.__begin_;
this->__size_ = __v.__size_;
this->__cap() = __v.__cap();
__v.__begin_ = nullptr;
__v.__cap() = __v.__size_ = 0;
}
else if (__v.size() > 0)
{
__vallocate(__v.size());
__construct_at_end(__v.begin(), __v.end());
}
}
template <class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
vector<bool, _Allocator>&
vector<bool, _Allocator>::operator=(vector&& __v)
_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value))
{
__move_assign(__v, integral_constant<bool,
__storage_traits::propagate_on_container_move_assignment::value>());
return *this;
}
template <class _Allocator>
void
vector<bool, _Allocator>::__move_assign(vector& __c, false_type)
{
if (__alloc() != __c.__alloc())
assign(__c.begin(), __c.end());
else
__move_assign(__c, true_type());
}
template <class _Allocator>
void
vector<bool, _Allocator>::__move_assign(vector& __c, true_type)
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
{
__vdeallocate();
__move_assign_alloc(__c);
this->__begin_ = __c.__begin_;
this->__size_ = __c.__size_;
this->__cap() = __c.__cap();
__c.__begin_ = nullptr;
__c.__cap() = __c.__size_ = 0;
}
#endif // !_LIBCPP_CXX03_LANG
template <class _Allocator>
void
vector<bool, _Allocator>::assign(size_type __n, const value_type& __x)
{
__size_ = 0;
if (__n > 0)
{
size_type __c = capacity();
if (__n <= __c)
__size_ = __n;
else
{
vector __v(__alloc());
__v.reserve(__recommend(__n));
__v.__size_ = __n;
swap(__v);
}
_VSTD::fill_n(begin(), __n, __x);
}
__invalidate_all_iterators();
}
template <class _Allocator>
template <class _InputIterator>
typename enable_if
<
__is_cpp17_input_iterator<_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value,
void
>::type
vector<bool, _Allocator>::assign(_InputIterator __first, _InputIterator __last)
{
clear();
for (; __first != __last; ++__first)
push_back(*__first);
}
template <class _Allocator>
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
void
>::type
vector<bool, _Allocator>::assign(_ForwardIterator __first, _ForwardIterator __last)
{
clear();
difference_type __ns = _VSTD::distance(__first, __last);
_LIBCPP_ASSERT(__ns >= 0, "invalid range specified");
const size_t __n = static_cast<size_type>(__ns);
if (__n)
{
if (__n > capacity())
{
__vdeallocate();
__vallocate(__n);
}
__construct_at_end(__first, __last);
}
}
template <class _Allocator>
void
vector<bool, _Allocator>::reserve(size_type __n)
{
if (__n > capacity())
{
vector __v(this->__alloc());
__v.__vallocate(__n);
__v.__construct_at_end(this->begin(), this->end());
swap(__v);
__invalidate_all_iterators();
}
}
template <class _Allocator>
void
vector<bool, _Allocator>::shrink_to_fit() _NOEXCEPT
{
if (__external_cap_to_internal(size()) > __cap())
{
#ifndef _LIBCPP_NO_EXCEPTIONS
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
vector(*this, allocator_type(__alloc())).swap(*this);
#ifndef _LIBCPP_NO_EXCEPTIONS
}
catch (...)
{
}
#endif // _LIBCPP_NO_EXCEPTIONS
}
}
template <class _Allocator>
typename vector<bool, _Allocator>::reference
vector<bool, _Allocator>::at(size_type __n)
{
if (__n >= size())
this->__throw_out_of_range();
return (*this)[__n];
}
template <class _Allocator>
typename vector<bool, _Allocator>::const_reference
vector<bool, _Allocator>::at(size_type __n) const
{
if (__n >= size())
this->__throw_out_of_range();
return (*this)[__n];
}
template <class _Allocator>
void
vector<bool, _Allocator>::push_back(const value_type& __x)
{
if (this->__size_ == this->capacity())
reserve(__recommend(this->__size_ + 1));
++this->__size_;
back() = __x;
}
template <class _Allocator>
typename vector<bool, _Allocator>::iterator
vector<bool, _Allocator>::insert(const_iterator __position, const value_type& __x)
{
iterator __r;
if (size() < capacity())
{
const_iterator __old_end = end();
++__size_;
_VSTD::copy_backward(__position, __old_end, end());
__r = __const_iterator_cast(__position);
}
else
{
vector __v(__alloc());
__v.reserve(__recommend(__size_ + 1));
__v.__size_ = __size_ + 1;
__r = _VSTD::copy(cbegin(), __position, __v.begin());
_VSTD::copy_backward(__position, cend(), __v.end());
swap(__v);
}
*__r = __x;
return __r;
}
template <class _Allocator>
typename vector<bool, _Allocator>::iterator
vector<bool, _Allocator>::insert(const_iterator __position, size_type __n, const value_type& __x)
{
iterator __r;
size_type __c = capacity();
if (__n <= __c && size() <= __c - __n)
{
const_iterator __old_end = end();
__size_ += __n;
_VSTD::copy_backward(__position, __old_end, end());
__r = __const_iterator_cast(__position);
}
else
{
vector __v(__alloc());
__v.reserve(__recommend(__size_ + __n));
__v.__size_ = __size_ + __n;
__r = _VSTD::copy(cbegin(), __position, __v.begin());
_VSTD::copy_backward(__position, cend(), __v.end());
swap(__v);
}
_VSTD::fill_n(__r, __n, __x);
return __r;
}
template <class _Allocator>
template <class _InputIterator>
typename enable_if
<
__is_cpp17_input_iterator <_InputIterator>::value &&
!__is_cpp17_forward_iterator<_InputIterator>::value,
typename vector<bool, _Allocator>::iterator
>::type
vector<bool, _Allocator>::insert(const_iterator __position, _InputIterator __first, _InputIterator __last)
{
difference_type __off = __position - begin();
iterator __p = __const_iterator_cast(__position);
iterator __old_end = end();
for (; size() != capacity() && __first != __last; ++__first)
{
++this->__size_;
back() = *__first;
}
vector __v(__alloc());
if (__first != __last)
{
#ifndef _LIBCPP_NO_EXCEPTIONS
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
__v.assign(__first, __last);
difference_type __old_size = static_cast<difference_type>(__old_end - begin());
difference_type __old_p = __p - begin();
reserve(__recommend(size() + __v.size()));
__p = begin() + __old_p;
__old_end = begin() + __old_size;
#ifndef _LIBCPP_NO_EXCEPTIONS
}
catch (...)
{
erase(__old_end, end());
throw;
}
#endif // _LIBCPP_NO_EXCEPTIONS
}
__p = _VSTD::rotate(__p, __old_end, end());
insert(__p, __v.begin(), __v.end());
return begin() + __off;
}
template <class _Allocator>
template <class _ForwardIterator>
typename enable_if
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
typename vector<bool, _Allocator>::iterator
>::type
vector<bool, _Allocator>::insert(const_iterator __position, _ForwardIterator __first, _ForwardIterator __last)
{
const difference_type __n_signed = _VSTD::distance(__first, __last);
_LIBCPP_ASSERT(__n_signed >= 0, "invalid range specified");
const size_type __n = static_cast<size_type>(__n_signed);
iterator __r;
size_type __c = capacity();
if (__n <= __c && size() <= __c - __n)
{
const_iterator __old_end = end();
__size_ += __n;
_VSTD::copy_backward(__position, __old_end, end());
__r = __const_iterator_cast(__position);
}
else
{
vector __v(__alloc());
__v.reserve(__recommend(__size_ + __n));
__v.__size_ = __size_ + __n;
__r = _VSTD::copy(cbegin(), __position, __v.begin());
_VSTD::copy_backward(__position, cend(), __v.end());
swap(__v);
}
_VSTD::copy(__first, __last, __r);
return __r;
}
template <class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
typename vector<bool, _Allocator>::iterator
vector<bool, _Allocator>::erase(const_iterator __position)
{
iterator __r = __const_iterator_cast(__position);
_VSTD::copy(__position + 1, this->cend(), __r);
--__size_;
return __r;
}
template <class _Allocator>
typename vector<bool, _Allocator>::iterator
vector<bool, _Allocator>::erase(const_iterator __first, const_iterator __last)
{
iterator __r = __const_iterator_cast(__first);
difference_type __d = __last - __first;
_VSTD::copy(__last, this->cend(), __r);
__size_ -= __d;
return __r;
}
template <class _Allocator>
void
vector<bool, _Allocator>::swap(vector& __x)
#if _LIBCPP_STD_VER >= 14
_NOEXCEPT
#else
_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value ||
__is_nothrow_swappable<allocator_type>::value)
#endif
{
_VSTD::swap(this->__begin_, __x.__begin_);
_VSTD::swap(this->__size_, __x.__size_);
_VSTD::swap(this->__cap(), __x.__cap());
_VSTD::__swap_allocator(this->__alloc(), __x.__alloc(),
integral_constant<bool, __alloc_traits::propagate_on_container_swap::value>());
}
template <class _Allocator>
void
vector<bool, _Allocator>::resize(size_type __sz, value_type __x)
{
size_type __cs = size();
if (__cs < __sz)
{
iterator __r;
size_type __c = capacity();
size_type __n = __sz - __cs;
if (__n <= __c && __cs <= __c - __n)
{
__r = end();
__size_ += __n;
}
else
{
vector __v(__alloc());
__v.reserve(__recommend(__size_ + __n));
__v.__size_ = __size_ + __n;
__r = _VSTD::copy(cbegin(), cend(), __v.begin());
swap(__v);
}
_VSTD::fill_n(__r, __n, __x);
}
else
__size_ = __sz;
}
template <class _Allocator>
void
vector<bool, _Allocator>::flip() _NOEXCEPT
{
// do middle whole words
size_type __n = __size_;
__storage_pointer __p = __begin_;
for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word)
*__p = ~*__p;
// do last partial word
if (__n > 0)
{
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
__storage_type __b = *__p & __m;
*__p &= ~__m;
*__p |= ~__b & __m;
}
}
template <class _Allocator>
bool
vector<bool, _Allocator>::__invariants() const
{
if (this->__begin_ == nullptr)
{
if (this->__size_ != 0 || this->__cap() != 0)
return false;
}
else
{
if (this->__cap() == 0)
return false;
if (this->__size_ > this->capacity())
return false;
}
return true;
}
template <class _Allocator>
size_t
vector<bool, _Allocator>::__hash_code() const _NOEXCEPT
{
size_t __h = 0;
// do middle whole words
size_type __n = __size_;
__storage_pointer __p = __begin_;
for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word)
__h ^= *__p;
// do last partial word
if (__n > 0)
{
const __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
__h ^= *__p & __m;
}
return __h;
}
template <class _Allocator>
struct _LIBCPP_TEMPLATE_VIS hash<vector<bool, _Allocator> >
: public unary_function<vector<bool, _Allocator>, size_t>
{
_LIBCPP_INLINE_VISIBILITY
size_t operator()(const vector<bool, _Allocator>& __vec) const _NOEXCEPT
{return __vec.__hash_code();}
};
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator==(const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
{
const typename vector<_Tp, _Allocator>::size_type __sz = __x.size();
return __sz == __y.size() && _VSTD::equal(__x.begin(), __x.end(), __y.begin());
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator!=(const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
{
return !(__x == __y);
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator< (const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
{
return _VSTD::lexicographical_compare(__x.begin(), __x.end(), __y.begin(), __y.end());
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator> (const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
{
return __y < __x;
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator>=(const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
{
return !(__x < __y);
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
bool
operator<=(const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
{
return !(__y < __x);
}
template <class _Tp, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
void
swap(vector<_Tp, _Allocator>& __x, vector<_Tp, _Allocator>& __y)
_NOEXCEPT_(_NOEXCEPT_(__x.swap(__y)))
{
__x.swap(__y);
}
#if _LIBCPP_STD_VER > 17
template <class _Tp, class _Allocator, class _Up>
inline _LIBCPP_INLINE_VISIBILITY typename vector<_Tp, _Allocator>::size_type
erase(vector<_Tp, _Allocator>& __c, const _Up& __v) {
auto __old_size = __c.size();
__c.erase(_VSTD::remove(__c.begin(), __c.end(), __v), __c.end());
return __old_size - __c.size();
}
template <class _Tp, class _Allocator, class _Predicate>
inline _LIBCPP_INLINE_VISIBILITY typename vector<_Tp, _Allocator>::size_type
erase_if(vector<_Tp, _Allocator>& __c, _Predicate __pred) {
auto __old_size = __c.size();
__c.erase(_VSTD::remove_if(__c.begin(), __c.end(), __pred), __c.end());
return __old_size - __c.size();
}
#endif
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP_VECTOR
diff --git a/contrib/llvm-project/libcxx/include/wctype.h b/contrib/llvm-project/libcxx/include/wctype.h
index 1b4b1461496c..3b614759ac6d 100644
--- a/contrib/llvm-project/libcxx/include/wctype.h
+++ b/contrib/llvm-project/libcxx/include/wctype.h
@@ -1,80 +1,90 @@
// -*- C++ -*-
//===--------------------------- wctype.h ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP_WCTYPE_H
#define _LIBCPP_WCTYPE_H
/*
wctype.h synopsis
Macros:
WEOF
Types:
wint_t
wctrans_t
wctype_t
int iswalnum(wint_t wc);
int iswalpha(wint_t wc);
int iswblank(wint_t wc); // C99
int iswcntrl(wint_t wc);
int iswdigit(wint_t wc);
int iswgraph(wint_t wc);
int iswlower(wint_t wc);
int iswprint(wint_t wc);
int iswpunct(wint_t wc);
int iswspace(wint_t wc);
int iswupper(wint_t wc);
int iswxdigit(wint_t wc);
int iswctype(wint_t wc, wctype_t desc);
wctype_t wctype(const char* property);
wint_t towlower(wint_t wc);
wint_t towupper(wint_t wc);
wint_t towctrans(wint_t wc, wctrans_t desc);
wctrans_t wctrans(const char* property);
*/
#include <__config>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
+// TODO:
+// In the future, we should unconditionally include_next <wctype.h> here and instead
+// have a mode under which the library does not need libc++'s <wctype.h> or <cwctype>
+// at all (i.e. a mode without wchar_t). As it stands, we need to do that to completely
+// bypass the using declarations in <cwctype> when we did not include <wctype.h>.
+// Otherwise, a using declaration like `using ::wint_t` in <cwctype> will refer to
+// nothing (with using_if_exists), and if we include another header that defines one
+// of these declarations (e.g. <wchar.h>), the second `using ::wint_t` with using_if_exists
+// will fail because it does not refer to the same declaration.
#if __has_include_next(<wctype.h>)
# include_next <wctype.h>
+# define _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H
#endif
#ifdef __cplusplus
#undef iswalnum
#undef iswalpha
#undef iswblank
#undef iswcntrl
#undef iswdigit
#undef iswgraph
#undef iswlower
#undef iswprint
#undef iswpunct
#undef iswspace
#undef iswupper
#undef iswxdigit
#undef iswctype
#undef wctype
#undef towlower
#undef towupper
#undef towctrans
#undef wctrans
#endif // __cplusplus
#endif // _LIBCPP_WCTYPE_H
diff --git a/contrib/llvm-project/libunwind/src/Unwind-EHABI.cpp b/contrib/llvm-project/libunwind/src/Unwind-EHABI.cpp
index 32b5cbc3be92..8843db7f54c3 100644
--- a/contrib/llvm-project/libunwind/src/Unwind-EHABI.cpp
+++ b/contrib/llvm-project/libunwind/src/Unwind-EHABI.cpp
@@ -1,1003 +1,1005 @@
//===--------------------------- Unwind-EHABI.cpp -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//
// Implements ARM zero-cost C++ exceptions
//
//===----------------------------------------------------------------------===//
#include "Unwind-EHABI.h"
#if defined(_LIBUNWIND_ARM_EHABI)
#include <inttypes.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
#include "libunwind.h"
#include "libunwind_ext.h"
#include "unwind.h"
namespace {
// Strange order: take words in order, but inside word, take from most to least
// signinficant byte.
uint8_t getByte(const uint32_t* data, size_t offset) {
const uint8_t* byteData = reinterpret_cast<const uint8_t*>(data);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return byteData[(offset & ~(size_t)0x03) + (3 - (offset & (size_t)0x03))];
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return byteData[offset];
#else
#error "Unable to determine endianess"
#endif
}
const char* getNextWord(const char* data, uint32_t* out) {
*out = *reinterpret_cast<const uint32_t*>(data);
return data + 4;
}
const char* getNextNibble(const char* data, uint32_t* out) {
*out = *reinterpret_cast<const uint16_t*>(data);
return data + 2;
}
struct Descriptor {
// See # 9.2
typedef enum {
SU16 = 0, // Short descriptor, 16-bit entries
LU16 = 1, // Long descriptor, 16-bit entries
LU32 = 3, // Long descriptor, 32-bit entries
RESERVED0 = 4, RESERVED1 = 5, RESERVED2 = 6, RESERVED3 = 7,
RESERVED4 = 8, RESERVED5 = 9, RESERVED6 = 10, RESERVED7 = 11,
RESERVED8 = 12, RESERVED9 = 13, RESERVED10 = 14, RESERVED11 = 15
} Format;
// See # 9.2
typedef enum {
CLEANUP = 0x0,
FUNC = 0x1,
CATCH = 0x2,
INVALID = 0x4
} Kind;
};
_Unwind_Reason_Code ProcessDescriptors(
_Unwind_State state,
_Unwind_Control_Block* ucbp,
struct _Unwind_Context* context,
Descriptor::Format format,
const char* descriptorStart,
uint32_t flags) {
// EHT is inlined in the index using compact form. No descriptors. #5
if (flags & 0x1)
return _URC_CONTINUE_UNWIND;
// TODO: We should check the state here, and determine whether we need to
// perform phase1 or phase2 unwinding.
(void)state;
const char* descriptor = descriptorStart;
uint32_t descriptorWord;
getNextWord(descriptor, &descriptorWord);
while (descriptorWord) {
// Read descriptor based on # 9.2.
uint32_t length;
uint32_t offset;
switch (format) {
case Descriptor::LU32:
descriptor = getNextWord(descriptor, &length);
descriptor = getNextWord(descriptor, &offset);
+ break;
case Descriptor::LU16:
descriptor = getNextNibble(descriptor, &length);
descriptor = getNextNibble(descriptor, &offset);
+ break;
default:
assert(false);
return _URC_FAILURE;
}
// See # 9.2 table for decoding the kind of descriptor. It's a 2-bit value.
Descriptor::Kind kind =
static_cast<Descriptor::Kind>((length & 0x1) | ((offset & 0x1) << 1));
// Clear off flag from last bit.
length &= ~1u;
offset &= ~1u;
uintptr_t scopeStart = ucbp->pr_cache.fnstart + offset;
uintptr_t scopeEnd = scopeStart + length;
uintptr_t pc = _Unwind_GetIP(context);
bool isInScope = (scopeStart <= pc) && (pc < scopeEnd);
switch (kind) {
case Descriptor::CLEANUP: {
// TODO(ajwong): Handle cleanup descriptors.
break;
}
case Descriptor::FUNC: {
// TODO(ajwong): Handle function descriptors.
break;
}
case Descriptor::CATCH: {
// Catch descriptors require gobbling one more word.
uint32_t landing_pad;
descriptor = getNextWord(descriptor, &landing_pad);
if (isInScope) {
// TODO(ajwong): This is only phase1 compatible logic. Implement
// phase2.
landing_pad = signExtendPrel31(landing_pad & ~0x80000000);
if (landing_pad == 0xffffffff) {
return _URC_HANDLER_FOUND;
} else if (landing_pad == 0xfffffffe) {
return _URC_FAILURE;
} else {
/*
bool is_reference_type = landing_pad & 0x80000000;
void* matched_object;
if (__cxxabiv1::__cxa_type_match(
ucbp, reinterpret_cast<const std::type_info *>(landing_pad),
is_reference_type,
&matched_object) != __cxxabiv1::ctm_failed)
return _URC_HANDLER_FOUND;
*/
_LIBUNWIND_ABORT("Type matching not implemented");
}
}
break;
}
default:
_LIBUNWIND_ABORT("Invalid descriptor kind found.");
}
getNextWord(descriptor, &descriptorWord);
}
return _URC_CONTINUE_UNWIND;
}
static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state,
_Unwind_Control_Block* ucbp,
struct _Unwind_Context* context) {
// Read the compact model EHT entry's header # 6.3
const uint32_t* unwindingData = ucbp->pr_cache.ehtp;
assert((*unwindingData & 0xf0000000) == 0x80000000 && "Must be a compact entry");
Descriptor::Format format =
static_cast<Descriptor::Format>((*unwindingData & 0x0f000000) >> 24);
const char *lsda =
reinterpret_cast<const char *>(_Unwind_GetLanguageSpecificData(context));
// Handle descriptors before unwinding so they are processed in the context
// of the correct stack frame.
_Unwind_Reason_Code result =
ProcessDescriptors(state, ucbp, context, format, lsda,
ucbp->pr_cache.additional);
if (result != _URC_CONTINUE_UNWIND)
return result;
if (__unw_step(reinterpret_cast<unw_cursor_t *>(context)) != UNW_STEP_SUCCESS)
return _URC_FAILURE;
return _URC_CONTINUE_UNWIND;
}
// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE /
// _UVRSD_UINT32.
uint32_t RegisterMask(uint8_t start, uint8_t count_minus_one) {
return ((1U << (count_minus_one + 1)) - 1) << start;
}
// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_VFP /
// _UVRSD_DOUBLE.
uint32_t RegisterRange(uint8_t start, uint8_t count_minus_one) {
return ((uint32_t)start << 16) | ((uint32_t)count_minus_one + 1);
}
} // end anonymous namespace
/**
* Decodes an EHT entry.
*
* @param data Pointer to EHT.
* @param[out] off Offset from return value (in bytes) to begin interpretation.
* @param[out] len Number of bytes in unwind code.
* @return Pointer to beginning of unwind code.
*/
extern "C" const uint32_t*
decode_eht_entry(const uint32_t* data, size_t* off, size_t* len) {
if ((*data & 0x80000000) == 0) {
// 6.2: Generic Model
//
// EHT entry is a prel31 pointing to the PR, followed by data understood
// only by the personality routine. Fortunately, all existing assembler
// implementations, including GNU assembler, LLVM integrated assembler,
// and ARM assembler, assume that the unwind opcodes come after the
// personality rountine address.
*off = 1; // First byte is size data.
*len = (((data[1] >> 24) & 0xff) + 1) * 4;
data++; // Skip the first word, which is the prel31 offset.
} else {
// 6.3: ARM Compact Model
//
// EHT entries here correspond to the __aeabi_unwind_cpp_pr[012] PRs indeded
// by format:
Descriptor::Format format =
static_cast<Descriptor::Format>((*data & 0x0f000000) >> 24);
switch (format) {
case Descriptor::SU16:
*len = 4;
*off = 1;
break;
case Descriptor::LU16:
case Descriptor::LU32:
*len = 4 + 4 * ((*data & 0x00ff0000) >> 16);
*off = 2;
break;
default:
return nullptr;
}
}
return data;
}
_LIBUNWIND_EXPORT _Unwind_Reason_Code
_Unwind_VRS_Interpret(_Unwind_Context *context, const uint32_t *data,
size_t offset, size_t len) {
bool wrotePC = false;
bool finish = false;
while (offset < len && !finish) {
uint8_t byte = getByte(data, offset++);
if ((byte & 0x80) == 0) {
uint32_t sp;
_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp);
if (byte & 0x40)
sp -= (((uint32_t)byte & 0x3f) << 2) + 4;
else
sp += ((uint32_t)byte << 2) + 4;
_Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp);
} else {
switch (byte & 0xf0) {
case 0x80: {
if (offset >= len)
return _URC_FAILURE;
uint32_t registers =
(((uint32_t)byte & 0x0f) << 12) |
(((uint32_t)getByte(data, offset++)) << 4);
if (!registers)
return _URC_FAILURE;
if (registers & (1 << 15))
wrotePC = true;
_Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
break;
}
case 0x90: {
uint8_t reg = byte & 0x0f;
if (reg == 13 || reg == 15)
return _URC_FAILURE;
uint32_t sp;
_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_R0 + reg,
_UVRSD_UINT32, &sp);
_Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
&sp);
break;
}
case 0xa0: {
uint32_t registers = RegisterMask(4, byte & 0x07);
if (byte & 0x08)
registers |= 1 << 14;
_Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
break;
}
case 0xb0: {
switch (byte) {
case 0xb0:
finish = true;
break;
case 0xb1: {
if (offset >= len)
return _URC_FAILURE;
uint8_t registers = getByte(data, offset++);
if (registers & 0xf0 || !registers)
return _URC_FAILURE;
_Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
break;
}
case 0xb2: {
uint32_t addend = 0;
uint32_t shift = 0;
// This decodes a uleb128 value.
while (true) {
if (offset >= len)
return _URC_FAILURE;
uint32_t v = getByte(data, offset++);
addend |= (v & 0x7f) << shift;
if ((v & 0x80) == 0)
break;
shift += 7;
}
uint32_t sp;
_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
&sp);
sp += 0x204 + (addend << 2);
_Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
&sp);
break;
}
case 0xb3: {
uint8_t v = getByte(data, offset++);
_Unwind_VRS_Pop(context, _UVRSC_VFP,
RegisterRange(static_cast<uint8_t>(v >> 4),
v & 0x0f), _UVRSD_VFPX);
break;
}
case 0xb4:
case 0xb5:
case 0xb6:
case 0xb7:
return _URC_FAILURE;
default:
_Unwind_VRS_Pop(context, _UVRSC_VFP,
RegisterRange(8, byte & 0x07), _UVRSD_VFPX);
break;
}
break;
}
case 0xc0: {
switch (byte) {
#if defined(__ARM_WMMX)
case 0xc0:
case 0xc1:
case 0xc2:
case 0xc3:
case 0xc4:
case 0xc5:
_Unwind_VRS_Pop(context, _UVRSC_WMMXD,
RegisterRange(10, byte & 0x7), _UVRSD_DOUBLE);
break;
case 0xc6: {
uint8_t v = getByte(data, offset++);
uint8_t start = static_cast<uint8_t>(v >> 4);
uint8_t count_minus_one = v & 0xf;
if (start + count_minus_one >= 16)
return _URC_FAILURE;
_Unwind_VRS_Pop(context, _UVRSC_WMMXD,
RegisterRange(start, count_minus_one),
_UVRSD_DOUBLE);
break;
}
case 0xc7: {
uint8_t v = getByte(data, offset++);
if (!v || v & 0xf0)
return _URC_FAILURE;
_Unwind_VRS_Pop(context, _UVRSC_WMMXC, v, _UVRSD_DOUBLE);
break;
}
#endif
case 0xc8:
case 0xc9: {
uint8_t v = getByte(data, offset++);
uint8_t start =
static_cast<uint8_t>(((byte == 0xc8) ? 16 : 0) + (v >> 4));
uint8_t count_minus_one = v & 0xf;
if (start + count_minus_one >= 32)
return _URC_FAILURE;
_Unwind_VRS_Pop(context, _UVRSC_VFP,
RegisterRange(start, count_minus_one),
_UVRSD_DOUBLE);
break;
}
default:
return _URC_FAILURE;
}
break;
}
case 0xd0: {
if (byte & 0x08)
return _URC_FAILURE;
_Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(8, byte & 0x7),
_UVRSD_DOUBLE);
break;
}
default:
return _URC_FAILURE;
}
}
}
if (!wrotePC) {
uint32_t lr;
_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_LR, _UVRSD_UINT32, &lr);
_Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_IP, _UVRSD_UINT32, &lr);
}
return _URC_CONTINUE_UNWIND;
}
extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
__aeabi_unwind_cpp_pr0(_Unwind_State state, _Unwind_Control_Block *ucbp,
_Unwind_Context *context) {
return unwindOneFrame(state, ucbp, context);
}
extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
__aeabi_unwind_cpp_pr1(_Unwind_State state, _Unwind_Control_Block *ucbp,
_Unwind_Context *context) {
return unwindOneFrame(state, ucbp, context);
}
extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
__aeabi_unwind_cpp_pr2(_Unwind_State state, _Unwind_Control_Block *ucbp,
_Unwind_Context *context) {
return unwindOneFrame(state, ucbp, context);
}
static _Unwind_Reason_Code
unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
// EHABI #7.3 discusses preserving the VRS in a "temporary VRS" during
// phase 1 and then restoring it to the "primary VRS" for phase 2. The
// effect is phase 2 doesn't see any of the VRS manipulations from phase 1.
// In this implementation, the phases don't share the VRS backing store.
// Instead, they are passed the original |uc| and they create a new VRS
// from scratch thus achieving the same effect.
__unw_init_local(cursor, uc);
// Walk each frame looking for a place to stop.
for (bool handlerNotFound = true; handlerNotFound;) {
// See if frame has code to run (has personality routine).
unw_proc_info_t frameInfo;
if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
_LIBUNWIND_TRACE_UNWINDING(
"unwind_phase1(ex_ojb=%p): __unw_get_proc_info "
"failed => _URC_FATAL_PHASE1_ERROR",
static_cast<void *>(exception_object));
return _URC_FATAL_PHASE1_ERROR;
}
// When tracing, print state information.
if (_LIBUNWIND_TRACING_UNWINDING) {
char functionBuf[512];
const char *functionName = functionBuf;
unw_word_t offset;
if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
&offset) != UNW_ESUCCESS) ||
(frameInfo.start_ip + offset > frameInfo.end_ip))
functionName = ".anonymous.";
unw_word_t pc;
__unw_get_reg(cursor, UNW_REG_IP, &pc);
_LIBUNWIND_TRACE_UNWINDING(
"unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR ", func=%s, "
"lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR,
static_cast<void *>(exception_object), pc,
frameInfo.start_ip, functionName,
frameInfo.lsda, frameInfo.handler);
}
// If there is a personality routine, ask it if it will want to stop at
// this frame.
if (frameInfo.handler != 0) {
_Unwind_Personality_Fn p =
(_Unwind_Personality_Fn)(long)(frameInfo.handler);
_LIBUNWIND_TRACE_UNWINDING(
"unwind_phase1(ex_ojb=%p): calling personality function %p",
static_cast<void *>(exception_object),
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(p)));
struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor);
exception_object->pr_cache.fnstart = frameInfo.start_ip;
exception_object->pr_cache.ehtp =
(_Unwind_EHT_Header *)frameInfo.unwind_info;
exception_object->pr_cache.additional = frameInfo.flags;
_Unwind_Reason_Code personalityResult =
(*p)(_US_VIRTUAL_UNWIND_FRAME, exception_object, context);
_LIBUNWIND_TRACE_UNWINDING(
"unwind_phase1(ex_ojb=%p): personality result %d start_ip %x ehtp %p "
"additional %x",
static_cast<void *>(exception_object), personalityResult,
exception_object->pr_cache.fnstart,
static_cast<void *>(exception_object->pr_cache.ehtp),
exception_object->pr_cache.additional);
switch (personalityResult) {
case _URC_HANDLER_FOUND:
// found a catch clause or locals that need destructing in this frame
// stop search and remember stack pointer at the frame
handlerNotFound = false;
// p should have initialized barrier_cache. EHABI #7.3.5
_LIBUNWIND_TRACE_UNWINDING(
"unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND",
static_cast<void *>(exception_object));
return _URC_NO_REASON;
case _URC_CONTINUE_UNWIND:
_LIBUNWIND_TRACE_UNWINDING(
"unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND",
static_cast<void *>(exception_object));
// continue unwinding
break;
// EHABI #7.3.3
case _URC_FAILURE:
return _URC_FAILURE;
default:
// something went wrong
_LIBUNWIND_TRACE_UNWINDING(
"unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR",
static_cast<void *>(exception_object));
return _URC_FATAL_PHASE1_ERROR;
}
}
}
return _URC_NO_REASON;
}
static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor,
_Unwind_Exception *exception_object,
bool resume) {
// See comment at the start of unwind_phase1 regarding VRS integrity.
__unw_init_local(cursor, uc);
_LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)",
static_cast<void *>(exception_object));
int frame_count = 0;
// Walk each frame until we reach where search phase said to stop.
while (true) {
// Ask libunwind to get next frame (skip over first which is
// _Unwind_RaiseException or _Unwind_Resume).
//
// Resume only ever makes sense for 1 frame.
_Unwind_State state =
resume ? _US_UNWIND_FRAME_RESUME : _US_UNWIND_FRAME_STARTING;
if (resume && frame_count == 1) {
// On a resume, first unwind the _Unwind_Resume() frame. The next frame
// is now the landing pad for the cleanup from a previous execution of
// phase2. To continue unwindingly correctly, replace VRS[15] with the
// IP of the frame that the previous run of phase2 installed the context
// for. After this, continue unwinding as if normal.
//
// See #7.4.6 for details.
__unw_set_reg(cursor, UNW_REG_IP,
exception_object->unwinder_cache.reserved2);
resume = false;
}
// Get info about this frame.
unw_word_t sp;
unw_proc_info_t frameInfo;
__unw_get_reg(cursor, UNW_REG_SP, &sp);
if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
_LIBUNWIND_TRACE_UNWINDING(
"unwind_phase2(ex_ojb=%p): __unw_get_proc_info "
"failed => _URC_FATAL_PHASE2_ERROR",
static_cast<void *>(exception_object));
return _URC_FATAL_PHASE2_ERROR;
}
// When tracing, print state information.
if (_LIBUNWIND_TRACING_UNWINDING) {
char functionBuf[512];
const char *functionName = functionBuf;
unw_word_t offset;
if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
&offset) != UNW_ESUCCESS) ||
(frameInfo.start_ip + offset > frameInfo.end_ip))
functionName = ".anonymous.";
_LIBUNWIND_TRACE_UNWINDING(
"unwind_phase2(ex_ojb=%p): start_ip=0x%" PRIxPTR ", func=%s, sp=0x%" PRIxPTR ", "
"lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR "",
static_cast<void *>(exception_object), frameInfo.start_ip,
functionName, sp, frameInfo.lsda,
frameInfo.handler);
}
// If there is a personality routine, tell it we are unwinding.
if (frameInfo.handler != 0) {
_Unwind_Personality_Fn p =
(_Unwind_Personality_Fn)(long)(frameInfo.handler);
struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor);
// EHABI #7.2
exception_object->pr_cache.fnstart = frameInfo.start_ip;
exception_object->pr_cache.ehtp =
(_Unwind_EHT_Header *)frameInfo.unwind_info;
exception_object->pr_cache.additional = frameInfo.flags;
_Unwind_Reason_Code personalityResult =
(*p)(state, exception_object, context);
switch (personalityResult) {
case _URC_CONTINUE_UNWIND:
// Continue unwinding
_LIBUNWIND_TRACE_UNWINDING(
"unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND",
static_cast<void *>(exception_object));
// EHABI #7.2
if (sp == exception_object->barrier_cache.sp) {
// Phase 1 said we would stop at this frame, but we did not...
_LIBUNWIND_ABORT("during phase1 personality function said it would "
"stop here, but now in phase2 it did not stop here");
}
break;
case _URC_INSTALL_CONTEXT:
_LIBUNWIND_TRACE_UNWINDING(
"unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT",
static_cast<void *>(exception_object));
// Personality routine says to transfer control to landing pad.
// We may get control back if landing pad calls _Unwind_Resume().
if (_LIBUNWIND_TRACING_UNWINDING) {
unw_word_t pc;
__unw_get_reg(cursor, UNW_REG_IP, &pc);
__unw_get_reg(cursor, UNW_REG_SP, &sp);
_LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering "
"user code with ip=0x%" PRIxPTR ", sp=0x%" PRIxPTR,
static_cast<void *>(exception_object),
pc, sp);
}
{
// EHABI #7.4.1 says we need to preserve pc for when _Unwind_Resume
// is called back, to find this same frame.
unw_word_t pc;
__unw_get_reg(cursor, UNW_REG_IP, &pc);
exception_object->unwinder_cache.reserved2 = (uint32_t)pc;
}
__unw_resume(cursor);
// __unw_resume() only returns if there was an error.
return _URC_FATAL_PHASE2_ERROR;
// # EHABI #7.4.3
case _URC_FAILURE:
abort();
default:
// Personality routine returned an unknown result code.
_LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d",
personalityResult);
return _URC_FATAL_PHASE2_ERROR;
}
}
frame_count++;
}
// Clean up phase did not resume at the frame that the search phase
// said it would...
return _URC_FATAL_PHASE2_ERROR;
}
/// Called by __cxa_throw. Only returns if there is a fatal error.
_LIBUNWIND_EXPORT _Unwind_Reason_Code
_Unwind_RaiseException(_Unwind_Exception *exception_object) {
_LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)",
static_cast<void *>(exception_object));
unw_context_t uc;
unw_cursor_t cursor;
__unw_getcontext(&uc);
// This field for is for compatibility with GCC to say this isn't a forced
// unwind. EHABI #7.2
exception_object->unwinder_cache.reserved1 = 0;
// phase 1: the search phase
_Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object);
if (phase1 != _URC_NO_REASON)
return phase1;
// phase 2: the clean up phase
return unwind_phase2(&uc, &cursor, exception_object, false);
}
_LIBUNWIND_EXPORT void _Unwind_Complete(_Unwind_Exception* exception_object) {
// This is to be called when exception handling completes to give us a chance
// to perform any housekeeping. EHABI #7.2. But we have nothing to do here.
(void)exception_object;
}
/// When _Unwind_RaiseException() is in phase2, it hands control
/// to the personality function at each frame. The personality
/// may force a jump to a landing pad in that function, the landing
/// pad code may then call _Unwind_Resume() to continue with the
/// unwinding. Note: the call to _Unwind_Resume() is from compiler
/// geneated user code. All other _Unwind_* routines are called
/// by the C++ runtime __cxa_* routines.
///
/// Note: re-throwing an exception (as opposed to continuing the unwind)
/// is implemented by having the code call __cxa_rethrow() which
/// in turn calls _Unwind_Resume_or_Rethrow().
_LIBUNWIND_EXPORT void
_Unwind_Resume(_Unwind_Exception *exception_object) {
_LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)",
static_cast<void *>(exception_object));
unw_context_t uc;
unw_cursor_t cursor;
__unw_getcontext(&uc);
// _Unwind_RaiseException on EHABI will always set the reserved1 field to 0,
// which is in the same position as private_1 below.
// TODO(ajwong): Who wronte the above? Why is it true?
unwind_phase2(&uc, &cursor, exception_object, true);
// Clients assume _Unwind_Resume() does not return, so all we can do is abort.
_LIBUNWIND_ABORT("_Unwind_Resume() can't return");
}
/// Called by personality handler during phase 2 to get LSDA for current frame.
_LIBUNWIND_EXPORT uintptr_t
_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) {
unw_cursor_t *cursor = (unw_cursor_t *)context;
unw_proc_info_t frameInfo;
uintptr_t result = 0;
if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
result = (uintptr_t)frameInfo.lsda;
_LIBUNWIND_TRACE_API(
"_Unwind_GetLanguageSpecificData(context=%p) => 0x%llx",
static_cast<void *>(context), (long long)result);
return result;
}
static uint64_t ValueAsBitPattern(_Unwind_VRS_DataRepresentation representation,
void* valuep) {
uint64_t value = 0;
switch (representation) {
case _UVRSD_UINT32:
case _UVRSD_FLOAT:
memcpy(&value, valuep, sizeof(uint32_t));
break;
case _UVRSD_VFPX:
case _UVRSD_UINT64:
case _UVRSD_DOUBLE:
memcpy(&value, valuep, sizeof(uint64_t));
break;
}
return value;
}
_LIBUNWIND_EXPORT _Unwind_VRS_Result
_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
uint32_t regno, _Unwind_VRS_DataRepresentation representation,
void *valuep) {
_LIBUNWIND_TRACE_API("_Unwind_VRS_Set(context=%p, regclass=%d, reg=%d, "
"rep=%d, value=0x%llX)",
static_cast<void *>(context), regclass, regno,
representation,
ValueAsBitPattern(representation, valuep));
unw_cursor_t *cursor = (unw_cursor_t *)context;
switch (regclass) {
case _UVRSC_CORE:
if (representation != _UVRSD_UINT32 || regno > 15)
return _UVRSR_FAILED;
return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno),
*(unw_word_t *)valuep) == UNW_ESUCCESS
? _UVRSR_OK
: _UVRSR_FAILED;
case _UVRSC_VFP:
if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
return _UVRSR_FAILED;
if (representation == _UVRSD_VFPX) {
// Can only touch d0-15 with FSTMFDX.
if (regno > 15)
return _UVRSR_FAILED;
__unw_save_vfp_as_X(cursor);
} else {
if (regno > 31)
return _UVRSR_FAILED;
}
return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno),
*(unw_fpreg_t *)valuep) == UNW_ESUCCESS
? _UVRSR_OK
: _UVRSR_FAILED;
#if defined(__ARM_WMMX)
case _UVRSC_WMMXC:
if (representation != _UVRSD_UINT32 || regno > 3)
return _UVRSR_FAILED;
return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno),
*(unw_word_t *)valuep) == UNW_ESUCCESS
? _UVRSR_OK
: _UVRSR_FAILED;
case _UVRSC_WMMXD:
if (representation != _UVRSD_DOUBLE || regno > 31)
return _UVRSR_FAILED;
return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno),
*(unw_fpreg_t *)valuep) == UNW_ESUCCESS
? _UVRSR_OK
: _UVRSR_FAILED;
#else
case _UVRSC_WMMXC:
case _UVRSC_WMMXD:
break;
#endif
}
_LIBUNWIND_ABORT("unsupported register class");
}
static _Unwind_VRS_Result
_Unwind_VRS_Get_Internal(_Unwind_Context *context,
_Unwind_VRS_RegClass regclass, uint32_t regno,
_Unwind_VRS_DataRepresentation representation,
void *valuep) {
unw_cursor_t *cursor = (unw_cursor_t *)context;
switch (regclass) {
case _UVRSC_CORE:
if (representation != _UVRSD_UINT32 || regno > 15)
return _UVRSR_FAILED;
return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno),
(unw_word_t *)valuep) == UNW_ESUCCESS
? _UVRSR_OK
: _UVRSR_FAILED;
case _UVRSC_VFP:
if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
return _UVRSR_FAILED;
if (representation == _UVRSD_VFPX) {
// Can only touch d0-15 with FSTMFDX.
if (regno > 15)
return _UVRSR_FAILED;
__unw_save_vfp_as_X(cursor);
} else {
if (regno > 31)
return _UVRSR_FAILED;
}
return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno),
(unw_fpreg_t *)valuep) == UNW_ESUCCESS
? _UVRSR_OK
: _UVRSR_FAILED;
#if defined(__ARM_WMMX)
case _UVRSC_WMMXC:
if (representation != _UVRSD_UINT32 || regno > 3)
return _UVRSR_FAILED;
return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno),
(unw_word_t *)valuep) == UNW_ESUCCESS
? _UVRSR_OK
: _UVRSR_FAILED;
case _UVRSC_WMMXD:
if (representation != _UVRSD_DOUBLE || regno > 31)
return _UVRSR_FAILED;
return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno),
(unw_fpreg_t *)valuep) == UNW_ESUCCESS
? _UVRSR_OK
: _UVRSR_FAILED;
#else
case _UVRSC_WMMXC:
case _UVRSC_WMMXD:
break;
#endif
}
_LIBUNWIND_ABORT("unsupported register class");
}
_LIBUNWIND_EXPORT _Unwind_VRS_Result
_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
uint32_t regno, _Unwind_VRS_DataRepresentation representation,
void *valuep) {
_Unwind_VRS_Result result =
_Unwind_VRS_Get_Internal(context, regclass, regno, representation,
valuep);
_LIBUNWIND_TRACE_API("_Unwind_VRS_Get(context=%p, regclass=%d, reg=%d, "
"rep=%d, value=0x%llX, result = %d)",
static_cast<void *>(context), regclass, regno,
representation,
ValueAsBitPattern(representation, valuep), result);
return result;
}
_Unwind_VRS_Result
_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
uint32_t discriminator,
_Unwind_VRS_DataRepresentation representation) {
_LIBUNWIND_TRACE_API("_Unwind_VRS_Pop(context=%p, regclass=%d, "
"discriminator=%d, representation=%d)",
static_cast<void *>(context), regclass, discriminator,
representation);
switch (regclass) {
case _UVRSC_WMMXC:
#if !defined(__ARM_WMMX)
break;
#endif
case _UVRSC_CORE: {
if (representation != _UVRSD_UINT32)
return _UVRSR_FAILED;
// When popping SP from the stack, we don't want to override it from the
// computed new stack location. See EHABI #7.5.4 table 3.
bool poppedSP = false;
uint32_t* sp;
if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP,
_UVRSD_UINT32, &sp) != _UVRSR_OK) {
return _UVRSR_FAILED;
}
for (uint32_t i = 0; i < 16; ++i) {
if (!(discriminator & static_cast<uint32_t>(1 << i)))
continue;
uint32_t value = *sp++;
if (regclass == _UVRSC_CORE && i == 13)
poppedSP = true;
if (_Unwind_VRS_Set(context, regclass, i,
_UVRSD_UINT32, &value) != _UVRSR_OK) {
return _UVRSR_FAILED;
}
}
if (!poppedSP) {
return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP,
_UVRSD_UINT32, &sp);
}
return _UVRSR_OK;
}
case _UVRSC_WMMXD:
#if !defined(__ARM_WMMX)
break;
#endif
case _UVRSC_VFP: {
if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
return _UVRSR_FAILED;
uint32_t first = discriminator >> 16;
uint32_t count = discriminator & 0xffff;
uint32_t end = first+count;
uint32_t* sp;
if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP,
_UVRSD_UINT32, &sp) != _UVRSR_OK) {
return _UVRSR_FAILED;
}
// For _UVRSD_VFPX, we're assuming the data is stored in FSTMX "standard
// format 1", which is equivalent to FSTMD + a padding word.
for (uint32_t i = first; i < end; ++i) {
// SP is only 32-bit aligned so don't copy 64-bit at a time.
uint64_t w0 = *sp++;
uint64_t w1 = *sp++;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
uint64_t value = (w1 << 32) | w0;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
uint64_t value = (w0 << 32) | w1;
#else
#error "Unable to determine endianess"
#endif
if (_Unwind_VRS_Set(context, regclass, i, representation, &value) !=
_UVRSR_OK)
return _UVRSR_FAILED;
}
if (representation == _UVRSD_VFPX)
++sp;
return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
&sp);
}
}
_LIBUNWIND_ABORT("unsupported register class");
}
/// Called by personality handler during phase 2 to find the start of the
/// function.
_LIBUNWIND_EXPORT uintptr_t
_Unwind_GetRegionStart(struct _Unwind_Context *context) {
unw_cursor_t *cursor = (unw_cursor_t *)context;
unw_proc_info_t frameInfo;
uintptr_t result = 0;
if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
result = (uintptr_t)frameInfo.start_ip;
_LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%llX",
static_cast<void *>(context), (long long)result);
return result;
}
/// Called by personality handler during phase 2 if a foreign exception
// is caught.
_LIBUNWIND_EXPORT void
_Unwind_DeleteException(_Unwind_Exception *exception_object) {
_LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)",
static_cast<void *>(exception_object));
if (exception_object->exception_cleanup != NULL)
(*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT,
exception_object);
}
extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
__gnu_unwind_frame(_Unwind_Exception *exception_object,
struct _Unwind_Context *context) {
unw_cursor_t *cursor = (unw_cursor_t *)context;
if (__unw_step(cursor) != UNW_STEP_SUCCESS)
return _URC_FAILURE;
return _URC_OK;
}
#endif // defined(_LIBUNWIND_ARM_EHABI)
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h
index ca276d2f3cf8..81500905c0f5 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -1,1327 +1,1327 @@
//===- LazyCallGraph.h - Analysis of a Module's call graph ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// Implements a lazy call graph analysis and related passes for the new pass
/// manager.
///
/// NB: This is *not* a traditional call graph! It is a graph which models both
/// the current calls and potential calls. As a consequence there are many
/// edges in this call graph that do not correspond to a 'call' or 'invoke'
/// instruction.
///
/// The primary use cases of this graph analysis is to facilitate iterating
/// across the functions of a module in ways that ensure all callees are
/// visited prior to a caller (given any SCC constraints), or vice versa. As
/// such is it particularly well suited to organizing CGSCC optimizations such
/// as inlining, outlining, argument promotion, etc. That is its primary use
/// case and motivates the design. It may not be appropriate for other
/// purposes. The use graph of functions or some other conservative analysis of
/// call instructions may be interesting for optimizations and subsequent
/// analyses which don't work in the context of an overly specified
/// potential-call-edge graph.
///
/// To understand the specific rules and nature of this call graph analysis,
/// see the documentation of the \c LazyCallGraph below.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_LAZYCALLGRAPH_H
#define LLVM_ANALYSIS_LAZYCALLGRAPH_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <iterator>
#include <string>
#include <utility>
namespace llvm {
template <class GraphType> struct GraphTraits;
class Module;
class Value;
/// A lazily constructed view of the call graph of a module.
///
/// With the edges of this graph, the motivating constraint that we are
/// attempting to maintain is that function-local optimization, CGSCC-local
/// optimizations, and optimizations transforming a pair of functions connected
/// by an edge in the graph, do not invalidate a bottom-up traversal of the SCC
/// DAG. That is, no optimizations will delete, remove, or add an edge such
/// that functions already visited in a bottom-up order of the SCC DAG are no
/// longer valid to have visited, or such that functions not yet visited in
/// a bottom-up order of the SCC DAG are not required to have already been
/// visited.
///
/// Within this constraint, the desire is to minimize the merge points of the
/// SCC DAG. The greater the fanout of the SCC DAG and the fewer merge points
/// in the SCC DAG, the more independence there is in optimizing within it.
/// There is a strong desire to enable parallelization of optimizations over
/// the call graph, and both limited fanout and merge points will (artificially
/// in some cases) limit the scaling of such an effort.
///
/// To this end, graph represents both direct and any potential resolution to
/// an indirect call edge. Another way to think about it is that it represents
/// both the direct call edges and any direct call edges that might be formed
/// through static optimizations. Specifically, it considers taking the address
/// of a function to be an edge in the call graph because this might be
/// forwarded to become a direct call by some subsequent function-local
/// optimization. The result is that the graph closely follows the use-def
/// edges for functions. Walking "up" the graph can be done by looking at all
/// of the uses of a function.
///
/// The roots of the call graph are the external functions and functions
/// escaped into global variables. Those functions can be called from outside
/// of the module or via unknowable means in the IR -- we may not be able to
/// form even a potential call edge from a function body which may dynamically
/// load the function and call it.
///
/// This analysis still requires updates to remain valid after optimizations
/// which could potentially change the set of potential callees. The
/// constraints it operates under only make the traversal order remain valid.
///
/// The entire analysis must be re-computed if full interprocedural
/// optimizations run at any point. For example, globalopt completely
/// invalidates the information in this analysis.
///
/// FIXME: This class is named LazyCallGraph in a lame attempt to distinguish
/// it from the existing CallGraph. At some point, it is expected that this
/// will be the only call graph and it will be renamed accordingly.
class LazyCallGraph {
public:
class Node;
class EdgeSequence;
class SCC;
class RefSCC;
/// A class used to represent edges in the call graph.
///
/// The lazy call graph models both *call* edges and *reference* edges. Call
/// edges are much what you would expect, and exist when there is a 'call' or
/// 'invoke' instruction of some function. Reference edges are also tracked
/// along side these, and exist whenever any instruction (transitively
/// through its operands) references a function. All call edges are
/// inherently reference edges, and so the reference graph forms a superset
/// of the formal call graph.
///
/// All of these forms of edges are fundamentally represented as outgoing
/// edges. The edges are stored in the source node and point at the target
/// node. This allows the edge structure itself to be a very compact data
/// structure: essentially a tagged pointer.
class Edge {
public:
/// The kind of edge in the graph.
enum Kind : bool { Ref = false, Call = true };
Edge();
explicit Edge(Node &N, Kind K);
/// Test whether the edge is null.
///
/// This happens when an edge has been deleted. We leave the edge objects
/// around but clear them.
explicit operator bool() const;
/// Returnss the \c Kind of the edge.
Kind getKind() const;
/// Test whether the edge represents a direct call to a function.
///
/// This requires that the edge is not null.
bool isCall() const;
/// Get the call graph node referenced by this edge.
///
/// This requires that the edge is not null.
Node &getNode() const;
/// Get the function referenced by this edge.
///
/// This requires that the edge is not null.
Function &getFunction() const;
private:
friend class LazyCallGraph::EdgeSequence;
friend class LazyCallGraph::RefSCC;
PointerIntPair<Node *, 1, Kind> Value;
void setKind(Kind K) { Value.setInt(K); }
};
/// The edge sequence object.
///
/// This typically exists entirely within the node but is exposed as
/// a separate type because a node doesn't initially have edges. An explicit
/// population step is required to produce this sequence at first and it is
/// then cached in the node. It is also used to represent edges entering the
/// graph from outside the module to model the graph's roots.
///
/// The sequence itself both iterable and indexable. The indexes remain
/// stable even as the sequence mutates (including removal).
class EdgeSequence {
friend class LazyCallGraph;
friend class LazyCallGraph::Node;
friend class LazyCallGraph::RefSCC;
using VectorT = SmallVector<Edge, 4>;
using VectorImplT = SmallVectorImpl<Edge>;
public:
/// An iterator used for the edges to both entry nodes and child nodes.
class iterator
: public iterator_adaptor_base<iterator, VectorImplT::iterator,
std::forward_iterator_tag> {
friend class LazyCallGraph;
friend class LazyCallGraph::Node;
VectorImplT::iterator E;
// Build the iterator for a specific position in the edge list.
iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E)
: iterator_adaptor_base(BaseI), E(E) {
while (I != E && !*I)
++I;
}
public:
iterator() = default;
using iterator_adaptor_base::operator++;
iterator &operator++() {
do {
++I;
} while (I != E && !*I);
return *this;
}
};
/// An iterator over specifically call edges.
///
/// This has the same iteration properties as the \c iterator, but
/// restricts itself to edges which represent actual calls.
class call_iterator
: public iterator_adaptor_base<call_iterator, VectorImplT::iterator,
std::forward_iterator_tag> {
friend class LazyCallGraph;
friend class LazyCallGraph::Node;
VectorImplT::iterator E;
/// Advance the iterator to the next valid, call edge.
void advanceToNextEdge() {
while (I != E && (!*I || !I->isCall()))
++I;
}
// Build the iterator for a specific position in the edge list.
call_iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E)
: iterator_adaptor_base(BaseI), E(E) {
advanceToNextEdge();
}
public:
call_iterator() = default;
using iterator_adaptor_base::operator++;
call_iterator &operator++() {
++I;
advanceToNextEdge();
return *this;
}
};
iterator begin() { return iterator(Edges.begin(), Edges.end()); }
iterator end() { return iterator(Edges.end(), Edges.end()); }
Edge &operator[](Node &N) {
assert(EdgeIndexMap.find(&N) != EdgeIndexMap.end() && "No such edge!");
auto &E = Edges[EdgeIndexMap.find(&N)->second];
assert(E && "Dead or null edge!");
return E;
}
Edge *lookup(Node &N) {
auto EI = EdgeIndexMap.find(&N);
if (EI == EdgeIndexMap.end())
return nullptr;
auto &E = Edges[EI->second];
return E ? &E : nullptr;
}
call_iterator call_begin() {
return call_iterator(Edges.begin(), Edges.end());
}
call_iterator call_end() { return call_iterator(Edges.end(), Edges.end()); }
iterator_range<call_iterator> calls() {
return make_range(call_begin(), call_end());
}
bool empty() {
for (auto &E : Edges)
if (E)
return false;
return true;
}
private:
VectorT Edges;
DenseMap<Node *, int> EdgeIndexMap;
EdgeSequence() = default;
/// Internal helper to insert an edge to a node.
void insertEdgeInternal(Node &ChildN, Edge::Kind EK);
/// Internal helper to change an edge kind.
void setEdgeKind(Node &ChildN, Edge::Kind EK);
/// Internal helper to remove the edge to the given function.
bool removeEdgeInternal(Node &ChildN);
};
/// A node in the call graph.
///
/// This represents a single node. It's primary roles are to cache the list of
/// callees, de-duplicate and provide fast testing of whether a function is
/// a callee, and facilitate iteration of child nodes in the graph.
///
/// The node works much like an optional in order to lazily populate the
/// edges of each node. Until populated, there are no edges. Once populated,
/// you can access the edges by dereferencing the node or using the `->`
/// operator as if the node was an `Optional<EdgeSequence>`.
class Node {
friend class LazyCallGraph;
friend class LazyCallGraph::RefSCC;
public:
LazyCallGraph &getGraph() const { return *G; }
Function &getFunction() const { return *F; }
StringRef getName() const { return F->getName(); }
/// Equality is defined as address equality.
bool operator==(const Node &N) const { return this == &N; }
bool operator!=(const Node &N) const { return !operator==(N); }
/// Tests whether the node has been populated with edges.
bool isPopulated() const { return Edges.hasValue(); }
/// Tests whether this is actually a dead node and no longer valid.
///
/// Users rarely interact with nodes in this state and other methods are
/// invalid. This is used to model a node in an edge list where the
/// function has been completely removed.
bool isDead() const {
assert(!G == !F &&
"Both graph and function pointers should be null or non-null.");
return !G;
}
// We allow accessing the edges by dereferencing or using the arrow
// operator, essentially wrapping the internal optional.
EdgeSequence &operator*() const {
// Rip const off because the node itself isn't changing here.
return const_cast<EdgeSequence &>(*Edges);
}
EdgeSequence *operator->() const { return &**this; }
/// Populate the edges of this node if necessary.
///
/// The first time this is called it will populate the edges for this node
/// in the graph. It does this by scanning the underlying function, so once
/// this is done, any changes to that function must be explicitly reflected
/// in updates to the graph.
///
/// \returns the populated \c EdgeSequence to simplify walking it.
///
/// This will not update or re-scan anything if called repeatedly. Instead,
/// the edge sequence is cached and returned immediately on subsequent
/// calls.
EdgeSequence &populate() {
if (Edges)
return *Edges;
return populateSlow();
}
private:
LazyCallGraph *G;
Function *F;
// We provide for the DFS numbering and Tarjan walk lowlink numbers to be
// stored directly within the node. These are both '-1' when nodes are part
// of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk.
int DFSNumber = 0;
int LowLink = 0;
Optional<EdgeSequence> Edges;
/// Basic constructor implements the scanning of F into Edges and
/// EdgeIndexMap.
Node(LazyCallGraph &G, Function &F) : G(&G), F(&F) {}
/// Implementation of the scan when populating.
EdgeSequence &populateSlow();
/// Internal helper to directly replace the function with a new one.
///
/// This is used to facilitate tranfsormations which need to replace the
/// formal Function object but directly move the body and users from one to
/// the other.
void replaceFunction(Function &NewF);
void clear() { Edges.reset(); }
/// Print the name of this node's function.
friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) {
return OS << N.F->getName();
}
/// Dump the name of this node's function to stderr.
void dump() const;
};
/// An SCC of the call graph.
///
/// This represents a Strongly Connected Component of the direct call graph
/// -- ignoring indirect calls and function references. It stores this as
/// a collection of call graph nodes. While the order of nodes in the SCC is
/// stable, it is not any particular order.
///
/// The SCCs are nested within a \c RefSCC, see below for details about that
/// outer structure. SCCs do not support mutation of the call graph, that
/// must be done through the containing \c RefSCC in order to fully reason
/// about the ordering and connections of the graph.
- class SCC {
+ class LLVM_EXTERNAL_VISIBILITY SCC {
friend class LazyCallGraph;
friend class LazyCallGraph::Node;
RefSCC *OuterRefSCC;
SmallVector<Node *, 1> Nodes;
template <typename NodeRangeT>
SCC(RefSCC &OuterRefSCC, NodeRangeT &&Nodes)
: OuterRefSCC(&OuterRefSCC), Nodes(std::forward<NodeRangeT>(Nodes)) {}
void clear() {
OuterRefSCC = nullptr;
Nodes.clear();
}
/// Print a short descrtiption useful for debugging or logging.
///
/// We print the function names in the SCC wrapped in '()'s and skipping
/// the middle functions if there are a large number.
//
// Note: this is defined inline to dodge issues with GCC's interpretation
// of enclosing namespaces for friend function declarations.
friend raw_ostream &operator<<(raw_ostream &OS, const SCC &C) {
OS << '(';
int i = 0;
for (LazyCallGraph::Node &N : C) {
if (i > 0)
OS << ", ";
// Elide the inner elements if there are too many.
if (i > 8) {
OS << "..., " << *C.Nodes.back();
break;
}
OS << N;
++i;
}
OS << ')';
return OS;
}
/// Dump a short description of this SCC to stderr.
void dump() const;
#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
/// Verify invariants about the SCC.
///
/// This will attempt to validate all of the basic invariants within an
/// SCC, but not that it is a strongly connected componet per-se. Primarily
/// useful while building and updating the graph to check that basic
/// properties are in place rather than having inexplicable crashes later.
void verify();
#endif
public:
using iterator = pointee_iterator<SmallVectorImpl<Node *>::const_iterator>;
iterator begin() const { return Nodes.begin(); }
iterator end() const { return Nodes.end(); }
int size() const { return Nodes.size(); }
RefSCC &getOuterRefSCC() const { return *OuterRefSCC; }
/// Test if this SCC is a parent of \a C.
///
/// Note that this is linear in the number of edges departing the current
/// SCC.
bool isParentOf(const SCC &C) const;
/// Test if this SCC is an ancestor of \a C.
///
/// Note that in the worst case this is linear in the number of edges
/// departing the current SCC and every SCC in the entire graph reachable
/// from this SCC. Thus this very well may walk every edge in the entire
/// call graph! Do not call this in a tight loop!
bool isAncestorOf(const SCC &C) const;
/// Test if this SCC is a child of \a C.
///
/// See the comments for \c isParentOf for detailed notes about the
/// complexity of this routine.
bool isChildOf(const SCC &C) const { return C.isParentOf(*this); }
/// Test if this SCC is a descendant of \a C.
///
/// See the comments for \c isParentOf for detailed notes about the
/// complexity of this routine.
bool isDescendantOf(const SCC &C) const { return C.isAncestorOf(*this); }
/// Provide a short name by printing this SCC to a std::string.
///
/// This copes with the fact that we don't have a name per-se for an SCC
/// while still making the use of this in debugging and logging useful.
std::string getName() const {
std::string Name;
raw_string_ostream OS(Name);
OS << *this;
OS.flush();
return Name;
}
};
/// A RefSCC of the call graph.
///
/// This models a Strongly Connected Component of function reference edges in
/// the call graph. As opposed to actual SCCs, these can be used to scope
/// subgraphs of the module which are independent from other subgraphs of the
/// module because they do not reference it in any way. This is also the unit
/// where we do mutation of the graph in order to restrict mutations to those
/// which don't violate this independence.
///
/// A RefSCC contains a DAG of actual SCCs. All the nodes within the RefSCC
/// are necessarily within some actual SCC that nests within it. Since
/// a direct call *is* a reference, there will always be at least one RefSCC
/// around any SCC.
class RefSCC {
friend class LazyCallGraph;
friend class LazyCallGraph::Node;
LazyCallGraph *G;
/// A postorder list of the inner SCCs.
SmallVector<SCC *, 4> SCCs;
/// A map from SCC to index in the postorder list.
SmallDenseMap<SCC *, int, 4> SCCIndices;
/// Fast-path constructor. RefSCCs should instead be constructed by calling
/// formRefSCCFast on the graph itself.
RefSCC(LazyCallGraph &G);
void clear() {
SCCs.clear();
SCCIndices.clear();
}
/// Print a short description useful for debugging or logging.
///
/// We print the SCCs wrapped in '[]'s and skipping the middle SCCs if
/// there are a large number.
//
// Note: this is defined inline to dodge issues with GCC's interpretation
// of enclosing namespaces for friend function declarations.
friend raw_ostream &operator<<(raw_ostream &OS, const RefSCC &RC) {
OS << '[';
int i = 0;
for (LazyCallGraph::SCC &C : RC) {
if (i > 0)
OS << ", ";
// Elide the inner elements if there are too many.
if (i > 4) {
OS << "..., " << *RC.SCCs.back();
break;
}
OS << C;
++i;
}
OS << ']';
return OS;
}
/// Dump a short description of this RefSCC to stderr.
void dump() const;
#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
/// Verify invariants about the RefSCC and all its SCCs.
///
/// This will attempt to validate all of the invariants *within* the
/// RefSCC, but not that it is a strongly connected component of the larger
/// graph. This makes it useful even when partially through an update.
///
/// Invariants checked:
/// - SCCs and their indices match.
/// - The SCCs list is in fact in post-order.
void verify();
#endif
public:
using iterator = pointee_iterator<SmallVectorImpl<SCC *>::const_iterator>;
using range = iterator_range<iterator>;
using parent_iterator =
pointee_iterator<SmallPtrSetImpl<RefSCC *>::const_iterator>;
iterator begin() const { return SCCs.begin(); }
iterator end() const { return SCCs.end(); }
ssize_t size() const { return SCCs.size(); }
SCC &operator[](int Idx) { return *SCCs[Idx]; }
iterator find(SCC &C) const {
return SCCs.begin() + SCCIndices.find(&C)->second;
}
/// Test if this RefSCC is a parent of \a RC.
///
/// CAUTION: This method walks every edge in the \c RefSCC, it can be very
/// expensive.
bool isParentOf(const RefSCC &RC) const;
/// Test if this RefSCC is an ancestor of \a RC.
///
/// CAUTION: This method walks the directed graph of edges as far as
/// necessary to find a possible path to the argument. In the worst case
/// this may walk the entire graph and can be extremely expensive.
bool isAncestorOf(const RefSCC &RC) const;
/// Test if this RefSCC is a child of \a RC.
///
/// CAUTION: This method walks every edge in the argument \c RefSCC, it can
/// be very expensive.
bool isChildOf(const RefSCC &RC) const { return RC.isParentOf(*this); }
/// Test if this RefSCC is a descendant of \a RC.
///
/// CAUTION: This method walks the directed graph of edges as far as
/// necessary to find a possible path from the argument. In the worst case
/// this may walk the entire graph and can be extremely expensive.
bool isDescendantOf(const RefSCC &RC) const {
return RC.isAncestorOf(*this);
}
/// Provide a short name by printing this RefSCC to a std::string.
///
/// This copes with the fact that we don't have a name per-se for an RefSCC
/// while still making the use of this in debugging and logging useful.
std::string getName() const {
std::string Name;
raw_string_ostream OS(Name);
OS << *this;
OS.flush();
return Name;
}
///@{
/// \name Mutation API
///
/// These methods provide the core API for updating the call graph in the
/// presence of (potentially still in-flight) DFS-found RefSCCs and SCCs.
///
/// Note that these methods sometimes have complex runtimes, so be careful
/// how you call them.
/// Make an existing internal ref edge into a call edge.
///
/// This may form a larger cycle and thus collapse SCCs into TargetN's SCC.
/// If that happens, the optional callback \p MergedCB will be invoked (if
/// provided) on the SCCs being merged away prior to actually performing
/// the merge. Note that this will never include the target SCC as that
/// will be the SCC functions are merged into to resolve the cycle. Once
/// this function returns, these merged SCCs are not in a valid state but
/// the pointers will remain valid until destruction of the parent graph
/// instance for the purpose of clearing cached information. This function
/// also returns 'true' if a cycle was formed and some SCCs merged away as
/// a convenience.
///
/// After this operation, both SourceN's SCC and TargetN's SCC may move
/// position within this RefSCC's postorder list. Any SCCs merged are
/// merged into the TargetN's SCC in order to preserve reachability analyses
/// which took place on that SCC.
bool switchInternalEdgeToCall(
Node &SourceN, Node &TargetN,
function_ref<void(ArrayRef<SCC *> MergedSCCs)> MergeCB = {});
/// Make an existing internal call edge between separate SCCs into a ref
/// edge.
///
/// If SourceN and TargetN in separate SCCs within this RefSCC, changing
/// the call edge between them to a ref edge is a trivial operation that
/// does not require any structural changes to the call graph.
void switchTrivialInternalEdgeToRef(Node &SourceN, Node &TargetN);
/// Make an existing internal call edge within a single SCC into a ref
/// edge.
///
/// Since SourceN and TargetN are part of a single SCC, this SCC may be
/// split up due to breaking a cycle in the call edges that formed it. If
/// that happens, then this routine will insert new SCCs into the postorder
/// list *before* the SCC of TargetN (previously the SCC of both). This
/// preserves postorder as the TargetN can reach all of the other nodes by
/// definition of previously being in a single SCC formed by the cycle from
/// SourceN to TargetN.
///
/// The newly added SCCs are added *immediately* and contiguously
/// prior to the TargetN SCC and return the range covering the new SCCs in
/// the RefSCC's postorder sequence. You can directly iterate the returned
/// range to observe all of the new SCCs in postorder.
///
/// Note that if SourceN and TargetN are in separate SCCs, the simpler
/// routine `switchTrivialInternalEdgeToRef` should be used instead.
iterator_range<iterator> switchInternalEdgeToRef(Node &SourceN,
Node &TargetN);
/// Make an existing outgoing ref edge into a call edge.
///
/// Note that this is trivial as there are no cyclic impacts and there
/// remains a reference edge.
void switchOutgoingEdgeToCall(Node &SourceN, Node &TargetN);
/// Make an existing outgoing call edge into a ref edge.
///
/// This is trivial as there are no cyclic impacts and there remains
/// a reference edge.
void switchOutgoingEdgeToRef(Node &SourceN, Node &TargetN);
/// Insert a ref edge from one node in this RefSCC to another in this
/// RefSCC.
///
/// This is always a trivial operation as it doesn't change any part of the
/// graph structure besides connecting the two nodes.
///
/// Note that we don't support directly inserting internal *call* edges
/// because that could change the graph structure and requires returning
/// information about what became invalid. As a consequence, the pattern
/// should be to first insert the necessary ref edge, and then to switch it
/// to a call edge if needed and handle any invalidation that results. See
/// the \c switchInternalEdgeToCall routine for details.
void insertInternalRefEdge(Node &SourceN, Node &TargetN);
/// Insert an edge whose parent is in this RefSCC and child is in some
/// child RefSCC.
///
/// There must be an existing path from the \p SourceN to the \p TargetN.
/// This operation is inexpensive and does not change the set of SCCs and
/// RefSCCs in the graph.
void insertOutgoingEdge(Node &SourceN, Node &TargetN, Edge::Kind EK);
/// Insert an edge whose source is in a descendant RefSCC and target is in
/// this RefSCC.
///
/// There must be an existing path from the target to the source in this
/// case.
///
/// NB! This is has the potential to be a very expensive function. It
/// inherently forms a cycle in the prior RefSCC DAG and we have to merge
/// RefSCCs to resolve that cycle. But finding all of the RefSCCs which
/// participate in the cycle can in the worst case require traversing every
/// RefSCC in the graph. Every attempt is made to avoid that, but passes
/// must still exercise caution calling this routine repeatedly.
///
/// Also note that this can only insert ref edges. In order to insert
/// a call edge, first insert a ref edge and then switch it to a call edge.
/// These are intentionally kept as separate interfaces because each step
/// of the operation invalidates a different set of data structures.
///
/// This returns all the RefSCCs which were merged into the this RefSCC
/// (the target's). This allows callers to invalidate any cached
/// information.
///
/// FIXME: We could possibly optimize this quite a bit for cases where the
/// caller and callee are very nearby in the graph. See comments in the
/// implementation for details, but that use case might impact users.
SmallVector<RefSCC *, 1> insertIncomingRefEdge(Node &SourceN,
Node &TargetN);
/// Remove an edge whose source is in this RefSCC and target is *not*.
///
/// This removes an inter-RefSCC edge. All inter-RefSCC edges originating
/// from this SCC have been fully explored by any in-flight DFS graph
/// formation, so this is always safe to call once you have the source
/// RefSCC.
///
/// This operation does not change the cyclic structure of the graph and so
/// is very inexpensive. It may change the connectivity graph of the SCCs
/// though, so be careful calling this while iterating over them.
void removeOutgoingEdge(Node &SourceN, Node &TargetN);
/// Remove a list of ref edges which are entirely within this RefSCC.
///
/// Both the \a SourceN and all of the \a TargetNs must be within this
/// RefSCC. Removing these edges may break cycles that form this RefSCC and
/// thus this operation may change the RefSCC graph significantly. In
/// particular, this operation will re-form new RefSCCs based on the
/// remaining connectivity of the graph. The following invariants are
/// guaranteed to hold after calling this method:
///
/// 1) If a ref-cycle remains after removal, it leaves this RefSCC intact
/// and in the graph. No new RefSCCs are built.
/// 2) Otherwise, this RefSCC will be dead after this call and no longer in
/// the graph or the postorder traversal of the call graph. Any iterator
/// pointing at this RefSCC will become invalid.
/// 3) All newly formed RefSCCs will be returned and the order of the
/// RefSCCs returned will be a valid postorder traversal of the new
/// RefSCCs.
/// 4) No RefSCC other than this RefSCC has its member set changed (this is
/// inherent in the definition of removing such an edge).
///
/// These invariants are very important to ensure that we can build
/// optimization pipelines on top of the CGSCC pass manager which
/// intelligently update the RefSCC graph without invalidating other parts
/// of the RefSCC graph.
///
/// Note that we provide no routine to remove a *call* edge. Instead, you
/// must first switch it to a ref edge using \c switchInternalEdgeToRef.
/// This split API is intentional as each of these two steps can invalidate
/// a different aspect of the graph structure and needs to have the
/// invalidation handled independently.
///
/// The runtime complexity of this method is, in the worst case, O(V+E)
/// where V is the number of nodes in this RefSCC and E is the number of
/// edges leaving the nodes in this RefSCC. Note that E includes both edges
/// within this RefSCC and edges from this RefSCC to child RefSCCs. Some
/// effort has been made to minimize the overhead of common cases such as
/// self-edges and edge removals which result in a spanning tree with no
/// more cycles.
SmallVector<RefSCC *, 1> removeInternalRefEdge(Node &SourceN,
ArrayRef<Node *> TargetNs);
/// A convenience wrapper around the above to handle trivial cases of
/// inserting a new call edge.
///
/// This is trivial whenever the target is in the same SCC as the source or
/// the edge is an outgoing edge to some descendant SCC. In these cases
/// there is no change to the cyclic structure of SCCs or RefSCCs.
///
/// To further make calling this convenient, it also handles inserting
/// already existing edges.
void insertTrivialCallEdge(Node &SourceN, Node &TargetN);
/// A convenience wrapper around the above to handle trivial cases of
/// inserting a new ref edge.
///
/// This is trivial whenever the target is in the same RefSCC as the source
/// or the edge is an outgoing edge to some descendant RefSCC. In these
/// cases there is no change to the cyclic structure of the RefSCCs.
///
/// To further make calling this convenient, it also handles inserting
/// already existing edges.
void insertTrivialRefEdge(Node &SourceN, Node &TargetN);
/// Directly replace a node's function with a new function.
///
/// This should be used when moving the body and users of a function to
/// a new formal function object but not otherwise changing the call graph
/// structure in any way.
///
/// It requires that the old function in the provided node have zero uses
/// and the new function must have calls and references to it establishing
/// an equivalent graph.
void replaceNodeFunction(Node &N, Function &NewF);
///@}
};
/// A post-order depth-first RefSCC iterator over the call graph.
///
/// This iterator walks the cached post-order sequence of RefSCCs. However,
/// it trades stability for flexibility. It is restricted to a forward
/// iterator but will survive mutations which insert new RefSCCs and continue
/// to point to the same RefSCC even if it moves in the post-order sequence.
class postorder_ref_scc_iterator
: public iterator_facade_base<postorder_ref_scc_iterator,
std::forward_iterator_tag, RefSCC> {
friend class LazyCallGraph;
friend class LazyCallGraph::Node;
/// Nonce type to select the constructor for the end iterator.
struct IsAtEndT {};
LazyCallGraph *G;
RefSCC *RC = nullptr;
/// Build the begin iterator for a node.
postorder_ref_scc_iterator(LazyCallGraph &G) : G(&G), RC(getRC(G, 0)) {}
/// Build the end iterator for a node. This is selected purely by overload.
postorder_ref_scc_iterator(LazyCallGraph &G, IsAtEndT /*Nonce*/) : G(&G) {}
/// Get the post-order RefSCC at the given index of the postorder walk,
/// populating it if necessary.
static RefSCC *getRC(LazyCallGraph &G, int Index) {
if (Index == (int)G.PostOrderRefSCCs.size())
// We're at the end.
return nullptr;
return G.PostOrderRefSCCs[Index];
}
public:
bool operator==(const postorder_ref_scc_iterator &Arg) const {
return G == Arg.G && RC == Arg.RC;
}
reference operator*() const { return *RC; }
using iterator_facade_base::operator++;
postorder_ref_scc_iterator &operator++() {
assert(RC && "Cannot increment the end iterator!");
RC = getRC(*G, G->RefSCCIndices.find(RC)->second + 1);
return *this;
}
};
/// Construct a graph for the given module.
///
/// This sets up the graph and computes all of the entry points of the graph.
/// No function definitions are scanned until their nodes in the graph are
/// requested during traversal.
LazyCallGraph(Module &M,
function_ref<TargetLibraryInfo &(Function &)> GetTLI);
LazyCallGraph(LazyCallGraph &&G);
LazyCallGraph &operator=(LazyCallGraph &&RHS);
bool invalidate(Module &, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &);
EdgeSequence::iterator begin() { return EntryEdges.begin(); }
EdgeSequence::iterator end() { return EntryEdges.end(); }
void buildRefSCCs();
postorder_ref_scc_iterator postorder_ref_scc_begin() {
if (!EntryEdges.empty())
assert(!PostOrderRefSCCs.empty() &&
"Must form RefSCCs before iterating them!");
return postorder_ref_scc_iterator(*this);
}
postorder_ref_scc_iterator postorder_ref_scc_end() {
if (!EntryEdges.empty())
assert(!PostOrderRefSCCs.empty() &&
"Must form RefSCCs before iterating them!");
return postorder_ref_scc_iterator(*this,
postorder_ref_scc_iterator::IsAtEndT());
}
iterator_range<postorder_ref_scc_iterator> postorder_ref_sccs() {
return make_range(postorder_ref_scc_begin(), postorder_ref_scc_end());
}
/// Lookup a function in the graph which has already been scanned and added.
Node *lookup(const Function &F) const { return NodeMap.lookup(&F); }
/// Lookup a function's SCC in the graph.
///
/// \returns null if the function hasn't been assigned an SCC via the RefSCC
/// iterator walk.
SCC *lookupSCC(Node &N) const { return SCCMap.lookup(&N); }
/// Lookup a function's RefSCC in the graph.
///
/// \returns null if the function hasn't been assigned a RefSCC via the
/// RefSCC iterator walk.
RefSCC *lookupRefSCC(Node &N) const {
if (SCC *C = lookupSCC(N))
return &C->getOuterRefSCC();
return nullptr;
}
/// Get a graph node for a given function, scanning it to populate the graph
/// data as necessary.
Node &get(Function &F) {
Node *&N = NodeMap[&F];
if (N)
return *N;
return insertInto(F, N);
}
/// Get the sequence of known and defined library functions.
///
/// These functions, because they are known to LLVM, can have calls
/// introduced out of thin air from arbitrary IR.
ArrayRef<Function *> getLibFunctions() const {
return LibFunctions.getArrayRef();
}
/// Test whether a function is a known and defined library function tracked by
/// the call graph.
///
/// Because these functions are known to LLVM they are specially modeled in
/// the call graph and even when all IR-level references have been removed
/// remain active and reachable.
bool isLibFunction(Function &F) const { return LibFunctions.count(&F); }
///@{
/// \name Pre-SCC Mutation API
///
/// These methods are only valid to call prior to forming any SCCs for this
/// call graph. They can be used to update the core node-graph during
/// a node-based inorder traversal that precedes any SCC-based traversal.
///
/// Once you begin manipulating a call graph's SCCs, most mutation of the
/// graph must be performed via a RefSCC method. There are some exceptions
/// below.
/// Update the call graph after inserting a new edge.
void insertEdge(Node &SourceN, Node &TargetN, Edge::Kind EK);
/// Update the call graph after inserting a new edge.
void insertEdge(Function &Source, Function &Target, Edge::Kind EK) {
return insertEdge(get(Source), get(Target), EK);
}
/// Update the call graph after deleting an edge.
void removeEdge(Node &SourceN, Node &TargetN);
/// Update the call graph after deleting an edge.
void removeEdge(Function &Source, Function &Target) {
return removeEdge(get(Source), get(Target));
}
///@}
///@{
/// \name General Mutation API
///
/// There are a very limited set of mutations allowed on the graph as a whole
/// once SCCs have started to be formed. These routines have strict contracts
/// but may be called at any point.
/// Remove a dead function from the call graph (typically to delete it).
///
/// Note that the function must have an empty use list, and the call graph
/// must be up-to-date prior to calling this. That means it is by itself in
/// a maximal SCC which is by itself in a maximal RefSCC, etc. No structural
/// changes result from calling this routine other than potentially removing
/// entry points into the call graph.
///
/// If SCC formation has begun, this function must not be part of the current
/// DFS in order to call this safely. Typically, the function will have been
/// fully visited by the DFS prior to calling this routine.
void removeDeadFunction(Function &F);
/// Add a new function split/outlined from an existing function.
///
/// The new function may only reference other functions that the original
/// function did.
///
/// The original function must reference (either directly or indirectly) the
/// new function.
///
/// The new function may also reference the original function.
/// It may end up in a parent SCC in the case that the original function's
/// edge to the new function is a ref edge, and the edge back is a call edge.
void addSplitFunction(Function &OriginalFunction, Function &NewFunction);
/// Add new ref-recursive functions split/outlined from an existing function.
///
/// The new functions may only reference other functions that the original
/// function did. The new functions may reference (not call) the original
/// function.
///
/// The original function must reference (not call) all new functions.
/// All new functions must reference (not call) each other.
void addSplitRefRecursiveFunctions(Function &OriginalFunction,
ArrayRef<Function *> NewFunctions);
///@}
///@{
/// \name Static helpers for code doing updates to the call graph.
///
/// These helpers are used to implement parts of the call graph but are also
/// useful to code doing updates or otherwise wanting to walk the IR in the
/// same patterns as when we build the call graph.
/// Recursively visits the defined functions whose address is reachable from
/// every constant in the \p Worklist.
///
/// Doesn't recurse through any constants already in the \p Visited set, and
/// updates that set with every constant visited.
///
/// For each defined function, calls \p Callback with that function.
template <typename CallbackT>
static void visitReferences(SmallVectorImpl<Constant *> &Worklist,
SmallPtrSetImpl<Constant *> &Visited,
CallbackT Callback) {
while (!Worklist.empty()) {
Constant *C = Worklist.pop_back_val();
if (Function *F = dyn_cast<Function>(C)) {
if (!F->isDeclaration())
Callback(*F);
continue;
}
// The blockaddress constant expression is a weird special case, we can't
// generically walk its operands the way we do for all other constants.
if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
// If we've already visited the function referred to by the block
// address, we don't need to revisit it.
if (Visited.count(BA->getFunction()))
continue;
// If all of the blockaddress' users are instructions within the
// referred to function, we don't need to insert a cycle.
if (llvm::all_of(BA->users(), [&](User *U) {
if (Instruction *I = dyn_cast<Instruction>(U))
return I->getFunction() == BA->getFunction();
return false;
}))
continue;
// Otherwise we should go visit the referred to function.
Visited.insert(BA->getFunction());
Worklist.push_back(BA->getFunction());
continue;
}
for (Value *Op : C->operand_values())
if (Visited.insert(cast<Constant>(Op)).second)
Worklist.push_back(cast<Constant>(Op));
}
}
///@}
private:
using node_stack_iterator = SmallVectorImpl<Node *>::reverse_iterator;
using node_stack_range = iterator_range<node_stack_iterator>;
/// Allocator that holds all the call graph nodes.
SpecificBumpPtrAllocator<Node> BPA;
/// Maps function->node for fast lookup.
DenseMap<const Function *, Node *> NodeMap;
/// The entry edges into the graph.
///
/// These edges are from "external" sources. Put another way, they
/// escape at the module scope.
EdgeSequence EntryEdges;
/// Allocator that holds all the call graph SCCs.
SpecificBumpPtrAllocator<SCC> SCCBPA;
/// Maps Function -> SCC for fast lookup.
DenseMap<Node *, SCC *> SCCMap;
/// Allocator that holds all the call graph RefSCCs.
SpecificBumpPtrAllocator<RefSCC> RefSCCBPA;
/// The post-order sequence of RefSCCs.
///
/// This list is lazily formed the first time we walk the graph.
SmallVector<RefSCC *, 16> PostOrderRefSCCs;
/// A map from RefSCC to the index for it in the postorder sequence of
/// RefSCCs.
DenseMap<RefSCC *, int> RefSCCIndices;
/// Defined functions that are also known library functions which the
/// optimizer can reason about and therefore might introduce calls to out of
/// thin air.
SmallSetVector<Function *, 4> LibFunctions;
/// Helper to insert a new function, with an already looked-up entry in
/// the NodeMap.
Node &insertInto(Function &F, Node *&MappedN);
/// Helper to initialize a new node created outside of creating SCCs and add
/// it to the NodeMap if necessary. For example, useful when a function is
/// split.
Node &initNode(Function &F);
/// Helper to update pointers back to the graph object during moves.
void updateGraphPtrs();
/// Allocates an SCC and constructs it using the graph allocator.
///
/// The arguments are forwarded to the constructor.
template <typename... Ts> SCC *createSCC(Ts &&... Args) {
return new (SCCBPA.Allocate()) SCC(std::forward<Ts>(Args)...);
}
/// Allocates a RefSCC and constructs it using the graph allocator.
///
/// The arguments are forwarded to the constructor.
template <typename... Ts> RefSCC *createRefSCC(Ts &&... Args) {
return new (RefSCCBPA.Allocate()) RefSCC(std::forward<Ts>(Args)...);
}
/// Common logic for building SCCs from a sequence of roots.
///
/// This is a very generic implementation of the depth-first walk and SCC
/// formation algorithm. It uses a generic sequence of roots and generic
/// callbacks for each step. This is designed to be used to implement both
/// the RefSCC formation and SCC formation with shared logic.
///
/// Currently this is a relatively naive implementation of Tarjan's DFS
/// algorithm to form the SCCs.
///
/// FIXME: We should consider newer variants such as Nuutila.
template <typename RootsT, typename GetBeginT, typename GetEndT,
typename GetNodeT, typename FormSCCCallbackT>
static void buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin,
GetEndT &&GetEnd, GetNodeT &&GetNode,
FormSCCCallbackT &&FormSCC);
/// Build the SCCs for a RefSCC out of a list of nodes.
void buildSCCs(RefSCC &RC, node_stack_range Nodes);
/// Get the index of a RefSCC within the postorder traversal.
///
/// Requires that this RefSCC is a valid one in the (perhaps partial)
/// postorder traversed part of the graph.
int getRefSCCIndex(RefSCC &RC) {
auto IndexIt = RefSCCIndices.find(&RC);
assert(IndexIt != RefSCCIndices.end() && "RefSCC doesn't have an index!");
assert(PostOrderRefSCCs[IndexIt->second] == &RC &&
"Index does not point back at RC!");
return IndexIt->second;
}
};
inline LazyCallGraph::Edge::Edge() : Value() {}
inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {}
inline LazyCallGraph::Edge::operator bool() const {
return Value.getPointer() && !Value.getPointer()->isDead();
}
inline LazyCallGraph::Edge::Kind LazyCallGraph::Edge::getKind() const {
assert(*this && "Queried a null edge!");
return Value.getInt();
}
inline bool LazyCallGraph::Edge::isCall() const {
assert(*this && "Queried a null edge!");
return getKind() == Call;
}
inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode() const {
assert(*this && "Queried a null edge!");
return *Value.getPointer();
}
inline Function &LazyCallGraph::Edge::getFunction() const {
assert(*this && "Queried a null edge!");
return getNode().getFunction();
}
// Provide GraphTraits specializations for call graphs.
template <> struct GraphTraits<LazyCallGraph::Node *> {
using NodeRef = LazyCallGraph::Node *;
using ChildIteratorType = LazyCallGraph::EdgeSequence::iterator;
static NodeRef getEntryNode(NodeRef N) { return N; }
static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); }
static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); }
};
template <> struct GraphTraits<LazyCallGraph *> {
using NodeRef = LazyCallGraph::Node *;
using ChildIteratorType = LazyCallGraph::EdgeSequence::iterator;
static NodeRef getEntryNode(NodeRef N) { return N; }
static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); }
static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); }
};
/// An analysis pass which computes the call graph for a module.
class LazyCallGraphAnalysis : public AnalysisInfoMixin<LazyCallGraphAnalysis> {
friend AnalysisInfoMixin<LazyCallGraphAnalysis>;
static AnalysisKey Key;
public:
/// Inform generic clients of the result type.
using Result = LazyCallGraph;
/// Compute the \c LazyCallGraph for the module \c M.
///
/// This just builds the set of entry points to the call graph. The rest is
/// built lazily as it is walked.
LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
return FAM.getResult<TargetLibraryAnalysis>(F);
};
return LazyCallGraph(M, GetTLI);
}
};
/// A pass which prints the call graph to a \c raw_ostream.
///
/// This is primarily useful for testing the analysis.
class LazyCallGraphPrinterPass
: public PassInfoMixin<LazyCallGraphPrinterPass> {
raw_ostream &OS;
public:
explicit LazyCallGraphPrinterPass(raw_ostream &OS);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
/// A pass which prints the call graph as a DOT file to a \c raw_ostream.
///
/// This is primarily useful for visualization purposes.
class LazyCallGraphDOTPrinterPass
: public PassInfoMixin<LazyCallGraphDOTPrinterPass> {
raw_ostream &OS;
public:
explicit LazyCallGraphDOTPrinterPass(raw_ostream &OS);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
} // end namespace llvm
#endif // LLVM_ANALYSIS_LAZYCALLGRAPH_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
index 164ec50e47bc..5983f98d84cf 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
@@ -1,1356 +1,1356 @@
//===- llvm/Analysis/LoopInfo.h - Natural Loop Calculator -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the LoopInfo class that is used to identify natural loops
// and determine the loop depth of various nodes of the CFG. A natural loop
// has exactly one entry-point, which is called the header. Note that natural
// loops may actually be several loops that share the same header node.
//
// This analysis calculates the nesting structure of loops in a function. For
// each natural loop identified, this analysis identifies natural loops
// contained entirely within the loop and the basic blocks the make up the loop.
//
// It can calculate on the fly various bits of information, for example:
//
// * whether there is a preheader for the loop
// * the number of back edges to the header
// * whether or not a particular block branches out of the loop
// * the successor blocks of the loop
// * the loop depth
// * etc...
//
// Note that this analysis specifically identifies *Loops* not cycles or SCCs
// in the CFG. There can be strongly connected components in the CFG which
// this analysis will not recognize and that will not be represented by a Loop
// instance. In particular, a Loop might be inside such a non-loop SCC, or a
// non-loop SCC might contain a sub-SCC which is a Loop.
//
// For an overview of terminology used in this API (and thus all of our loop
// analyses or transforms), see docs/LoopTerminology.rst.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_LOOPINFO_H
#define LLVM_ANALYSIS_LOOPINFO_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include <algorithm>
#include <utility>
namespace llvm {
class DominatorTree;
class LoopInfo;
class Loop;
class InductionDescriptor;
class MDNode;
class MemorySSAUpdater;
class ScalarEvolution;
class raw_ostream;
template <class N, bool IsPostDom> class DominatorTreeBase;
template <class N, class M> class LoopInfoBase;
template <class N, class M> class LoopBase;
//===----------------------------------------------------------------------===//
/// Instances of this class are used to represent loops that are detected in the
/// flow graph.
///
template <class BlockT, class LoopT> class LoopBase {
LoopT *ParentLoop;
// Loops contained entirely within this one.
std::vector<LoopT *> SubLoops;
// The list of blocks in this loop. First entry is the header node.
std::vector<BlockT *> Blocks;
SmallPtrSet<const BlockT *, 8> DenseBlockSet;
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
/// Indicator that this loop is no longer a valid loop.
bool IsInvalid = false;
#endif
LoopBase(const LoopBase<BlockT, LoopT> &) = delete;
const LoopBase<BlockT, LoopT> &
operator=(const LoopBase<BlockT, LoopT> &) = delete;
public:
/// Return the nesting level of this loop. An outer-most loop has depth 1,
/// for consistency with loop depth values used for basic blocks, where depth
/// 0 is used for blocks not inside any loops.
unsigned getLoopDepth() const {
assert(!isInvalid() && "Loop not in a valid state!");
unsigned D = 1;
for (const LoopT *CurLoop = ParentLoop; CurLoop;
CurLoop = CurLoop->ParentLoop)
++D;
return D;
}
BlockT *getHeader() const { return getBlocks().front(); }
/// Return the parent loop if it exists or nullptr for top
/// level loops.
/// A loop is either top-level in a function (that is, it is not
/// contained in any other loop) or it is entirely enclosed in
/// some other loop.
/// If a loop is top-level, it has no parent, otherwise its
/// parent is the innermost loop in which it is enclosed.
LoopT *getParentLoop() const { return ParentLoop; }
/// This is a raw interface for bypassing addChildLoop.
void setParentLoop(LoopT *L) {
assert(!isInvalid() && "Loop not in a valid state!");
ParentLoop = L;
}
/// Return true if the specified loop is contained within in this loop.
bool contains(const LoopT *L) const {
assert(!isInvalid() && "Loop not in a valid state!");
if (L == this)
return true;
if (!L)
return false;
return contains(L->getParentLoop());
}
/// Return true if the specified basic block is in this loop.
bool contains(const BlockT *BB) const {
assert(!isInvalid() && "Loop not in a valid state!");
return DenseBlockSet.count(BB);
}
/// Return true if the specified instruction is in this loop.
template <class InstT> bool contains(const InstT *Inst) const {
return contains(Inst->getParent());
}
/// Return the loops contained entirely within this loop.
const std::vector<LoopT *> &getSubLoops() const {
assert(!isInvalid() && "Loop not in a valid state!");
return SubLoops;
}
std::vector<LoopT *> &getSubLoopsVector() {
assert(!isInvalid() && "Loop not in a valid state!");
return SubLoops;
}
typedef typename std::vector<LoopT *>::const_iterator iterator;
typedef
typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator;
iterator begin() const { return getSubLoops().begin(); }
iterator end() const { return getSubLoops().end(); }
reverse_iterator rbegin() const { return getSubLoops().rbegin(); }
reverse_iterator rend() const { return getSubLoops().rend(); }
// LoopInfo does not detect irreducible control flow, just natural
// loops. That is, it is possible that there is cyclic control
// flow within the "innermost loop" or around the "outermost
// loop".
/// Return true if the loop does not contain any (natural) loops.
bool isInnermost() const { return getSubLoops().empty(); }
/// Return true if the loop does not have a parent (natural) loop
// (i.e. it is outermost, which is the same as top-level).
bool isOutermost() const { return getParentLoop() == nullptr; }
/// Get a list of the basic blocks which make up this loop.
ArrayRef<BlockT *> getBlocks() const {
assert(!isInvalid() && "Loop not in a valid state!");
return Blocks;
}
typedef typename ArrayRef<BlockT *>::const_iterator block_iterator;
block_iterator block_begin() const { return getBlocks().begin(); }
block_iterator block_end() const { return getBlocks().end(); }
inline iterator_range<block_iterator> blocks() const {
assert(!isInvalid() && "Loop not in a valid state!");
return make_range(block_begin(), block_end());
}
/// Get the number of blocks in this loop in constant time.
/// Invalidate the loop, indicating that it is no longer a loop.
unsigned getNumBlocks() const {
assert(!isInvalid() && "Loop not in a valid state!");
return Blocks.size();
}
/// Return a direct, mutable handle to the blocks vector so that we can
/// mutate it efficiently with techniques like `std::remove`.
std::vector<BlockT *> &getBlocksVector() {
assert(!isInvalid() && "Loop not in a valid state!");
return Blocks;
}
/// Return a direct, mutable handle to the blocks set so that we can
/// mutate it efficiently.
SmallPtrSetImpl<const BlockT *> &getBlocksSet() {
assert(!isInvalid() && "Loop not in a valid state!");
return DenseBlockSet;
}
/// Return a direct, immutable handle to the blocks set.
const SmallPtrSetImpl<const BlockT *> &getBlocksSet() const {
assert(!isInvalid() && "Loop not in a valid state!");
return DenseBlockSet;
}
/// Return true if this loop is no longer valid. The only valid use of this
/// helper is "assert(L.isInvalid())" or equivalent, since IsInvalid is set to
/// true by the destructor. In other words, if this accessor returns true,
/// the caller has already triggered UB by calling this accessor; and so it
/// can only be called in a context where a return value of true indicates a
/// programmer error.
bool isInvalid() const {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
return IsInvalid;
#else
return false;
#endif
}
/// True if terminator in the block can branch to another block that is
/// outside of the current loop. \p BB must be inside the loop.
bool isLoopExiting(const BlockT *BB) const {
assert(!isInvalid() && "Loop not in a valid state!");
assert(contains(BB) && "Exiting block must be part of the loop");
for (const auto *Succ : children<const BlockT *>(BB)) {
if (!contains(Succ))
return true;
}
return false;
}
/// Returns true if \p BB is a loop-latch.
/// A latch block is a block that contains a branch back to the header.
/// This function is useful when there are multiple latches in a loop
/// because \fn getLoopLatch will return nullptr in that case.
bool isLoopLatch(const BlockT *BB) const {
assert(!isInvalid() && "Loop not in a valid state!");
assert(contains(BB) && "block does not belong to the loop");
BlockT *Header = getHeader();
auto PredBegin = GraphTraits<Inverse<BlockT *>>::child_begin(Header);
auto PredEnd = GraphTraits<Inverse<BlockT *>>::child_end(Header);
return std::find(PredBegin, PredEnd, BB) != PredEnd;
}
/// Calculate the number of back edges to the loop header.
unsigned getNumBackEdges() const {
assert(!isInvalid() && "Loop not in a valid state!");
unsigned NumBackEdges = 0;
BlockT *H = getHeader();
for (const auto Pred : children<Inverse<BlockT *>>(H))
if (contains(Pred))
++NumBackEdges;
return NumBackEdges;
}
//===--------------------------------------------------------------------===//
// APIs for simple analysis of the loop.
//
// Note that all of these methods can fail on general loops (ie, there may not
// be a preheader, etc). For best success, the loop simplification and
// induction variable canonicalization pass should be used to normalize loops
// for easy analysis. These methods assume canonical loops.
/// Return all blocks inside the loop that have successors outside of the
/// loop. These are the blocks _inside of the current loop_ which branch out.
/// The returned list is always unique.
void getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const;
/// If getExitingBlocks would return exactly one block, return that block.
/// Otherwise return null.
BlockT *getExitingBlock() const;
/// Return all of the successor blocks of this loop. These are the blocks
/// _outside of the current loop_ which are branched to.
void getExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
/// If getExitBlocks would return exactly one block, return that block.
/// Otherwise return null.
BlockT *getExitBlock() const;
/// Return true if no exit block for the loop has a predecessor that is
/// outside the loop.
bool hasDedicatedExits() const;
/// Return all unique successor blocks of this loop.
/// These are the blocks _outside of the current loop_ which are branched to.
void getUniqueExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
/// Return all unique successor blocks of this loop except successors from
/// Latch block are not considered. If the exit comes from Latch has also
/// non Latch predecessor in a loop it will be added to ExitBlocks.
/// These are the blocks _outside of the current loop_ which are branched to.
void getUniqueNonLatchExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
/// If getUniqueExitBlocks would return exactly one block, return that block.
/// Otherwise return null.
BlockT *getUniqueExitBlock() const;
/// Return true if this loop does not have any exit blocks.
bool hasNoExitBlocks() const;
/// Edge type.
typedef std::pair<BlockT *, BlockT *> Edge;
/// Return all pairs of (_inside_block_,_outside_block_).
void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const;
/// If there is a preheader for this loop, return it. A loop has a preheader
/// if there is only one edge to the header of the loop from outside of the
/// loop. If this is the case, the block branching to the header of the loop
/// is the preheader node.
///
/// This method returns null if there is no preheader for the loop.
BlockT *getLoopPreheader() const;
/// If the given loop's header has exactly one unique predecessor outside the
/// loop, return it. Otherwise return null.
/// This is less strict that the loop "preheader" concept, which requires
/// the predecessor to have exactly one successor.
BlockT *getLoopPredecessor() const;
/// If there is a single latch block for this loop, return it.
/// A latch block is a block that contains a branch back to the header.
BlockT *getLoopLatch() const;
/// Return all loop latch blocks of this loop. A latch block is a block that
/// contains a branch back to the header.
void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const {
assert(!isInvalid() && "Loop not in a valid state!");
BlockT *H = getHeader();
for (const auto Pred : children<Inverse<BlockT *>>(H))
if (contains(Pred))
LoopLatches.push_back(Pred);
}
/// Return all inner loops in the loop nest rooted by the loop in preorder,
/// with siblings in forward program order.
template <class Type>
static void getInnerLoopsInPreorder(const LoopT &L,
SmallVectorImpl<Type> &PreOrderLoops) {
SmallVector<LoopT *, 4> PreOrderWorklist;
PreOrderWorklist.append(L.rbegin(), L.rend());
while (!PreOrderWorklist.empty()) {
LoopT *L = PreOrderWorklist.pop_back_val();
// Sub-loops are stored in forward program order, but will process the
// worklist backwards so append them in reverse order.
PreOrderWorklist.append(L->rbegin(), L->rend());
PreOrderLoops.push_back(L);
}
}
/// Return all loops in the loop nest rooted by the loop in preorder, with
/// siblings in forward program order.
SmallVector<const LoopT *, 4> getLoopsInPreorder() const {
SmallVector<const LoopT *, 4> PreOrderLoops;
const LoopT *CurLoop = static_cast<const LoopT *>(this);
PreOrderLoops.push_back(CurLoop);
getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
return PreOrderLoops;
}
SmallVector<LoopT *, 4> getLoopsInPreorder() {
SmallVector<LoopT *, 4> PreOrderLoops;
LoopT *CurLoop = static_cast<LoopT *>(this);
PreOrderLoops.push_back(CurLoop);
getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
return PreOrderLoops;
}
//===--------------------------------------------------------------------===//
// APIs for updating loop information after changing the CFG
//
/// This method is used by other analyses to update loop information.
/// NewBB is set to be a new member of the current loop.
/// Because of this, it is added as a member of all parent loops, and is added
/// to the specified LoopInfo object as being in the current basic block. It
/// is not valid to replace the loop header with this method.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LI);
/// This is used when splitting loops up. It replaces the OldChild entry in
/// our children list with NewChild, and updates the parent pointer of
/// OldChild to be null and the NewChild to be this loop.
/// This updates the loop depth of the new child.
void replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild);
/// Add the specified loop to be a child of this loop.
/// This updates the loop depth of the new child.
void addChildLoop(LoopT *NewChild) {
assert(!isInvalid() && "Loop not in a valid state!");
assert(!NewChild->ParentLoop && "NewChild already has a parent!");
NewChild->ParentLoop = static_cast<LoopT *>(this);
SubLoops.push_back(NewChild);
}
/// This removes the specified child from being a subloop of this loop. The
/// loop is not deleted, as it will presumably be inserted into another loop.
LoopT *removeChildLoop(iterator I) {
assert(!isInvalid() && "Loop not in a valid state!");
assert(I != SubLoops.end() && "Cannot remove end iterator!");
LoopT *Child = *I;
assert(Child->ParentLoop == this && "Child is not a child of this loop!");
SubLoops.erase(SubLoops.begin() + (I - begin()));
Child->ParentLoop = nullptr;
return Child;
}
/// This removes the specified child from being a subloop of this loop. The
/// loop is not deleted, as it will presumably be inserted into another loop.
LoopT *removeChildLoop(LoopT *Child) {
return removeChildLoop(llvm::find(*this, Child));
}
/// This adds a basic block directly to the basic block list.
/// This should only be used by transformations that create new loops. Other
/// transformations should use addBasicBlockToLoop.
void addBlockEntry(BlockT *BB) {
assert(!isInvalid() && "Loop not in a valid state!");
Blocks.push_back(BB);
DenseBlockSet.insert(BB);
}
/// interface to reverse Blocks[from, end of loop] in this loop
void reverseBlock(unsigned from) {
assert(!isInvalid() && "Loop not in a valid state!");
std::reverse(Blocks.begin() + from, Blocks.end());
}
/// interface to do reserve() for Blocks
void reserveBlocks(unsigned size) {
assert(!isInvalid() && "Loop not in a valid state!");
Blocks.reserve(size);
}
/// This method is used to move BB (which must be part of this loop) to be the
/// loop header of the loop (the block that dominates all others).
void moveToHeader(BlockT *BB) {
assert(!isInvalid() && "Loop not in a valid state!");
if (Blocks[0] == BB)
return;
for (unsigned i = 0;; ++i) {
assert(i != Blocks.size() && "Loop does not contain BB!");
if (Blocks[i] == BB) {
Blocks[i] = Blocks[0];
Blocks[0] = BB;
return;
}
}
}
/// This removes the specified basic block from the current loop, updating the
/// Blocks as appropriate. This does not update the mapping in the LoopInfo
/// class.
void removeBlockFromLoop(BlockT *BB) {
assert(!isInvalid() && "Loop not in a valid state!");
auto I = find(Blocks, BB);
assert(I != Blocks.end() && "N is not in this list!");
Blocks.erase(I);
DenseBlockSet.erase(BB);
}
/// Verify loop structure
void verifyLoop() const;
/// Verify loop structure of this loop and all nested loops.
void verifyLoopNest(DenseSet<const LoopT *> *Loops) const;
/// Returns true if the loop is annotated parallel.
///
/// Derived classes can override this method using static template
/// polymorphism.
bool isAnnotatedParallel() const { return false; }
/// Print loop with all the BBs inside it.
void print(raw_ostream &OS, bool Verbose = false, bool PrintNested = true,
unsigned Depth = 0) const;
protected:
friend class LoopInfoBase<BlockT, LoopT>;
/// This creates an empty loop.
LoopBase() : ParentLoop(nullptr) {}
explicit LoopBase(BlockT *BB) : ParentLoop(nullptr) {
Blocks.push_back(BB);
DenseBlockSet.insert(BB);
}
// Since loop passes like SCEV are allowed to key analysis results off of
// `Loop` pointers, we cannot re-use pointers within a loop pass manager.
// This means loop passes should not be `delete` ing `Loop` objects directly
// (and risk a later `Loop` allocation re-using the address of a previous one)
// but should be using LoopInfo::markAsRemoved, which keeps around the `Loop`
// pointer till the end of the lifetime of the `LoopInfo` object.
//
// To make it easier to follow this rule, we mark the destructor as
// non-public.
~LoopBase() {
for (auto *SubLoop : SubLoops)
SubLoop->~LoopT();
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
IsInvalid = true;
#endif
SubLoops.clear();
Blocks.clear();
DenseBlockSet.clear();
ParentLoop = nullptr;
}
};
template <class BlockT, class LoopT>
raw_ostream &operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) {
Loop.print(OS);
return OS;
}
// Implementation in LoopInfoImpl.h
extern template class LoopBase<BasicBlock, Loop>;
/// Represents a single loop in the control flow graph. Note that not all SCCs
/// in the CFG are necessarily loops.
-class Loop : public LoopBase<BasicBlock, Loop> {
+class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> {
public:
/// A range representing the start and end location of a loop.
class LocRange {
DebugLoc Start;
DebugLoc End;
public:
LocRange() {}
LocRange(DebugLoc Start) : Start(Start), End(Start) {}
LocRange(DebugLoc Start, DebugLoc End)
: Start(std::move(Start)), End(std::move(End)) {}
const DebugLoc &getStart() const { return Start; }
const DebugLoc &getEnd() const { return End; }
/// Check for null.
///
explicit operator bool() const { return Start && End; }
};
/// Return true if the specified value is loop invariant.
bool isLoopInvariant(const Value *V) const;
/// Return true if all the operands of the specified instruction are loop
/// invariant.
bool hasLoopInvariantOperands(const Instruction *I) const;
/// If the given value is an instruction inside of the loop and it can be
/// hoisted, do so to make it trivially loop-invariant.
/// Return true if the value after any hoisting is loop invariant. This
/// function can be used as a slightly more aggressive replacement for
/// isLoopInvariant.
///
/// If InsertPt is specified, it is the point to hoist instructions to.
/// If null, the terminator of the loop preheader is used.
bool makeLoopInvariant(Value *V, bool &Changed,
Instruction *InsertPt = nullptr,
MemorySSAUpdater *MSSAU = nullptr) const;
/// If the given instruction is inside of the loop and it can be hoisted, do
/// so to make it trivially loop-invariant.
/// Return true if the instruction after any hoisting is loop invariant. This
/// function can be used as a slightly more aggressive replacement for
/// isLoopInvariant.
///
/// If InsertPt is specified, it is the point to hoist instructions to.
/// If null, the terminator of the loop preheader is used.
///
bool makeLoopInvariant(Instruction *I, bool &Changed,
Instruction *InsertPt = nullptr,
MemorySSAUpdater *MSSAU = nullptr) const;
/// Check to see if the loop has a canonical induction variable: an integer
/// recurrence that starts at 0 and increments by one each time through the
/// loop. If so, return the phi node that corresponds to it.
///
/// The IndVarSimplify pass transforms loops to have a canonical induction
/// variable.
///
PHINode *getCanonicalInductionVariable() const;
/// Get the latch condition instruction.
ICmpInst *getLatchCmpInst() const;
/// Obtain the unique incoming and back edge. Return false if they are
/// non-unique or the loop is dead; otherwise, return true.
bool getIncomingAndBackEdge(BasicBlock *&Incoming,
BasicBlock *&Backedge) const;
/// Below are some utilities to get the loop guard, loop bounds and induction
/// variable, and to check if a given phinode is an auxiliary induction
/// variable, if the loop is guarded, and if the loop is canonical.
///
/// Here is an example:
/// \code
/// for (int i = lb; i < ub; i+=step)
/// <loop body>
/// --- pseudo LLVMIR ---
/// beforeloop:
/// guardcmp = (lb < ub)
/// if (guardcmp) goto preheader; else goto afterloop
/// preheader:
/// loop:
/// i_1 = phi[{lb, preheader}, {i_2, latch}]
/// <loop body>
/// i_2 = i_1 + step
/// latch:
/// cmp = (i_2 < ub)
/// if (cmp) goto loop
/// exit:
/// afterloop:
/// \endcode
///
/// - getBounds
/// - getInitialIVValue --> lb
/// - getStepInst --> i_2 = i_1 + step
/// - getStepValue --> step
/// - getFinalIVValue --> ub
/// - getCanonicalPredicate --> '<'
/// - getDirection --> Increasing
///
/// - getInductionVariable --> i_1
/// - isAuxiliaryInductionVariable(x) --> true if x == i_1
/// - getLoopGuardBranch()
/// --> `if (guardcmp) goto preheader; else goto afterloop`
/// - isGuarded() --> true
/// - isCanonical --> false
struct LoopBounds {
/// Return the LoopBounds object if
/// - the given \p IndVar is an induction variable
/// - the initial value of the induction variable can be found
/// - the step instruction of the induction variable can be found
/// - the final value of the induction variable can be found
///
/// Else None.
static Optional<Loop::LoopBounds> getBounds(const Loop &L, PHINode &IndVar,
ScalarEvolution &SE);
/// Get the initial value of the loop induction variable.
Value &getInitialIVValue() const { return InitialIVValue; }
/// Get the instruction that updates the loop induction variable.
Instruction &getStepInst() const { return StepInst; }
/// Get the step that the loop induction variable gets updated by in each
/// loop iteration. Return nullptr if not found.
Value *getStepValue() const { return StepValue; }
/// Get the final value of the loop induction variable.
Value &getFinalIVValue() const { return FinalIVValue; }
/// Return the canonical predicate for the latch compare instruction, if
/// able to be calcuated. Else BAD_ICMP_PREDICATE.
///
/// A predicate is considered as canonical if requirements below are all
/// satisfied:
/// 1. The first successor of the latch branch is the loop header
/// If not, inverse the predicate.
/// 2. One of the operands of the latch comparison is StepInst
/// If not, and
/// - if the current calcuated predicate is not ne or eq, flip the
/// predicate.
/// - else if the loop is increasing, return slt
/// (notice that it is safe to change from ne or eq to sign compare)
/// - else if the loop is decreasing, return sgt
/// (notice that it is safe to change from ne or eq to sign compare)
///
/// Here is an example when both (1) and (2) are not satisfied:
/// \code
/// loop.header:
/// %iv = phi [%initialiv, %loop.preheader], [%inc, %loop.header]
/// %inc = add %iv, %step
/// %cmp = slt %iv, %finaliv
/// br %cmp, %loop.exit, %loop.header
/// loop.exit:
/// \endcode
/// - The second successor of the latch branch is the loop header instead
/// of the first successor (slt -> sge)
/// - The first operand of the latch comparison (%cmp) is the IndVar (%iv)
/// instead of the StepInst (%inc) (sge -> sgt)
///
/// The predicate would be sgt if both (1) and (2) are satisfied.
/// getCanonicalPredicate() returns sgt for this example.
/// Note: The IR is not changed.
ICmpInst::Predicate getCanonicalPredicate() const;
/// An enum for the direction of the loop
/// - for (int i = 0; i < ub; ++i) --> Increasing
/// - for (int i = ub; i > 0; --i) --> Descresing
/// - for (int i = x; i != y; i+=z) --> Unknown
enum class Direction { Increasing, Decreasing, Unknown };
/// Get the direction of the loop.
Direction getDirection() const;
private:
LoopBounds(const Loop &Loop, Value &I, Instruction &SI, Value *SV, Value &F,
ScalarEvolution &SE)
: L(Loop), InitialIVValue(I), StepInst(SI), StepValue(SV),
FinalIVValue(F), SE(SE) {}
const Loop &L;
// The initial value of the loop induction variable
Value &InitialIVValue;
// The instruction that updates the loop induction variable
Instruction &StepInst;
// The value that the loop induction variable gets updated by in each loop
// iteration
Value *StepValue;
// The final value of the loop induction variable
Value &FinalIVValue;
ScalarEvolution &SE;
};
/// Return the struct LoopBounds collected if all struct members are found,
/// else None.
Optional<LoopBounds> getBounds(ScalarEvolution &SE) const;
/// Return the loop induction variable if found, else return nullptr.
/// An instruction is considered as the loop induction variable if
/// - it is an induction variable of the loop; and
/// - it is used to determine the condition of the branch in the loop latch
///
/// Note: the induction variable doesn't need to be canonical, i.e. starts at
/// zero and increments by one each time through the loop (but it can be).
PHINode *getInductionVariable(ScalarEvolution &SE) const;
/// Get the loop induction descriptor for the loop induction variable. Return
/// true if the loop induction variable is found.
bool getInductionDescriptor(ScalarEvolution &SE,
InductionDescriptor &IndDesc) const;
/// Return true if the given PHINode \p AuxIndVar is
/// - in the loop header
/// - not used outside of the loop
/// - incremented by a loop invariant step for each loop iteration
/// - step instruction opcode should be add or sub
/// Note: auxiliary induction variable is not required to be used in the
/// conditional branch in the loop latch. (but it can be)
bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
ScalarEvolution &SE) const;
/// Return the loop guard branch, if it exists.
///
/// This currently only works on simplified loop, as it requires a preheader
/// and a latch to identify the guard. It will work on loops of the form:
/// \code
/// GuardBB:
/// br cond1, Preheader, ExitSucc <== GuardBranch
/// Preheader:
/// br Header
/// Header:
/// ...
/// br Latch
/// Latch:
/// br cond2, Header, ExitBlock
/// ExitBlock:
/// br ExitSucc
/// ExitSucc:
/// \endcode
BranchInst *getLoopGuardBranch() const;
/// Return true iff the loop is
/// - in simplify rotated form, and
/// - guarded by a loop guard branch.
bool isGuarded() const { return (getLoopGuardBranch() != nullptr); }
/// Return true if the loop is in rotated form.
///
/// This does not check if the loop was rotated by loop rotation, instead it
/// only checks if the loop is in rotated form (has a valid latch that exists
/// the loop).
bool isRotatedForm() const {
assert(!isInvalid() && "Loop not in a valid state!");
BasicBlock *Latch = getLoopLatch();
return Latch && isLoopExiting(Latch);
}
/// Return true if the loop induction variable starts at zero and increments
/// by one each time through the loop.
bool isCanonical(ScalarEvolution &SE) const;
/// Return true if the Loop is in LCSSA form.
bool isLCSSAForm(const DominatorTree &DT) const;
/// Return true if this Loop and all inner subloops are in LCSSA form.
bool isRecursivelyLCSSAForm(const DominatorTree &DT,
const LoopInfo &LI) const;
/// Return true if the Loop is in the form that the LoopSimplify form
/// transforms loops to, which is sometimes called normal form.
bool isLoopSimplifyForm() const;
/// Return true if the loop body is safe to clone in practice.
bool isSafeToClone() const;
/// Returns true if the loop is annotated parallel.
///
/// A parallel loop can be assumed to not contain any dependencies between
/// iterations by the compiler. That is, any loop-carried dependency checking
/// can be skipped completely when parallelizing the loop on the target
/// machine. Thus, if the parallel loop information originates from the
/// programmer, e.g. via the OpenMP parallel for pragma, it is the
/// programmer's responsibility to ensure there are no loop-carried
/// dependencies. The final execution order of the instructions across
/// iterations is not guaranteed, thus, the end result might or might not
/// implement actual concurrent execution of instructions across multiple
/// iterations.
bool isAnnotatedParallel() const;
/// Return the llvm.loop loop id metadata node for this loop if it is present.
///
/// If this loop contains the same llvm.loop metadata on each branch to the
/// header then the node is returned. If any latch instruction does not
/// contain llvm.loop or if multiple latches contain different nodes then
/// 0 is returned.
MDNode *getLoopID() const;
/// Set the llvm.loop loop id metadata for this loop.
///
/// The LoopID metadata node will be added to each terminator instruction in
/// the loop that branches to the loop header.
///
/// The LoopID metadata node should have one or more operands and the first
/// operand should be the node itself.
void setLoopID(MDNode *LoopID) const;
/// Add llvm.loop.unroll.disable to this loop's loop id metadata.
///
/// Remove existing unroll metadata and add unroll disable metadata to
/// indicate the loop has already been unrolled. This prevents a loop
/// from being unrolled more than is directed by a pragma if the loop
/// unrolling pass is run more than once (which it generally is).
void setLoopAlreadyUnrolled();
/// Add llvm.loop.mustprogress to this loop's loop id metadata.
void setLoopMustProgress();
void dump() const;
void dumpVerbose() const;
/// Return the debug location of the start of this loop.
/// This looks for a BB terminating instruction with a known debug
/// location by looking at the preheader and header blocks. If it
/// cannot find a terminating instruction with location information,
/// it returns an unknown location.
DebugLoc getStartLoc() const;
/// Return the source code span of the loop.
LocRange getLocRange() const;
StringRef getName() const {
if (BasicBlock *Header = getHeader())
if (Header->hasName())
return Header->getName();
return "<unnamed loop>";
}
private:
Loop() = default;
friend class LoopInfoBase<BasicBlock, Loop>;
friend class LoopBase<BasicBlock, Loop>;
explicit Loop(BasicBlock *BB) : LoopBase<BasicBlock, Loop>(BB) {}
~Loop() = default;
};
//===----------------------------------------------------------------------===//
/// This class builds and contains all of the top-level loop
/// structures in the specified function.
///
template <class BlockT, class LoopT> class LoopInfoBase {
// BBMap - Mapping of basic blocks to the inner most loop they occur in
DenseMap<const BlockT *, LoopT *> BBMap;
std::vector<LoopT *> TopLevelLoops;
BumpPtrAllocator LoopAllocator;
friend class LoopBase<BlockT, LoopT>;
friend class LoopInfo;
void operator=(const LoopInfoBase &) = delete;
LoopInfoBase(const LoopInfoBase &) = delete;
public:
LoopInfoBase() {}
~LoopInfoBase() { releaseMemory(); }
LoopInfoBase(LoopInfoBase &&Arg)
: BBMap(std::move(Arg.BBMap)),
TopLevelLoops(std::move(Arg.TopLevelLoops)),
LoopAllocator(std::move(Arg.LoopAllocator)) {
// We have to clear the arguments top level loops as we've taken ownership.
Arg.TopLevelLoops.clear();
}
LoopInfoBase &operator=(LoopInfoBase &&RHS) {
BBMap = std::move(RHS.BBMap);
for (auto *L : TopLevelLoops)
L->~LoopT();
TopLevelLoops = std::move(RHS.TopLevelLoops);
LoopAllocator = std::move(RHS.LoopAllocator);
RHS.TopLevelLoops.clear();
return *this;
}
void releaseMemory() {
BBMap.clear();
for (auto *L : TopLevelLoops)
L->~LoopT();
TopLevelLoops.clear();
LoopAllocator.Reset();
}
template <typename... ArgsTy> LoopT *AllocateLoop(ArgsTy &&... Args) {
LoopT *Storage = LoopAllocator.Allocate<LoopT>();
return new (Storage) LoopT(std::forward<ArgsTy>(Args)...);
}
/// iterator/begin/end - The interface to the top-level loops in the current
/// function.
///
typedef typename std::vector<LoopT *>::const_iterator iterator;
typedef
typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator;
iterator begin() const { return TopLevelLoops.begin(); }
iterator end() const { return TopLevelLoops.end(); }
reverse_iterator rbegin() const { return TopLevelLoops.rbegin(); }
reverse_iterator rend() const { return TopLevelLoops.rend(); }
bool empty() const { return TopLevelLoops.empty(); }
/// Return all of the loops in the function in preorder across the loop
/// nests, with siblings in forward program order.
///
/// Note that because loops form a forest of trees, preorder is equivalent to
/// reverse postorder.
SmallVector<LoopT *, 4> getLoopsInPreorder();
/// Return all of the loops in the function in preorder across the loop
/// nests, with siblings in *reverse* program order.
///
/// Note that because loops form a forest of trees, preorder is equivalent to
/// reverse postorder.
///
/// Also note that this is *not* a reverse preorder. Only the siblings are in
/// reverse program order.
SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder();
/// Return the inner most loop that BB lives in. If a basic block is in no
/// loop (for example the entry node), null is returned.
LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); }
/// Same as getLoopFor.
const LoopT *operator[](const BlockT *BB) const { return getLoopFor(BB); }
/// Return the loop nesting level of the specified block. A depth of 0 means
/// the block is not inside any loop.
unsigned getLoopDepth(const BlockT *BB) const {
const LoopT *L = getLoopFor(BB);
return L ? L->getLoopDepth() : 0;
}
// True if the block is a loop header node
bool isLoopHeader(const BlockT *BB) const {
const LoopT *L = getLoopFor(BB);
return L && L->getHeader() == BB;
}
/// Return the top-level loops.
const std::vector<LoopT *> &getTopLevelLoops() const { return TopLevelLoops; }
/// Return the top-level loops.
std::vector<LoopT *> &getTopLevelLoopsVector() { return TopLevelLoops; }
/// This removes the specified top-level loop from this loop info object.
/// The loop is not deleted, as it will presumably be inserted into
/// another loop.
LoopT *removeLoop(iterator I) {
assert(I != end() && "Cannot remove end iterator!");
LoopT *L = *I;
assert(L->isOutermost() && "Not a top-level loop!");
TopLevelLoops.erase(TopLevelLoops.begin() + (I - begin()));
return L;
}
/// Change the top-level loop that contains BB to the specified loop.
/// This should be used by transformations that restructure the loop hierarchy
/// tree.
void changeLoopFor(BlockT *BB, LoopT *L) {
if (!L) {
BBMap.erase(BB);
return;
}
BBMap[BB] = L;
}
/// Replace the specified loop in the top-level loops list with the indicated
/// loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop) {
auto I = find(TopLevelLoops, OldLoop);
assert(I != TopLevelLoops.end() && "Old loop not at top level!");
*I = NewLoop;
assert(!NewLoop->ParentLoop && !OldLoop->ParentLoop &&
"Loops already embedded into a subloop!");
}
/// This adds the specified loop to the collection of top-level loops.
void addTopLevelLoop(LoopT *New) {
assert(New->isOutermost() && "Loop already in subloop!");
TopLevelLoops.push_back(New);
}
/// This method completely removes BB from all data structures,
/// including all of the Loop objects it is nested in and our mapping from
/// BasicBlocks to loops.
void removeBlock(BlockT *BB) {
auto I = BBMap.find(BB);
if (I != BBMap.end()) {
for (LoopT *L = I->second; L; L = L->getParentLoop())
L->removeBlockFromLoop(BB);
BBMap.erase(I);
}
}
// Internals
static bool isNotAlreadyContainedIn(const LoopT *SubLoop,
const LoopT *ParentLoop) {
if (!SubLoop)
return true;
if (SubLoop == ParentLoop)
return false;
return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop);
}
/// Create the loop forest using a stable algorithm.
void analyze(const DominatorTreeBase<BlockT, false> &DomTree);
// Debugging
void print(raw_ostream &OS) const;
void verify(const DominatorTreeBase<BlockT, false> &DomTree) const;
/// Destroy a loop that has been removed from the `LoopInfo` nest.
///
/// This runs the destructor of the loop object making it invalid to
/// reference afterward. The memory is retained so that the *pointer* to the
/// loop remains valid.
///
/// The caller is responsible for removing this loop from the loop nest and
/// otherwise disconnecting it from the broader `LoopInfo` data structures.
/// Callers that don't naturally handle this themselves should probably call
/// `erase' instead.
void destroy(LoopT *L) {
L->~LoopT();
// Since LoopAllocator is a BumpPtrAllocator, this Deallocate only poisons
// \c L, but the pointer remains valid for non-dereferencing uses.
LoopAllocator.Deallocate(L);
}
};
// Implementation in LoopInfoImpl.h
extern template class LoopInfoBase<BasicBlock, Loop>;
class LoopInfo : public LoopInfoBase<BasicBlock, Loop> {
typedef LoopInfoBase<BasicBlock, Loop> BaseT;
friend class LoopBase<BasicBlock, Loop>;
void operator=(const LoopInfo &) = delete;
LoopInfo(const LoopInfo &) = delete;
public:
LoopInfo() {}
explicit LoopInfo(const DominatorTreeBase<BasicBlock, false> &DomTree);
LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))) {}
LoopInfo &operator=(LoopInfo &&RHS) {
BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
return *this;
}
/// Handle invalidation explicitly.
bool invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &);
// Most of the public interface is provided via LoopInfoBase.
/// Update LoopInfo after removing the last backedge from a loop. This updates
/// the loop forest and parent loops for each block so that \c L is no longer
/// referenced, but does not actually delete \c L immediately. The pointer
/// will remain valid until this LoopInfo's memory is released.
void erase(Loop *L);
/// Returns true if replacing From with To everywhere is guaranteed to
/// preserve LCSSA form.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To) {
// Preserving LCSSA form is only problematic if the replacing value is an
// instruction.
Instruction *I = dyn_cast<Instruction>(To);
if (!I)
return true;
// If both instructions are defined in the same basic block then replacement
// cannot break LCSSA form.
if (I->getParent() == From->getParent())
return true;
// If the instruction is not defined in a loop then it can safely replace
// anything.
Loop *ToLoop = getLoopFor(I->getParent());
if (!ToLoop)
return true;
// If the replacing instruction is defined in the same loop as the original
// instruction, or in a loop that contains it as an inner loop, then using
// it as a replacement will not break LCSSA form.
return ToLoop->contains(getLoopFor(From->getParent()));
}
/// Checks if moving a specific instruction can break LCSSA in any loop.
///
/// Return true if moving \p Inst to before \p NewLoc will break LCSSA,
/// assuming that the function containing \p Inst and \p NewLoc is currently
/// in LCSSA form.
bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc) {
assert(Inst->getFunction() == NewLoc->getFunction() &&
"Can't reason about IPO!");
auto *OldBB = Inst->getParent();
auto *NewBB = NewLoc->getParent();
// Movement within the same loop does not break LCSSA (the equality check is
// to avoid doing a hashtable lookup in case of intra-block movement).
if (OldBB == NewBB)
return true;
auto *OldLoop = getLoopFor(OldBB);
auto *NewLoop = getLoopFor(NewBB);
if (OldLoop == NewLoop)
return true;
// Check if Outer contains Inner; with the null loop counting as the
// "outermost" loop.
auto Contains = [](const Loop *Outer, const Loop *Inner) {
return !Outer || Outer->contains(Inner);
};
// To check that the movement of Inst to before NewLoc does not break LCSSA,
// we need to check two sets of uses for possible LCSSA violations at
// NewLoc: the users of NewInst, and the operands of NewInst.
// If we know we're hoisting Inst out of an inner loop to an outer loop,
// then the uses *of* Inst don't need to be checked.
if (!Contains(NewLoop, OldLoop)) {
for (Use &U : Inst->uses()) {
auto *UI = cast<Instruction>(U.getUser());
auto *UBB = isa<PHINode>(UI) ? cast<PHINode>(UI)->getIncomingBlock(U)
: UI->getParent();
if (UBB != NewBB && getLoopFor(UBB) != NewLoop)
return false;
}
}
// If we know we're sinking Inst from an outer loop into an inner loop, then
// the *operands* of Inst don't need to be checked.
if (!Contains(OldLoop, NewLoop)) {
// See below on why we can't handle phi nodes here.
if (isa<PHINode>(Inst))
return false;
for (Use &U : Inst->operands()) {
auto *DefI = dyn_cast<Instruction>(U.get());
if (!DefI)
return false;
// This would need adjustment if we allow Inst to be a phi node -- the
// new use block won't simply be NewBB.
auto *DefBlock = DefI->getParent();
if (DefBlock != NewBB && getLoopFor(DefBlock) != NewLoop)
return false;
}
}
return true;
}
// Return true if a new use of V added in ExitBB would require an LCSSA PHI
// to be inserted at the begining of the block. Note that V is assumed to
// dominate ExitBB, and ExitBB must be the exit block of some loop. The
// IR is assumed to be in LCSSA form before the planned insertion.
bool wouldBeOutOfLoopUseRequiringLCSSA(const Value *V,
const BasicBlock *ExitBB) const;
};
// Allow clients to walk the list of nested loops...
template <> struct GraphTraits<const Loop *> {
typedef const Loop *NodeRef;
typedef LoopInfo::iterator ChildIteratorType;
static NodeRef getEntryNode(const Loop *L) { return L; }
static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
static ChildIteratorType child_end(NodeRef N) { return N->end(); }
};
template <> struct GraphTraits<Loop *> {
typedef Loop *NodeRef;
typedef LoopInfo::iterator ChildIteratorType;
static NodeRef getEntryNode(Loop *L) { return L; }
static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
static ChildIteratorType child_end(NodeRef N) { return N->end(); }
};
/// Analysis pass that exposes the \c LoopInfo for a function.
class LoopAnalysis : public AnalysisInfoMixin<LoopAnalysis> {
friend AnalysisInfoMixin<LoopAnalysis>;
static AnalysisKey Key;
public:
typedef LoopInfo Result;
LoopInfo run(Function &F, FunctionAnalysisManager &AM);
};
/// Printer pass for the \c LoopAnalysis results.
class LoopPrinterPass : public PassInfoMixin<LoopPrinterPass> {
raw_ostream &OS;
public:
explicit LoopPrinterPass(raw_ostream &OS) : OS(OS) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
/// Verifier pass for the \c LoopAnalysis results.
struct LoopVerifierPass : public PassInfoMixin<LoopVerifierPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
/// The legacy pass manager's analysis pass to compute loop information.
class LoopInfoWrapperPass : public FunctionPass {
LoopInfo LI;
public:
static char ID; // Pass identification, replacement for typeid
LoopInfoWrapperPass();
LoopInfo &getLoopInfo() { return LI; }
const LoopInfo &getLoopInfo() const { return LI; }
/// Calculate the natural loop information for a given function.
bool runOnFunction(Function &F) override;
void verifyAnalysis() const override;
void releaseMemory() override { LI.releaseMemory(); }
void print(raw_ostream &O, const Module *M = nullptr) const override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
/// Function to print a loop's contents as LLVM's text IR assembly.
void printLoop(Loop &L, raw_ostream &OS, const std::string &Banner = "");
/// Find and return the loop attribute node for the attribute @p Name in
/// @p LoopID. Return nullptr if there is no such attribute.
MDNode *findOptionMDForLoopID(MDNode *LoopID, StringRef Name);
/// Find string metadata for a loop.
///
/// Returns the MDNode where the first operand is the metadata's name. The
/// following operands are the metadata's values. If no metadata with @p Name is
/// found, return nullptr.
MDNode *findOptionMDForLoop(const Loop *TheLoop, StringRef Name);
Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
StringRef Name);
/// Returns true if Name is applied to TheLoop and enabled.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name);
/// Find named metadata for a loop with an integer value.
llvm::Optional<int>
getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name);
/// Find string metadata for loop
///
/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
/// operand or null otherwise. If the string metadata is not found return
/// Optional's not-a-value.
Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
StringRef Name);
/// Look for the loop attribute that requires progress within the loop.
/// Note: Most consumers probably want "isMustProgress" which checks
/// the containing function attribute too.
bool hasMustProgress(const Loop *L);
/// Return true if this loop can be assumed to make progress. (i.e. can't
/// be infinite without side effects without also being undefined)
bool isMustProgress(const Loop *L);
/// Return whether an MDNode might represent an access group.
///
/// Access group metadata nodes have to be distinct and empty. Being
/// always-empty ensures that it never needs to be changed (which -- because
/// MDNodes are designed immutable -- would require creating a new MDNode). Note
/// that this is not a sufficient condition: not every distinct and empty NDNode
/// is representing an access group.
bool isValidAsAccessGroup(MDNode *AccGroup);
/// Create a new LoopID after the loop has been transformed.
///
/// This can be used when no follow-up loop attributes are defined
/// (llvm::makeFollowupLoopID returning None) to stop transformations to be
/// applied again.
///
/// @param Context The LLVMContext in which to create the new LoopID.
/// @param OrigLoopID The original LoopID; can be nullptr if the original
/// loop has no LoopID.
/// @param RemovePrefixes Remove all loop attributes that have these prefixes.
/// Use to remove metadata of the transformation that has
/// been applied.
/// @param AddAttrs Add these loop attributes to the new LoopID.
///
/// @return A new LoopID that can be applied using Loop::setLoopID().
llvm::MDNode *
makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID,
llvm::ArrayRef<llvm::StringRef> RemovePrefixes,
llvm::ArrayRef<llvm::MDNode *> AddAttrs);
} // End llvm namespace
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h
index 9a749a1c8eae..df10e126c31a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h
@@ -1,181 +1,181 @@
//===- llvm/Analysis/LoopNestAnalysis.h -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file defines the interface for the loop nest analysis.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_LOOPNESTANALYSIS_H
#define LLVM_ANALYSIS_LOOPNESTANALYSIS_H
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
namespace llvm {
using LoopVectorTy = SmallVector<Loop *, 8>;
class LPMUpdater;
/// This class represents a loop nest and can be used to query its properties.
-class LoopNest {
+class LLVM_EXTERNAL_VISIBILITY LoopNest {
public:
/// Construct a loop nest rooted by loop \p Root.
LoopNest(Loop &Root, ScalarEvolution &SE);
LoopNest() = delete;
/// Construct a LoopNest object.
static std::unique_ptr<LoopNest> getLoopNest(Loop &Root, ScalarEvolution &SE);
/// Return true if the given loops \p OuterLoop and \p InnerLoop are
/// perfectly nested with respect to each other, and false otherwise.
/// Example:
/// \code
/// for(i)
/// for(j)
/// for(k)
/// \endcode
/// arePerfectlyNested(loop_i, loop_j, SE) would return true.
/// arePerfectlyNested(loop_j, loop_k, SE) would return true.
/// arePerfectlyNested(loop_i, loop_k, SE) would return false.
static bool arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
ScalarEvolution &SE);
/// Return the maximum nesting depth of the loop nest rooted by loop \p Root.
/// For example given the loop nest:
/// \code
/// for(i) // loop at level 1 and Root of the nest
/// for(j) // loop at level 2
/// <code>
/// for(k) // loop at level 3
/// \endcode
/// getMaxPerfectDepth(Loop_i) would return 2.
static unsigned getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE);
/// Recursivelly traverse all empty 'single successor' basic blocks of \p From
/// (if there are any). When \p CheckUniquePred is set to true, check if
/// each of the empty single successors has a unique predecessor. Return
/// the last basic block found or \p End if it was reached during the search.
static const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From,
const BasicBlock *End,
bool CheckUniquePred = false);
/// Return the outermost loop in the loop nest.
Loop &getOutermostLoop() const { return *Loops.front(); }
/// Return the innermost loop in the loop nest if the nest has only one
/// innermost loop, and a nullptr otherwise.
/// Note: the innermost loop returned is not necessarily perfectly nested.
Loop *getInnermostLoop() const {
if (Loops.size() == 1)
return Loops.back();
// The loops in the 'Loops' vector have been collected in breadth first
// order, therefore if the last 2 loops in it have the same nesting depth
// there isn't a unique innermost loop in the nest.
Loop *LastLoop = Loops.back();
auto SecondLastLoopIter = ++Loops.rbegin();
return (LastLoop->getLoopDepth() == (*SecondLastLoopIter)->getLoopDepth())
? nullptr
: LastLoop;
}
/// Return the loop at the given \p Index.
Loop *getLoop(unsigned Index) const {
assert(Index < Loops.size() && "Index is out of bounds");
return Loops[Index];
}
/// Return the number of loops in the nest.
size_t getNumLoops() const { return Loops.size(); }
/// Get the loops in the nest.
ArrayRef<Loop *> getLoops() const { return Loops; }
/// Retrieve a vector of perfect loop nests contained in the current loop
/// nest. For example, given the following nest containing 4 loops, this
/// member function would return {{L1,L2},{L3,L4}}.
/// \code
/// for(i) // L1
/// for(j) // L2
/// <code>
/// for(k) // L3
/// for(l) // L4
/// \endcode
SmallVector<LoopVectorTy, 4> getPerfectLoops(ScalarEvolution &SE) const;
/// Return the loop nest depth (i.e. the loop depth of the 'deepest' loop)
/// For example given the loop nest:
/// \code
/// for(i) // loop at level 1 and Root of the nest
/// for(j1) // loop at level 2
/// for(k) // loop at level 3
/// for(j2) // loop at level 2
/// \endcode
/// getNestDepth() would return 3.
unsigned getNestDepth() const {
int NestDepth =
Loops.back()->getLoopDepth() - Loops.front()->getLoopDepth() + 1;
assert(NestDepth > 0 && "Expecting NestDepth to be at least 1");
return NestDepth;
}
/// Return the maximum perfect nesting depth.
unsigned getMaxPerfectDepth() const { return MaxPerfectDepth; }
/// Return true if all loops in the loop nest are in simplify form.
bool areAllLoopsSimplifyForm() const {
return all_of(Loops, [](const Loop *L) { return L->isLoopSimplifyForm(); });
}
/// Return true if all loops in the loop nest are in rotated form.
bool areAllLoopsRotatedForm() const {
return all_of(Loops, [](const Loop *L) { return L->isRotatedForm(); });
}
/// Return the function to which the loop-nest belongs.
Function *getParent() const {
return Loops.front()->getHeader()->getParent();
}
StringRef getName() const { return Loops.front()->getName(); }
protected:
const unsigned MaxPerfectDepth; // maximum perfect nesting depth level.
LoopVectorTy Loops; // the loops in the nest (in breadth first order).
};
raw_ostream &operator<<(raw_ostream &, const LoopNest &);
/// This analysis provides information for a loop nest. The analysis runs on
/// demand and can be initiated via AM.getResult<LoopNestAnalysis>.
class LoopNestAnalysis : public AnalysisInfoMixin<LoopNestAnalysis> {
friend AnalysisInfoMixin<LoopNestAnalysis>;
static AnalysisKey Key;
public:
using Result = LoopNest;
Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR);
};
/// Printer pass for the \c LoopNest results.
class LoopNestPrinterPass : public PassInfoMixin<LoopNestPrinterPass> {
raw_ostream &OS;
public:
explicit LoopNestPrinterPass(raw_ostream &OS) : OS(OS) {}
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};
} // namespace llvm
#endif // LLVM_ANALYSIS_LOOPNESTANALYSIS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
index da9e00e0e8e1..5ab58ca0646a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1,2394 +1,2393 @@
//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This pass exposes codegen information to IR-level passes. Every
/// transformation that uses codegen information is broken into three parts:
/// 1. The IR-level analysis pass.
/// 2. The IR-level transformation interface which provides the needed
/// information.
/// 3. Codegen-level implementation which uses target-specific hooks.
///
/// This file defines #2, which is the interface that IR-level transformations
/// use for querying the codegen.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/InstructionCost.h"
#include <functional>
namespace llvm {
namespace Intrinsic {
typedef unsigned ID;
}
class AssumptionCache;
class BlockFrequencyInfo;
class DominatorTree;
class BranchInst;
class CallBase;
class ExtractElementInst;
class Function;
class GlobalValue;
class InstCombiner;
class IntrinsicInst;
class LoadInst;
class LoopAccessInfo;
class Loop;
class LoopInfo;
class ProfileSummaryInfo;
class SCEV;
class ScalarEvolution;
class StoreInst;
class SwitchInst;
class TargetLibraryInfo;
class Type;
class User;
class Value;
class VPIntrinsic;
struct KnownBits;
template <typename T> class Optional;
/// Information about a load/store intrinsic defined by the target.
struct MemIntrinsicInfo {
/// This is the pointer that the intrinsic is loading from or storing to.
/// If this is non-null, then analysis/optimization passes can assume that
/// this intrinsic is functionally equivalent to a load/store from this
/// pointer.
Value *PtrVal = nullptr;
// Ordering for atomic operations.
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
// Same Id is set by the target for corresponding load/store intrinsics.
unsigned short MatchingId = 0;
bool ReadMem = false;
bool WriteMem = false;
bool IsVolatile = false;
bool isUnordered() const {
return (Ordering == AtomicOrdering::NotAtomic ||
Ordering == AtomicOrdering::Unordered) &&
!IsVolatile;
}
};
/// Attributes of a target dependent hardware loop.
struct HardwareLoopInfo {
HardwareLoopInfo() = delete;
HardwareLoopInfo(Loop *L) : L(L) {}
Loop *L = nullptr;
BasicBlock *ExitBlock = nullptr;
BranchInst *ExitBranch = nullptr;
const SCEV *ExitCount = nullptr;
- const SCEV *TripCount = nullptr;
IntegerType *CountType = nullptr;
Value *LoopDecrement = nullptr; // Decrement the loop counter by this
// value in every iteration.
bool IsNestingLegal = false; // Can a hardware loop be a parent to
// another hardware loop?
bool CounterInReg = false; // Should loop counter be updated in
// the loop via a phi?
bool PerformEntryTest = false; // Generate the intrinsic which also performs
// icmp ne zero on the loop counter value and
// produces an i1 to guard the loop entry.
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
DominatorTree &DT, bool ForceNestedLoop = false,
bool ForceHardwareLoopPHI = false);
bool canAnalyze(LoopInfo &LI);
};
class IntrinsicCostAttributes {
const IntrinsicInst *II = nullptr;
Type *RetTy = nullptr;
Intrinsic::ID IID;
SmallVector<Type *, 4> ParamTys;
SmallVector<const Value *, 4> Arguments;
FastMathFlags FMF;
// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
// arguments and the return value will be computed based on types.
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
public:
IntrinsicCostAttributes(
Intrinsic::ID Id, const CallBase &CI,
InstructionCost ScalarCost = InstructionCost::getInvalid());
IntrinsicCostAttributes(
Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
InstructionCost ScalarCost = InstructionCost::getInvalid());
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<const Value *> Args);
IntrinsicCostAttributes(
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
const IntrinsicInst *I = nullptr,
InstructionCost ScalarCost = InstructionCost::getInvalid());
Intrinsic::ID getID() const { return IID; }
const IntrinsicInst *getInst() const { return II; }
Type *getReturnType() const { return RetTy; }
FastMathFlags getFlags() const { return FMF; }
InstructionCost getScalarizationCost() const { return ScalarizationCost; }
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
bool isTypeBasedOnly() const {
return Arguments.empty();
}
bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
};
class TargetTransformInfo;
typedef TargetTransformInfo TTI;
/// This pass provides access to the codegen interfaces that are needed
/// for IR-level transformations.
class TargetTransformInfo {
public:
/// Construct a TTI object using a type implementing the \c Concept
/// API below.
///
/// This is used by targets to construct a TTI wrapping their target-specific
/// implementation that encodes appropriate costs for their target.
template <typename T> TargetTransformInfo(T Impl);
/// Construct a baseline TTI object using a minimal implementation of
/// the \c Concept API below.
///
/// The TTI implementation will reflect the information in the DataLayout
/// provided if non-null.
explicit TargetTransformInfo(const DataLayout &DL);
// Provide move semantics.
TargetTransformInfo(TargetTransformInfo &&Arg);
TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
// We need to define the destructor out-of-line to define our sub-classes
// out-of-line.
~TargetTransformInfo();
/// Handle the invalidation of this information.
///
/// When used as a result of \c TargetIRAnalysis this method will be called
/// when the function this was computed for changes. When it returns false,
/// the information is preserved across those changes.
bool invalidate(Function &, const PreservedAnalyses &,
FunctionAnalysisManager::Invalidator &) {
// FIXME: We should probably in some way ensure that the subtarget
// information for a function hasn't changed.
return false;
}
/// \name Generic Target Information
/// @{
/// The kind of cost model.
///
/// There are several different cost models that can be customized by the
/// target. The normalization of each cost model may be target specific.
enum TargetCostKind {
TCK_RecipThroughput, ///< Reciprocal throughput.
TCK_Latency, ///< The latency of instruction.
TCK_CodeSize, ///< Instruction code size.
TCK_SizeAndLatency ///< The weighted sum of size and latency.
};
/// Query the cost of a specified instruction.
///
/// Clients should use this interface to query the cost of an existing
/// instruction. The instruction must have a valid parent (basic block).
///
/// Note, this method does not cache the cost calculation and it
/// can be expensive in some cases.
InstructionCost getInstructionCost(const Instruction *I,
enum TargetCostKind kind) const {
InstructionCost Cost;
switch (kind) {
case TCK_RecipThroughput:
Cost = getInstructionThroughput(I);
break;
case TCK_Latency:
Cost = getInstructionLatency(I);
break;
case TCK_CodeSize:
case TCK_SizeAndLatency:
Cost = getUserCost(I, kind);
break;
}
return Cost;
}
/// Underlying constants for 'cost' values in this interface.
///
/// Many APIs in this interface return a cost. This enum defines the
/// fundamental values that should be used to interpret (and produce) those
/// costs. The costs are returned as an int rather than a member of this
/// enumeration because it is expected that the cost of one IR instruction
/// may have a multiplicative factor to it or otherwise won't fit directly
/// into the enum. Moreover, it is common to sum or average costs which works
/// better as simple integral values. Thus this enum only provides constants.
/// Also note that the returned costs are signed integers to make it natural
/// to add, subtract, and test with zero (a common boundary condition). It is
/// not expected that 2^32 is a realistic cost to be modeling at any point.
///
/// Note that these costs should usually reflect the intersection of code-size
/// cost and execution cost. A free instruction is typically one that folds
/// into another instruction. For example, reg-to-reg moves can often be
/// skipped by renaming the registers in the CPU, but they still are encoded
/// and thus wouldn't be considered 'free' here.
enum TargetCostConstants {
TCC_Free = 0, ///< Expected to fold away in lowering.
TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
};
/// Estimate the cost of a GEP operation when lowered.
InstructionCost
getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands,
TargetCostKind CostKind = TCK_SizeAndLatency) const;
/// \returns A value by which our inlining threshold should be multiplied.
/// This is primarily used to bump up the inlining threshold wholesale on
/// targets where calls are unusually expensive.
///
/// TODO: This is a rather blunt instrument. Perhaps altering the costs of
/// individual classes of instructions would be better.
unsigned getInliningThresholdMultiplier() const;
/// \returns A value to be added to the inlining threshold.
unsigned adjustInliningThreshold(const CallBase *CB) const;
/// \returns Vector bonus in percent.
///
/// Vector bonuses: We want to more aggressively inline vector-dense kernels
/// and apply this bonus based on the percentage of vector instructions. A
/// bonus is applied if the vector instructions exceed 50% and half that
/// amount is applied if it exceeds 10%. Note that these bonuses are some what
/// arbitrary and evolved over time by accident as much as because they are
/// principled bonuses.
/// FIXME: It would be nice to base the bonus values on something more
/// scientific. A target may has no bonus on vector instructions.
int getInlinerVectorBonusPercent() const;
/// \return the expected cost of a memcpy, which could e.g. depend on the
/// source/destination type and alignment and the number of bytes copied.
InstructionCost getMemcpyCost(const Instruction *I) const;
/// \return The estimated number of case clusters when lowering \p 'SI'.
/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
/// table.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) const;
/// Estimate the cost of a given IR user when lowered.
///
/// This can estimate the cost of either a ConstantExpr or Instruction when
/// lowered.
///
/// \p Operands is a list of operands which can be a result of transformations
/// of the current operands. The number of the operands on the list must equal
/// to the number of the current operands the IR user has. Their order on the
/// list must be the same as the order of the current operands the IR user
/// has.
///
/// The returned cost is defined in terms of \c TargetCostConstants, see its
/// comments for a detailed explanation of the cost values.
InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
TargetCostKind CostKind) const;
/// This is a helper function which calls the two-argument getUserCost
/// with \p Operands which are the current operands U has.
InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const {
SmallVector<const Value *, 4> Operands(U->operand_values());
return getUserCost(U, Operands, CostKind);
}
/// If a branch or a select condition is skewed in one direction by more than
/// this factor, it is very likely to be predicted correctly.
BranchProbability getPredictableBranchThreshold() const;
/// Return true if branch divergence exists.
///
/// Branch divergence has a significantly negative impact on GPU performance
/// when threads in the same wavefront take different paths due to conditional
/// branches.
bool hasBranchDivergence() const;
/// Return true if the target prefers to use GPU divergence analysis to
/// replace the legacy version.
bool useGPUDivergenceAnalysis() const;
/// Returns whether V is a source of divergence.
///
/// This function provides the target-dependent information for
/// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
/// first builds the dependency graph, and then runs the reachability
/// algorithm starting with the sources of divergence.
bool isSourceOfDivergence(const Value *V) const;
// Returns true for the target specific
// set of operations which produce uniform result
// even taking non-uniform arguments
bool isAlwaysUniform(const Value *V) const;
/// Returns the address space ID for a target's 'flat' address space. Note
/// this is not necessarily the same as addrspace(0), which LLVM sometimes
/// refers to as the generic address space. The flat address space is a
/// generic address space that can be used access multiple segments of memory
/// with different address spaces. Access of a memory location through a
/// pointer with this address space is expected to be legal but slower
/// compared to the same memory location accessed through a pointer with a
/// different address space.
//
/// This is for targets with different pointer representations which can
/// be converted with the addrspacecast instruction. If a pointer is converted
/// to this address space, optimizations should attempt to replace the access
/// with the source address space.
///
/// \returns ~0u if the target does not have such a flat address space to
/// optimize away.
unsigned getFlatAddressSpace() const;
/// Return any intrinsic address operand indexes which may be rewritten if
/// they use a flat address space pointer.
///
/// \returns true if the intrinsic was handled.
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const;
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
unsigned getAssumedAddrSpace(const Value *V) const;
/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
/// NewV, which has a different address space. This should happen for every
/// operand index that collectFlatAddressOperands returned for the intrinsic.
/// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
/// new value (which may be the original \p II with modified operands).
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const;
/// Test whether calls to a function lower to actual program function
/// calls.
///
/// The idea is to test whether the program is likely to require a 'call'
/// instruction or equivalent in order to call the given function.
///
/// FIXME: It's not clear that this is a good or useful query API. Client's
/// should probably move to simpler cost metrics using the above.
/// Alternatively, we could split the cost interface into distinct code-size
/// and execution-speed costs. This would allow modelling the core of this
/// query more accurately as a call is a single small instruction, but
/// incurs significant execution cost.
bool isLoweredToCall(const Function *F) const;
struct LSRCost {
/// TODO: Some of these could be merged. Also, a lexical ordering
/// isn't always optimal.
unsigned Insns;
unsigned NumRegs;
unsigned AddRecCost;
unsigned NumIVMuls;
unsigned NumBaseAdds;
unsigned ImmCost;
unsigned SetupCost;
unsigned ScaleCost;
};
/// Parameters that control the generic loop unrolling transformation.
struct UnrollingPreferences {
/// The cost threshold for the unrolled loop. Should be relative to the
/// getUserCost values returned by this API, and the expectation is that
/// the unrolled loop's instructions when run through that interface should
/// not exceed this cost. However, this is only an estimate. Also, specific
/// loops may be unrolled even with a cost above this threshold if deemed
/// profitable. Set this to UINT_MAX to disable the loop body cost
/// restriction.
unsigned Threshold;
/// If complete unrolling will reduce the cost of the loop, we will boost
/// the Threshold by a certain percent to allow more aggressive complete
/// unrolling. This value provides the maximum boost percentage that we
/// can apply to Threshold (The value should be no less than 100).
/// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
/// MaxPercentThresholdBoost / 100)
/// E.g. if complete unrolling reduces the loop execution time by 50%
/// then we boost the threshold by the factor of 2x. If unrolling is not
/// expected to reduce the running time, then we do not increase the
/// threshold.
unsigned MaxPercentThresholdBoost;
/// The cost threshold for the unrolled loop when optimizing for size (set
/// to UINT_MAX to disable).
unsigned OptSizeThreshold;
/// The cost threshold for the unrolled loop, like Threshold, but used
/// for partial/runtime unrolling (set to UINT_MAX to disable).
unsigned PartialThreshold;
/// The cost threshold for the unrolled loop when optimizing for size, like
/// OptSizeThreshold, but used for partial/runtime unrolling (set to
/// UINT_MAX to disable).
unsigned PartialOptSizeThreshold;
/// A forced unrolling factor (the number of concatenated bodies of the
/// original loop in the unrolled loop body). When set to 0, the unrolling
/// transformation will select an unrolling factor based on the current cost
/// threshold and other factors.
unsigned Count;
/// Default unroll count for loops with run-time trip count.
unsigned DefaultUnrollRuntimeCount;
// Set the maximum unrolling factor. The unrolling factor may be selected
// using the appropriate cost threshold, but may not exceed this number
// (set to UINT_MAX to disable). This does not apply in cases where the
// loop is being fully unrolled.
unsigned MaxCount;
/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
/// applies even if full unrolling is selected. This allows a target to fall
/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
unsigned FullUnrollMaxCount;
// Represents number of instructions optimized when "back edge"
// becomes "fall through" in unrolled loop.
// For now we count a conditional branch on a backedge and a comparison
// feeding it.
unsigned BEInsns;
/// Allow partial unrolling (unrolling of loops to expand the size of the
/// loop body, not only to eliminate small constant-trip-count loops).
bool Partial;
/// Allow runtime unrolling (unrolling of loops to expand the size of the
/// loop body even when the number of loop iterations is not known at
/// compile time).
bool Runtime;
/// Allow generation of a loop remainder (extra iterations after unroll).
bool AllowRemainder;
/// Allow emitting expensive instructions (such as divisions) when computing
/// the trip count of a loop for runtime unrolling.
bool AllowExpensiveTripCount;
/// Apply loop unroll on any kind of loop
/// (mainly to loops that fail runtime unrolling).
bool Force;
/// Allow using trip count upper bound to unroll loops.
bool UpperBound;
/// Allow unrolling of all the iterations of the runtime loop remainder.
bool UnrollRemainder;
/// Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollAndJam;
/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
/// value above is used during unroll and jam for the outer loop size.
/// This value is used in the same manner to limit the size of the inner
/// loop.
unsigned UnrollAndJamInnerLoopThreshold;
/// Don't allow loop unrolling to simulate more than this number of
/// iterations when checking full unroll profitability
unsigned MaxIterationsCountToAnalyze;
};
/// Get target-customized preferences for the generic loop unrolling
/// transformation. The caller will initialize UP with the current
/// target-independent defaults.
void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) const;
/// Query the target whether it would be profitable to convert the given loop
/// into a hardware loop.
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) const;
/// Query the target whether it would be prefered to create a predicated
/// vector loop, which can avoid the need to emit a scalar epilogue loop.
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,
const LoopAccessInfo *LAI) const;
/// Query the target whether lowering of the llvm.get.active.lane.mask
/// intrinsic is supported.
bool emitGetActiveLaneMask() const;
// Parameters that control the loop peeling transformation
struct PeelingPreferences {
/// A forced peeling factor (the number of bodied of the original loop
/// that should be peeled off before the loop body). When set to 0, the
/// a peeling factor based on profile information and other factors.
unsigned PeelCount;
/// Allow peeling off loop iterations.
bool AllowPeeling;
/// Allow peeling off loop iterations for loop nests.
bool AllowLoopNestsPeeling;
/// Allow peeling basing on profile. Uses to enable peeling off all
/// iterations basing on provided profile.
/// If the value is true the peeling cost model can decide to peel only
/// some iterations and in this case it will set this to false.
bool PeelProfiledIterations;
};
/// Get target-customized preferences for the generic loop peeling
/// transformation. The caller will initialize \p PP with the current
/// target-independent defaults with information from \p L and \p SE.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) const;
/// Targets can implement their own combinations for target-specific
/// intrinsics. This function will be called from the InstCombine pass every
/// time a target-specific intrinsic is encountered.
///
/// \returns None to not do anything target specific or a value that will be
/// returned from the InstCombiner. It is possible to return null and stop
/// further processing of the intrinsic by returning nullptr.
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
/// Can be used to implement target-specific instruction combining.
/// \see instCombineIntrinsic
Optional<Value *>
simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
APInt DemandedMask, KnownBits &Known,
bool &KnownBitsComputed) const;
/// Can be used to implement target-specific instruction combining.
/// \see instCombineIntrinsic
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
APInt &UndefElts2, APInt &UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) const;
/// @}
/// \name Scalar Target Information
/// @{
/// Flags indicating the kind of support for population count.
///
/// Compared to the SW implementation, HW support is supposed to
/// significantly boost the performance when the population is dense, and it
/// may or may not degrade performance if the population is sparse. A HW
/// support is considered as "Fast" if it can outperform, or is on a par
/// with, SW implementation when the population is sparse; otherwise, it is
/// considered as "Slow".
enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
/// Return true if the specified immediate is legal add immediate, that
/// is the target has add instructions which can add a register with the
/// immediate without having to materialize the immediate into a register.
bool isLegalAddImmediate(int64_t Imm) const;
/// Return true if the specified immediate is legal icmp immediate,
/// that is the target has icmp instructions which can compare a register
/// against the immediate without having to materialize the immediate into a
/// register.
bool isLegalICmpImmediate(int64_t Imm) const;
/// Return true if the addressing mode represented by AM is legal for
/// this target, for a load/store of the specified type.
/// The type may be VoidTy, in which case only return true if the addressing
/// mode is legal for a load/store of any legal type.
/// If target returns true in LSRWithInstrQueries(), I may be valid.
/// TODO: Handle pre/postinc as well.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace = 0,
Instruction *I = nullptr) const;
/// Return true if LSR cost of C1 is lower than C1.
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) const;
/// Return true if LSR major cost is number of registers. Targets which
/// implement their own isLSRCostLess and unset number of registers as major
/// cost should return false, otherwise return true.
bool isNumRegsMajorCostOfLSR() const;
/// \returns true if LSR should not optimize a chain that includes \p I.
bool isProfitableLSRChainElement(Instruction *I) const;
/// Return true if the target can fuse a compare and branch.
/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
/// calculation for the instructions in a loop.
bool canMacroFuseCmp() const;
/// Return true if the target can save a compare for loop count, for example
/// hardware loop saves a compare.
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) const;
enum AddressingModeKind {
AMK_PreIndexed,
AMK_PostIndexed,
AMK_None
};
/// Return the preferred addressing mode LSR should make efforts to generate.
AddressingModeKind getPreferredAddressingMode(const Loop *L,
ScalarEvolution *SE) const;
/// Return true if the target supports masked store.
bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked load.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
/// Return true if the target supports nontemporal store.
bool isLegalNTStore(Type *DataType, Align Alignment) const;
/// Return true if the target supports nontemporal load.
bool isLegalNTLoad(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked scatter.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked gather.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked compress store.
bool isLegalMaskedCompressStore(Type *DataType) const;
/// Return true if the target supports masked expand load.
bool isLegalMaskedExpandLoad(Type *DataType) const;
/// Return true if the target has a unified operation to calculate division
/// and remainder. If so, the additional implicit multiplication and
/// subtraction required to calculate a remainder from division are free. This
/// can enable more aggressive transformations for division and remainder than
/// would typically be allowed using throughput or size cost models.
bool hasDivRemOp(Type *DataType, bool IsSigned) const;
/// Return true if the given instruction (assumed to be a memory access
/// instruction) has a volatile variant. If that's the case then we can avoid
/// addrspacecast to generic AS for volatile loads/stores. Default
/// implementation returns false, which prevents address space inference for
/// volatile loads/stores.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
/// Return true if target doesn't mind addresses in vectors.
bool prefersVectorizedAddressing() const;
/// Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
/// of the specified type.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
/// TODO: Handle pre/postinc as well.
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale,
unsigned AddrSpace = 0) const;
/// Return true if the loop strength reduce pass should make
/// Instruction* based TTI queries to isLegalAddressingMode(). This is
/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
/// immediate offset and no index register.
bool LSRWithInstrQueries() const;
/// Return true if it's free to truncate a value of type Ty1 to type
/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
/// by referencing its sub-register AX.
bool isTruncateFree(Type *Ty1, Type *Ty2) const;
/// Return true if it is profitable to hoist instruction in the
/// then/else to before if.
bool isProfitableToHoist(Instruction *I) const;
bool useAA() const;
/// Return true if this type is legal.
bool isTypeLegal(Type *Ty) const;
/// Returns the estimated number of registers required to represent \p Ty.
InstructionCost getRegUsageForType(Type *Ty) const;
/// Return true if switches should be turned into lookup tables for the
/// target.
bool shouldBuildLookupTables() const;
/// Return true if switches should be turned into lookup tables
/// containing this constant value for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const;
/// Return true if lookup tables should be turned into relative lookup tables.
bool shouldBuildRelLookupTables() const;
/// Return true if the input function which is cold at all call sites,
/// should use coldcc calling convention.
bool useColdCCForColdCall(Function &F) const;
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the demanded result elements need to be inserted and/or
/// extracted from vectors.
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
bool Insert, bool Extract) const;
/// Estimate the overhead of scalarizing an instructions unique
/// non-constant operands. The (potentially vector) types to use for each of
/// argument are passes via Tys.
InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys) const;
/// If target has efficient vector element load/store instructions, it can
/// return true here so that insertion/extraction costs are not added to
/// the scalarization cost of a load/store.
bool supportsEfficientVectorElementLoadStore() const;
/// Don't restrict interleaved unrolling to small loops.
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
/// Returns options for expansion of memcmp. IsZeroCmp is
// true if this is the expansion of memcmp(p1, p2, s) == 0.
struct MemCmpExpansionOptions {
// Return true if memcmp expansion is enabled.
operator bool() const { return MaxNumLoads > 0; }
// Maximum number of load operations.
unsigned MaxNumLoads = 0;
// The list of available load sizes (in bytes), sorted in decreasing order.
SmallVector<unsigned, 8> LoadSizes;
// For memcmp expansion when the memcmp result is only compared equal or
// not-equal to 0, allow up to this number of load pairs per block. As an
// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
// a0 = load2bytes &a[0]
// b0 = load2bytes &b[0]
// a2 = load1byte &a[2]
// b2 = load1byte &b[2]
// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
unsigned NumLoadsPerBlock = 1;
// Set to true to allow overlapping loads. For example, 7-byte compares can
// be done with two 4-byte compares instead of 4+2+1-byte compares. This
// requires all loads in LoadSizes to be doable in an unaligned way.
bool AllowOverlappingLoads = false;
};
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
/// Enable matching of interleaved access groups.
bool enableInterleavedAccessVectorization() const;
/// Enable matching of interleaved access groups that contain predicated
/// accesses or gaps and therefore vectorized using masked
/// vector loads/stores.
bool enableMaskedInterleavedAccessVectorization() const;
/// Indicate that it is potentially unsafe to automatically vectorize
/// floating-point operations because the semantics of vector and scalar
/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
/// does not support IEEE-754 denormal numbers, while depending on the
/// platform, scalar floating-point math does.
/// This applies to floating-point math operations and calls, not memory
/// operations, shuffles, or casts.
bool isFPVectorizationPotentiallyUnsafe() const;
/// Determine if the target supports unaligned memory accesses.
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace = 0,
Align Alignment = Align(1),
bool *Fast = nullptr) const;
/// Return hardware support for population count.
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
/// Return true if the hardware has a fast square-root instruction.
bool haveFastSqrt(Type *Ty) const;
/// Return true if it is faster to check if a floating-point value is NaN
/// (or not-NaN) versus a comparison against a constant FP zero value.
/// Targets should override this if materializing a 0.0 for comparison is
/// generally as cheap as checking for ordered/unordered.
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
/// Return the expected cost of supporting the floating point operation
/// of the specified type.
InstructionCost getFPOpCost(Type *Ty) const;
/// Return the expected cost of materializing for the given integer
/// immediate of the specified type.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
TargetCostKind CostKind) const;
/// Return the expected cost of materialization for the given integer
/// immediate of the specified type for a given instruction. The cost can be
/// zero if the immediate can be folded into the specified instruction.
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
const APInt &Imm, Type *Ty,
TargetCostKind CostKind,
Instruction *Inst = nullptr) const;
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty,
TargetCostKind CostKind) const;
/// Return the expected cost for the given integer when optimising
/// for size. This is different than the other integer immediate cost
/// functions in that it is subtarget agnostic. This is useful when you e.g.
/// target one ISA such as Aarch32 but smaller encodings could be possible
/// with another such as Thumb. This return value is used as a penalty when
/// the total costs for a constant is calculated (the bigger the cost, the
/// more beneficial constant hoisting is).
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
const APInt &Imm, Type *Ty) const;
/// @}
/// \name Vector Target Information
/// @{
/// The various kinds of shuffle patterns for vector queries.
enum ShuffleKind {
SK_Broadcast, ///< Broadcast element 0 to all other elements.
SK_Reverse, ///< Reverse the order of the vector.
SK_Select, ///< Selects elements from the corresponding lane of
///< either source operand. This is equivalent to a
///< vector select with a constant condition operand.
SK_Transpose, ///< Transpose two vectors.
SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
///< with any shuffle mask.
SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
///< shuffle mask.
SK_Splice ///< Concatenates elements from the first input vector
///< with elements of the second input vector. Returning
///< a vector of the same type as the input vectors.
};
/// Additional information about an operand's possible values.
enum OperandValueKind {
OK_AnyValue, // Operand can have any value.
OK_UniformValue, // Operand is uniform (splat of a value).
OK_UniformConstantValue, // Operand is uniform constant.
OK_NonUniformConstantValue // Operand is a non uniform constant value.
};
/// Additional properties of an operand's values.
enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
/// \return the number of registers in the target-provided register class.
unsigned getNumberOfRegisters(unsigned ClassID) const;
/// \return the target-provided register class ID for the provided type,
/// accounting for type promotion and other type-legalization techniques that
/// the target might apply. However, it specifically does not account for the
/// scalarization or splitting of vector types. Should a vector type require
/// scalarization or splitting into multiple underlying vector registers, that
/// type should be mapped to a register class containing no registers.
/// Specifically, this is designed to provide a simple, high-level view of the
/// register allocation later performed by the backend. These register classes
/// don't necessarily map onto the register classes used by the backend.
/// FIXME: It's not currently possible to determine how many registers
/// are used by the provided type.
unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
/// \return the target-provided register class name
const char *getRegisterClassName(unsigned ClassID) const;
enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector };
/// \return The width of the largest scalar or vector register type.
TypeSize getRegisterBitWidth(RegisterKind K) const;
/// \return The width of the smallest vector register type.
unsigned getMinVectorRegisterBitWidth() const;
/// \return The maximum value of vscale if the target specifies an
/// architectural maximum vector length, and None otherwise.
Optional<unsigned> getMaxVScale() const;
/// \return True if the vectorization factor should be chosen to
/// make the vector of the smallest element type match the size of a
/// vector register. For wider element types, this could result in
/// creating vectors that span multiple vector registers.
/// If false, the vectorization factor will be chosen based on the
/// size of the widest element type.
bool shouldMaximizeVectorBandwidth() const;
/// \return The minimum vectorization factor for types of given element
/// bit width, or 0 if there is no minimum VF. The returned value only
/// applies when shouldMaximizeVectorBandwidth returns true.
/// If IsScalable is true, the returned ElementCount must be a scalable VF.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
/// \return The maximum vectorization factor for types of given element
/// bit width and opcode, or 0 if there is no maximum VF.
/// Currently only used by the SLP vectorizer.
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
/// \return True if it should be considered for address type promotion.
/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
/// profitable without finding other extensions fed by the same input.
bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
/// \return The size of a cache line in bytes.
unsigned getCacheLineSize() const;
/// The possible cache levels
enum class CacheLevel {
L1D, // The L1 data cache
L2D, // The L2 data cache
// We currently do not model L3 caches, as their sizes differ widely between
// microarchitectures. Also, we currently do not have a use for L3 cache
// size modeling yet.
};
/// \return The size of the cache level in bytes, if available.
Optional<unsigned> getCacheSize(CacheLevel Level) const;
/// \return The associativity of the cache level, if available.
Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
/// \return How much before a load we should place the prefetch
/// instruction. This is currently measured in number of
/// instructions.
unsigned getPrefetchDistance() const;
/// Some HW prefetchers can handle accesses up to a certain constant stride.
/// Sometimes prefetching is beneficial even below the HW prefetcher limit,
/// and the arguments provided are meant to serve as a basis for deciding this
/// for a particular loop.
///
/// \param NumMemAccesses Number of memory accesses in the loop.
/// \param NumStridedMemAccesses Number of the memory accesses that
/// ScalarEvolution could find a known stride
/// for.
/// \param NumPrefetches Number of software prefetches that will be
/// emitted as determined by the addresses
/// involved and the cache line size.
/// \param HasCall True if the loop contains a call.
///
/// \return This is the minimum stride in bytes where it makes sense to start
/// adding SW prefetches. The default is 1, i.e. prefetch with any
/// stride.
unsigned getMinPrefetchStride(unsigned NumMemAccesses,
unsigned NumStridedMemAccesses,
unsigned NumPrefetches, bool HasCall) const;
/// \return The maximum number of iterations to prefetch ahead. If
/// the required number of iterations is more than this number, no
/// prefetching is performed.
unsigned getMaxPrefetchIterationsAhead() const;
/// \return True if prefetching should also be done for writes.
bool enableWritePrefetching() const;
/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.
unsigned getMaxInterleaveFactor(unsigned VF) const;
/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
static OperandValueKind getOperandInfo(const Value *V,
OperandValueProperties &OpProps);
/// This is an approximation of reciprocal throughput of a math/logic op.
/// A higher cost indicates less expected throughput.
/// From Agner Fog's guides, reciprocal throughput is "the average number of
/// clock cycles per instruction when the instructions are not part of a
/// limiting dependency chain."
/// Therefore, costs should be scaled to account for multiple execution units
/// on the target that can process this type of instruction. For example, if
/// there are 5 scalar integer units and 2 vector integer units that can
/// calculate an 'add' in a single cycle, this model should indicate that the
/// cost of the vector add instruction is 2.5 times the cost of the scalar
/// add instruction.
/// \p Args is an optional argument which holds the instruction operands
/// values so the TTI can analyze those values searching for special
/// cases or optimizations based on those values.
/// \p CxtI is the optional original context instruction, if one exists, to
/// provide even more information.
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
OperandValueKind Opd1Info = OK_AnyValue,
OperandValueKind Opd2Info = OK_AnyValue,
OperandValueProperties Opd1PropInfo = OP_None,
OperandValueProperties Opd2PropInfo = OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr) const;
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
/// The exact mask may be passed as Mask, or else the array will be empty.
/// The index and subtype parameters are used by the subvector insertion and
/// extraction shuffle kinds to show the insert/extract point and the type of
/// the subvector being inserted/extracted.
/// NOTE: For subvector extractions Tp represents the source type.
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask = None, int Index = 0,
VectorType *SubTp = nullptr) const;
/// Represents a hint about the context in which a cast is used.
///
/// For zext/sext, the context of the cast is the operand, which must be a
/// load of some kind. For trunc, the context is of the cast is the single
/// user of the instruction, which must be a store of some kind.
///
/// This enum allows the vectorizer to give getCastInstrCost an idea of the
/// type of cast it's dealing with, as not every cast is equal. For instance,
/// the zext of a load may be free, but the zext of an interleaving load can
//// be (very) expensive!
///
/// See \c getCastContextHint to compute a CastContextHint from a cast
/// Instruction*. Callers can use it if they don't need to override the
/// context and just want it to be calculated from the instruction.
///
/// FIXME: This handles the types of load/store that the vectorizer can
/// produce, which are the cases where the context instruction is most
/// likely to be incorrect. There are other situations where that can happen
/// too, which might be handled here but in the long run a more general
/// solution of costing multiple instructions at the same times may be better.
enum class CastContextHint : uint8_t {
None, ///< The cast is not used with a load/store of any kind.
Normal, ///< The cast is used with a normal load/store.
Masked, ///< The cast is used with a masked load/store.
GatherScatter, ///< The cast is used with a gather/scatter.
Interleave, ///< The cast is used with an interleaved load/store.
Reversed, ///< The cast is used with a reversed load/store.
};
/// Calculates a CastContextHint from \p I.
/// This should be used by callers of getCastInstrCost if they wish to
/// determine the context from some instruction.
/// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
/// or if it's another type of cast.
static CastContextHint getCastContextHint(const Instruction *I);
/// \return The expected cost of cast instructions, such as bitcast, trunc,
/// zext, etc. If there is an existing instruction that holds Opcode, it
/// may be passed in the 'I' parameter.
InstructionCost
getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
const Instruction *I = nullptr) const;
/// \return The expected cost of a sign- or zero-extended vector extract. Use
/// -1 to indicate that there is no information about the index value.
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy,
unsigned Index = -1) const;
/// \return The expected cost of control-flow related instructions such as
/// Phi, Ret, Br, Switch.
InstructionCost
getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
const Instruction *I = nullptr) const;
/// \returns The expected cost of compare and select instructions. If there
/// is an existing instruction that holds Opcode, it may be passed in the
/// 'I' parameter. The \p VecPred parameter can be used to indicate the select
/// is using a compare with the specified predicate as condition. When vector
/// types are passed, \p VecPred must be used for all lanes.
InstructionCost
getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
/// \return The expected cost of vector Insert and Extract.
/// Use -1 to indicate that there is no information on the index value.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index = -1) const;
/// \return The cost of Load and Store instructions.
InstructionCost
getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
/// \return The cost of masked Load and Store instructions.
InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// \return The cost of Gather or Scatter operation
/// \p Opcode - is a type of memory access Load or Store
/// \p DataTy - a vector type of the data to be loaded or stored
/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
/// \p VariableMask - true when the memory access is predicated with a mask
/// that is not a compile-time constant
/// \p Alignment - alignment of single element
/// \p I - the optional original context instruction, if one exists, e.g. the
/// load/store to transform or the call to the gather/scatter intrinsic
InstructionCost getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
/// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code
/// \p VecTy is the vector type of the interleaved access.
/// \p Factor is the interleave factor
/// \p Indices is the indices for interleaved load members (as interleaved
/// load allows gaps)
/// \p Alignment is the alignment of the memory operation
/// \p AddressSpace is address space of the pointer.
/// \p UseMaskForCond indicates if the memory access is predicated.
/// \p UseMaskForGaps indicates if gaps should be masked.
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
/// A helper function to determine the type of reduction algorithm used
/// for a given \p Opcode and set of FastMathFlags \p FMF.
static bool requiresOrderedReduction(Optional<FastMathFlags> FMF) {
return FMF != None && !(*FMF).allowReassoc();
}
/// Calculate the cost of vector reduction intrinsics.
///
/// This is the cost of reducing the vector value of type \p Ty to a scalar
/// value using the operation denoted by \p Opcode. The FastMathFlags
/// parameter \p FMF indicates what type of reduction we are performing:
/// 1. Tree-wise. This is the typical 'fast' reduction performed that
/// involves successively splitting a vector into half and doing the
/// operation on the pair of halves until you have a scalar value. For
/// example:
/// (v0, v1, v2, v3)
/// ((v0+v2), (v1+v3), undef, undef)
/// ((v0+v2+v1+v3), undef, undef, undef)
/// This is the default behaviour for integer operations, whereas for
/// floating point we only do this if \p FMF indicates that
/// reassociation is allowed.
/// 2. Ordered. For a vector with N elements this involves performing N
/// operations in lane order, starting with an initial scalar value, i.e.
/// result = InitVal + v0
/// result = result + v1
/// result = result + v2
/// result = result + v3
/// This is only the case for FP operations and when reassociation is not
/// allowed.
///
InstructionCost getArithmeticReductionCost(
unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
InstructionCost getMinMaxReductionCost(
VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// Calculate the cost of an extended reduction pattern, similar to
/// getArithmeticReductionCost of an Add reduction with an extension and
/// optional multiply. This is the cost of as:
/// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
/// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
/// on a VectorType with ResTy elements and Ty lanes.
InstructionCost getExtendedAddReductionCost(
bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
/// Three cases are handled: 1. scalar instruction 2. vector instruction
/// 3. scalar instruction which is to be vectorized.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) const;
/// \returns The cost of Call instructions.
InstructionCost getCallInstrCost(
Function *F, Type *RetTy, ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
/// \returns The number of pieces into which the provided type must be
/// split during legalization. Zero is returned when the answer is unknown.
unsigned getNumberOfParts(Type *Tp) const;
/// \returns The cost of the address computation. For most targets this can be
/// merged into the instruction indexing mode. Some targets might want to
/// distinguish between address computation for memory operations on vector
/// types and scalar types. Such targets should override this function.
/// The 'SE' parameter holds pointer for the scalar evolution object which
/// is used in order to get the Ptr step value in case of constant stride.
/// The 'Ptr' parameter holds SCEV of the access pointer.
InstructionCost getAddressComputationCost(Type *Ty,
ScalarEvolution *SE = nullptr,
const SCEV *Ptr = nullptr) const;
/// \returns The cost, if any, of keeping values of the given types alive
/// over a callsite.
///
/// Some types may require the use of register classes that do not have
/// any callee-saved registers, so would require a spill and fill.
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
/// \returns True if the intrinsic is a supported memory intrinsic. Info
/// will contain additional information - whether the intrinsic may write
/// or read to memory, volatility and the pointer. Info is undefined
/// if false is returned.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
/// \returns The maximum element size, in bytes, for an element
/// unordered-atomic memory intrinsic.
unsigned getAtomicMemIntrinsicMaxElementSize() const;
/// \returns A value which is the result of the given memory intrinsic. New
/// instructions may be created to extract the result from the given intrinsic
/// memory operation. Returns nullptr if the target cannot create a result
/// from the given intrinsic.
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) const;
/// \returns The type to use in a loop expansion of a memcpy call.
Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign) const;
/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
/// \param RemainingBytes The number of bytes to copy.
///
/// Calculates the operand types to use when copying \p RemainingBytes of
/// memory, where source and destination alignments are \p SrcAlign and
/// \p DestAlign respectively.
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign) const;
/// \returns True if the two functions have compatible attributes for inlining
/// purposes.
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
/// \returns True if the caller and callee agree on how \p Args will be passed
/// to the callee.
/// \param[out] Args The list of compatible arguments. The implementation may
/// filter out any incompatible args from this list.
bool areFunctionArgsABICompatible(const Function *Caller,
const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const;
/// The type of load/store indexing.
enum MemIndexedMode {
MIM_Unindexed, ///< No indexing.
MIM_PreInc, ///< Pre-incrementing.
MIM_PreDec, ///< Pre-decrementing.
MIM_PostInc, ///< Post-incrementing.
MIM_PostDec ///< Post-decrementing.
};
/// \returns True if the specified indexed load for the given type is legal.
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
/// \returns True if the specified indexed store for the given type is legal.
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
/// \returns The bitwidth of the largest vector type that should be used to
/// load/store in the given address space.
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
/// \returns True if the load instruction is legal to vectorize.
bool isLegalToVectorizeLoad(LoadInst *LI) const;
/// \returns True if the store instruction is legal to vectorize.
bool isLegalToVectorizeStore(StoreInst *SI) const;
/// \returns True if it is legal to vectorize the given load chain.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
/// \returns True if it is legal to vectorize the given store chain.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
/// \returns True if it is legal to vectorize the given reduction kind.
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const;
/// \returns True if the given type is supported for scalable vectors
bool isElementTypeLegalForScalableVector(Type *Ty) const;
/// \returns The new vector factor value if the target doesn't support \p
/// SizeInBytes loads or has a better vector factor.
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const;
/// \returns The new vector factor value if the target doesn't support \p
/// SizeInBytes stores or has a better vector factor.
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const;
/// Flags describing the kind of vector reduction.
struct ReductionFlags {
ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
bool IsSigned; ///< Whether the operation is a signed int reduction.
bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
};
/// \returns True if the target prefers reductions in loop.
bool preferInLoopReduction(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const;
/// \returns True if the target prefers reductions select kept in the loop
/// when tail folding. i.e.
/// loop:
/// p = phi (0, s)
/// a = add (p, x)
/// s = select (mask, a, p)
/// vecreduce.add(s)
///
/// As opposed to the normal scheme of p = phi (0, a) which allows the select
/// to be pulled out of the loop. If the select(.., add, ..) can be predicated
/// by the target, this can lead to cleaner code generation.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const;
/// \returns True if the target wants to expand the given reduction intrinsic
/// into a shuffle sequence.
bool shouldExpandReduction(const IntrinsicInst *II) const;
/// \returns the size cost of rematerializing a GlobalValue address relative
/// to a stack reload.
unsigned getGISelRematGlobalCost() const;
/// \returns True if the target supports scalable vectors.
bool supportsScalableVectors() const;
/// \name Vector Predication Information
/// @{
/// Whether the target supports the %evl parameter of VP intrinsic efficiently
/// in hardware. (see LLVM Language Reference - "Vector Predication
/// Intrinsics") Use of %evl is discouraged when that is not the case.
bool hasActiveVectorLength() const;
struct VPLegalization {
enum VPTransform {
// keep the predicating parameter
Legal = 0,
// where legal, discard the predicate parameter
Discard = 1,
// transform into something else that is also predicating
Convert = 2
};
// How to transform the EVL parameter.
// Legal: keep the EVL parameter as it is.
// Discard: Ignore the EVL parameter where it is safe to do so.
// Convert: Fold the EVL into the mask parameter.
VPTransform EVLParamStrategy;
// How to transform the operator.
// Legal: The target supports this operator.
// Convert: Convert this to a non-VP operation.
// The 'Discard' strategy is invalid.
VPTransform OpStrategy;
bool shouldDoNothing() const {
return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
}
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
: EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
};
/// \returns How the target needs this vector-predicated operation to be
/// transformed.
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
/// @}
/// @}
private:
/// Estimate the latency of specified instruction.
/// Returns 1 as the default value.
InstructionCost getInstructionLatency(const Instruction *I) const;
/// Returns the expected throughput cost of the instruction.
/// Returns -1 if the cost is unknown.
InstructionCost getInstructionThroughput(const Instruction *I) const;
/// The abstract base class used to type erase specific TTI
/// implementations.
class Concept;
/// The template model for the base class which wraps a concrete
/// implementation in a type erased interface.
template <typename T> class Model;
std::unique_ptr<Concept> TTIImpl;
};
class TargetTransformInfo::Concept {
public:
virtual ~Concept() = 0;
virtual const DataLayout &getDataLayout() const = 0;
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
virtual int getInlinerVectorBonusPercent() = 0;
virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
virtual unsigned
getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) = 0;
virtual InstructionCost getUserCost(const User *U,
ArrayRef<const Value *> Operands,
TargetCostKind CostKind) = 0;
virtual BranchProbability getPredictableBranchThreshold() = 0;
virtual bool hasBranchDivergence() = 0;
virtual bool useGPUDivergenceAnalysis() = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;
virtual unsigned getFlatAddressSpace() = 0;
virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const = 0;
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
Value *OldV,
Value *NewV) const = 0;
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) = 0;
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) = 0;
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) = 0;
virtual bool
preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT, const LoopAccessInfo *LAI) = 0;
virtual bool emitGetActiveLaneMask() = 0;
virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) = 0;
virtual Optional<Value *>
simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
APInt DemandedMask, KnownBits &Known,
bool &KnownBitsComputed) = 0;
virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
APInt &UndefElts2, APInt &UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale, unsigned AddrSpace,
Instruction *I) = 0;
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) = 0;
virtual bool isNumRegsMajorCostOfLSR() = 0;
virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
virtual bool canMacroFuseCmp() = 0;
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) = 0;
virtual AddressingModeKind
getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
virtual bool prefersVectorizedAddressing() = 0;
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace) = 0;
virtual bool LSRWithInstrQueries() = 0;
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
virtual bool isProfitableToHoist(Instruction *I) = 0;
virtual bool useAA() = 0;
virtual bool isTypeLegal(Type *Ty) = 0;
virtual InstructionCost getRegUsageForType(Type *Ty) = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
virtual bool shouldBuildRelLookupTables() = 0;
virtual bool useColdCCForColdCall(Function &F) = 0;
virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
bool Insert,
bool Extract) = 0;
virtual InstructionCost
getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys) = 0;
virtual bool supportsEfficientVectorElementLoadStore() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
virtual MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
virtual bool enableInterleavedAccessVectorization() = 0;
virtual bool enableMaskedInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
unsigned AddressSpace,
Align Alignment,
bool *Fast) = 0;
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
virtual InstructionCost getFPOpCost(Type *Ty) = 0;
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
const APInt &Imm, Type *Ty) = 0;
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
TargetCostKind CostKind) = 0;
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
const APInt &Imm, Type *Ty,
TargetCostKind CostKind,
Instruction *Inst = nullptr) = 0;
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty,
TargetCostKind CostKind) = 0;
virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
virtual unsigned getRegisterClassForType(bool Vector,
Type *Ty = nullptr) const = 0;
virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
virtual unsigned getMinVectorRegisterBitWidth() const = 0;
virtual Optional<unsigned> getMaxVScale() const = 0;
virtual bool shouldMaximizeVectorBandwidth() const = 0;
virtual ElementCount getMinimumVF(unsigned ElemWidth,
bool IsScalable) const = 0;
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
virtual bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
virtual unsigned getCacheLineSize() const = 0;
virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
/// \return How much before a load we should place the prefetch
/// instruction. This is currently measured in number of
/// instructions.
virtual unsigned getPrefetchDistance() const = 0;
/// \return Some HW prefetchers can handle accesses up to a certain
/// constant stride. This is the minimum stride in bytes where it
/// makes sense to start adding SW prefetches. The default is 1,
/// i.e. prefetch with any stride. Sometimes prefetching is beneficial
/// even below the HW prefetcher limit, and the arguments provided are
/// meant to serve as a basis for deciding this for a particular loop.
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
unsigned NumStridedMemAccesses,
unsigned NumPrefetches,
bool HasCall) const = 0;
/// \return The maximum number of iterations to prefetch ahead. If
/// the required number of iterations is more than this number, no
/// prefetching is performed.
virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
/// \return True if prefetching should also be done for writes.
virtual bool enableWritePrefetching() const = 0;
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
virtual InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
OperandValueKind Opd1Info, OperandValueKind Opd2Info,
OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
VectorType *SubTp) = 0;
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src, CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0;
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy,
unsigned Index) = 0;
virtual InstructionCost getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) = 0;
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0;
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) = 0;
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) = 0;
virtual InstructionCost
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost
getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) = 0;
virtual InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
virtual InstructionCost
getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost
getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost getExtendedAddReductionCost(
bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
virtual InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind) = 0;
virtual unsigned getNumberOfParts(Type *Tp) = 0;
virtual InstructionCost
getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
virtual InstructionCost
getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) = 0;
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) = 0;
virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign) const = 0;
virtual void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign) const = 0;
virtual bool areInlineCompatible(const Function *Caller,
const Function *Callee) const = 0;
virtual bool
areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const = 0;
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
Align Alignment,
unsigned AddrSpace) const = 0;
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
Align Alignment,
unsigned AddrSpace) const = 0;
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const = 0;
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const = 0;
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const = 0;
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
virtual unsigned getGISelRematGlobalCost() const = 0;
virtual bool supportsScalableVectors() const = 0;
virtual bool hasActiveVectorLength() const = 0;
virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
virtual VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
};
template <typename T>
class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
T Impl;
public:
Model(T Impl) : Impl(std::move(Impl)) {}
~Model() override {}
const DataLayout &getDataLayout() const override {
return Impl.getDataLayout();
}
InstructionCost
getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands,
enum TargetTransformInfo::TargetCostKind CostKind) override {
return Impl.getGEPCost(PointeeType, Ptr, Operands);
}
unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();
}
unsigned adjustInliningThreshold(const CallBase *CB) override {
return Impl.adjustInliningThreshold(CB);
}
int getInlinerVectorBonusPercent() override {
return Impl.getInlinerVectorBonusPercent();
}
InstructionCost getMemcpyCost(const Instruction *I) override {
return Impl.getMemcpyCost(I);
}
InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
TargetCostKind CostKind) override {
return Impl.getUserCost(U, Operands, CostKind);
}
BranchProbability getPredictableBranchThreshold() override {
return Impl.getPredictableBranchThreshold();
}
bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
bool useGPUDivergenceAnalysis() override {
return Impl.useGPUDivergenceAnalysis();
}
bool isSourceOfDivergence(const Value *V) override {
return Impl.isSourceOfDivergence(V);
}
bool isAlwaysUniform(const Value *V) override {
return Impl.isAlwaysUniform(V);
}
unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const override {
return Impl.collectFlatAddressOperands(OpIndexes, IID);
}
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
}
unsigned getAssumedAddrSpace(const Value *V) const override {
return Impl.getAssumedAddrSpace(V);
}
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const override {
return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
}
bool isLoweredToCall(const Function *F) override {
return Impl.isLoweredToCall(F);
}
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UnrollingPreferences &UP) override {
return Impl.getUnrollingPreferences(L, SE, UP);
}
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) override {
return Impl.getPeelingPreferences(L, SE, PP);
}
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) override {
return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,
const LoopAccessInfo *LAI) override {
return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
}
bool emitGetActiveLaneMask() override {
return Impl.emitGetActiveLaneMask();
}
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) override {
return Impl.instCombineIntrinsic(IC, II);
}
Optional<Value *>
simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
APInt DemandedMask, KnownBits &Known,
bool &KnownBitsComputed) override {
return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
KnownBitsComputed);
}
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
APInt &UndefElts2, APInt &UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) override {
return Impl.simplifyDemandedVectorEltsIntrinsic(
IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
SimplifyAndSetOp);
}
bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);
}
bool isLegalICmpImmediate(int64_t Imm) override {
return Impl.isLegalICmpImmediate(Imm);
}
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
Instruction *I) override {
return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
AddrSpace, I);
}
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) override {
return Impl.isLSRCostLess(C1, C2);
}
bool isNumRegsMajorCostOfLSR() override {
return Impl.isNumRegsMajorCostOfLSR();
}
bool isProfitableLSRChainElement(Instruction *I) override {
return Impl.isProfitableLSRChainElement(I);
}
bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) override {
return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
}
AddressingModeKind
getPreferredAddressingMode(const Loop *L,
ScalarEvolution *SE) const override {
return Impl.getPreferredAddressingMode(L, SE);
}
bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
return Impl.isLegalMaskedStore(DataType, Alignment);
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
return Impl.isLegalMaskedLoad(DataType, Alignment);
}
bool isLegalNTStore(Type *DataType, Align Alignment) override {
return Impl.isLegalNTStore(DataType, Alignment);
}
bool isLegalNTLoad(Type *DataType, Align Alignment) override {
return Impl.isLegalNTLoad(DataType, Alignment);
}
bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
return Impl.isLegalMaskedScatter(DataType, Alignment);
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
return Impl.isLegalMaskedGather(DataType, Alignment);
}
bool isLegalMaskedCompressStore(Type *DataType) override {
return Impl.isLegalMaskedCompressStore(DataType);
}
bool isLegalMaskedExpandLoad(Type *DataType) override {
return Impl.isLegalMaskedExpandLoad(DataType);
}
bool hasDivRemOp(Type *DataType, bool IsSigned) override {
return Impl.hasDivRemOp(DataType, IsSigned);
}
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
return Impl.hasVolatileVariant(I, AddrSpace);
}
bool prefersVectorizedAddressing() override {
return Impl.prefersVectorizedAddressing();
}
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale,
unsigned AddrSpace) override {
return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
AddrSpace);
}
bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
bool isTruncateFree(Type *Ty1, Type *Ty2) override {
return Impl.isTruncateFree(Ty1, Ty2);
}
bool isProfitableToHoist(Instruction *I) override {
return Impl.isProfitableToHoist(I);
}
bool useAA() override { return Impl.useAA(); }
bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
InstructionCost getRegUsageForType(Type *Ty) override {
return Impl.getRegUsageForType(Ty);
}
bool shouldBuildLookupTables() override {
return Impl.shouldBuildLookupTables();
}
bool shouldBuildLookupTablesForConstant(Constant *C) override {
return Impl.shouldBuildLookupTablesForConstant(C);
}
bool shouldBuildRelLookupTables() override {
return Impl.shouldBuildRelLookupTables();
}
bool useColdCCForColdCall(Function &F) override {
return Impl.useColdCCForColdCall(F);
}
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
bool Insert, bool Extract) override {
return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
}
InstructionCost
getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys) override {
return Impl.getOperandsScalarizationOverhead(Args, Tys);
}
bool supportsEfficientVectorElementLoadStore() override {
return Impl.supportsEfficientVectorElementLoadStore();
}
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
return Impl.enableAggressiveInterleaving(LoopHasReductions);
}
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const override {
return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
}
bool enableInterleavedAccessVectorization() override {
return Impl.enableInterleavedAccessVectorization();
}
bool enableMaskedInterleavedAccessVectorization() override {
return Impl.enableMaskedInterleavedAccessVectorization();
}
bool isFPVectorizationPotentiallyUnsafe() override {
return Impl.isFPVectorizationPotentiallyUnsafe();
}
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace, Align Alignment,
bool *Fast) override {
return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
Alignment, Fast);
}
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
return Impl.getPopcntSupport(IntTyWidthInBit);
}
bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
}
InstructionCost getFPOpCost(Type *Ty) override {
return Impl.getFPOpCost(Ty);
}
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
const APInt &Imm, Type *Ty) override {
return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
}
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
TargetCostKind CostKind) override {
return Impl.getIntImmCost(Imm, Ty, CostKind);
}
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
const APInt &Imm, Type *Ty,
TargetCostKind CostKind,
Instruction *Inst = nullptr) override {
return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
}
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty,
TargetCostKind CostKind) override {
return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
}
unsigned getNumberOfRegisters(unsigned ClassID) const override {
return Impl.getNumberOfRegisters(ClassID);
}
unsigned getRegisterClassForType(bool Vector,
Type *Ty = nullptr) const override {
return Impl.getRegisterClassForType(Vector, Ty);
}
const char *getRegisterClassName(unsigned ClassID) const override {
return Impl.getRegisterClassName(ClassID);
}
TypeSize getRegisterBitWidth(RegisterKind K) const override {
return Impl.getRegisterBitWidth(K);
}
unsigned getMinVectorRegisterBitWidth() const override {
return Impl.getMinVectorRegisterBitWidth();
}
Optional<unsigned> getMaxVScale() const override {
return Impl.getMaxVScale();
}
bool shouldMaximizeVectorBandwidth() const override {
return Impl.shouldMaximizeVectorBandwidth();
}
ElementCount getMinimumVF(unsigned ElemWidth,
bool IsScalable) const override {
return Impl.getMinimumVF(ElemWidth, IsScalable);
}
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
return Impl.getMaximumVF(ElemWidth, Opcode);
}
bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
return Impl.shouldConsiderAddressTypePromotion(
I, AllowPromotionWithoutCommonHeader);
}
unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
Optional<unsigned> getCacheSize(CacheLevel Level) const override {
return Impl.getCacheSize(Level);
}
Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
return Impl.getCacheAssociativity(Level);
}
/// Return the preferred prefetch distance in terms of instructions.
///
unsigned getPrefetchDistance() const override {
return Impl.getPrefetchDistance();
}
/// Return the minimum stride necessary to trigger software
/// prefetching.
///
unsigned getMinPrefetchStride(unsigned NumMemAccesses,
unsigned NumStridedMemAccesses,
unsigned NumPrefetches,
bool HasCall) const override {
return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
NumPrefetches, HasCall);
}
/// Return the maximum prefetch distance in terms of loop
/// iterations.
///
unsigned getMaxPrefetchIterationsAhead() const override {
return Impl.getMaxPrefetchIterationsAhead();
}
/// \return True if prefetching should also be done for writes.
bool enableWritePrefetching() const override {
return Impl.enableWritePrefetching();
}
unsigned getMaxInterleaveFactor(unsigned VF) override {
return Impl.getMaxInterleaveFactor(VF);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) override {
return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
}
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
OperandValueKind Opd1Info, OperandValueKind Opd2Info,
OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args,
const Instruction *CxtI = nullptr) override {
return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
VectorType *SubTp) override {
return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
}
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) override {
return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
}
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy,
unsigned Index) override {
return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
}
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) override {
return Impl.getCFInstrCost(Opcode, CostKind, I);
}
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) override {
return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index);
}
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) override {
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind, I);
}
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind) override {
return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
}
InstructionCost
getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) override {
return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
}
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) override {
return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
}
InstructionCost
getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) override {
return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
}
InstructionCost
getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
TTI::TargetCostKind CostKind) override {
return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
}
InstructionCost getExtendedAddReductionCost(
bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
CostKind);
}
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) override {
return Impl.getIntrinsicInstrCost(ICA, CostKind);
}
InstructionCost getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind) override {
return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
}
unsigned getNumberOfParts(Type *Tp) override {
return Impl.getNumberOfParts(Tp);
}
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
const SCEV *Ptr) override {
return Impl.getAddressComputationCost(Ty, SE, Ptr);
}
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
return Impl.getCostOfKeepingLiveOverCall(Tys);
}
bool getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) override {
return Impl.getTgtMemIntrinsic(Inst, Info);
}
unsigned getAtomicMemIntrinsicMaxElementSize() const override {
return Impl.getAtomicMemIntrinsicMaxElementSize();
}
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) override {
return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
}
Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign) const override {
return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
DestAddrSpace, SrcAlign, DestAlign);
}
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign) const override {
Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
SrcAddrSpace, DestAddrSpace,
SrcAlign, DestAlign);
}
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const override {
return Impl.areInlineCompatible(Caller, Callee);
}
bool areFunctionArgsABICompatible(
const Function *Caller, const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const override {
return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
}
bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
}
bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
}
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
}
bool isLegalToVectorizeLoad(LoadInst *LI) const override {
return Impl.isLegalToVectorizeLoad(LI);
}
bool isLegalToVectorizeStore(StoreInst *SI) const override {
return Impl.isLegalToVectorizeStore(SI);
}
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const override {
return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
AddrSpace);
}
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const override {
return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
AddrSpace);
}
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const override {
return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
}
bool isElementTypeLegalForScalableVector(Type *Ty) const override {
return Impl.isElementTypeLegalForScalableVector(Ty);
}
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const override {
return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
}
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const override {
return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
}
bool preferInLoopReduction(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const override {
return Impl.preferInLoopReduction(Opcode, Ty, Flags);
}
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const override {
return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
}
bool shouldExpandReduction(const IntrinsicInst *II) const override {
return Impl.shouldExpandReduction(II);
}
unsigned getGISelRematGlobalCost() const override {
return Impl.getGISelRematGlobalCost();
}
bool supportsScalableVectors() const override {
return Impl.supportsScalableVectors();
}
bool hasActiveVectorLength() const override {
return Impl.hasActiveVectorLength();
}
InstructionCost getInstructionLatency(const Instruction *I) override {
return Impl.getInstructionLatency(I);
}
VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
return Impl.getVPLegalizationStrategy(PI);
}
};
template <typename T>
TargetTransformInfo::TargetTransformInfo(T Impl)
: TTIImpl(new Model<T>(Impl)) {}
/// Analysis pass providing the \c TargetTransformInfo.
///
/// The core idea of the TargetIRAnalysis is to expose an interface through
/// which LLVM targets can analyze and provide information about the middle
/// end's target-independent IR. This supports use cases such as target-aware
/// cost modeling of IR constructs.
///
/// This is a function analysis because much of the cost modeling for targets
/// is done in a subtarget specific way and LLVM supports compiling different
/// functions targeting different subtargets in order to support runtime
/// dispatch according to the observed subtarget.
class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
public:
typedef TargetTransformInfo Result;
/// Default construct a target IR analysis.
///
/// This will use the module's datalayout to construct a baseline
/// conservative TTI result.
TargetIRAnalysis();
/// Construct an IR analysis pass around a target-provide callback.
///
/// The callback will be called with a particular function for which the TTI
/// is needed and must return a TTI object for that function.
TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
// Value semantics. We spell out the constructors for MSVC.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
: TTICallback(Arg.TTICallback) {}
TargetIRAnalysis(TargetIRAnalysis &&Arg)
: TTICallback(std::move(Arg.TTICallback)) {}
TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
TTICallback = RHS.TTICallback;
return *this;
}
TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
TTICallback = std::move(RHS.TTICallback);
return *this;
}
Result run(const Function &F, FunctionAnalysisManager &);
private:
friend AnalysisInfoMixin<TargetIRAnalysis>;
static AnalysisKey Key;
/// The callback used to produce a result.
///
/// We use a completely opaque callback so that targets can provide whatever
/// mechanism they desire for constructing the TTI for a given function.
///
/// FIXME: Should we really use std::function? It's relatively inefficient.
/// It might be possible to arrange for even stateful callbacks to outlive
/// the analysis and thus use a function_ref which would be lighter weight.
/// This may also be less error prone as the callback is likely to reference
/// the external TargetMachine, and that reference needs to never dangle.
std::function<Result(const Function &)> TTICallback;
/// Helper function used as the callback in the default constructor.
static Result getDefaultTTI(const Function &F);
};
/// Wrapper pass for TargetTransformInfo.
///
/// This pass can be constructed from a TTI object which it stores internally
/// and is queried by passes.
class TargetTransformInfoWrapperPass : public ImmutablePass {
TargetIRAnalysis TIRA;
Optional<TargetTransformInfo> TTI;
virtual void anchor();
public:
static char ID;
/// We must provide a default constructor for the pass but it should
/// never be used.
///
/// Use the constructor below or call one of the creation routines.
TargetTransformInfoWrapperPass();
explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
TargetTransformInfo &getTTI(const Function &F);
};
/// Create an analysis pass wrapper around a TTI object.
///
/// This analysis pass just holds the TTI instance and makes it available to
/// clients.
ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
index 786fe908f68f..c63a5d42e9b3 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -1,1287 +1,1287 @@
//===- llvm/CodeGen/MachineFunction.h ---------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Collect native machine code for a function. This class contains a list of
// MachineBasicBlock instances that make up the current compiled function.
//
// This class also contains pointers to various classes which hold
// target-specific information about the generated code.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_MACHINEFUNCTION_H
#define LLVM_CODEGEN_MACHINEFUNCTION_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ArrayRecycler.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Recycler.h"
#include "llvm/Target/TargetOptions.h"
#include <cassert>
#include <cstdint>
#include <memory>
#include <utility>
#include <vector>
namespace llvm {
class BasicBlock;
class BlockAddress;
class DataLayout;
class DebugLoc;
struct DenormalMode;
class DIExpression;
class DILocalVariable;
class DILocation;
class Function;
class GISelChangeObserver;
class GlobalValue;
class LLVMTargetMachine;
class MachineConstantPool;
class MachineFrameInfo;
class MachineFunction;
class MachineJumpTableInfo;
class MachineModuleInfo;
class MachineRegisterInfo;
class MCContext;
class MCInstrDesc;
class MCSymbol;
class MCSection;
class Pass;
class PseudoSourceValueManager;
class raw_ostream;
class SlotIndexes;
class StringRef;
class TargetRegisterClass;
class TargetSubtargetInfo;
struct WasmEHFuncInfo;
struct WinEHFuncInfo;
template <> struct ilist_alloc_traits<MachineBasicBlock> {
void deleteNode(MachineBasicBlock *MBB);
};
template <> struct ilist_callback_traits<MachineBasicBlock> {
void addNodeToList(MachineBasicBlock* N);
void removeNodeFromList(MachineBasicBlock* N);
template <class Iterator>
void transferNodesFromList(ilist_callback_traits &OldList, Iterator, Iterator) {
assert(this == &OldList && "never transfer MBBs between functions");
}
};
/// MachineFunctionInfo - This class can be derived from and used by targets to
/// hold private target-specific information for each MachineFunction. Objects
/// of type are accessed/created with MF::getInfo and destroyed when the
/// MachineFunction is destroyed.
struct MachineFunctionInfo {
virtual ~MachineFunctionInfo();
/// Factory function: default behavior is to call new using the
/// supplied allocator.
///
/// This function can be overridden in a derive class.
template<typename Ty>
static Ty *create(BumpPtrAllocator &Allocator, MachineFunction &MF) {
return new (Allocator.Allocate<Ty>()) Ty(MF);
}
};
/// Properties which a MachineFunction may have at a given point in time.
/// Each of these has checking code in the MachineVerifier, and passes can
/// require that a property be set.
class MachineFunctionProperties {
// Possible TODO: Allow targets to extend this (perhaps by allowing the
// constructor to specify the size of the bit vector)
// Possible TODO: Allow requiring the negative (e.g. VRegsAllocated could be
// stated as the negative of "has vregs"
public:
// The properties are stated in "positive" form; i.e. a pass could require
// that the property hold, but not that it does not hold.
// Property descriptions:
// IsSSA: True when the machine function is in SSA form and virtual registers
// have a single def.
// NoPHIs: The machine function does not contain any PHI instruction.
// TracksLiveness: True when tracking register liveness accurately.
// While this property is set, register liveness information in basic block
// live-in lists and machine instruction operands (e.g. implicit defs) is
// accurate, kill flags are conservatively accurate (kill flag correctly
// indicates the last use of a register, an operand without kill flag may or
// may not be the last use of a register). This means it can be used to
// change the code in ways that affect the values in registers, for example
// by the register scavenger.
// When this property is cleared at a very late time, liveness is no longer
// reliable.
// NoVRegs: The machine function does not use any virtual registers.
// Legalized: In GlobalISel: the MachineLegalizer ran and all pre-isel generic
// instructions have been legalized; i.e., all instructions are now one of:
// - generic and always legal (e.g., COPY)
// - target-specific
// - legal pre-isel generic instructions.
// RegBankSelected: In GlobalISel: the RegBankSelect pass ran and all generic
// virtual registers have been assigned to a register bank.
// Selected: In GlobalISel: the InstructionSelect pass ran and all pre-isel
// generic instructions have been eliminated; i.e., all instructions are now
// target-specific or non-pre-isel generic instructions (e.g., COPY).
// Since only pre-isel generic instructions can have generic virtual register
// operands, this also means that all generic virtual registers have been
// constrained to virtual registers (assigned to register classes) and that
// all sizes attached to them have been eliminated.
// TiedOpsRewritten: The twoaddressinstruction pass will set this flag, it
// means that tied-def have been rewritten to meet the RegConstraint.
enum class Property : unsigned {
IsSSA,
NoPHIs,
TracksLiveness,
NoVRegs,
FailedISel,
Legalized,
RegBankSelected,
Selected,
TiedOpsRewritten,
LastProperty = TiedOpsRewritten,
};
bool hasProperty(Property P) const {
return Properties[static_cast<unsigned>(P)];
}
MachineFunctionProperties &set(Property P) {
Properties.set(static_cast<unsigned>(P));
return *this;
}
MachineFunctionProperties &reset(Property P) {
Properties.reset(static_cast<unsigned>(P));
return *this;
}
/// Reset all the properties.
MachineFunctionProperties &reset() {
Properties.reset();
return *this;
}
MachineFunctionProperties &set(const MachineFunctionProperties &MFP) {
Properties |= MFP.Properties;
return *this;
}
MachineFunctionProperties &reset(const MachineFunctionProperties &MFP) {
Properties.reset(MFP.Properties);
return *this;
}
// Returns true if all properties set in V (i.e. required by a pass) are set
// in this.
bool verifyRequiredProperties(const MachineFunctionProperties &V) const {
return !V.Properties.test(Properties);
}
/// Print the MachineFunctionProperties in human-readable form.
void print(raw_ostream &OS) const;
private:
BitVector Properties =
BitVector(static_cast<unsigned>(Property::LastProperty)+1);
};
struct SEHHandler {
/// Filter or finally function. Null indicates a catch-all.
const Function *FilterOrFinally;
/// Address of block to recover at. Null for a finally handler.
const BlockAddress *RecoverBA;
};
/// This structure is used to retain landing pad info for the current function.
struct LandingPadInfo {
MachineBasicBlock *LandingPadBlock; // Landing pad block.
SmallVector<MCSymbol *, 1> BeginLabels; // Labels prior to invoke.
SmallVector<MCSymbol *, 1> EndLabels; // Labels after invoke.
SmallVector<SEHHandler, 1> SEHHandlers; // SEH handlers active at this lpad.
MCSymbol *LandingPadLabel = nullptr; // Label at beginning of landing pad.
std::vector<int> TypeIds; // List of type ids (filters negative).
explicit LandingPadInfo(MachineBasicBlock *MBB)
: LandingPadBlock(MBB) {}
};
-class MachineFunction {
+class LLVM_EXTERNAL_VISIBILITY MachineFunction {
Function &F;
const LLVMTargetMachine &Target;
const TargetSubtargetInfo *STI;
MCContext &Ctx;
MachineModuleInfo &MMI;
// RegInfo - Information about each register in use in the function.
MachineRegisterInfo *RegInfo;
// Used to keep track of target-specific per-machine function information for
// the target implementation.
MachineFunctionInfo *MFInfo;
// Keep track of objects allocated on the stack.
MachineFrameInfo *FrameInfo;
// Keep track of constants which are spilled to memory
MachineConstantPool *ConstantPool;
// Keep track of jump tables for switch instructions
MachineJumpTableInfo *JumpTableInfo;
// Keep track of the function section.
MCSection *Section = nullptr;
// Keeps track of Wasm exception handling related data. This will be null for
// functions that aren't using a wasm EH personality.
WasmEHFuncInfo *WasmEHInfo = nullptr;
// Keeps track of Windows exception handling related data. This will be null
// for functions that aren't using a funclet-based EH personality.
WinEHFuncInfo *WinEHInfo = nullptr;
// Function-level unique numbering for MachineBasicBlocks. When a
// MachineBasicBlock is inserted into a MachineFunction is it automatically
// numbered and this vector keeps track of the mapping from ID's to MBB's.
std::vector<MachineBasicBlock*> MBBNumbering;
// Unary encoding of basic block symbols is used to reduce size of ".strtab".
// Basic block number 'i' gets a prefix of length 'i'. The ith character also
// denotes the type of basic block number 'i'. Return blocks are marked with
// 'r', landing pads with 'l' and regular blocks with 'a'.
std::vector<char> BBSectionsSymbolPrefix;
// Pool-allocate MachineFunction-lifetime and IR objects.
BumpPtrAllocator Allocator;
// Allocation management for instructions in function.
Recycler<MachineInstr> InstructionRecycler;
// Allocation management for operand arrays on instructions.
ArrayRecycler<MachineOperand> OperandRecycler;
// Allocation management for basic blocks in function.
Recycler<MachineBasicBlock> BasicBlockRecycler;
// List of machine basic blocks in function
using BasicBlockListType = ilist<MachineBasicBlock>;
BasicBlockListType BasicBlocks;
/// FunctionNumber - This provides a unique ID for each function emitted in
/// this translation unit.
///
unsigned FunctionNumber;
/// Alignment - The alignment of the function.
Align Alignment;
/// ExposesReturnsTwice - True if the function calls setjmp or related
/// functions with attribute "returns twice", but doesn't have
/// the attribute itself.
/// This is used to limit optimizations which cannot reason
/// about the control flow of such functions.
bool ExposesReturnsTwice = false;
/// True if the function includes any inline assembly.
bool HasInlineAsm = false;
/// True if any WinCFI instruction have been emitted in this function.
bool HasWinCFI = false;
/// Current high-level properties of the IR of the function (e.g. is in SSA
/// form or whether registers have been allocated)
MachineFunctionProperties Properties;
// Allocation management for pseudo source values.
std::unique_ptr<PseudoSourceValueManager> PSVManager;
/// List of moves done by a function's prolog. Used to construct frame maps
/// by debug and exception handling consumers.
std::vector<MCCFIInstruction> FrameInstructions;
/// List of basic blocks immediately following calls to _setjmp. Used to
/// construct a table of valid longjmp targets for Windows Control Flow Guard.
std::vector<MCSymbol *> LongjmpTargets;
/// List of basic blocks that are the target of catchrets. Used to construct
/// a table of valid targets for Windows EHCont Guard.
std::vector<MCSymbol *> CatchretTargets;
/// \name Exception Handling
/// \{
/// List of LandingPadInfo describing the landing pad information.
std::vector<LandingPadInfo> LandingPads;
/// Map a landing pad's EH symbol to the call site indexes.
DenseMap<MCSymbol*, SmallVector<unsigned, 4>> LPadToCallSiteMap;
/// Map a landing pad to its index.
DenseMap<const MachineBasicBlock *, unsigned> WasmLPadToIndexMap;
/// Map of invoke call site index values to associated begin EH_LABEL.
DenseMap<MCSymbol*, unsigned> CallSiteMap;
/// CodeView label annotations.
std::vector<std::pair<MCSymbol *, MDNode *>> CodeViewAnnotations;
bool CallsEHReturn = false;
bool CallsUnwindInit = false;
bool HasEHCatchret = false;
bool HasEHScopes = false;
bool HasEHFunclets = false;
/// Section Type for basic blocks, only relevant with basic block sections.
BasicBlockSection BBSectionsType = BasicBlockSection::None;
/// List of C++ TypeInfo used.
std::vector<const GlobalValue *> TypeInfos;
/// List of typeids encoding filters used.
std::vector<unsigned> FilterIds;
/// List of the indices in FilterIds corresponding to filter terminators.
std::vector<unsigned> FilterEnds;
EHPersonality PersonalityTypeCache = EHPersonality::Unknown;
/// \}
/// Clear all the members of this MachineFunction, but the ones used
/// to initialize again the MachineFunction.
/// More specifically, this deallocates all the dynamically allocated
/// objects and get rid of all the XXXInfo data structure, but keep
/// unchanged the references to Fn, Target, MMI, and FunctionNumber.
void clear();
/// Allocate and initialize the different members.
/// In particular, the XXXInfo data structure.
/// \pre Fn, Target, MMI, and FunctionNumber are properly set.
void init();
public:
struct VariableDbgInfo {
const DILocalVariable *Var;
const DIExpression *Expr;
// The Slot can be negative for fixed stack objects.
int Slot;
const DILocation *Loc;
VariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
int Slot, const DILocation *Loc)
: Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {}
};
class Delegate {
virtual void anchor();
public:
virtual ~Delegate() = default;
/// Callback after an insertion. This should not modify the MI directly.
virtual void MF_HandleInsertion(MachineInstr &MI) = 0;
/// Callback before a removal. This should not modify the MI directly.
virtual void MF_HandleRemoval(MachineInstr &MI) = 0;
};
/// Structure used to represent pair of argument number after call lowering
/// and register used to transfer that argument.
/// For now we support only cases when argument is transferred through one
/// register.
struct ArgRegPair {
Register Reg;
uint16_t ArgNo;
ArgRegPair(Register R, unsigned Arg) : Reg(R), ArgNo(Arg) {
assert(Arg < (1 << 16) && "Arg out of range");
}
};
/// Vector of call argument and its forwarding register.
using CallSiteInfo = SmallVector<ArgRegPair, 1>;
using CallSiteInfoImpl = SmallVectorImpl<ArgRegPair>;
private:
Delegate *TheDelegate = nullptr;
GISelChangeObserver *Observer = nullptr;
using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>;
/// Map a call instruction to call site arguments forwarding info.
CallSiteInfoMap CallSitesInfo;
/// A helper function that returns call site info for a give call
/// instruction if debug entry value support is enabled.
CallSiteInfoMap::iterator getCallSiteInfo(const MachineInstr *MI);
// Callbacks for insertion and removal.
void handleInsertion(MachineInstr &MI);
void handleRemoval(MachineInstr &MI);
friend struct ilist_traits<MachineInstr>;
public:
using VariableDbgInfoMapTy = SmallVector<VariableDbgInfo, 4>;
VariableDbgInfoMapTy VariableDbgInfos;
/// A count of how many instructions in the function have had numbers
/// assigned to them. Used for debug value tracking, to determine the
/// next instruction number.
unsigned DebugInstrNumberingCount = 0;
/// Set value of DebugInstrNumberingCount field. Avoid using this unless
/// you're deserializing this data.
void setDebugInstrNumberingCount(unsigned Num);
/// Pair of instruction number and operand number.
using DebugInstrOperandPair = std::pair<unsigned, unsigned>;
/// Replacement definition for a debug instruction reference. Made up of a
/// source instruction / operand pair, destination pair, and a qualifying
/// subregister indicating what bits in the operand make up the substitution.
// For example, a debug user
/// of %1:
/// %0:gr32 = someinst, debug-instr-number 1
/// %1:gr16 = %0.some_16_bit_subreg, debug-instr-number 2
/// Would receive the substitution {{2, 0}, {1, 0}, $subreg}, where $subreg is
/// the subregister number for some_16_bit_subreg.
class DebugSubstitution {
public:
DebugInstrOperandPair Src; ///< Source instruction / operand pair.
DebugInstrOperandPair Dest; ///< Replacement instruction / operand pair.
unsigned Subreg; ///< Qualifier for which part of Dest is read.
DebugSubstitution(const DebugInstrOperandPair &Src,
const DebugInstrOperandPair &Dest, unsigned Subreg)
: Src(Src), Dest(Dest), Subreg(Subreg) {}
/// Order only by source instruction / operand pair: there should never
/// be duplicate entries for the same source in any collection.
bool operator<(const DebugSubstitution &Other) const {
return Src < Other.Src;
}
};
/// Debug value substitutions: a collection of DebugSubstitution objects,
/// recording changes in where a value is defined. For example, when one
/// instruction is substituted for another. Keeping a record allows recovery
/// of variable locations after compilation finishes.
SmallVector<DebugSubstitution, 8> DebugValueSubstitutions;
/// Location of a PHI instruction that is also a debug-info variable value,
/// for the duration of register allocation. Loaded by the PHI-elimination
/// pass, and emitted as DBG_PHI instructions during VirtRegRewriter, with
/// maintenance applied by intermediate passes that edit registers (such as
/// coalescing and the allocator passes).
class DebugPHIRegallocPos {
public:
MachineBasicBlock *MBB; ///< Block where this PHI was originally located.
Register Reg; ///< VReg where the control-flow-merge happens.
unsigned SubReg; ///< Optional subreg qualifier within Reg.
DebugPHIRegallocPos(MachineBasicBlock *MBB, Register Reg, unsigned SubReg)
: MBB(MBB), Reg(Reg), SubReg(SubReg) {}
};
/// Map of debug instruction numbers to the position of their PHI instructions
/// during register allocation. See DebugPHIRegallocPos.
DenseMap<unsigned, DebugPHIRegallocPos> DebugPHIPositions;
/// Create a substitution between one <instr,operand> value to a different,
/// new value.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair,
unsigned SubReg = 0);
/// Create substitutions for any tracked values in \p Old, to point at
/// \p New. Needed when we re-create an instruction during optimization,
/// which has the same signature (i.e., def operands in the same place) but
/// a modified instruction type, flags, or otherwise. An example: X86 moves
/// are sometimes transformed into equivalent LEAs.
/// If the two instructions are not the same opcode, limit which operands to
/// examine for substitutions to the first N operands by setting
/// \p MaxOperand.
void substituteDebugValuesForInst(const MachineInstr &Old, MachineInstr &New,
unsigned MaxOperand = UINT_MAX);
/// Find the underlying defining instruction / operand for a COPY instruction
/// while in SSA form. Copies do not actually define values -- they move them
/// between registers. Labelling a COPY-like instruction with an instruction
/// number is to be avoided as it makes value numbers non-unique later in
/// compilation. This method follows the definition chain for any sequence of
/// COPY-like instructions to find whatever non-COPY-like instruction defines
/// the copied value; or for parameters, creates a DBG_PHI on entry.
/// May insert instructions into the entry block!
/// \p MI The copy-like instruction to salvage.
/// \returns An instruction/operand pair identifying the defining value.
DebugInstrOperandPair salvageCopySSA(MachineInstr &MI);
/// Finalise any partially emitted debug instructions. These are DBG_INSTR_REF
/// instructions where we only knew the vreg of the value they use, not the
/// instruction that defines that vreg. Once isel finishes, we should have
/// enough information for every DBG_INSTR_REF to point at an instruction
/// (or DBG_PHI).
void finalizeDebugInstrRefs();
MachineFunction(Function &F, const LLVMTargetMachine &Target,
const TargetSubtargetInfo &STI, unsigned FunctionNum,
MachineModuleInfo &MMI);
MachineFunction(const MachineFunction &) = delete;
MachineFunction &operator=(const MachineFunction &) = delete;
~MachineFunction();
/// Reset the instance as if it was just created.
void reset() {
clear();
init();
}
/// Reset the currently registered delegate - otherwise assert.
void resetDelegate(Delegate *delegate) {
assert(TheDelegate == delegate &&
"Only the current delegate can perform reset!");
TheDelegate = nullptr;
}
/// Set the delegate. resetDelegate must be called before attempting
/// to set.
void setDelegate(Delegate *delegate) {
assert(delegate && !TheDelegate &&
"Attempted to set delegate to null, or to change it without "
"first resetting it!");
TheDelegate = delegate;
}
void setObserver(GISelChangeObserver *O) { Observer = O; }
GISelChangeObserver *getObserver() const { return Observer; }
MachineModuleInfo &getMMI() const { return MMI; }
MCContext &getContext() const { return Ctx; }
/// Returns the Section this function belongs to.
MCSection *getSection() const { return Section; }
/// Indicates the Section this function belongs to.
void setSection(MCSection *S) { Section = S; }
PseudoSourceValueManager &getPSVManager() const { return *PSVManager; }
/// Return the DataLayout attached to the Module associated to this MF.
const DataLayout &getDataLayout() const;
/// Return the LLVM function that this machine code represents
Function &getFunction() { return F; }
/// Return the LLVM function that this machine code represents
const Function &getFunction() const { return F; }
/// getName - Return the name of the corresponding LLVM function.
StringRef getName() const;
/// getFunctionNumber - Return a unique ID for the current function.
unsigned getFunctionNumber() const { return FunctionNumber; }
/// Returns true if this function has basic block sections enabled.
bool hasBBSections() const {
return (BBSectionsType == BasicBlockSection::All ||
BBSectionsType == BasicBlockSection::List ||
BBSectionsType == BasicBlockSection::Preset);
}
/// Returns true if basic block labels are to be generated for this function.
bool hasBBLabels() const {
return BBSectionsType == BasicBlockSection::Labels;
}
void setBBSectionsType(BasicBlockSection V) { BBSectionsType = V; }
/// Assign IsBeginSection IsEndSection fields for basic blocks in this
/// function.
void assignBeginEndSections();
/// getTarget - Return the target machine this machine code is compiled with
const LLVMTargetMachine &getTarget() const { return Target; }
/// getSubtarget - Return the subtarget for which this machine code is being
/// compiled.
const TargetSubtargetInfo &getSubtarget() const { return *STI; }
/// getSubtarget - This method returns a pointer to the specified type of
/// TargetSubtargetInfo. In debug builds, it verifies that the object being
/// returned is of the correct type.
template<typename STC> const STC &getSubtarget() const {
return *static_cast<const STC *>(STI);
}
/// getRegInfo - Return information about the registers currently in use.
MachineRegisterInfo &getRegInfo() { return *RegInfo; }
const MachineRegisterInfo &getRegInfo() const { return *RegInfo; }
/// getFrameInfo - Return the frame info object for the current function.
/// This object contains information about objects allocated on the stack
/// frame of the current function in an abstract way.
MachineFrameInfo &getFrameInfo() { return *FrameInfo; }
const MachineFrameInfo &getFrameInfo() const { return *FrameInfo; }
/// getJumpTableInfo - Return the jump table info object for the current
/// function. This object contains information about jump tables in the
/// current function. If the current function has no jump tables, this will
/// return null.
const MachineJumpTableInfo *getJumpTableInfo() const { return JumpTableInfo; }
MachineJumpTableInfo *getJumpTableInfo() { return JumpTableInfo; }
/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it
/// does already exist, allocate one.
MachineJumpTableInfo *getOrCreateJumpTableInfo(unsigned JTEntryKind);
/// getConstantPool - Return the constant pool object for the current
/// function.
MachineConstantPool *getConstantPool() { return ConstantPool; }
const MachineConstantPool *getConstantPool() const { return ConstantPool; }
/// getWasmEHFuncInfo - Return information about how the current function uses
/// Wasm exception handling. Returns null for functions that don't use wasm
/// exception handling.
const WasmEHFuncInfo *getWasmEHFuncInfo() const { return WasmEHInfo; }
WasmEHFuncInfo *getWasmEHFuncInfo() { return WasmEHInfo; }
/// getWinEHFuncInfo - Return information about how the current function uses
/// Windows exception handling. Returns null for functions that don't use
/// funclets for exception handling.
const WinEHFuncInfo *getWinEHFuncInfo() const { return WinEHInfo; }
WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; }
/// getAlignment - Return the alignment of the function.
Align getAlignment() const { return Alignment; }
/// setAlignment - Set the alignment of the function.
void setAlignment(Align A) { Alignment = A; }
/// ensureAlignment - Make sure the function is at least A bytes aligned.
void ensureAlignment(Align A) {
if (Alignment < A)
Alignment = A;
}
/// exposesReturnsTwice - Returns true if the function calls setjmp or
/// any other similar functions with attribute "returns twice" without
/// having the attribute itself.
bool exposesReturnsTwice() const {
return ExposesReturnsTwice;
}
/// setCallsSetJmp - Set a flag that indicates if there's a call to
/// a "returns twice" function.
void setExposesReturnsTwice(bool B) {
ExposesReturnsTwice = B;
}
/// Returns true if the function contains any inline assembly.
bool hasInlineAsm() const {
return HasInlineAsm;
}
/// Set a flag that indicates that the function contains inline assembly.
void setHasInlineAsm(bool B) {
HasInlineAsm = B;
}
bool hasWinCFI() const {
return HasWinCFI;
}
void setHasWinCFI(bool v) { HasWinCFI = v; }
/// True if this function needs frame moves for debug or exceptions.
bool needsFrameMoves() const;
/// Get the function properties
const MachineFunctionProperties &getProperties() const { return Properties; }
MachineFunctionProperties &getProperties() { return Properties; }
/// getInfo - Keep track of various per-function pieces of information for
/// backends that would like to do so.
///
template<typename Ty>
Ty *getInfo() {
if (!MFInfo)
MFInfo = Ty::template create<Ty>(Allocator, *this);
return static_cast<Ty*>(MFInfo);
}
template<typename Ty>
const Ty *getInfo() const {
return const_cast<MachineFunction*>(this)->getInfo<Ty>();
}
/// Returns the denormal handling type for the default rounding mode of the
/// function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const;
/// getBlockNumbered - MachineBasicBlocks are automatically numbered when they
/// are inserted into the machine function. The block number for a machine
/// basic block can be found by using the MBB::getNumber method, this method
/// provides the inverse mapping.
MachineBasicBlock *getBlockNumbered(unsigned N) const {
assert(N < MBBNumbering.size() && "Illegal block number");
assert(MBBNumbering[N] && "Block was removed from the machine function!");
return MBBNumbering[N];
}
/// Should we be emitting segmented stack stuff for the function
bool shouldSplitStack() const;
/// getNumBlockIDs - Return the number of MBB ID's allocated.
unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); }
/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
/// recomputes them. This guarantees that the MBB numbers are sequential,
/// dense, and match the ordering of the blocks within the function. If a
/// specific MachineBasicBlock is specified, only that block and those after
/// it are renumbered.
void RenumberBlocks(MachineBasicBlock *MBBFrom = nullptr);
/// print - Print out the MachineFunction in a format suitable for debugging
/// to the specified stream.
void print(raw_ostream &OS, const SlotIndexes* = nullptr) const;
/// viewCFG - This function is meant for use from the debugger. You can just
/// say 'call F->viewCFG()' and a ghostview window should pop up from the
/// program, displaying the CFG of the current function with the code for each
/// basic block inside. This depends on there being a 'dot' and 'gv' program
/// in your path.
void viewCFG() const;
/// viewCFGOnly - This function is meant for use from the debugger. It works
/// just like viewCFG, but it does not include the contents of basic blocks
/// into the nodes, just the label. If you are only interested in the CFG
/// this can make the graph smaller.
///
void viewCFGOnly() const;
/// dump - Print the current MachineFunction to cerr, useful for debugger use.
void dump() const;
/// Run the current MachineFunction through the machine code verifier, useful
/// for debugger use.
/// \returns true if no problems were found.
bool verify(Pass *p = nullptr, const char *Banner = nullptr,
bool AbortOnError = true) const;
// Provide accessors for the MachineBasicBlock list...
using iterator = BasicBlockListType::iterator;
using const_iterator = BasicBlockListType::const_iterator;
using const_reverse_iterator = BasicBlockListType::const_reverse_iterator;
using reverse_iterator = BasicBlockListType::reverse_iterator;
/// Support for MachineBasicBlock::getNextNode().
static BasicBlockListType MachineFunction::*
getSublistAccess(MachineBasicBlock *) {
return &MachineFunction::BasicBlocks;
}
/// addLiveIn - Add the specified physical register as a live-in value and
/// create a corresponding virtual register for it.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC);
//===--------------------------------------------------------------------===//
// BasicBlock accessor functions.
//
iterator begin() { return BasicBlocks.begin(); }
const_iterator begin() const { return BasicBlocks.begin(); }
iterator end () { return BasicBlocks.end(); }
const_iterator end () const { return BasicBlocks.end(); }
reverse_iterator rbegin() { return BasicBlocks.rbegin(); }
const_reverse_iterator rbegin() const { return BasicBlocks.rbegin(); }
reverse_iterator rend () { return BasicBlocks.rend(); }
const_reverse_iterator rend () const { return BasicBlocks.rend(); }
unsigned size() const { return (unsigned)BasicBlocks.size();}
bool empty() const { return BasicBlocks.empty(); }
const MachineBasicBlock &front() const { return BasicBlocks.front(); }
MachineBasicBlock &front() { return BasicBlocks.front(); }
const MachineBasicBlock & back() const { return BasicBlocks.back(); }
MachineBasicBlock & back() { return BasicBlocks.back(); }
void push_back (MachineBasicBlock *MBB) { BasicBlocks.push_back (MBB); }
void push_front(MachineBasicBlock *MBB) { BasicBlocks.push_front(MBB); }
void insert(iterator MBBI, MachineBasicBlock *MBB) {
BasicBlocks.insert(MBBI, MBB);
}
void splice(iterator InsertPt, iterator MBBI) {
BasicBlocks.splice(InsertPt, BasicBlocks, MBBI);
}
void splice(iterator InsertPt, MachineBasicBlock *MBB) {
BasicBlocks.splice(InsertPt, BasicBlocks, MBB);
}
void splice(iterator InsertPt, iterator MBBI, iterator MBBE) {
BasicBlocks.splice(InsertPt, BasicBlocks, MBBI, MBBE);
}
void remove(iterator MBBI) { BasicBlocks.remove(MBBI); }
void remove(MachineBasicBlock *MBBI) { BasicBlocks.remove(MBBI); }
void erase(iterator MBBI) { BasicBlocks.erase(MBBI); }
void erase(MachineBasicBlock *MBBI) { BasicBlocks.erase(MBBI); }
template <typename Comp>
void sort(Comp comp) {
BasicBlocks.sort(comp);
}
/// Return the number of \p MachineInstrs in this \p MachineFunction.
unsigned getInstructionCount() const {
unsigned InstrCount = 0;
for (const MachineBasicBlock &MBB : BasicBlocks)
InstrCount += MBB.size();
return InstrCount;
}
//===--------------------------------------------------------------------===//
// Internal functions used to automatically number MachineBasicBlocks
/// Adds the MBB to the internal numbering. Returns the unique number
/// assigned to the MBB.
unsigned addToMBBNumbering(MachineBasicBlock *MBB) {
MBBNumbering.push_back(MBB);
return (unsigned)MBBNumbering.size()-1;
}
/// removeFromMBBNumbering - Remove the specific machine basic block from our
/// tracker, this is only really to be used by the MachineBasicBlock
/// implementation.
void removeFromMBBNumbering(unsigned N) {
assert(N < MBBNumbering.size() && "Illegal basic block #");
MBBNumbering[N] = nullptr;
}
/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
/// of `new MachineInstr'.
MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL,
bool NoImplicit = false);
/// Create a new MachineInstr which is a copy of \p Orig, identical in all
/// ways except the instruction has no parent, prev, or next. Bundling flags
/// are reset.
///
/// Note: Clones a single instruction, not whole instruction bundles.
/// Does not perform target specific adjustments; consider using
/// TargetInstrInfo::duplicate() instead.
MachineInstr *CloneMachineInstr(const MachineInstr *Orig);
/// Clones instruction or the whole instruction bundle \p Orig and insert
/// into \p MBB before \p InsertBefore.
///
/// Note: Does not perform target specific adjustments; consider using
/// TargetInstrInfo::duplicate() intead.
MachineInstr &CloneMachineInstrBundle(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig);
/// DeleteMachineInstr - Delete the given MachineInstr.
void DeleteMachineInstr(MachineInstr *MI);
/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
/// instead of `new MachineBasicBlock'.
MachineBasicBlock *CreateMachineBasicBlock(const BasicBlock *bb = nullptr);
/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
void DeleteMachineBasicBlock(MachineBasicBlock *MBB);
/// getMachineMemOperand - Allocate a new MachineMemOperand.
/// MachineMemOperands are owned by the MachineFunction and need not be
/// explicitly deallocated.
MachineMemOperand *getMachineMemOperand(
MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
Align base_alignment, const AAMDNodes &AAInfo = AAMDNodes(),
const MDNode *Ranges = nullptr, SyncScope::ID SSID = SyncScope::System,
AtomicOrdering Ordering = AtomicOrdering::NotAtomic,
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic);
MachineMemOperand *getMachineMemOperand(
MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy,
Align base_alignment, const AAMDNodes &AAInfo = AAMDNodes(),
const MDNode *Ranges = nullptr, SyncScope::ID SSID = SyncScope::System,
AtomicOrdering Ordering = AtomicOrdering::NotAtomic,
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic);
/// getMachineMemOperand - Allocate a new MachineMemOperand by copying
/// an existing one, adjusting by an offset and using the given size.
/// MachineMemOperands are owned by the MachineFunction and need not be
/// explicitly deallocated.
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, LLT Ty);
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
return getMachineMemOperand(MMO, Offset, LLT::scalar(8 * Size));
}
/// getMachineMemOperand - Allocate a new MachineMemOperand by copying
/// an existing one, replacing only the MachinePointerInfo and size.
/// MachineMemOperands are owned by the MachineFunction and need not be
/// explicitly deallocated.
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
const MachinePointerInfo &PtrInfo,
uint64_t Size);
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
const MachinePointerInfo &PtrInfo,
LLT Ty);
/// Allocate a new MachineMemOperand by copying an existing one,
/// replacing only AliasAnalysis information. MachineMemOperands are owned
/// by the MachineFunction and need not be explicitly deallocated.
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
const AAMDNodes &AAInfo);
/// Allocate a new MachineMemOperand by copying an existing one,
/// replacing the flags. MachineMemOperands are owned
/// by the MachineFunction and need not be explicitly deallocated.
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
MachineMemOperand::Flags Flags);
using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
/// Allocate an array of MachineOperands. This is only intended for use by
/// internal MachineInstr functions.
MachineOperand *allocateOperandArray(OperandCapacity Cap) {
return OperandRecycler.allocate(Cap, Allocator);
}
/// Dellocate an array of MachineOperands and recycle the memory. This is
/// only intended for use by internal MachineInstr functions.
/// Cap must be the same capacity that was used to allocate the array.
void deallocateOperandArray(OperandCapacity Cap, MachineOperand *Array) {
OperandRecycler.deallocate(Cap, Array);
}
/// Allocate and initialize a register mask with @p NumRegister bits.
uint32_t *allocateRegMask();
ArrayRef<int> allocateShuffleMask(ArrayRef<int> Mask);
/// Allocate and construct an extra info structure for a `MachineInstr`.
///
/// This is allocated on the function's allocator and so lives the life of
/// the function.
MachineInstr::ExtraInfo *createMIExtraInfo(
ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol = nullptr,
MCSymbol *PostInstrSymbol = nullptr, MDNode *HeapAllocMarker = nullptr);
/// Allocate a string and populate it with the given external symbol name.
const char *createExternalSymbolName(StringRef Name);
//===--------------------------------------------------------------------===//
// Label Manipulation.
/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
/// normal 'L' label is returned.
MCSymbol *getJTISymbol(unsigned JTI, MCContext &Ctx,
bool isLinkerPrivate = false) const;
/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
/// base.
MCSymbol *getPICBaseSymbol() const;
/// Returns a reference to a list of cfi instructions in the function's
/// prologue. Used to construct frame maps for debug and exception handling
/// comsumers.
const std::vector<MCCFIInstruction> &getFrameInstructions() const {
return FrameInstructions;
}
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst);
/// Returns a reference to a list of symbols immediately following calls to
/// _setjmp in the function. Used to construct the longjmp target table used
/// by Windows Control Flow Guard.
const std::vector<MCSymbol *> &getLongjmpTargets() const {
return LongjmpTargets;
}
/// Add the specified symbol to the list of valid longjmp targets for Windows
/// Control Flow Guard.
void addLongjmpTarget(MCSymbol *Target) { LongjmpTargets.push_back(Target); }
/// Returns a reference to a list of symbols that we have catchrets.
/// Used to construct the catchret target table used by Windows EHCont Guard.
const std::vector<MCSymbol *> &getCatchretTargets() const {
return CatchretTargets;
}
/// Add the specified symbol to the list of valid catchret targets for Windows
/// EHCont Guard.
void addCatchretTarget(MCSymbol *Target) {
CatchretTargets.push_back(Target);
}
/// \name Exception Handling
/// \{
bool callsEHReturn() const { return CallsEHReturn; }
void setCallsEHReturn(bool b) { CallsEHReturn = b; }
bool callsUnwindInit() const { return CallsUnwindInit; }
void setCallsUnwindInit(bool b) { CallsUnwindInit = b; }
bool hasEHCatchret() const { return HasEHCatchret; }
void setHasEHCatchret(bool V) { HasEHCatchret = V; }
bool hasEHScopes() const { return HasEHScopes; }
void setHasEHScopes(bool V) { HasEHScopes = V; }
bool hasEHFunclets() const { return HasEHFunclets; }
void setHasEHFunclets(bool V) { HasEHFunclets = V; }
/// Find or create an LandingPadInfo for the specified MachineBasicBlock.
LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad);
/// Remap landing pad labels and remove any deleted landing pads.
void tidyLandingPads(DenseMap<MCSymbol *, uintptr_t> *LPMap = nullptr,
bool TidyIfNoBeginLabels = true);
/// Return a reference to the landing pad info for the current function.
const std::vector<LandingPadInfo> &getLandingPads() const {
return LandingPads;
}
/// Provide the begin and end labels of an invoke style call and associate it
/// with a try landing pad block.
void addInvoke(MachineBasicBlock *LandingPad,
MCSymbol *BeginLabel, MCSymbol *EndLabel);
/// Add a new panding pad, and extract the exception handling information from
/// the landingpad instruction. Returns the label ID for the landing pad
/// entry.
MCSymbol *addLandingPad(MachineBasicBlock *LandingPad);
/// Provide the catch typeinfo for a landing pad.
void addCatchTypeInfo(MachineBasicBlock *LandingPad,
ArrayRef<const GlobalValue *> TyInfo);
/// Provide the filter typeinfo for a landing pad.
void addFilterTypeInfo(MachineBasicBlock *LandingPad,
ArrayRef<const GlobalValue *> TyInfo);
/// Add a cleanup action for a landing pad.
void addCleanup(MachineBasicBlock *LandingPad);
void addSEHCatchHandler(MachineBasicBlock *LandingPad, const Function *Filter,
const BlockAddress *RecoverBA);
void addSEHCleanupHandler(MachineBasicBlock *LandingPad,
const Function *Cleanup);
/// Return the type id for the specified typeinfo. This is function wide.
unsigned getTypeIDFor(const GlobalValue *TI);
/// Return the id of the filter encoded by TyIds. This is function wide.
int getFilterIDFor(std::vector<unsigned> &TyIds);
/// Map the landing pad's EH symbol to the call site indexes.
void setCallSiteLandingPad(MCSymbol *Sym, ArrayRef<unsigned> Sites);
/// Map the landing pad to its index. Used for Wasm exception handling.
void setWasmLandingPadIndex(const MachineBasicBlock *LPad, unsigned Index) {
WasmLPadToIndexMap[LPad] = Index;
}
/// Returns true if the landing pad has an associate index in wasm EH.
bool hasWasmLandingPadIndex(const MachineBasicBlock *LPad) const {
return WasmLPadToIndexMap.count(LPad);
}
/// Get the index in wasm EH for a given landing pad.
unsigned getWasmLandingPadIndex(const MachineBasicBlock *LPad) const {
assert(hasWasmLandingPadIndex(LPad));
return WasmLPadToIndexMap.lookup(LPad);
}
/// Get the call site indexes for a landing pad EH symbol.
SmallVectorImpl<unsigned> &getCallSiteLandingPad(MCSymbol *Sym) {
assert(hasCallSiteLandingPad(Sym) &&
"missing call site number for landing pad!");
return LPadToCallSiteMap[Sym];
}
/// Return true if the landing pad Eh symbol has an associated call site.
bool hasCallSiteLandingPad(MCSymbol *Sym) {
return !LPadToCallSiteMap[Sym].empty();
}
/// Map the begin label for a call site.
void setCallSiteBeginLabel(MCSymbol *BeginLabel, unsigned Site) {
CallSiteMap[BeginLabel] = Site;
}
/// Get the call site number for a begin label.
unsigned getCallSiteBeginLabel(MCSymbol *BeginLabel) const {
assert(hasCallSiteBeginLabel(BeginLabel) &&
"Missing call site number for EH_LABEL!");
return CallSiteMap.lookup(BeginLabel);
}
/// Return true if the begin label has a call site number associated with it.
bool hasCallSiteBeginLabel(MCSymbol *BeginLabel) const {
return CallSiteMap.count(BeginLabel);
}
/// Record annotations associated with a particular label.
void addCodeViewAnnotation(MCSymbol *Label, MDNode *MD) {
CodeViewAnnotations.push_back({Label, MD});
}
ArrayRef<std::pair<MCSymbol *, MDNode *>> getCodeViewAnnotations() const {
return CodeViewAnnotations;
}
/// Return a reference to the C++ typeinfo for the current function.
const std::vector<const GlobalValue *> &getTypeInfos() const {
return TypeInfos;
}
/// Return a reference to the typeids encoding filters used in the current
/// function.
const std::vector<unsigned> &getFilterIds() const {
return FilterIds;
}
/// \}
/// Collect information used to emit debugging information of a variable.
void setVariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
int Slot, const DILocation *Loc) {
VariableDbgInfos.emplace_back(Var, Expr, Slot, Loc);
}
VariableDbgInfoMapTy &getVariableDbgInfo() { return VariableDbgInfos; }
const VariableDbgInfoMapTy &getVariableDbgInfo() const {
return VariableDbgInfos;
}
/// Start tracking the arguments passed to the call \p CallI.
void addCallArgsForwardingRegs(const MachineInstr *CallI,
CallSiteInfoImpl &&CallInfo) {
assert(CallI->isCandidateForCallSiteEntry());
bool Inserted =
CallSitesInfo.try_emplace(CallI, std::move(CallInfo)).second;
(void)Inserted;
assert(Inserted && "Call site info not unique");
}
const CallSiteInfoMap &getCallSitesInfo() const {
return CallSitesInfo;
}
/// Following functions update call site info. They should be called before
/// removing, replacing or copying call instruction.
/// Erase the call site info for \p MI. It is used to remove a call
/// instruction from the instruction stream.
void eraseCallSiteInfo(const MachineInstr *MI);
/// Copy the call site info from \p Old to \ New. Its usage is when we are
/// making a copy of the instruction that will be inserted at different point
/// of the instruction stream.
void copyCallSiteInfo(const MachineInstr *Old,
const MachineInstr *New);
const std::vector<char> &getBBSectionsSymbolPrefix() const {
return BBSectionsSymbolPrefix;
}
/// Move the call site info from \p Old to \New call site info. This function
/// is used when we are replacing one call instruction with another one to
/// the same callee.
void moveCallSiteInfo(const MachineInstr *Old,
const MachineInstr *New);
unsigned getNewDebugInstrNum() {
return ++DebugInstrNumberingCount;
}
};
//===--------------------------------------------------------------------===//
// GraphTraits specializations for function basic block graphs (CFGs)
//===--------------------------------------------------------------------===//
// Provide specializations of GraphTraits to be able to treat a
// machine function as a graph of machine basic blocks... these are
// the same as the machine basic block iterators, except that the root
// node is implicitly the first node of the function.
//
template <> struct GraphTraits<MachineFunction*> :
public GraphTraits<MachineBasicBlock*> {
static NodeRef getEntryNode(MachineFunction *F) { return &F->front(); }
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
using nodes_iterator = pointer_iterator<MachineFunction::iterator>;
static nodes_iterator nodes_begin(MachineFunction *F) {
return nodes_iterator(F->begin());
}
static nodes_iterator nodes_end(MachineFunction *F) {
return nodes_iterator(F->end());
}
static unsigned size (MachineFunction *F) { return F->size(); }
};
template <> struct GraphTraits<const MachineFunction*> :
public GraphTraits<const MachineBasicBlock*> {
static NodeRef getEntryNode(const MachineFunction *F) { return &F->front(); }
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
using nodes_iterator = pointer_iterator<MachineFunction::const_iterator>;
static nodes_iterator nodes_begin(const MachineFunction *F) {
return nodes_iterator(F->begin());
}
static nodes_iterator nodes_end (const MachineFunction *F) {
return nodes_iterator(F->end());
}
static unsigned size (const MachineFunction *F) {
return F->size();
}
};
// Provide specializations of GraphTraits to be able to treat a function as a
// graph of basic blocks... and to walk it in inverse order. Inverse order for
// a function is considered to be when traversing the predecessor edges of a BB
// instead of the successor edges.
//
template <> struct GraphTraits<Inverse<MachineFunction*>> :
public GraphTraits<Inverse<MachineBasicBlock*>> {
static NodeRef getEntryNode(Inverse<MachineFunction *> G) {
return &G.Graph->front();
}
};
template <> struct GraphTraits<Inverse<const MachineFunction*>> :
public GraphTraits<Inverse<const MachineBasicBlock*>> {
static NodeRef getEntryNode(Inverse<const MachineFunction *> G) {
return &G.Graph->front();
}
};
class MachineFunctionAnalysisManager;
void verifyMachineFunction(MachineFunctionAnalysisManager *,
const std::string &Banner,
const MachineFunction &MF);
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINEFUNCTION_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Function.h b/contrib/llvm-project/llvm/include/llvm/IR/Function.h
index e0094e2afff2..c33e8e94b467 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Function.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Function.h
@@ -1,971 +1,972 @@
//===- llvm/Function.h - Class to represent a single function ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the Function class, which represents a
// single function/procedure in LLVM.
//
// A function basically consists of a list of basic blocks, a list of arguments,
// and a symbol table.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_IR_FUNCTION_H
#define LLVM_IR_FUNCTION_H
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/SymbolTableListTraits.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
namespace llvm {
namespace Intrinsic {
typedef unsigned ID;
}
class AssemblyAnnotationWriter;
class Constant;
class DISubprogram;
class LLVMContext;
class Module;
template <typename T> class Optional;
class raw_ostream;
class Type;
class User;
class BranchProbabilityInfo;
class BlockFrequencyInfo;
-class Function : public GlobalObject, public ilist_node<Function> {
+class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
+ public ilist_node<Function> {
public:
using BasicBlockListType = SymbolTableList<BasicBlock>;
// BasicBlock iterators...
using iterator = BasicBlockListType::iterator;
using const_iterator = BasicBlockListType::const_iterator;
using arg_iterator = Argument *;
using const_arg_iterator = const Argument *;
private:
// Important things that make up a function!
BasicBlockListType BasicBlocks; ///< The basic blocks
mutable Argument *Arguments = nullptr; ///< The formal arguments
size_t NumArgs;
std::unique_ptr<ValueSymbolTable>
SymTab; ///< Symbol table of args/instructions
AttributeList AttributeSets; ///< Parameter attributes
/*
* Value::SubclassData
*
* bit 0 : HasLazyArguments
* bit 1 : HasPrefixData
* bit 2 : HasPrologueData
* bit 3 : HasPersonalityFn
* bits 4-13 : CallingConvention
* bits 14 : HasGC
* bits 15 : [reserved]
*/
/// Bits from GlobalObject::GlobalObjectSubclassData.
enum {
/// Whether this function is materializable.
IsMaterializableBit = 0,
};
friend class SymbolTableListTraits<Function>;
/// hasLazyArguments/CheckLazyArguments - The argument list of a function is
/// built on demand, so that the list isn't allocated until the first client
/// needs it. The hasLazyArguments predicate returns true if the arg list
/// hasn't been set up yet.
public:
bool hasLazyArguments() const {
return getSubclassDataFromValue() & (1<<0);
}
private:
void CheckLazyArguments() const {
if (hasLazyArguments())
BuildLazyArguments();
}
void BuildLazyArguments() const;
void clearArguments();
/// Function ctor - If the (optional) Module argument is specified, the
/// function is automatically inserted into the end of the function list for
/// the module.
///
Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
const Twine &N = "", Module *M = nullptr);
public:
Function(const Function&) = delete;
void operator=(const Function&) = delete;
~Function();
// This is here to help easily convert from FunctionT * (Function * or
// MachineFunction *) in BlockFrequencyInfoImpl to Function * by calling
// FunctionT->getFunction().
const Function &getFunction() const { return *this; }
static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
unsigned AddrSpace, const Twine &N = "",
Module *M = nullptr) {
return new Function(Ty, Linkage, AddrSpace, N, M);
}
// TODO: remove this once all users have been updated to pass an AddrSpace
static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
const Twine &N = "", Module *M = nullptr) {
return new Function(Ty, Linkage, static_cast<unsigned>(-1), N, M);
}
/// Creates a new function and attaches it to a module.
///
/// Places the function in the program address space as specified
/// by the module's data layout.
static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
const Twine &N, Module &M);
/// Creates a function with some attributes recorded in llvm.module.flags
/// applied.
///
/// Use this when synthesizing new functions that need attributes that would
/// have been set by command line options.
static Function *createWithDefaultAttr(FunctionType *Ty, LinkageTypes Linkage,
unsigned AddrSpace,
const Twine &N = "",
Module *M = nullptr);
// Provide fast operand accessors.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
/// Returns the number of non-debug IR instructions in this function.
/// This is equivalent to the sum of the sizes of each basic block contained
/// within this function.
unsigned getInstructionCount() const;
/// Returns the FunctionType for me.
FunctionType *getFunctionType() const {
return cast<FunctionType>(getValueType());
}
/// Returns the type of the ret val.
Type *getReturnType() const { return getFunctionType()->getReturnType(); }
/// getContext - Return a reference to the LLVMContext associated with this
/// function.
LLVMContext &getContext() const;
/// isVarArg - Return true if this function takes a variable number of
/// arguments.
bool isVarArg() const { return getFunctionType()->isVarArg(); }
bool isMaterializable() const {
return getGlobalObjectSubClassData() & (1 << IsMaterializableBit);
}
void setIsMaterializable(bool V) {
unsigned Mask = 1 << IsMaterializableBit;
setGlobalObjectSubClassData((~Mask & getGlobalObjectSubClassData()) |
(V ? Mask : 0u));
}
/// getIntrinsicID - This method returns the ID number of the specified
/// function, or Intrinsic::not_intrinsic if the function is not an
/// intrinsic, or if the pointer is null. This value is always defined to be
/// zero to allow easy checking for whether a function is intrinsic or not.
/// The particular intrinsic functions which correspond to this value are
/// defined in llvm/Intrinsics.h.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY { return IntID; }
/// isIntrinsic - Returns true if the function's name starts with "llvm.".
/// It's possible for this function to return true while getIntrinsicID()
/// returns Intrinsic::not_intrinsic!
bool isIntrinsic() const { return HasLLVMReservedName; }
/// isTargetIntrinsic - Returns true if IID is an intrinsic specific to a
/// certain target. If it is a generic intrinsic false is returned.
static bool isTargetIntrinsic(Intrinsic::ID IID);
/// isTargetIntrinsic - Returns true if this function is an intrinsic and the
/// intrinsic is specific to a certain target. If this is not an intrinsic
/// or a generic intrinsic, false is returned.
bool isTargetIntrinsic() const;
/// Returns true if the function is one of the "Constrained Floating-Point
/// Intrinsics". Returns false if not, and returns false when
/// getIntrinsicID() returns Intrinsic::not_intrinsic.
bool isConstrainedFPIntrinsic() const;
static Intrinsic::ID lookupIntrinsicID(StringRef Name);
/// Recalculate the ID for this function if it is an Intrinsic defined
/// in llvm/Intrinsics.h. Sets the intrinsic ID to Intrinsic::not_intrinsic
/// if the name of this function does not match an intrinsic in that header.
/// Note, this method does not need to be called directly, as it is called
/// from Value::setName() whenever the name of this function changes.
void recalculateIntrinsicID();
/// getCallingConv()/setCallingConv(CC) - These method get and set the
/// calling convention of this function. The enum values for the known
/// calling conventions are defined in CallingConv.h.
CallingConv::ID getCallingConv() const {
return static_cast<CallingConv::ID>((getSubclassDataFromValue() >> 4) &
CallingConv::MaxID);
}
void setCallingConv(CallingConv::ID CC) {
auto ID = static_cast<unsigned>(CC);
assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
setValueSubclassData((getSubclassDataFromValue() & 0xc00f) | (ID << 4));
}
/// Return the attribute list for this Function.
AttributeList getAttributes() const { return AttributeSets; }
/// Set the attribute list for this Function.
void setAttributes(AttributeList Attrs) { AttributeSets = Attrs; }
/// Add function attributes to this function.
void addFnAttr(Attribute::AttrKind Kind) {
addAttribute(AttributeList::FunctionIndex, Kind);
}
/// Add function attributes to this function.
void addFnAttr(StringRef Kind, StringRef Val = StringRef()) {
addAttribute(AttributeList::FunctionIndex,
Attribute::get(getContext(), Kind, Val));
}
/// Add function attributes to this function.
void addFnAttr(Attribute Attr) {
addAttribute(AttributeList::FunctionIndex, Attr);
}
/// Remove function attributes from this function.
void removeFnAttr(Attribute::AttrKind Kind) {
removeAttribute(AttributeList::FunctionIndex, Kind);
}
/// Remove function attribute from this function.
void removeFnAttr(StringRef Kind) {
setAttributes(getAttributes().removeAttribute(
getContext(), AttributeList::FunctionIndex, Kind));
}
/// A function will have the "coroutine.presplit" attribute if it's
/// a coroutine and has not gone through full CoroSplit pass.
bool isPresplitCoroutine() const {
return hasFnAttribute("coroutine.presplit");
}
enum ProfileCountType { PCT_Invalid, PCT_Real, PCT_Synthetic };
/// Class to represent profile counts.
///
/// This class represents both real and synthetic profile counts.
class ProfileCount {
private:
uint64_t Count;
ProfileCountType PCT;
static ProfileCount Invalid;
public:
ProfileCount() : Count(-1), PCT(PCT_Invalid) {}
ProfileCount(uint64_t Count, ProfileCountType PCT)
: Count(Count), PCT(PCT) {}
bool hasValue() const { return PCT != PCT_Invalid; }
uint64_t getCount() const { return Count; }
ProfileCountType getType() const { return PCT; }
bool isSynthetic() const { return PCT == PCT_Synthetic; }
explicit operator bool() { return hasValue(); }
bool operator!() const { return !hasValue(); }
// Update the count retaining the same profile count type.
ProfileCount &setCount(uint64_t C) {
Count = C;
return *this;
}
static ProfileCount getInvalid() { return ProfileCount(-1, PCT_Invalid); }
};
/// Set the entry count for this function.
///
/// Entry count is the number of times this function was executed based on
/// pgo data. \p Imports points to a set of GUIDs that needs to
/// be imported by the function for sample PGO, to enable the same inlines as
/// the profiled optimized binary.
void setEntryCount(ProfileCount Count,
const DenseSet<GlobalValue::GUID> *Imports = nullptr);
/// A convenience wrapper for setting entry count
void setEntryCount(uint64_t Count, ProfileCountType Type = PCT_Real,
const DenseSet<GlobalValue::GUID> *Imports = nullptr);
/// Get the entry count for this function.
///
/// Entry count is the number of times the function was executed.
/// When AllowSynthetic is false, only pgo_data will be returned.
ProfileCount getEntryCount(bool AllowSynthetic = false) const;
/// Return true if the function is annotated with profile data.
///
/// Presence of entry counts from a profile run implies the function has
/// profile annotations. If IncludeSynthetic is false, only return true
/// when the profile data is real.
bool hasProfileData(bool IncludeSynthetic = false) const {
return getEntryCount(IncludeSynthetic).hasValue();
}
/// Returns the set of GUIDs that needs to be imported to the function for
/// sample PGO, to enable the same inlines as the profiled optimized binary.
DenseSet<GlobalValue::GUID> getImportGUIDs() const;
/// Set the section prefix for this function.
void setSectionPrefix(StringRef Prefix);
/// Get the section prefix for this function.
Optional<StringRef> getSectionPrefix() const;
/// Return true if the function has the attribute.
bool hasFnAttribute(Attribute::AttrKind Kind) const {
return AttributeSets.hasFnAttribute(Kind);
}
/// Return true if the function has the attribute.
bool hasFnAttribute(StringRef Kind) const {
return AttributeSets.hasFnAttribute(Kind);
}
/// Return the attribute for the given attribute kind.
Attribute getFnAttribute(Attribute::AttrKind Kind) const {
return getAttribute(AttributeList::FunctionIndex, Kind);
}
/// Return the attribute for the given attribute kind.
Attribute getFnAttribute(StringRef Kind) const {
return getAttribute(AttributeList::FunctionIndex, Kind);
}
/// Return the stack alignment for the function.
unsigned getFnStackAlignment() const {
if (!hasFnAttribute(Attribute::StackAlignment))
return 0;
if (const auto MA =
AttributeSets.getStackAlignment(AttributeList::FunctionIndex))
return MA->value();
return 0;
}
/// Return the stack alignment for the function.
MaybeAlign getFnStackAlign() const {
if (!hasFnAttribute(Attribute::StackAlignment))
return None;
return AttributeSets.getStackAlignment(AttributeList::FunctionIndex);
}
/// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
/// to use during code generation.
bool hasGC() const {
return getSubclassDataFromValue() & (1<<14);
}
const std::string &getGC() const;
void setGC(std::string Str);
void clearGC();
/// Returns true if the function has ssp, sspstrong, or sspreq fn attrs.
bool hasStackProtectorFnAttr() const;
/// adds the attribute to the list of attributes.
void addAttribute(unsigned i, Attribute::AttrKind Kind);
/// adds the attribute to the list of attributes.
void addAttribute(unsigned i, Attribute Attr);
/// adds the attributes to the list of attributes.
void addAttributes(unsigned i, const AttrBuilder &Attrs);
/// adds the attribute to the list of attributes for the given arg.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
/// adds the attribute to the list of attributes for the given arg.
void addParamAttr(unsigned ArgNo, Attribute Attr);
/// adds the attributes to the list of attributes for the given arg.
void addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);
/// removes the attribute from the list of attributes.
void removeAttribute(unsigned i, Attribute::AttrKind Kind);
/// removes the attribute from the list of attributes.
void removeAttribute(unsigned i, StringRef Kind);
/// removes the attributes from the list of attributes.
void removeAttributes(unsigned i, const AttrBuilder &Attrs);
/// removes the attribute from the list of attributes.
void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
/// removes the attribute from the list of attributes.
void removeParamAttr(unsigned ArgNo, StringRef Kind);
/// removes the attribute from the list of attributes.
void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);
/// removes noundef and other attributes that imply undefined behavior if a
/// `undef` or `poison` value is passed from the list of attributes.
void removeParamUndefImplyingAttrs(unsigned ArgNo);
/// check if an attributes is in the list of attributes.
bool hasAttribute(unsigned i, Attribute::AttrKind Kind) const {
return getAttributes().hasAttribute(i, Kind);
}
/// check if an attributes is in the list of attributes.
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
return getAttributes().hasParamAttribute(ArgNo, Kind);
}
/// gets the specified attribute from the list of attributes.
Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
return getAttributes().getParamAttr(ArgNo, Kind);
}
/// gets the attribute from the list of attributes.
Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
return AttributeSets.getAttribute(i, Kind);
}
/// gets the attribute from the list of attributes.
Attribute getAttribute(unsigned i, StringRef Kind) const {
return AttributeSets.getAttribute(i, Kind);
}
/// adds the dereferenceable attribute to the list of attributes.
void addDereferenceableAttr(unsigned i, uint64_t Bytes);
/// adds the dereferenceable attribute to the list of attributes for
/// the given arg.
void addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes);
/// adds the dereferenceable_or_null attribute to the list of
/// attributes.
void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
/// adds the dereferenceable_or_null attribute to the list of
/// attributes for the given arg.
void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes);
/// Extract the alignment for a call or parameter (0=unknown).
/// FIXME: Remove this function once transition to Align is over.
/// Use getParamAlign() instead.
unsigned getParamAlignment(unsigned ArgNo) const {
if (const auto MA = getParamAlign(ArgNo))
return MA->value();
return 0;
}
MaybeAlign getParamAlign(unsigned ArgNo) const {
return AttributeSets.getParamAlignment(ArgNo);
}
MaybeAlign getParamStackAlign(unsigned ArgNo) const {
return AttributeSets.getParamStackAlignment(ArgNo);
}
/// Extract the byval type for a parameter.
Type *getParamByValType(unsigned ArgNo) const {
return AttributeSets.getParamByValType(ArgNo);
}
/// Extract the sret type for a parameter.
Type *getParamStructRetType(unsigned ArgNo) const {
return AttributeSets.getParamStructRetType(ArgNo);
}
/// Extract the inalloca type for a parameter.
Type *getParamInAllocaType(unsigned ArgNo) const {
return AttributeSets.getParamInAllocaType(ArgNo);
}
/// Extract the byref type for a parameter.
Type *getParamByRefType(unsigned ArgNo) const {
return AttributeSets.getParamByRefType(ArgNo);
}
/// Extract the number of dereferenceable bytes for a call or
/// parameter (0=unknown).
/// @param i AttributeList index, referring to a return value or argument.
uint64_t getDereferenceableBytes(unsigned i) const {
return AttributeSets.getDereferenceableBytes(i);
}
/// Extract the number of dereferenceable bytes for a parameter.
/// @param ArgNo Index of an argument, with 0 being the first function arg.
uint64_t getParamDereferenceableBytes(unsigned ArgNo) const {
return AttributeSets.getParamDereferenceableBytes(ArgNo);
}
/// Extract the number of dereferenceable_or_null bytes for a call or
/// parameter (0=unknown).
/// @param i AttributeList index, referring to a return value or argument.
uint64_t getDereferenceableOrNullBytes(unsigned i) const {
return AttributeSets.getDereferenceableOrNullBytes(i);
}
/// Extract the number of dereferenceable_or_null bytes for a
/// parameter.
/// @param ArgNo AttributeList ArgNo, referring to an argument.
uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const {
return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo);
}
/// Determine if the function does not access memory.
bool doesNotAccessMemory() const {
return hasFnAttribute(Attribute::ReadNone);
}
void setDoesNotAccessMemory() {
addFnAttr(Attribute::ReadNone);
}
/// Determine if the function does not access or only reads memory.
bool onlyReadsMemory() const {
return doesNotAccessMemory() || hasFnAttribute(Attribute::ReadOnly);
}
void setOnlyReadsMemory() {
addFnAttr(Attribute::ReadOnly);
}
/// Determine if the function does not access or only writes memory.
bool doesNotReadMemory() const {
return doesNotAccessMemory() || hasFnAttribute(Attribute::WriteOnly);
}
void setDoesNotReadMemory() {
addFnAttr(Attribute::WriteOnly);
}
/// Determine if the call can access memmory only using pointers based
/// on its arguments.
bool onlyAccessesArgMemory() const {
return hasFnAttribute(Attribute::ArgMemOnly);
}
void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); }
/// Determine if the function may only access memory that is
/// inaccessible from the IR.
bool onlyAccessesInaccessibleMemory() const {
return hasFnAttribute(Attribute::InaccessibleMemOnly);
}
void setOnlyAccessesInaccessibleMemory() {
addFnAttr(Attribute::InaccessibleMemOnly);
}
/// Determine if the function may only access memory that is
/// either inaccessible from the IR or pointed to by its arguments.
bool onlyAccessesInaccessibleMemOrArgMem() const {
return hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly);
}
void setOnlyAccessesInaccessibleMemOrArgMem() {
addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
}
/// Determine if the function cannot return.
bool doesNotReturn() const {
return hasFnAttribute(Attribute::NoReturn);
}
void setDoesNotReturn() {
addFnAttr(Attribute::NoReturn);
}
/// Determine if the function should not perform indirect branch tracking.
bool doesNoCfCheck() const { return hasFnAttribute(Attribute::NoCfCheck); }
/// Determine if the function cannot unwind.
bool doesNotThrow() const {
return hasFnAttribute(Attribute::NoUnwind);
}
void setDoesNotThrow() {
addFnAttr(Attribute::NoUnwind);
}
/// Determine if the call cannot be duplicated.
bool cannotDuplicate() const {
return hasFnAttribute(Attribute::NoDuplicate);
}
void setCannotDuplicate() {
addFnAttr(Attribute::NoDuplicate);
}
/// Determine if the call is convergent.
bool isConvergent() const {
return hasFnAttribute(Attribute::Convergent);
}
void setConvergent() {
addFnAttr(Attribute::Convergent);
}
void setNotConvergent() {
removeFnAttr(Attribute::Convergent);
}
/// Determine if the call has sideeffects.
bool isSpeculatable() const {
return hasFnAttribute(Attribute::Speculatable);
}
void setSpeculatable() {
addFnAttr(Attribute::Speculatable);
}
/// Determine if the call might deallocate memory.
bool doesNotFreeMemory() const {
return onlyReadsMemory() || hasFnAttribute(Attribute::NoFree);
}
void setDoesNotFreeMemory() {
addFnAttr(Attribute::NoFree);
}
/// Determine if the call can synchroize with other threads
bool hasNoSync() const {
return hasFnAttribute(Attribute::NoSync);
}
void setNoSync() {
addFnAttr(Attribute::NoSync);
}
/// Determine if the function is known not to recurse, directly or
/// indirectly.
bool doesNotRecurse() const {
return hasFnAttribute(Attribute::NoRecurse);
}
void setDoesNotRecurse() {
addFnAttr(Attribute::NoRecurse);
}
/// Determine if the function is required to make forward progress.
bool mustProgress() const {
return hasFnAttribute(Attribute::MustProgress) ||
hasFnAttribute(Attribute::WillReturn);
}
void setMustProgress() { addFnAttr(Attribute::MustProgress); }
/// Determine if the function will return.
bool willReturn() const { return hasFnAttribute(Attribute::WillReturn); }
void setWillReturn() { addFnAttr(Attribute::WillReturn); }
/// True if the ABI mandates (or the user requested) that this
/// function be in a unwind table.
bool hasUWTable() const {
return hasFnAttribute(Attribute::UWTable);
}
void setHasUWTable() {
addFnAttr(Attribute::UWTable);
}
/// True if this function needs an unwind table.
bool needsUnwindTableEntry() const {
return hasUWTable() || !doesNotThrow() || hasPersonalityFn();
}
/// Determine if the function returns a structure through first
/// or second pointer argument.
bool hasStructRetAttr() const {
return AttributeSets.hasParamAttribute(0, Attribute::StructRet) ||
AttributeSets.hasParamAttribute(1, Attribute::StructRet);
}
/// Determine if the parameter or return value is marked with NoAlias
/// attribute.
bool returnDoesNotAlias() const {
return AttributeSets.hasAttribute(AttributeList::ReturnIndex,
Attribute::NoAlias);
}
void setReturnDoesNotAlias() {
addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
}
/// Do not optimize this function (-O0).
bool hasOptNone() const { return hasFnAttribute(Attribute::OptimizeNone); }
/// Optimize this function for minimum size (-Oz).
bool hasMinSize() const { return hasFnAttribute(Attribute::MinSize); }
/// Optimize this function for size (-Os) or minimum size (-Oz).
bool hasOptSize() const {
return hasFnAttribute(Attribute::OptimizeForSize) || hasMinSize();
}
/// Returns the denormal handling type for the default rounding mode of the
/// function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const;
/// copyAttributesFrom - copy all additional attributes (those not needed to
/// create a Function) from the Function Src to this one.
void copyAttributesFrom(const Function *Src);
/// deleteBody - This method deletes the body of the function, and converts
/// the linkage to external.
///
void deleteBody() {
dropAllReferences();
setLinkage(ExternalLinkage);
}
/// removeFromParent - This method unlinks 'this' from the containing module,
/// but does not delete it.
///
void removeFromParent();
/// eraseFromParent - This method unlinks 'this' from the containing module
/// and deletes it.
///
void eraseFromParent();
/// Steal arguments from another function.
///
/// Drop this function's arguments and splice in the ones from \c Src.
/// Requires that this has no function body.
void stealArgumentListFrom(Function &Src);
/// Get the underlying elements of the Function... the basic block list is
/// empty for external functions.
///
const BasicBlockListType &getBasicBlockList() const { return BasicBlocks; }
BasicBlockListType &getBasicBlockList() { return BasicBlocks; }
static BasicBlockListType Function::*getSublistAccess(BasicBlock*) {
return &Function::BasicBlocks;
}
const BasicBlock &getEntryBlock() const { return front(); }
BasicBlock &getEntryBlock() { return front(); }
//===--------------------------------------------------------------------===//
// Symbol Table Accessing functions...
/// getSymbolTable() - Return the symbol table if any, otherwise nullptr.
///
inline ValueSymbolTable *getValueSymbolTable() { return SymTab.get(); }
inline const ValueSymbolTable *getValueSymbolTable() const {
return SymTab.get();
}
//===--------------------------------------------------------------------===//
// BasicBlock iterator forwarding functions
//
iterator begin() { return BasicBlocks.begin(); }
const_iterator begin() const { return BasicBlocks.begin(); }
iterator end () { return BasicBlocks.end(); }
const_iterator end () const { return BasicBlocks.end(); }
size_t size() const { return BasicBlocks.size(); }
bool empty() const { return BasicBlocks.empty(); }
const BasicBlock &front() const { return BasicBlocks.front(); }
BasicBlock &front() { return BasicBlocks.front(); }
const BasicBlock &back() const { return BasicBlocks.back(); }
BasicBlock &back() { return BasicBlocks.back(); }
/// @name Function Argument Iteration
/// @{
arg_iterator arg_begin() {
CheckLazyArguments();
return Arguments;
}
const_arg_iterator arg_begin() const {
CheckLazyArguments();
return Arguments;
}
arg_iterator arg_end() {
CheckLazyArguments();
return Arguments + NumArgs;
}
const_arg_iterator arg_end() const {
CheckLazyArguments();
return Arguments + NumArgs;
}
Argument* getArg(unsigned i) const {
assert (i < NumArgs && "getArg() out of range!");
CheckLazyArguments();
return Arguments + i;
}
iterator_range<arg_iterator> args() {
return make_range(arg_begin(), arg_end());
}
iterator_range<const_arg_iterator> args() const {
return make_range(arg_begin(), arg_end());
}
/// @}
size_t arg_size() const { return NumArgs; }
bool arg_empty() const { return arg_size() == 0; }
/// Check whether this function has a personality function.
bool hasPersonalityFn() const {
return getSubclassDataFromValue() & (1<<3);
}
/// Get the personality function associated with this function.
Constant *getPersonalityFn() const;
void setPersonalityFn(Constant *Fn);
/// Check whether this function has prefix data.
bool hasPrefixData() const {
return getSubclassDataFromValue() & (1<<1);
}
/// Get the prefix data associated with this function.
Constant *getPrefixData() const;
void setPrefixData(Constant *PrefixData);
/// Check whether this function has prologue data.
bool hasPrologueData() const {
return getSubclassDataFromValue() & (1<<2);
}
/// Get the prologue data associated with this function.
Constant *getPrologueData() const;
void setPrologueData(Constant *PrologueData);
/// Print the function to an output stream with an optional
/// AssemblyAnnotationWriter.
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr,
bool ShouldPreserveUseListOrder = false,
bool IsForDebug = false) const;
/// viewCFG - This function is meant for use from the debugger. You can just
/// say 'call F->viewCFG()' and a ghostview window should pop up from the
/// program, displaying the CFG of the current function with the code for each
/// basic block inside. This depends on there being a 'dot' and 'gv' program
/// in your path.
///
void viewCFG() const;
/// Extended form to print edge weights.
void viewCFG(bool ViewCFGOnly, const BlockFrequencyInfo *BFI,
const BranchProbabilityInfo *BPI) const;
/// viewCFGOnly - This function is meant for use from the debugger. It works
/// just like viewCFG, but it does not include the contents of basic blocks
/// into the nodes, just the label. If you are only interested in the CFG
/// this can make the graph smaller.
///
void viewCFGOnly() const;
/// Extended form to print edge weights.
void viewCFGOnly(const BlockFrequencyInfo *BFI,
const BranchProbabilityInfo *BPI) const;
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
return V->getValueID() == Value::FunctionVal;
}
/// dropAllReferences() - This method causes all the subinstructions to "let
/// go" of all references that they are maintaining. This allows one to
/// 'delete' a whole module at a time, even though there may be circular
/// references... first all references are dropped, and all use counts go to
/// zero. Then everything is deleted for real. Note that no operations are
/// valid on an object that has "dropped all references", except operator
/// delete.
///
/// Since no other object in the module can have references into the body of a
/// function, dropping all references deletes the entire body of the function,
/// including any contained basic blocks.
///
void dropAllReferences();
/// hasAddressTaken - returns true if there are any uses of this function
/// other than direct calls or invokes to it, or blockaddress expressions.
/// Optionally passes back an offending user for diagnostic purposes,
/// ignores callback uses, assume like pointer annotation calls, and
/// references in llvm.used and llvm.compiler.used variables.
///
bool hasAddressTaken(const User ** = nullptr,
bool IgnoreCallbackUses = false,
bool IgnoreAssumeLikeCalls = true,
bool IngoreLLVMUsed = false) const;
/// isDefTriviallyDead - Return true if it is trivially safe to remove
/// this function definition from the module (because it isn't externally
/// visible, does not have its address taken, and has no callers). To make
/// this more accurate, call removeDeadConstantUsers first.
bool isDefTriviallyDead() const;
/// callsFunctionThatReturnsTwice - Return true if the function has a call to
/// setjmp or other function that gcc recognizes as "returning twice".
bool callsFunctionThatReturnsTwice() const;
/// Set the attached subprogram.
///
/// Calls \a setMetadata() with \a LLVMContext::MD_dbg.
void setSubprogram(DISubprogram *SP);
/// Get the attached subprogram.
///
/// Calls \a getMetadata() with \a LLVMContext::MD_dbg and casts the result
/// to \a DISubprogram.
DISubprogram *getSubprogram() const;
/// Returns true if we should emit debug info for profiling.
bool isDebugInfoForProfiling() const;
/// Check if null pointer dereferencing is considered undefined behavior for
/// the function.
/// Return value: false => null pointer dereference is undefined.
/// Return value: true => null pointer dereference is not undefined.
bool nullPointerIsDefined() const;
private:
void allocHungoffUselist();
template<int Idx> void setHungoffOperand(Constant *C);
/// Shadow Value::setValueSubclassData with a private forwarding method so
/// that subclasses cannot accidentally use it.
void setValueSubclassData(unsigned short D) {
Value::setValueSubclassData(D);
}
void setValueSubclassDataBit(unsigned Bit, bool On);
};
/// Check whether null pointer dereferencing is considered undefined behavior
/// for a given function or an address space.
/// Null pointer access in non-zero address space is not considered undefined.
/// Return value: false => null pointer dereference is undefined.
/// Return value: true => null pointer dereference is not undefined.
bool NullPointerIsDefined(const Function *F, unsigned AS = 0);
template <>
struct OperandTraits<Function> : public HungoffOperandTraits<3> {};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(Function, Value)
} // end namespace llvm
#endif // LLVM_IR_FUNCTION_H
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Module.h b/contrib/llvm-project/llvm/include/llvm/IR/Module.h
index 97aea5aedf22..bd3a196c7181 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/Module.h
+++ b/contrib/llvm-project/llvm/include/llvm/IR/Module.h
@@ -1,964 +1,964 @@
//===- llvm/Module.h - C++ class to represent a VM module -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// @file
/// Module.h This file contains the declarations for the Module class.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_IR_MODULE_H
#define LLVM_IR_MODULE_H
#include "llvm-c/Types.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/IR/SymbolTableListTraits.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/CodeGen.h"
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <memory>
#include <string>
#include <vector>
namespace llvm {
class Error;
class FunctionType;
class GVMaterializer;
class LLVMContext;
class MemoryBuffer;
class ModuleSummaryIndex;
class Pass;
class RandomNumberGenerator;
template <class PtrType> class SmallPtrSetImpl;
class StructType;
class VersionTuple;
/// A Module instance is used to store all the information related to an
/// LLVM module. Modules are the top level container of all other LLVM
/// Intermediate Representation (IR) objects. Each module directly contains a
/// list of globals variables, a list of functions, a list of libraries (or
/// other modules) this module depends on, a symbol table, and various data
/// about the target's characteristics.
///
/// A module maintains a GlobalValRefMap object that is used to hold all
/// constant references to global variables in the module. When a global
/// variable is destroyed, it should have no entries in the GlobalValueRefMap.
/// The main container class for the LLVM Intermediate Representation.
-class Module {
-/// @name Types And Enumerations
-/// @{
+class LLVM_EXTERNAL_VISIBILITY Module {
+ /// @name Types And Enumerations
+ /// @{
public:
/// The type for the list of global variables.
using GlobalListType = SymbolTableList<GlobalVariable>;
/// The type for the list of functions.
using FunctionListType = SymbolTableList<Function>;
/// The type for the list of aliases.
using AliasListType = SymbolTableList<GlobalAlias>;
/// The type for the list of ifuncs.
using IFuncListType = SymbolTableList<GlobalIFunc>;
/// The type for the list of named metadata.
using NamedMDListType = ilist<NamedMDNode>;
/// The type of the comdat "symbol" table.
using ComdatSymTabType = StringMap<Comdat>;
/// The type for mapping names to named metadata.
using NamedMDSymTabType = StringMap<NamedMDNode *>;
/// The Global Variable iterator.
using global_iterator = GlobalListType::iterator;
/// The Global Variable constant iterator.
using const_global_iterator = GlobalListType::const_iterator;
/// The Function iterators.
using iterator = FunctionListType::iterator;
/// The Function constant iterator
using const_iterator = FunctionListType::const_iterator;
/// The Function reverse iterator.
using reverse_iterator = FunctionListType::reverse_iterator;
/// The Function constant reverse iterator.
using const_reverse_iterator = FunctionListType::const_reverse_iterator;
/// The Global Alias iterators.
using alias_iterator = AliasListType::iterator;
/// The Global Alias constant iterator
using const_alias_iterator = AliasListType::const_iterator;
/// The Global IFunc iterators.
using ifunc_iterator = IFuncListType::iterator;
/// The Global IFunc constant iterator
using const_ifunc_iterator = IFuncListType::const_iterator;
/// The named metadata iterators.
using named_metadata_iterator = NamedMDListType::iterator;
/// The named metadata constant iterators.
using const_named_metadata_iterator = NamedMDListType::const_iterator;
/// This enumeration defines the supported behaviors of module flags.
enum ModFlagBehavior {
/// Emits an error if two values disagree, otherwise the resulting value is
/// that of the operands.
Error = 1,
/// Emits a warning if two values disagree. The result value will be the
/// operand for the flag from the first module being linked.
Warning = 2,
/// Adds a requirement that another module flag be present and have a
/// specified value after linking is performed. The value must be a metadata
/// pair, where the first element of the pair is the ID of the module flag
/// to be restricted, and the second element of the pair is the value the
/// module flag should be restricted to. This behavior can be used to
/// restrict the allowable results (via triggering of an error) of linking
/// IDs with the **Override** behavior.
Require = 3,
/// Uses the specified value, regardless of the behavior or value of the
/// other module. If both modules specify **Override**, but the values
/// differ, an error will be emitted.
Override = 4,
/// Appends the two values, which are required to be metadata nodes.
Append = 5,
/// Appends the two values, which are required to be metadata
/// nodes. However, duplicate entries in the second list are dropped
/// during the append operation.
AppendUnique = 6,
/// Takes the max of the two values, which are required to be integers.
Max = 7,
// Markers:
ModFlagBehaviorFirstVal = Error,
ModFlagBehaviorLastVal = Max
};
/// Checks if Metadata represents a valid ModFlagBehavior, and stores the
/// converted result in MFB.
static bool isValidModFlagBehavior(Metadata *MD, ModFlagBehavior &MFB);
/// Check if the given module flag metadata represents a valid module flag,
/// and store the flag behavior, the key string and the value metadata.
static bool isValidModuleFlag(const MDNode &ModFlag, ModFlagBehavior &MFB,
MDString *&Key, Metadata *&Val);
struct ModuleFlagEntry {
ModFlagBehavior Behavior;
MDString *Key;
Metadata *Val;
ModuleFlagEntry(ModFlagBehavior B, MDString *K, Metadata *V)
: Behavior(B), Key(K), Val(V) {}
};
/// @}
/// @name Member Variables
/// @{
private:
LLVMContext &Context; ///< The LLVMContext from which types and
///< constants are allocated.
GlobalListType GlobalList; ///< The Global Variables in the module
FunctionListType FunctionList; ///< The Functions in the module
AliasListType AliasList; ///< The Aliases in the module
IFuncListType IFuncList; ///< The IFuncs in the module
NamedMDListType NamedMDList; ///< The named metadata in the module
std::string GlobalScopeAsm; ///< Inline Asm at global scope.
std::unique_ptr<ValueSymbolTable> ValSymTab; ///< Symbol table for values
ComdatSymTabType ComdatSymTab; ///< Symbol table for COMDATs
std::unique_ptr<MemoryBuffer>
OwnedMemoryBuffer; ///< Memory buffer directly owned by this
///< module, for legacy clients only.
std::unique_ptr<GVMaterializer>
Materializer; ///< Used to materialize GlobalValues
std::string ModuleID; ///< Human readable identifier for the module
std::string SourceFileName; ///< Original source file name for module,
///< recorded in bitcode.
std::string TargetTriple; ///< Platform target triple Module compiled on
///< Format: (arch)(sub)-(vendor)-(sys0-(abi)
NamedMDSymTabType NamedMDSymTab; ///< NamedMDNode names.
DataLayout DL; ///< DataLayout associated with the module
StringMap<unsigned>
CurrentIntrinsicIds; ///< Keep track of the current unique id count for
///< the specified intrinsic basename.
DenseMap<std::pair<Intrinsic::ID, const FunctionType *>, unsigned>
UniquedIntrinsicNames; ///< Keep track of uniqued names of intrinsics
///< based on unnamed types. The combination of
///< ID and FunctionType maps to the extension that
///< is used to make the intrinsic name unique.
friend class Constant;
/// @}
/// @name Constructors
/// @{
public:
/// The Module constructor. Note that there is no default constructor. You
/// must provide a name for the module upon construction.
explicit Module(StringRef ModuleID, LLVMContext& C);
/// The module destructor. This will dropAllReferences.
~Module();
/// @}
/// @name Module Level Accessors
/// @{
/// Get the module identifier which is, essentially, the name of the module.
/// @returns the module identifier as a string
const std::string &getModuleIdentifier() const { return ModuleID; }
/// Returns the number of non-debug IR instructions in the module.
/// This is equivalent to the sum of the IR instruction counts of each
/// function contained in the module.
unsigned getInstructionCount() const;
/// Get the module's original source file name. When compiling from
/// bitcode, this is taken from a bitcode record where it was recorded.
/// For other compiles it is the same as the ModuleID, which would
/// contain the source file name.
const std::string &getSourceFileName() const { return SourceFileName; }
/// Get a short "name" for the module.
///
/// This is useful for debugging or logging. It is essentially a convenience
/// wrapper around getModuleIdentifier().
StringRef getName() const { return ModuleID; }
/// Get the data layout string for the module's target platform. This is
/// equivalent to getDataLayout()->getStringRepresentation().
const std::string &getDataLayoutStr() const {
return DL.getStringRepresentation();
}
/// Get the data layout for the module's target platform.
const DataLayout &getDataLayout() const;
/// Get the target triple which is a string describing the target host.
/// @returns a string containing the target triple.
const std::string &getTargetTriple() const { return TargetTriple; }
/// Get the global data context.
/// @returns LLVMContext - a container for LLVM's global information
LLVMContext &getContext() const { return Context; }
/// Get any module-scope inline assembly blocks.
/// @returns a string containing the module-scope inline assembly blocks.
const std::string &getModuleInlineAsm() const { return GlobalScopeAsm; }
/// Get a RandomNumberGenerator salted for use with this module. The
/// RNG can be seeded via -rng-seed=<uint64> and is salted with the
/// ModuleID and the provided pass salt. The returned RNG should not
/// be shared across threads or passes.
///
/// A unique RNG per pass ensures a reproducible random stream even
/// when other randomness consuming passes are added or removed. In
/// addition, the random stream will be reproducible across LLVM
/// versions when the pass does not change.
std::unique_ptr<RandomNumberGenerator> createRNG(const StringRef Name) const;
/// Return true if size-info optimization remark is enabled, false
/// otherwise.
bool shouldEmitInstrCountChangedRemark() {
return getContext().getDiagHandlerPtr()->isAnalysisRemarkEnabled(
"size-info");
}
/// @}
/// @name Module Level Mutators
/// @{
/// Set the module identifier.
void setModuleIdentifier(StringRef ID) { ModuleID = std::string(ID); }
/// Set the module's original source file name.
void setSourceFileName(StringRef Name) { SourceFileName = std::string(Name); }
/// Set the data layout
void setDataLayout(StringRef Desc);
void setDataLayout(const DataLayout &Other);
/// Set the target triple.
void setTargetTriple(StringRef T) { TargetTriple = std::string(T); }
/// Set the module-scope inline assembly blocks.
/// A trailing newline is added if the input doesn't have one.
void setModuleInlineAsm(StringRef Asm) {
GlobalScopeAsm = std::string(Asm);
if (!GlobalScopeAsm.empty() && GlobalScopeAsm.back() != '\n')
GlobalScopeAsm += '\n';
}
/// Append to the module-scope inline assembly blocks.
/// A trailing newline is added if the input doesn't have one.
void appendModuleInlineAsm(StringRef Asm) {
GlobalScopeAsm += Asm;
if (!GlobalScopeAsm.empty() && GlobalScopeAsm.back() != '\n')
GlobalScopeAsm += '\n';
}
/// @}
/// @name Generic Value Accessors
/// @{
/// Return the global value in the module with the specified name, of
/// arbitrary type. This method returns null if a global with the specified
/// name is not found.
GlobalValue *getNamedValue(StringRef Name) const;
/// Return the number of global values in the module.
unsigned getNumNamedValues() const;
/// Return a unique non-zero ID for the specified metadata kind. This ID is
/// uniqued across modules in the current LLVMContext.
unsigned getMDKindID(StringRef Name) const;
/// Populate client supplied SmallVector with the name for custom metadata IDs
/// registered in this LLVMContext.
void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
/// Populate client supplied SmallVector with the bundle tags registered in
/// this LLVMContext. The bundle tags are ordered by increasing bundle IDs.
/// \see LLVMContext::getOperandBundleTagID
void getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const;
std::vector<StructType *> getIdentifiedStructTypes() const;
/// Return a unique name for an intrinsic whose mangling is based on an
/// unnamed type. The Proto represents the function prototype.
std::string getUniqueIntrinsicName(StringRef BaseName, Intrinsic::ID Id,
const FunctionType *Proto);
/// @}
/// @name Function Accessors
/// @{
/// Look up the specified function in the module symbol table. Four
/// possibilities:
/// 1. If it does not exist, add a prototype for the function and return it.
/// 2. Otherwise, if the existing function has the correct prototype, return
/// the existing function.
/// 3. Finally, the function exists but has the wrong prototype: return the
/// function with a constantexpr cast to the right prototype.
///
/// In all cases, the returned value is a FunctionCallee wrapper around the
/// 'FunctionType *T' passed in, as well as a 'Value*' either of the Function or
/// the bitcast to the function.
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T,
AttributeList AttributeList);
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T);
/// Look up the specified function in the module symbol table. If it does not
/// exist, add a prototype for the function and return it. This function
/// guarantees to return a constant of pointer to the specified function type
/// or a ConstantExpr BitCast of that type if the named function has a
/// different type. This version of the method takes a list of
/// function arguments, which makes it easier for clients to use.
template <typename... ArgsTy>
FunctionCallee getOrInsertFunction(StringRef Name,
AttributeList AttributeList, Type *RetTy,
ArgsTy... Args) {
SmallVector<Type*, sizeof...(ArgsTy)> ArgTys{Args...};
return getOrInsertFunction(Name,
FunctionType::get(RetTy, ArgTys, false),
AttributeList);
}
/// Same as above, but without the attributes.
template <typename... ArgsTy>
FunctionCallee getOrInsertFunction(StringRef Name, Type *RetTy,
ArgsTy... Args) {
return getOrInsertFunction(Name, AttributeList{}, RetTy, Args...);
}
// Avoid an incorrect ordering that'd otherwise compile incorrectly.
template <typename... ArgsTy>
FunctionCallee
getOrInsertFunction(StringRef Name, AttributeList AttributeList,
FunctionType *Invalid, ArgsTy... Args) = delete;
/// Look up the specified function in the module symbol table. If it does not
/// exist, return null.
Function *getFunction(StringRef Name) const;
/// @}
/// @name Global Variable Accessors
/// @{
/// Look up the specified global variable in the module symbol table. If it
/// does not exist, return null. If AllowInternal is set to true, this
/// function will return types that have InternalLinkage. By default, these
/// types are not returned.
GlobalVariable *getGlobalVariable(StringRef Name) const {
return getGlobalVariable(Name, false);
}
GlobalVariable *getGlobalVariable(StringRef Name, bool AllowInternal) const;
GlobalVariable *getGlobalVariable(StringRef Name,
bool AllowInternal = false) {
return static_cast<const Module *>(this)->getGlobalVariable(Name,
AllowInternal);
}
/// Return the global variable in the module with the specified name, of
/// arbitrary type. This method returns null if a global with the specified
/// name is not found.
const GlobalVariable *getNamedGlobal(StringRef Name) const {
return getGlobalVariable(Name, true);
}
GlobalVariable *getNamedGlobal(StringRef Name) {
return const_cast<GlobalVariable *>(
static_cast<const Module *>(this)->getNamedGlobal(Name));
}
/// Look up the specified global in the module symbol table.
/// If it does not exist, invoke a callback to create a declaration of the
/// global and return it. The global is constantexpr casted to the expected
/// type if necessary.
Constant *
getOrInsertGlobal(StringRef Name, Type *Ty,
function_ref<GlobalVariable *()> CreateGlobalCallback);
/// Look up the specified global in the module symbol table. If required, this
/// overload constructs the global variable using its constructor's defaults.
Constant *getOrInsertGlobal(StringRef Name, Type *Ty);
/// @}
/// @name Global Alias Accessors
/// @{
/// Return the global alias in the module with the specified name, of
/// arbitrary type. This method returns null if a global with the specified
/// name is not found.
GlobalAlias *getNamedAlias(StringRef Name) const;
/// @}
/// @name Global IFunc Accessors
/// @{
/// Return the global ifunc in the module with the specified name, of
/// arbitrary type. This method returns null if a global with the specified
/// name is not found.
GlobalIFunc *getNamedIFunc(StringRef Name) const;
/// @}
/// @name Named Metadata Accessors
/// @{
/// Return the first NamedMDNode in the module with the specified name. This
/// method returns null if a NamedMDNode with the specified name is not found.
NamedMDNode *getNamedMetadata(const Twine &Name) const;
/// Return the named MDNode in the module with the specified name. This method
/// returns a new NamedMDNode if a NamedMDNode with the specified name is not
/// found.
NamedMDNode *getOrInsertNamedMetadata(StringRef Name);
/// Remove the given NamedMDNode from this module and delete it.
void eraseNamedMetadata(NamedMDNode *NMD);
/// @}
/// @name Comdat Accessors
/// @{
/// Return the Comdat in the module with the specified name. It is created
/// if it didn't already exist.
Comdat *getOrInsertComdat(StringRef Name);
/// @}
/// @name Module Flags Accessors
/// @{
/// Returns the module flags in the provided vector.
void getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const;
/// Return the corresponding value if Key appears in module flags, otherwise
/// return null.
Metadata *getModuleFlag(StringRef Key) const;
/// Returns the NamedMDNode in the module that represents module-level flags.
/// This method returns null if there are no module-level flags.
NamedMDNode *getModuleFlagsMetadata() const;
/// Returns the NamedMDNode in the module that represents module-level flags.
/// If module-level flags aren't found, it creates the named metadata that
/// contains them.
NamedMDNode *getOrInsertModuleFlagsMetadata();
/// Add a module-level flag to the module-level flags metadata. It will create
/// the module-level flags named metadata if it doesn't already exist.
void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, Metadata *Val);
void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, Constant *Val);
void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, uint32_t Val);
void addModuleFlag(MDNode *Node);
/// Like addModuleFlag but replaces the old module flag if it already exists.
void setModuleFlag(ModFlagBehavior Behavior, StringRef Key, Metadata *Val);
/// @}
/// @name Materialization
/// @{
/// Sets the GVMaterializer to GVM. This module must not yet have a
/// Materializer. To reset the materializer for a module that already has one,
/// call materializeAll first. Destroying this module will destroy
/// its materializer without materializing any more GlobalValues. Without
/// destroying the Module, there is no way to detach or destroy a materializer
/// without materializing all the GVs it controls, to avoid leaving orphan
/// unmaterialized GVs.
void setMaterializer(GVMaterializer *GVM);
/// Retrieves the GVMaterializer, if any, for this Module.
GVMaterializer *getMaterializer() const { return Materializer.get(); }
bool isMaterialized() const { return !getMaterializer(); }
/// Make sure the GlobalValue is fully read.
llvm::Error materialize(GlobalValue *GV);
/// Make sure all GlobalValues in this Module are fully read and clear the
/// Materializer.
llvm::Error materializeAll();
llvm::Error materializeMetadata();
/// @}
/// @name Direct access to the globals list, functions list, and symbol table
/// @{
/// Get the Module's list of global variables (constant).
const GlobalListType &getGlobalList() const { return GlobalList; }
/// Get the Module's list of global variables.
GlobalListType &getGlobalList() { return GlobalList; }
static GlobalListType Module::*getSublistAccess(GlobalVariable*) {
return &Module::GlobalList;
}
/// Get the Module's list of functions (constant).
const FunctionListType &getFunctionList() const { return FunctionList; }
/// Get the Module's list of functions.
FunctionListType &getFunctionList() { return FunctionList; }
static FunctionListType Module::*getSublistAccess(Function*) {
return &Module::FunctionList;
}
/// Get the Module's list of aliases (constant).
const AliasListType &getAliasList() const { return AliasList; }
/// Get the Module's list of aliases.
AliasListType &getAliasList() { return AliasList; }
static AliasListType Module::*getSublistAccess(GlobalAlias*) {
return &Module::AliasList;
}
/// Get the Module's list of ifuncs (constant).
const IFuncListType &getIFuncList() const { return IFuncList; }
/// Get the Module's list of ifuncs.
IFuncListType &getIFuncList() { return IFuncList; }
static IFuncListType Module::*getSublistAccess(GlobalIFunc*) {
return &Module::IFuncList;
}
/// Get the Module's list of named metadata (constant).
const NamedMDListType &getNamedMDList() const { return NamedMDList; }
/// Get the Module's list of named metadata.
NamedMDListType &getNamedMDList() { return NamedMDList; }
static NamedMDListType Module::*getSublistAccess(NamedMDNode*) {
return &Module::NamedMDList;
}
/// Get the symbol table of global variable and function identifiers
const ValueSymbolTable &getValueSymbolTable() const { return *ValSymTab; }
/// Get the Module's symbol table of global variable and function identifiers.
ValueSymbolTable &getValueSymbolTable() { return *ValSymTab; }
/// Get the Module's symbol table for COMDATs (constant).
const ComdatSymTabType &getComdatSymbolTable() const { return ComdatSymTab; }
/// Get the Module's symbol table for COMDATs.
ComdatSymTabType &getComdatSymbolTable() { return ComdatSymTab; }
/// @}
/// @name Global Variable Iteration
/// @{
global_iterator global_begin() { return GlobalList.begin(); }
const_global_iterator global_begin() const { return GlobalList.begin(); }
global_iterator global_end () { return GlobalList.end(); }
const_global_iterator global_end () const { return GlobalList.end(); }
size_t global_size () const { return GlobalList.size(); }
bool global_empty() const { return GlobalList.empty(); }
iterator_range<global_iterator> globals() {
return make_range(global_begin(), global_end());
}
iterator_range<const_global_iterator> globals() const {
return make_range(global_begin(), global_end());
}
/// @}
/// @name Function Iteration
/// @{
iterator begin() { return FunctionList.begin(); }
const_iterator begin() const { return FunctionList.begin(); }
iterator end () { return FunctionList.end(); }
const_iterator end () const { return FunctionList.end(); }
reverse_iterator rbegin() { return FunctionList.rbegin(); }
const_reverse_iterator rbegin() const{ return FunctionList.rbegin(); }
reverse_iterator rend() { return FunctionList.rend(); }
const_reverse_iterator rend() const { return FunctionList.rend(); }
size_t size() const { return FunctionList.size(); }
bool empty() const { return FunctionList.empty(); }
iterator_range<iterator> functions() {
return make_range(begin(), end());
}
iterator_range<const_iterator> functions() const {
return make_range(begin(), end());
}
/// @}
/// @name Alias Iteration
/// @{
alias_iterator alias_begin() { return AliasList.begin(); }
const_alias_iterator alias_begin() const { return AliasList.begin(); }
alias_iterator alias_end () { return AliasList.end(); }
const_alias_iterator alias_end () const { return AliasList.end(); }
size_t alias_size () const { return AliasList.size(); }
bool alias_empty() const { return AliasList.empty(); }
iterator_range<alias_iterator> aliases() {
return make_range(alias_begin(), alias_end());
}
iterator_range<const_alias_iterator> aliases() const {
return make_range(alias_begin(), alias_end());
}
/// @}
/// @name IFunc Iteration
/// @{
ifunc_iterator ifunc_begin() { return IFuncList.begin(); }
const_ifunc_iterator ifunc_begin() const { return IFuncList.begin(); }
ifunc_iterator ifunc_end () { return IFuncList.end(); }
const_ifunc_iterator ifunc_end () const { return IFuncList.end(); }
size_t ifunc_size () const { return IFuncList.size(); }
bool ifunc_empty() const { return IFuncList.empty(); }
iterator_range<ifunc_iterator> ifuncs() {
return make_range(ifunc_begin(), ifunc_end());
}
iterator_range<const_ifunc_iterator> ifuncs() const {
return make_range(ifunc_begin(), ifunc_end());
}
/// @}
/// @name Convenience iterators
/// @{
using global_object_iterator =
concat_iterator<GlobalObject, iterator, global_iterator>;
using const_global_object_iterator =
concat_iterator<const GlobalObject, const_iterator,
const_global_iterator>;
iterator_range<global_object_iterator> global_objects();
iterator_range<const_global_object_iterator> global_objects() const;
using global_value_iterator =
concat_iterator<GlobalValue, iterator, global_iterator, alias_iterator,
ifunc_iterator>;
using const_global_value_iterator =
concat_iterator<const GlobalValue, const_iterator, const_global_iterator,
const_alias_iterator, const_ifunc_iterator>;
iterator_range<global_value_iterator> global_values();
iterator_range<const_global_value_iterator> global_values() const;
/// @}
/// @name Named Metadata Iteration
/// @{
named_metadata_iterator named_metadata_begin() { return NamedMDList.begin(); }
const_named_metadata_iterator named_metadata_begin() const {
return NamedMDList.begin();
}
named_metadata_iterator named_metadata_end() { return NamedMDList.end(); }
const_named_metadata_iterator named_metadata_end() const {
return NamedMDList.end();
}
size_t named_metadata_size() const { return NamedMDList.size(); }
bool named_metadata_empty() const { return NamedMDList.empty(); }
iterator_range<named_metadata_iterator> named_metadata() {
return make_range(named_metadata_begin(), named_metadata_end());
}
iterator_range<const_named_metadata_iterator> named_metadata() const {
return make_range(named_metadata_begin(), named_metadata_end());
}
/// An iterator for DICompileUnits that skips those marked NoDebug.
class debug_compile_units_iterator {
NamedMDNode *CUs;
unsigned Idx;
void SkipNoDebugCUs();
public:
using iterator_category = std::input_iterator_tag;
using value_type = DICompileUnit *;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
explicit debug_compile_units_iterator(NamedMDNode *CUs, unsigned Idx)
: CUs(CUs), Idx(Idx) {
SkipNoDebugCUs();
}
debug_compile_units_iterator &operator++() {
++Idx;
SkipNoDebugCUs();
return *this;
}
debug_compile_units_iterator operator++(int) {
debug_compile_units_iterator T(*this);
++Idx;
return T;
}
bool operator==(const debug_compile_units_iterator &I) const {
return Idx == I.Idx;
}
bool operator!=(const debug_compile_units_iterator &I) const {
return Idx != I.Idx;
}
DICompileUnit *operator*() const;
DICompileUnit *operator->() const;
};
debug_compile_units_iterator debug_compile_units_begin() const {
auto *CUs = getNamedMetadata("llvm.dbg.cu");
return debug_compile_units_iterator(CUs, 0);
}
debug_compile_units_iterator debug_compile_units_end() const {
auto *CUs = getNamedMetadata("llvm.dbg.cu");
return debug_compile_units_iterator(CUs, CUs ? CUs->getNumOperands() : 0);
}
/// Return an iterator for all DICompileUnits listed in this Module's
/// llvm.dbg.cu named metadata node and aren't explicitly marked as
/// NoDebug.
iterator_range<debug_compile_units_iterator> debug_compile_units() const {
auto *CUs = getNamedMetadata("llvm.dbg.cu");
return make_range(
debug_compile_units_iterator(CUs, 0),
debug_compile_units_iterator(CUs, CUs ? CUs->getNumOperands() : 0));
}
/// @}
/// Destroy ConstantArrays in LLVMContext if they are not used.
/// ConstantArrays constructed during linking can cause quadratic memory
/// explosion. Releasing all unused constants can cause a 20% LTO compile-time
/// slowdown for a large application.
///
/// NOTE: Constants are currently owned by LLVMContext. This can then only
/// be called where all uses of the LLVMContext are understood.
void dropTriviallyDeadConstantArrays();
/// @name Utility functions for printing and dumping Module objects
/// @{
/// Print the module to an output stream with an optional
/// AssemblyAnnotationWriter. If \c ShouldPreserveUseListOrder, then include
/// uselistorder directives so that use-lists can be recreated when reading
/// the assembly.
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW,
bool ShouldPreserveUseListOrder = false,
bool IsForDebug = false) const;
/// Dump the module to stderr (for debugging).
void dump() const;
/// This function causes all the subinstructions to "let go" of all references
/// that they are maintaining. This allows one to 'delete' a whole class at
/// a time, even though there may be circular references... first all
/// references are dropped, and all use counts go to zero. Then everything
/// is delete'd for real. Note that no operations are valid on an object
/// that has "dropped all references", except operator delete.
void dropAllReferences();
/// @}
/// @name Utility functions for querying Debug information.
/// @{
/// Returns the Number of Register ParametersDwarf Version by checking
/// module flags.
unsigned getNumberRegisterParameters() const;
/// Returns the Dwarf Version by checking module flags.
unsigned getDwarfVersion() const;
/// Returns the DWARF format by checking module flags.
bool isDwarf64() const;
/// Returns the CodeView Version by checking module flags.
/// Returns zero if not present in module.
unsigned getCodeViewFlag() const;
/// @}
/// @name Utility functions for querying and setting PIC level
/// @{
/// Returns the PIC level (small or large model)
PICLevel::Level getPICLevel() const;
/// Set the PIC level (small or large model)
void setPICLevel(PICLevel::Level PL);
/// @}
/// @}
/// @name Utility functions for querying and setting PIE level
/// @{
/// Returns the PIE level (small or large model)
PIELevel::Level getPIELevel() const;
/// Set the PIE level (small or large model)
void setPIELevel(PIELevel::Level PL);
/// @}
/// @}
/// @name Utility function for querying and setting code model
/// @{
/// Returns the code model (tiny, small, kernel, medium or large model)
Optional<CodeModel::Model> getCodeModel() const;
/// Set the code model (tiny, small, kernel, medium or large)
void setCodeModel(CodeModel::Model CL);
/// @}
/// @name Utility functions for querying and setting PGO summary
/// @{
/// Attach profile summary metadata to this module.
void setProfileSummary(Metadata *M, ProfileSummary::Kind Kind);
/// Returns profile summary metadata. When IsCS is true, use the context
/// sensitive profile summary.
Metadata *getProfileSummary(bool IsCS) const;
/// @}
/// Returns whether semantic interposition is to be respected.
bool getSemanticInterposition() const;
/// Set whether semantic interposition is to be respected.
void setSemanticInterposition(bool);
/// Returns true if PLT should be avoided for RTLib calls.
bool getRtLibUseGOT() const;
/// Set that PLT should be avoid for RTLib calls.
void setRtLibUseGOT();
/// Get/set whether synthesized functions should get the uwtable attribute.
bool getUwtable() const;
void setUwtable();
/// Get/set whether synthesized functions should get the "frame-pointer"
/// attribute.
FramePointerKind getFramePointer() const;
void setFramePointer(FramePointerKind Kind);
/// Get/set what kind of stack protector guard to use.
StringRef getStackProtectorGuard() const;
void setStackProtectorGuard(StringRef Kind);
/// Get/set which register to use as the stack protector guard register. The
/// empty string is equivalent to "global". Other values may be "tls" or
/// "sysreg".
StringRef getStackProtectorGuardReg() const;
void setStackProtectorGuardReg(StringRef Reg);
/// Get/set what offset from the stack protector to use.
int getStackProtectorGuardOffset() const;
void setStackProtectorGuardOffset(int Offset);
/// Get/set the stack alignment overridden from the default.
unsigned getOverrideStackAlignment() const;
void setOverrideStackAlignment(unsigned Align);
/// @name Utility functions for querying and setting the build SDK version
/// @{
/// Attach a build SDK version metadata to this module.
void setSDKVersion(const VersionTuple &V);
/// Get the build SDK version metadata.
///
/// An empty version is returned if no such metadata is attached.
VersionTuple getSDKVersion() const;
/// @}
/// Take ownership of the given memory buffer.
void setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB);
/// Set the partial sample profile ratio in the profile summary module flag,
/// if applicable.
void setPartialSampleProfileRatio(const ModuleSummaryIndex &Index);
};
/// Given "llvm.used" or "llvm.compiler.used" as a global name, collect the
/// initializer elements of that global in a SmallVector and return the global
/// itself.
GlobalVariable *collectUsedGlobalVariables(const Module &M,
SmallVectorImpl<GlobalValue *> &Vec,
bool CompilerUsed);
/// An raw_ostream inserter for modules.
inline raw_ostream &operator<<(raw_ostream &O, const Module &M) {
M.print(O, nullptr);
return O;
}
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Module, LLVMModuleRef)
/* LLVMModuleProviderRef exists for historical reasons, but now just holds a
* Module.
*/
inline Module *unwrap(LLVMModuleProviderRef MP) {
return reinterpret_cast<Module*>(MP);
}
} // end namespace llvm
#endif // LLVM_IR_MODULE_H
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
index f22d834b5e57..2d980e6935b3 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1,14093 +1,14093 @@
//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the implementation of the scalar evolution analysis
// engine, which is used primarily to analyze expressions involving induction
// variables in loops.
//
// There are several aspects to this library. First is the representation of
// scalar expressions, which are represented as subclasses of the SCEV class.
// These classes are used to represent certain types of subexpressions that we
// can handle. We only create one SCEV of a particular shape, so
// pointer-comparisons for equality are legal.
//
// One important aspect of the SCEV objects is that they are never cyclic, even
// if there is a cycle in the dataflow for an expression (ie, a PHI node). If
// the PHI node is one of the idioms that we can represent (e.g., a polynomial
// recurrence) then we represent it directly as a recurrence node, otherwise we
// represent it as a SCEVUnknown node.
//
// In addition to being able to represent expressions of various types, we also
// have folders that are used to build the *canonical* representation for a
// particular expression. These folders are capable of using a variety of
// rewrite rules to simplify the expressions.
//
// Once the folders are defined, we can implement the more interesting
// higher-level code, such as the code that recognizes PHI nodes of various
// types, computes the execution count of a loop, etc.
//
// TODO: We should use these routines and value representations to implement
// dependence analysis!
//
//===----------------------------------------------------------------------===//
//
// There are several good references for the techniques used in this analysis.
//
// Chains of recurrences -- a method to expedite the evaluation
// of closed-form functions
// Olaf Bachmann, Paul S. Wang, Eugene V. Zima
//
// On computational properties of chains of recurrences
// Eugene V. Zima
//
// Symbolic Evaluation of Chains of Recurrences for Loop Optimization
// Robert A. van Engelen
//
// Efficient Symbolic Analysis for Optimizing Compilers
// Robert A. van Engelen
//
// Using the chains of recurrences algebra for data dependence testing and
// induction variable substitution
// MS Thesis, Johnie Birch
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionDivision.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <climits>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <map>
#include <memory>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "scalar-evolution"
STATISTIC(NumArrayLenItCounts,
"Number of trip counts computed with array length");
STATISTIC(NumTripCountsComputed,
"Number of loops with predictable loop counts");
STATISTIC(NumTripCountsNotComputed,
"Number of loops without predictable loop counts");
STATISTIC(NumBruteForceTripCountsComputed,
"Number of loops with trip counts computed by force");
static cl::opt<unsigned>
MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
cl::ZeroOrMore,
cl::desc("Maximum number of iterations SCEV will "
"symbolically execute a constant "
"derived loop"),
cl::init(100));
// FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean.
static cl::opt<bool> VerifySCEV(
"verify-scev", cl::Hidden,
cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
static cl::opt<bool> VerifySCEVStrict(
"verify-scev-strict", cl::Hidden,
cl::desc("Enable stricter verification with -verify-scev is passed"));
static cl::opt<bool>
VerifySCEVMap("verify-scev-maps", cl::Hidden,
cl::desc("Verify no dangling value in ScalarEvolution's "
"ExprValueMap (slow)"));
static cl::opt<bool> VerifyIR(
"scev-verify-ir", cl::Hidden,
cl::desc("Verify IR correctness when making sensitive SCEV queries (slow)"),
cl::init(false));
static cl::opt<unsigned> MulOpsInlineThreshold(
"scev-mulops-inline-threshold", cl::Hidden,
cl::desc("Threshold for inlining multiplication operands into a SCEV"),
cl::init(32));
static cl::opt<unsigned> AddOpsInlineThreshold(
"scev-addops-inline-threshold", cl::Hidden,
cl::desc("Threshold for inlining addition operands into a SCEV"),
cl::init(500));
static cl::opt<unsigned> MaxSCEVCompareDepth(
"scalar-evolution-max-scev-compare-depth", cl::Hidden,
cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
cl::init(32));
static cl::opt<unsigned> MaxSCEVOperationsImplicationDepth(
"scalar-evolution-max-scev-operations-implication-depth", cl::Hidden,
cl::desc("Maximum depth of recursive SCEV operations implication analysis"),
cl::init(2));
static cl::opt<unsigned> MaxValueCompareDepth(
"scalar-evolution-max-value-compare-depth", cl::Hidden,
cl::desc("Maximum depth of recursive value complexity comparisons"),
cl::init(2));
static cl::opt<unsigned>
MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden,
cl::desc("Maximum depth of recursive arithmetics"),
cl::init(32));
static cl::opt<unsigned> MaxConstantEvolvingDepth(
"scalar-evolution-max-constant-evolving-depth", cl::Hidden,
cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));
static cl::opt<unsigned>
MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden,
cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"),
cl::init(8));
static cl::opt<unsigned>
MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
cl::desc("Max coefficients in AddRec during evolving"),
cl::init(8));
static cl::opt<unsigned>
HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden,
cl::desc("Size of the expression which is considered huge"),
cl::init(4096));
static cl::opt<bool>
ClassifyExpressions("scalar-evolution-classify-expressions",
cl::Hidden, cl::init(true),
cl::desc("When printing analysis, include information on every instruction"));
static cl::opt<bool> UseExpensiveRangeSharpening(
"scalar-evolution-use-expensive-range-sharpening", cl::Hidden,
cl::init(false),
cl::desc("Use more powerful methods of sharpening expression ranges. May "
"be costly in terms of compile time"));
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Implementation of the SCEV class.
//
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SCEV::dump() const {
print(dbgs());
dbgs() << '\n';
}
#endif
void SCEV::print(raw_ostream &OS) const {
switch (getSCEVType()) {
case scConstant:
cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
return;
case scPtrToInt: {
const SCEVPtrToIntExpr *PtrToInt = cast<SCEVPtrToIntExpr>(this);
const SCEV *Op = PtrToInt->getOperand();
OS << "(ptrtoint " << *Op->getType() << " " << *Op << " to "
<< *PtrToInt->getType() << ")";
return;
}
case scTruncate: {
const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
const SCEV *Op = Trunc->getOperand();
OS << "(trunc " << *Op->getType() << " " << *Op << " to "
<< *Trunc->getType() << ")";
return;
}
case scZeroExtend: {
const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
const SCEV *Op = ZExt->getOperand();
OS << "(zext " << *Op->getType() << " " << *Op << " to "
<< *ZExt->getType() << ")";
return;
}
case scSignExtend: {
const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
const SCEV *Op = SExt->getOperand();
OS << "(sext " << *Op->getType() << " " << *Op << " to "
<< *SExt->getType() << ")";
return;
}
case scAddRecExpr: {
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
OS << "{" << *AR->getOperand(0);
for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
OS << ",+," << *AR->getOperand(i);
OS << "}<";
if (AR->hasNoUnsignedWrap())
OS << "nuw><";
if (AR->hasNoSignedWrap())
OS << "nsw><";
if (AR->hasNoSelfWrap() &&
!AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
OS << "nw><";
AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ">";
return;
}
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr:
case scUMinExpr:
case scSMinExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
const char *OpStr = nullptr;
switch (NAry->getSCEVType()) {
case scAddExpr: OpStr = " + "; break;
case scMulExpr: OpStr = " * "; break;
case scUMaxExpr: OpStr = " umax "; break;
case scSMaxExpr: OpStr = " smax "; break;
case scUMinExpr:
OpStr = " umin ";
break;
case scSMinExpr:
OpStr = " smin ";
break;
default:
llvm_unreachable("There are no other nary expression types.");
}
OS << "(";
ListSeparator LS(OpStr);
for (const SCEV *Op : NAry->operands())
OS << LS << *Op;
OS << ")";
switch (NAry->getSCEVType()) {
case scAddExpr:
case scMulExpr:
if (NAry->hasNoUnsignedWrap())
OS << "<nuw>";
if (NAry->hasNoSignedWrap())
OS << "<nsw>";
break;
default:
// Nothing to print for other nary expressions.
break;
}
return;
}
case scUDivExpr: {
const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
return;
}
case scUnknown: {
const SCEVUnknown *U = cast<SCEVUnknown>(this);
Type *AllocTy;
if (U->isSizeOf(AllocTy)) {
OS << "sizeof(" << *AllocTy << ")";
return;
}
if (U->isAlignOf(AllocTy)) {
OS << "alignof(" << *AllocTy << ")";
return;
}
Type *CTy;
Constant *FieldNo;
if (U->isOffsetOf(CTy, FieldNo)) {
OS << "offsetof(" << *CTy << ", ";
FieldNo->printAsOperand(OS, false);
OS << ")";
return;
}
// Otherwise just print it normally.
U->getValue()->printAsOperand(OS, false);
return;
}
case scCouldNotCompute:
OS << "***COULDNOTCOMPUTE***";
return;
}
llvm_unreachable("Unknown SCEV kind!");
}
Type *SCEV::getType() const {
switch (getSCEVType()) {
case scConstant:
return cast<SCEVConstant>(this)->getType();
case scPtrToInt:
case scTruncate:
case scZeroExtend:
case scSignExtend:
return cast<SCEVCastExpr>(this)->getType();
case scAddRecExpr:
return cast<SCEVAddRecExpr>(this)->getType();
case scMulExpr:
return cast<SCEVMulExpr>(this)->getType();
case scUMaxExpr:
case scSMaxExpr:
case scUMinExpr:
case scSMinExpr:
return cast<SCEVMinMaxExpr>(this)->getType();
case scAddExpr:
return cast<SCEVAddExpr>(this)->getType();
case scUDivExpr:
return cast<SCEVUDivExpr>(this)->getType();
case scUnknown:
return cast<SCEVUnknown>(this)->getType();
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
}
llvm_unreachable("Unknown SCEV kind!");
}
bool SCEV::isZero() const {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
return SC->getValue()->isZero();
return false;
}
bool SCEV::isOne() const {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
return SC->getValue()->isOne();
return false;
}
bool SCEV::isAllOnesValue() const {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
return SC->getValue()->isMinusOne();
return false;
}
bool SCEV::isNonConstantNegative() const {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
if (!Mul) return false;
// If there is a constant factor, it will be first.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
if (!SC) return false;
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getAPInt().isNegative();
}
SCEVCouldNotCompute::SCEVCouldNotCompute() :
SCEV(FoldingSetNodeIDRef(), scCouldNotCompute, 0) {}
bool SCEVCouldNotCompute::classof(const SCEV *S) {
return S->getSCEVType() == scCouldNotCompute;
}
const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
FoldingSetNodeID ID;
ID.AddInteger(scConstant);
ID.AddPointer(V);
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
UniqueSCEVs.InsertNode(S, IP);
return S;
}
const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
return getConstant(ConstantInt::get(getContext(), Val));
}
const SCEV *
ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
return getConstant(ConstantInt::get(ITy, V, isSigned));
}
SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
const SCEV *op, Type *ty)
: SCEV(ID, SCEVTy, computeExpressionSize(op)), Ty(ty) {
Operands[0] = op;
}
SCEVPtrToIntExpr::SCEVPtrToIntExpr(const FoldingSetNodeIDRef ID, const SCEV *Op,
Type *ITy)
: SCEVCastExpr(ID, scPtrToInt, Op, ITy) {
assert(getOperand()->getType()->isPointerTy() && Ty->isIntegerTy() &&
"Must be a non-bit-width-changing pointer-to-integer cast!");
}
SCEVIntegralCastExpr::SCEVIntegralCastExpr(const FoldingSetNodeIDRef ID,
SCEVTypes SCEVTy, const SCEV *op,
Type *ty)
: SCEVCastExpr(ID, SCEVTy, op, ty) {}
SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op,
Type *ty)
: SCEVIntegralCastExpr(ID, scTruncate, op, ty) {
assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate non-integer value!");
}
SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
: SCEVIntegralCastExpr(ID, scZeroExtend, op, ty) {
assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot zero extend non-integer value!");
}
SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
: SCEVIntegralCastExpr(ID, scSignExtend, op, ty) {
assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot sign extend non-integer value!");
}
void SCEVUnknown::deleted() {
// Clear this SCEVUnknown from various maps.
SE->forgetMemoizedResults(this);
// Remove this SCEVUnknown from the uniquing map.
SE->UniqueSCEVs.RemoveNode(this);
// Release the value.
setValPtr(nullptr);
}
void SCEVUnknown::allUsesReplacedWith(Value *New) {
// Remove this SCEVUnknown from the uniquing map.
SE->UniqueSCEVs.RemoveNode(this);
// Update this SCEVUnknown to point to the new value. This is needed
// because there may still be outstanding SCEVs which still point to
// this SCEVUnknown.
setValPtr(New);
}
bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
CE->getOperand(0)->isNullValue() &&
CE->getNumOperands() == 2)
if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
if (CI->isOne()) {
AllocTy = cast<GEPOperator>(CE)->getSourceElementType();
return true;
}
return false;
}
bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
CE->getOperand(0)->isNullValue()) {
Type *Ty = cast<GEPOperator>(CE)->getSourceElementType();
if (StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isPacked() &&
CE->getNumOperands() == 3 &&
CE->getOperand(1)->isNullValue()) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
if (CI->isOne() &&
STy->getNumElements() == 2 &&
STy->getElementType(0)->isIntegerTy(1)) {
AllocTy = STy->getElementType(1);
return true;
}
}
}
return false;
}
bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
CE->getNumOperands() == 3 &&
CE->getOperand(0)->isNullValue() &&
CE->getOperand(1)->isNullValue()) {
Type *Ty = cast<GEPOperator>(CE)->getSourceElementType();
// Ignore vector types here so that ScalarEvolutionExpander doesn't
// emit getelementptrs that index into vectors.
if (Ty->isStructTy() || Ty->isArrayTy()) {
CTy = Ty;
FieldNo = CE->getOperand(2);
return true;
}
}
return false;
}
//===----------------------------------------------------------------------===//
// SCEV Utilities
//===----------------------------------------------------------------------===//
/// Compare the two values \p LV and \p RV in terms of their "complexity" where
/// "complexity" is a partial (and somewhat ad-hoc) relation used to order
/// operands in SCEV expressions. \p EqCache is a set of pairs of values that
/// have been previously deemed to be "equally complex" by this routine. It is
/// intended to avoid exponential time complexity in cases like:
///
/// %a = f(%x, %y)
/// %b = f(%a, %a)
/// %c = f(%b, %b)
///
/// %d = f(%x, %y)
/// %e = f(%d, %d)
/// %f = f(%e, %e)
///
/// CompareValueComplexity(%f, %c)
///
/// Since we do not continue running this routine on expression trees once we
/// have seen unequal values, there is no need to track them in the cache.
static int
CompareValueComplexity(EquivalenceClasses<const Value *> &EqCacheValue,
const LoopInfo *const LI, Value *LV, Value *RV,
unsigned Depth) {
if (Depth > MaxValueCompareDepth || EqCacheValue.isEquivalent(LV, RV))
return 0;
// Order pointer values after integer values. This helps SCEVExpander form
// GEPs.
bool LIsPointer = LV->getType()->isPointerTy(),
RIsPointer = RV->getType()->isPointerTy();
if (LIsPointer != RIsPointer)
return (int)LIsPointer - (int)RIsPointer;
// Compare getValueID values.
unsigned LID = LV->getValueID(), RID = RV->getValueID();
if (LID != RID)
return (int)LID - (int)RID;
// Sort arguments by their position.
if (const auto *LA = dyn_cast<Argument>(LV)) {
const auto *RA = cast<Argument>(RV);
unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
return (int)LArgNo - (int)RArgNo;
}
if (const auto *LGV = dyn_cast<GlobalValue>(LV)) {
const auto *RGV = cast<GlobalValue>(RV);
const auto IsGVNameSemantic = [&](const GlobalValue *GV) {
auto LT = GV->getLinkage();
return !(GlobalValue::isPrivateLinkage(LT) ||
GlobalValue::isInternalLinkage(LT));
};
// Use the names to distinguish the two values, but only if the
// names are semantically important.
if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV))
return LGV->getName().compare(RGV->getName());
}
// For instructions, compare their loop depth, and their operand count. This
// is pretty loose.
if (const auto *LInst = dyn_cast<Instruction>(LV)) {
const auto *RInst = cast<Instruction>(RV);
// Compare loop depths.
const BasicBlock *LParent = LInst->getParent(),
*RParent = RInst->getParent();
if (LParent != RParent) {
unsigned LDepth = LI->getLoopDepth(LParent),
RDepth = LI->getLoopDepth(RParent);
if (LDepth != RDepth)
return (int)LDepth - (int)RDepth;
}
// Compare the number of operands.
unsigned LNumOps = LInst->getNumOperands(),
RNumOps = RInst->getNumOperands();
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
for (unsigned Idx : seq(0u, LNumOps)) {
int Result =
CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx),
RInst->getOperand(Idx), Depth + 1);
if (Result != 0)
return Result;
}
}
EqCacheValue.unionSets(LV, RV);
return 0;
}
// Return negative, zero, or positive, if LHS is less than, equal to, or greater
// than RHS, respectively. A three-way result allows recursive comparisons to be
// more efficient.
// If the max analysis depth was reached, return None, assuming we do not know
// if they are equivalent for sure.
static Optional<int>
CompareSCEVComplexity(EquivalenceClasses<const SCEV *> &EqCacheSCEV,
EquivalenceClasses<const Value *> &EqCacheValue,
const LoopInfo *const LI, const SCEV *LHS,
const SCEV *RHS, DominatorTree &DT, unsigned Depth = 0) {
// Fast-path: SCEVs are uniqued so we can do a quick equality check.
if (LHS == RHS)
return 0;
// Primarily, sort the SCEVs by their getSCEVType().
SCEVTypes LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
if (LType != RType)
return (int)LType - (int)RType;
if (EqCacheSCEV.isEquivalent(LHS, RHS))
return 0;
if (Depth > MaxSCEVCompareDepth)
return None;
// Aside from the getSCEVType() ordering, the particular ordering
// isn't very important except that it's beneficial to be consistent,
// so that (a + b) and (b + a) don't end up as different expressions.
switch (LType) {
case scUnknown: {
const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(),
RU->getValue(), Depth + 1);
if (X == 0)
EqCacheSCEV.unionSets(LHS, RHS);
return X;
}
case scConstant: {
const SCEVConstant *LC = cast<SCEVConstant>(LHS);
const SCEVConstant *RC = cast<SCEVConstant>(RHS);
// Compare constant values.
const APInt &LA = LC->getAPInt();
const APInt &RA = RC->getAPInt();
unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
if (LBitWidth != RBitWidth)
return (int)LBitWidth - (int)RBitWidth;
return LA.ult(RA) ? -1 : 1;
}
case scAddRecExpr: {
const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
// There is always a dominance between two recs that are used by one SCEV,
// so we can safely sort recs by loop header dominance. We require such
// order in getAddExpr.
const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
if (LLoop != RLoop) {
const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader();
assert(LHead != RHead && "Two loops share the same header?");
if (DT.dominates(LHead, RHead))
return 1;
else
assert(DT.dominates(RHead, LHead) &&
"No dominance between recurrences used by one SCEV?");
return -1;
}
// Addrec complexity grows with operand count.
unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
// Lexicographically compare.
for (unsigned i = 0; i != LNumOps; ++i) {
auto X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
LA->getOperand(i), RA->getOperand(i), DT,
Depth + 1);
if (X != 0)
return X;
}
EqCacheSCEV.unionSets(LHS, RHS);
return 0;
}
case scAddExpr:
case scMulExpr:
case scSMaxExpr:
case scUMaxExpr:
case scSMinExpr:
case scUMinExpr: {
const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
// Lexicographically compare n-ary expressions.
unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
for (unsigned i = 0; i != LNumOps; ++i) {
auto X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
LC->getOperand(i), RC->getOperand(i), DT,
Depth + 1);
if (X != 0)
return X;
}
EqCacheSCEV.unionSets(LHS, RHS);
return 0;
}
case scUDivExpr: {
const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
// Lexicographically compare udiv expressions.
auto X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(),
RC->getLHS(), DT, Depth + 1);
if (X != 0)
return X;
X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getRHS(),
RC->getRHS(), DT, Depth + 1);
if (X == 0)
EqCacheSCEV.unionSets(LHS, RHS);
return X;
}
case scPtrToInt:
case scTruncate:
case scZeroExtend:
case scSignExtend: {
const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
// Compare cast expressions by operand.
auto X =
CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getOperand(),
RC->getOperand(), DT, Depth + 1);
if (X == 0)
EqCacheSCEV.unionSets(LHS, RHS);
return X;
}
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
}
llvm_unreachable("Unknown SCEV kind!");
}
/// Given a list of SCEV objects, order them by their complexity, and group
/// objects of the same complexity together by value. When this routine is
/// finished, we know that any duplicates in the vector are consecutive and that
/// complexity is monotonically increasing.
///
/// Note that we go take special precautions to ensure that we get deterministic
/// results from this routine. In other words, we don't want the results of
/// this to depend on where the addresses of various SCEV objects happened to
/// land in memory.
static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
LoopInfo *LI, DominatorTree &DT) {
if (Ops.size() < 2) return; // Noop
EquivalenceClasses<const SCEV *> EqCacheSCEV;
EquivalenceClasses<const Value *> EqCacheValue;
// Whether LHS has provably less complexity than RHS.
auto IsLessComplex = [&](const SCEV *LHS, const SCEV *RHS) {
auto Complexity =
CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT);
return Complexity && *Complexity < 0;
};
if (Ops.size() == 2) {
// This is the common case, which also happens to be trivially simple.
// Special case it.
const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
if (IsLessComplex(RHS, LHS))
std::swap(LHS, RHS);
return;
}
// Do the rough sort by complexity.
llvm::stable_sort(Ops, [&](const SCEV *LHS, const SCEV *RHS) {
return IsLessComplex(LHS, RHS);
});
// Now that we are sorted by complexity, group elements of the same
// complexity. Note that this is, at worst, N^2, but the vector is likely to
// be extremely short in practice. Note that we take this approach because we
// do not want to depend on the addresses of the objects we are grouping.
for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
const SCEV *S = Ops[i];
unsigned Complexity = S->getSCEVType();
// If there are any objects of the same complexity and same value as this
// one, group them.
for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
if (Ops[j] == S) { // Found a duplicate.
// Move it to immediately after i'th element.
std::swap(Ops[i+1], Ops[j]);
++i; // no need to rescan it.
if (i == e-2) return; // Done!
}
}
}
}
/// Returns true if \p Ops contains a huge SCEV (the subtree of S contains at
/// least HugeExprThreshold nodes).
static bool hasHugeExpression(ArrayRef<const SCEV *> Ops) {
return any_of(Ops, [](const SCEV *S) {
return S->getExpressionSize() >= HugeExprThreshold;
});
}
//===----------------------------------------------------------------------===//
// Simple SCEV method implementations
//===----------------------------------------------------------------------===//
/// Compute BC(It, K). The result has width W. Assume, K > 0.
static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
ScalarEvolution &SE,
Type *ResultTy) {
// Handle the simplest case efficiently.
if (K == 1)
return SE.getTruncateOrZeroExtend(It, ResultTy);
// We are using the following formula for BC(It, K):
//
// BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
//
// Suppose, W is the bitwidth of the return value. We must be prepared for
// overflow. Hence, we must assure that the result of our computation is
// equal to the accurate one modulo 2^W. Unfortunately, division isn't
// safe in modular arithmetic.
//
// However, this code doesn't use exactly that formula; the formula it uses
// is something like the following, where T is the number of factors of 2 in
// K! (i.e. trailing zeros in the binary representation of K!), and ^ is
// exponentiation:
//
// BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
//
// This formula is trivially equivalent to the previous formula. However,
// this formula can be implemented much more efficiently. The trick is that
// K! / 2^T is odd, and exact division by an odd number *is* safe in modular
// arithmetic. To do exact division in modular arithmetic, all we have
// to do is multiply by the inverse. Therefore, this step can be done at
// width W.
//
// The next issue is how to safely do the division by 2^T. The way this
// is done is by doing the multiplication step at a width of at least W + T
// bits. This way, the bottom W+T bits of the product are accurate. Then,
// when we perform the division by 2^T (which is equivalent to a right shift
// by T), the bottom W bits are accurate. Extra bits are okay; they'll get
// truncated out after the division by 2^T.
//
// In comparison to just directly using the first formula, this technique
// is much more efficient; using the first formula requires W * K bits,
// but this formula less than W + K bits. Also, the first formula requires
// a division step, whereas this formula only requires multiplies and shifts.
//
// It doesn't matter whether the subtraction step is done in the calculation
// width or the input iteration count's width; if the subtraction overflows,
// the result must be zero anyway. We prefer here to do it in the width of
// the induction variable because it helps a lot for certain cases; CodeGen
// isn't smart enough to ignore the overflow, which leads to much less
// efficient code if the width of the subtraction is wider than the native
// register width.
//
// (It's possible to not widen at all by pulling out factors of 2 before
// the multiplication; for example, K=2 can be calculated as
// It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
// extra arithmetic, so it's not an obvious win, and it gets
// much more complicated for K > 3.)
// Protection from insane SCEVs; this bound is conservative,
// but it probably doesn't matter.
if (K > 1000)
return SE.getCouldNotCompute();
unsigned W = SE.getTypeSizeInBits(ResultTy);
// Calculate K! / 2^T and T; we divide out the factors of two before
// multiplying for calculating K! / 2^T to avoid overflow.
// Other overflow doesn't matter because we only care about the bottom
// W bits of the result.
APInt OddFactorial(W, 1);
unsigned T = 1;
for (unsigned i = 3; i <= K; ++i) {
APInt Mult(W, i);
unsigned TwoFactors = Mult.countTrailingZeros();
T += TwoFactors;
Mult.lshrInPlace(TwoFactors);
OddFactorial *= Mult;
}
// We need at least W + T bits for the multiplication step
unsigned CalculationBits = W + T;
// Calculate 2^T, at width T+W.
APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
// Calculate the multiplicative inverse of K! / 2^T;
// this multiplication factor will perform the exact division by
// K! / 2^T.
APInt Mod = APInt::getSignedMinValue(W+1);
APInt MultiplyFactor = OddFactorial.zext(W+1);
MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
MultiplyFactor = MultiplyFactor.trunc(W);
// Calculate the product, at width T+W
IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
CalculationBits);
const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
for (unsigned i = 1; i != K; ++i) {
const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
Dividend = SE.getMulExpr(Dividend,
SE.getTruncateOrZeroExtend(S, CalculationTy));
}
// Divide by 2^T
const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
// Truncate the result, and divide by K! / 2^T.
return SE.getMulExpr(SE.getConstant(MultiplyFactor),
SE.getTruncateOrZeroExtend(DivResult, ResultTy));
}
/// Return the value of this chain of recurrences at the specified iteration
/// number. We can evaluate this recurrence by multiplying each element in the
/// chain by the binomial coefficient corresponding to it. In other words, we
/// can evaluate {A,+,B,+,C,+,D} as:
///
/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
///
/// where BC(It, k) stands for binomial coefficient.
const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
ScalarEvolution &SE) const {
return evaluateAtIteration(makeArrayRef(op_begin(), op_end()), It, SE);
}
const SCEV *
SCEVAddRecExpr::evaluateAtIteration(ArrayRef<const SCEV *> Operands,
const SCEV *It, ScalarEvolution &SE) {
assert(Operands.size() > 0);
const SCEV *Result = Operands[0];
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
// The computation is correct in the face of overflow provided that the
// multiplication is performed _after_ the evaluation of the binomial
// coefficient.
const SCEV *Coeff = BinomialCoefficient(It, i, SE, Result->getType());
if (isa<SCEVCouldNotCompute>(Coeff))
return Coeff;
Result = SE.getAddExpr(Result, SE.getMulExpr(Operands[i], Coeff));
}
return Result;
}
//===----------------------------------------------------------------------===//
// SCEV Expression folder implementations
//===----------------------------------------------------------------------===//
const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op,
unsigned Depth) {
assert(Depth <= 1 &&
"getLosslessPtrToIntExpr() should self-recurse at most once.");
// We could be called with an integer-typed operands during SCEV rewrites.
// Since the operand is an integer already, just perform zext/trunc/self cast.
if (!Op->getType()->isPointerTy())
return Op;
// What would be an ID for such a SCEV cast expression?
FoldingSetNodeID ID;
ID.AddInteger(scPtrToInt);
ID.AddPointer(Op);
void *IP = nullptr;
// Is there already an expression for such a cast?
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
return S;
// It isn't legal for optimizations to construct new ptrtoint expressions
// for non-integral pointers.
if (getDataLayout().isNonIntegralPointerType(Op->getType()))
return getCouldNotCompute();
Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType());
// We can only trivially model ptrtoint if SCEV's effective (integer) type
// is sufficiently wide to represent all possible pointer values.
// We could theoretically teach SCEV to truncate wider pointers, but
// that isn't implemented for now.
if (getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(Op->getType())) !=
getDataLayout().getTypeSizeInBits(IntPtrTy))
return getCouldNotCompute();
// If not, is this expression something we can't reduce any further?
if (auto *U = dyn_cast<SCEVUnknown>(Op)) {
// Perform some basic constant folding. If the operand of the ptr2int cast
// is a null pointer, don't create a ptr2int SCEV expression (that will be
// left as-is), but produce a zero constant.
// NOTE: We could handle a more general case, but lack motivational cases.
if (isa<ConstantPointerNull>(U->getValue()))
return getZero(IntPtrTy);
// Create an explicit cast node.
// We can reuse the existing insert position since if we get here,
// we won't have made any changes which would invalidate it.
SCEV *S = new (SCEVAllocator)
SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy);
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
return S;
}
assert(Depth == 0 && "getLosslessPtrToIntExpr() should not self-recurse for "
"non-SCEVUnknown's.");
// Otherwise, we've got some expression that is more complex than just a
// single SCEVUnknown. But we don't want to have a SCEVPtrToIntExpr of an
// arbitrary expression, we want to have SCEVPtrToIntExpr of an SCEVUnknown
// only, and the expressions must otherwise be integer-typed.
// So sink the cast down to the SCEVUnknown's.
/// The SCEVPtrToIntSinkingRewriter takes a scalar evolution expression,
/// which computes a pointer-typed value, and rewrites the whole expression
/// tree so that *all* the computations are done on integers, and the only
/// pointer-typed operands in the expression are SCEVUnknown.
class SCEVPtrToIntSinkingRewriter
: public SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter> {
using Base = SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter>;
public:
SCEVPtrToIntSinkingRewriter(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {}
static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE) {
SCEVPtrToIntSinkingRewriter Rewriter(SE);
return Rewriter.visit(Scev);
}
const SCEV *visit(const SCEV *S) {
Type *STy = S->getType();
// If the expression is not pointer-typed, just keep it as-is.
if (!STy->isPointerTy())
return S;
// Else, recursively sink the cast down into it.
return Base::visit(S);
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
bool Changed = false;
for (auto *Op : Expr->operands()) {
Operands.push_back(visit(Op));
Changed |= Op != Operands.back();
}
return !Changed ? Expr : SE.getAddExpr(Operands, Expr->getNoWrapFlags());
}
const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
bool Changed = false;
for (auto *Op : Expr->operands()) {
Operands.push_back(visit(Op));
Changed |= Op != Operands.back();
}
return !Changed ? Expr : SE.getMulExpr(Operands, Expr->getNoWrapFlags());
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
assert(Expr->getType()->isPointerTy() &&
"Should only reach pointer-typed SCEVUnknown's.");
return SE.getLosslessPtrToIntExpr(Expr, /*Depth=*/1);
}
};
// And actually perform the cast sinking.
const SCEV *IntOp = SCEVPtrToIntSinkingRewriter::rewrite(Op, *this);
assert(IntOp->getType()->isIntegerTy() &&
"We must have succeeded in sinking the cast, "
"and ending up with an integer-typed expression!");
return IntOp;
}
const SCEV *ScalarEvolution::getPtrToIntExpr(const SCEV *Op, Type *Ty) {
assert(Ty->isIntegerTy() && "Target type must be an integer type!");
const SCEV *IntOp = getLosslessPtrToIntExpr(Op);
if (isa<SCEVCouldNotCompute>(IntOp))
return IntOp;
return getTruncateOrZeroExtend(IntOp, Ty);
}
const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
"This is not a truncating conversion!");
assert(isSCEVable(Ty) &&
"This is not a conversion to a SCEVable type!");
assert(!Op->getType()->isPointerTy() && "Can't truncate pointer!");
Ty = getEffectiveSCEVType(Ty);
FoldingSetNodeID ID;
ID.AddInteger(scTruncate);
ID.AddPointer(Op);
ID.AddPointer(Ty);
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
// trunc(trunc(x)) --> trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
return getTruncateExpr(ST->getOperand(), Ty, Depth + 1);
// trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1);
// trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1);
if (Depth > MaxCastDepth) {
SCEV *S =
new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
return S;
}
// trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and
// trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN),
// if after transforming we have at most one truncate, not counting truncates
// that replace other casts.
if (isa<SCEVAddExpr>(Op) || isa<SCEVMulExpr>(Op)) {
auto *CommOp = cast<SCEVCommutativeExpr>(Op);
SmallVector<const SCEV *, 4> Operands;
unsigned numTruncs = 0;
for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2;
++i) {
const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1);
if (!isa<SCEVIntegralCastExpr>(CommOp->getOperand(i)) &&
isa<SCEVTruncateExpr>(S))
numTruncs++;
Operands.push_back(S);
}
if (numTruncs < 2) {
if (isa<SCEVAddExpr>(Op))
return getAddExpr(Operands);
else if (isa<SCEVMulExpr>(Op))
return getMulExpr(Operands);
else
llvm_unreachable("Unexpected SCEV type for Op.");
}
// Although we checked in the beginning that ID is not in the cache, it is
// possible that during recursion and different modification ID was inserted
// into the cache. So if we find it, just return it.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
return S;
}
// If the input value is a chrec scev, truncate the chrec's operands.
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
for (const SCEV *Op : AddRec->operands())
Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1));
return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
}
// Return zero if truncating to known zeros.
uint32_t MinTrailingZeros = GetMinTrailingZeros(Op);
if (MinTrailingZeros >= getTypeSizeInBits(Ty))
return getZero(Ty);
// The cast wasn't folded; create an explicit cast node. We can reuse
// the existing insert position since if we get here, we won't have
// made any changes which would invalidate it.
SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
return S;
}
// Get the limit of a recurrence such that incrementing by Step cannot cause
// signed overflow as long as the value of the recurrence within the
// loop does not exceed this limit before incrementing.
static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
ICmpInst::Predicate *Pred,
ScalarEvolution *SE) {
unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
if (SE->isKnownPositive(Step)) {
*Pred = ICmpInst::ICMP_SLT;
return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
SE->getSignedRangeMax(Step));
}
if (SE->isKnownNegative(Step)) {
*Pred = ICmpInst::ICMP_SGT;
return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
SE->getSignedRangeMin(Step));
}
return nullptr;
}
// Get the limit of a recurrence such that incrementing by Step cannot cause
// unsigned overflow as long as the value of the recurrence within the loop does
// not exceed this limit before incrementing.
static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
ICmpInst::Predicate *Pred,
ScalarEvolution *SE) {
unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
*Pred = ICmpInst::ICMP_ULT;
return SE->getConstant(APInt::getMinValue(BitWidth) -
SE->getUnsignedRangeMax(Step));
}
namespace {
struct ExtendOpTraitsBase {
typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *,
unsigned);
};
// Used to make code generic over signed and unsigned overflow.
template <typename ExtendOp> struct ExtendOpTraits {
// Members present:
//
// static const SCEV::NoWrapFlags WrapType;
//
// static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
//
// static const SCEV *getOverflowLimitForStep(const SCEV *Step,
// ICmpInst::Predicate *Pred,
// ScalarEvolution *SE);
};
template <>
struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
static const GetExtendExprTy GetExtendExpr;
static const SCEV *getOverflowLimitForStep(const SCEV *Step,
ICmpInst::Predicate *Pred,
ScalarEvolution *SE) {
return getSignedOverflowLimitForStep(Step, Pred, SE);
}
};
const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
template <>
struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
static const GetExtendExprTy GetExtendExpr;
static const SCEV *getOverflowLimitForStep(const SCEV *Step,
ICmpInst::Predicate *Pred,
ScalarEvolution *SE) {
return getUnsignedOverflowLimitForStep(Step, Pred, SE);
}
};
const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
} // end anonymous namespace
// The recurrence AR has been shown to have no signed/unsigned wrap or something
// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
// easily prove NSW/NUW for its preincrement or postincrement sibling. This
// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
// expression "Step + sext/zext(PreIncAR)" is congruent with
// "sext/zext(PostIncAR)"
template <typename ExtendOpTy>
static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
ScalarEvolution *SE, unsigned Depth) {
auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
const Loop *L = AR->getLoop();
const SCEV *Start = AR->getStart();
const SCEV *Step = AR->getStepRecurrence(*SE);
// Check for a simple looking step prior to loop entry.
const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
if (!SA)
return nullptr;
// Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
// subtraction is expensive. For this purpose, perform a quick and dirty
// difference, by checking for Step in the operand list.
SmallVector<const SCEV *, 4> DiffOps;
for (const SCEV *Op : SA->operands())
if (Op != Step)
DiffOps.push_back(Op);
if (DiffOps.size() == SA->getNumOperands())
return nullptr;
// Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
// `Step`:
// 1. NSW/NUW flags on the step increment.
auto PreStartFlags =
ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
// "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
// "S+X does not sign/unsign-overflow".
//
const SCEV *BECount = SE->getBackedgeTakenCount(L);
if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
!isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
return PreStart;
// 2. Direct overflow check on the step operation's expression.
unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
const SCEV *OperandExtendedStart =
SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth),
(SE->*GetExtendExpr)(Step, WideTy, Depth));
if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) {
if (PreAR && AR->getNoWrapFlags(WrapType)) {
// If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
// or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
// `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact.
SE->setNoWrapFlags(const_cast<SCEVAddRecExpr *>(PreAR), WrapType);
}
return PreStart;
}
// 3. Loop precondition.
ICmpInst::Predicate Pred;
const SCEV *OverflowLimit =
ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
if (OverflowLimit &&
SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))
return PreStart;
return nullptr;
}
// Get the normalized zero or sign extended expression for this AddRec's Start.
template <typename ExtendOpTy>
static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
ScalarEvolution *SE,
unsigned Depth) {
auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth);
if (!PreStart)
return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth);
return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty,
Depth),
(SE->*GetExtendExpr)(PreStart, Ty, Depth));
}
// Try to prove away overflow by looking at "nearby" add recurrences. A
// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
//
// Formally:
//
// {S,+,X} == {S-T,+,X} + T
// => Ext({S,+,X}) == Ext({S-T,+,X} + T)
//
// If ({S-T,+,X} + T) does not overflow ... (1)
//
// RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
//
// If {S-T,+,X} does not overflow ... (2)
//
// RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
// == {Ext(S-T)+Ext(T),+,Ext(X)}
//
// If (S-T)+T does not overflow ... (3)
//
// RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
// == {Ext(S),+,Ext(X)} == LHS
//
// Thus, if (1), (2) and (3) are true for some T, then
// Ext({S,+,X}) == {Ext(S),+,Ext(X)}
//
// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
// does not overflow" restricted to the 0th iteration. Therefore we only need
// to check for (1) and (2).
//
// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
// is `Delta` (defined below).
template <typename ExtendOpTy>
bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
const SCEV *Step,
const Loop *L) {
auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
// We restrict `Start` to a constant to prevent SCEV from spending too much
// time here. It is correct (but more expensive) to continue with a
// non-constant `Start` and do a general SCEV subtraction to compute
// `PreStart` below.
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
if (!StartC)
return false;
APInt StartAI = StartC->getAPInt();
for (unsigned Delta : {-2, -1, 1, 2}) {
const SCEV *PreStart = getConstant(StartAI - Delta);
FoldingSetNodeID ID;
ID.AddInteger(scAddRecExpr);
ID.AddPointer(PreStart);
ID.AddPointer(Step);
ID.AddPointer(L);
void *IP = nullptr;
const auto *PreAR =
static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
// Give up if we don't already have the add recurrence we need because
// actually constructing an add recurrence is relatively expensive.
if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2)
const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
DeltaS, &Pred, this);
if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1)
return true;
}
}
return false;
}
// Finds an integer D for an expression (C + x + y + ...) such that the top
// level addition in (D + (C - D + x + y + ...)) would not wrap (signed or
// unsigned) and the number of trailing zeros of (C - D + x + y + ...) is
// maximized, where C is the \p ConstantTerm, x, y, ... are arbitrary SCEVs, and
// the (C + x + y + ...) expression is \p WholeAddExpr.
static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
const SCEVConstant *ConstantTerm,
const SCEVAddExpr *WholeAddExpr) {
const APInt &C = ConstantTerm->getAPInt();
const unsigned BitWidth = C.getBitWidth();
// Find number of trailing zeros of (x + y + ...) w/o the C first:
uint32_t TZ = BitWidth;
for (unsigned I = 1, E = WholeAddExpr->getNumOperands(); I < E && TZ; ++I)
TZ = std::min(TZ, SE.GetMinTrailingZeros(WholeAddExpr->getOperand(I)));
if (TZ) {
// Set D to be as many least significant bits of C as possible while still
// guaranteeing that adding D to (C - D + x + y + ...) won't cause a wrap:
return TZ < BitWidth ? C.trunc(TZ).zext(BitWidth) : C;
}
return APInt(BitWidth, 0);
}
// Finds an integer D for an affine AddRec expression {C,+,x} such that the top
// level addition in (D + {C-D,+,x}) would not wrap (signed or unsigned) and the
// number of trailing zeros of (C - D + x * n) is maximized, where C is the \p
// ConstantStart, x is an arbitrary \p Step, and n is the loop trip count.
static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
const APInt &ConstantStart,
const SCEV *Step) {
const unsigned BitWidth = ConstantStart.getBitWidth();
const uint32_t TZ = SE.GetMinTrailingZeros(Step);
if (TZ)
return TZ < BitWidth ? ConstantStart.trunc(TZ).zext(BitWidth)
: ConstantStart;
return APInt(BitWidth, 0);
}
const SCEV *
ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
"This is not a conversion to a SCEVable type!");
assert(!Op->getType()->isPointerTy() && "Can't extend pointer!");
Ty = getEffectiveSCEVType(Ty);
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
// zext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
// Before doing any expensive analysis, check to see if we've already
// computed a SCEV for this Op and Ty.
FoldingSetNodeID ID;
ID.AddInteger(scZeroExtend);
ID.AddPointer(Op);
ID.AddPointer(Ty);
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
if (Depth > MaxCastDepth) {
SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
return S;
}
// zext(trunc(x)) --> zext(x) or x or trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
// It's possible the bits taken off by the truncate were all zero bits. If
// so, we should be able to simplify this further.
const SCEV *X = ST->getOperand();
ConstantRange CR = getUnsignedRange(X);
unsigned TruncBits = getTypeSizeInBits(ST->getType());
unsigned NewBits = getTypeSizeInBits(Ty);
if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
CR.zextOrTrunc(NewBits)))
return getTruncateOrZeroExtend(X, Ty, Depth);
}
// If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can zero extend all of the
// operands (often constants). This allows analysis of something like
// this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
if (AR->isAffine()) {
const SCEV *Start = AR->getStart();
const SCEV *Step = AR->getStepRecurrence(*this);
unsigned BitWidth = getTypeSizeInBits(AR->getType());
const Loop *L = AR->getLoop();
if (!AR->hasNoUnsignedWrap()) {
auto NewFlags = proveNoWrapViaConstantRanges(AR);
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
}
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->hasNoUnsignedWrap())
return getAddRecExpr(
getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
// simply not analyzable, and it covers the case where this code is
// being called from within backedge-taken count analysis, such that
// attempting to ask for the backedge-taken count would likely result
// in infinite recursion. In the later case, the analysis code will
// cope with a conservative value, and it will take care to purge
// that value once it has finished.
const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
// Manually compute the final value for AR, checking for overflow.
// Check whether the backedge-taken count can be losslessly casted to
// the addrec's type. The count is always unsigned.
const SCEV *CastedMaxBECount =
getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
CastedMaxBECount, MaxBECount->getType(), Depth);
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step,
SCEV::FlagAnyWrap, Depth + 1);
const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul,
SCEV::FlagAnyWrap,
Depth + 1),
WideTy, Depth + 1);
const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1);
const SCEV *WideMaxBECount =
getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
const SCEV *OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
getZeroExtendExpr(Step, WideTy, Depth + 1),
SCEV::FlagAnyWrap, Depth + 1),
SCEV::FlagAnyWrap, Depth + 1);
if (ZAdd == OperandExtendedAdd) {
// Cache knowledge of AR NUW, which is propagated to this AddRec.
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
Depth + 1),
getZeroExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as signed.
// This covers loops that count down.
OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
getSignExtendExpr(Step, WideTy, Depth + 1),
SCEV::FlagAnyWrap, Depth + 1),
SCEV::FlagAnyWrap, Depth + 1);
if (ZAdd == OperandExtendedAdd) {
// Cache knowledge of AR NW, which is propagated to this AddRec.
// Negative step causes unsigned wrap, but it still can't self-wrap.
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
Depth + 1),
getSignExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
}
}
// Normally, in the cases we can prove no-overflow via a
// backedge guarding condition, we can also compute a backedge
// taken count for the loop. The exceptions are assumptions and
// guards present in the loop -- SCEV is not great at exploiting
// these to compute max backedge taken counts, but can still use
// these to prove lack of overflow. Use this fact to avoid
// doing extra work that may not pay off.
if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
!AC.assumptions().empty()) {
auto NewFlags = proveNoUnsignedWrapViaInduction(AR);
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
if (AR->hasNoUnsignedWrap()) {
// Same as nuw case above - duplicated here to avoid a compile time
// issue. It's not clear that the order of checks does matter, but
// it's one of two issue possible causes for a change which was
// reverted. Be conservative for the moment.
return getAddRecExpr(
getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
Depth + 1),
getZeroExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
// For a negative step, we can extend the operands iff doing so only
// traverses values in the range zext([0,UINT_MAX]).
if (isKnownNegative(Step)) {
const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
getSignedRangeMin(Step));
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
isKnownOnEveryIteration(ICmpInst::ICMP_UGT, AR, N)) {
// Cache knowledge of AR NW, which is propagated to this
// AddRec. Negative step causes unsigned wrap, but it
// still can't self-wrap.
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
Depth + 1),
getSignExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
}
}
// zext({C,+,Step}) --> (zext(D) + zext({C-D,+,Step}))<nuw><nsw>
// if D + (C - D + Step * n) could be proven to not unsigned wrap
// where D maximizes the number of trailing zeros of (C - D + Step * n)
if (const auto *SC = dyn_cast<SCEVConstant>(Start)) {
const APInt &C = SC->getAPInt();
const APInt &D = extractConstantWithoutWrapping(*this, C, Step);
if (D != 0) {
const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth);
const SCEV *SResidual =
getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags());
const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1);
return getAddExpr(SZExtD, SZExtR,
(SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
Depth + 1);
}
}
if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
return getAddRecExpr(
getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
}
}
// zext(A % B) --> zext(A) % zext(B)
{
const SCEV *LHS;
const SCEV *RHS;
if (matchURem(Op, LHS, RHS))
return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1),
getZeroExtendExpr(RHS, Ty, Depth + 1));
}
// zext(A / B) --> zext(A) / zext(B).
if (auto *Div = dyn_cast<SCEVUDivExpr>(Op))
return getUDivExpr(getZeroExtendExpr(Div->getLHS(), Ty, Depth + 1),
getZeroExtendExpr(Div->getRHS(), Ty, Depth + 1));
if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
// zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
if (SA->hasNoUnsignedWrap()) {
// If the addition does not unsign overflow then we can, by definition,
// commute the zero extension with the addition operation.
SmallVector<const SCEV *, 4> Ops;
for (const auto *Op : SA->operands())
Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1);
}
// zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...))
// if D + (C - D + x + y + ...) could be proven to not unsigned wrap
// where D maximizes the number of trailing zeros of (C - D + x + y + ...)
//
// Often address arithmetics contain expressions like
// (zext (add (shl X, C1), C2)), for instance, (zext (5 + (4 * X))).
// This transformation is useful while proving that such expressions are
// equal or differ by a small constant amount, see LoadStoreVectorizer pass.
if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
const APInt &D = extractConstantWithoutWrapping(*this, SC, SA);
if (D != 0) {
const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth);
const SCEV *SResidual =
getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth);
const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1);
return getAddExpr(SZExtD, SZExtR,
(SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
Depth + 1);
}
}
}
if (auto *SM = dyn_cast<SCEVMulExpr>(Op)) {
// zext((A * B * ...)<nuw>) --> (zext(A) * zext(B) * ...)<nuw>
if (SM->hasNoUnsignedWrap()) {
// If the multiply does not unsign overflow then we can, by definition,
// commute the zero extension with the multiply operation.
SmallVector<const SCEV *, 4> Ops;
for (const auto *Op : SM->operands())
Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
return getMulExpr(Ops, SCEV::FlagNUW, Depth + 1);
}
// zext(2^K * (trunc X to iN)) to iM ->
// 2^K * (zext(trunc X to i{N-K}) to iM)<nuw>
//
// Proof:
//
// zext(2^K * (trunc X to iN)) to iM
// = zext((trunc X to iN) << K) to iM
// = zext((trunc X to i{N-K}) << K)<nuw> to iM
// (because shl removes the top K bits)
// = zext((2^K * (trunc X to i{N-K}))<nuw>) to iM
// = (2^K * (zext(trunc X to i{N-K}) to iM))<nuw>.
//
if (SM->getNumOperands() == 2)
if (auto *MulLHS = dyn_cast<SCEVConstant>(SM->getOperand(0)))
if (MulLHS->getAPInt().isPowerOf2())
if (auto *TruncRHS = dyn_cast<SCEVTruncateExpr>(SM->getOperand(1))) {
int NewTruncBits = getTypeSizeInBits(TruncRHS->getType()) -
MulLHS->getAPInt().logBase2();
Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits);
return getMulExpr(
getZeroExtendExpr(MulLHS, Ty),
getZeroExtendExpr(
getTruncateExpr(TruncRHS->getOperand(), NewTruncTy), Ty),
SCEV::FlagNUW, Depth + 1);
}
}
// The cast wasn't folded; create an explicit cast node.
// Recompute the insert position, as it may have been invalidated.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
return S;
}
const SCEV *
ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
"This is not a conversion to a SCEVable type!");
assert(!Op->getType()->isPointerTy() && "Can't extend pointer!");
Ty = getEffectiveSCEVType(Ty);
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
// sext(sext(x)) --> sext(x)
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1);
// sext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
// Before doing any expensive analysis, check to see if we've already
// computed a SCEV for this Op and Ty.
FoldingSetNodeID ID;
ID.AddInteger(scSignExtend);
ID.AddPointer(Op);
ID.AddPointer(Ty);
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// Limit recursion depth.
if (Depth > MaxCastDepth) {
SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
return S;
}
// sext(trunc(x)) --> sext(x) or x or trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
// It's possible the bits taken off by the truncate were all sign bits. If
// so, we should be able to simplify this further.
const SCEV *X = ST->getOperand();
ConstantRange CR = getSignedRange(X);
unsigned TruncBits = getTypeSizeInBits(ST->getType());
unsigned NewBits = getTypeSizeInBits(Ty);
if (CR.truncate(TruncBits).signExtend(NewBits).contains(
CR.sextOrTrunc(NewBits)))
return getTruncateOrSignExtend(X, Ty, Depth);
}
if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
// sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
if (SA->hasNoSignedWrap()) {
// If the addition does not sign overflow then we can, by definition,
// commute the sign extension with the addition operation.
SmallVector<const SCEV *, 4> Ops;
for (const auto *Op : SA->operands())
Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1));
return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
}
// sext(C + x + y + ...) --> (sext(D) + sext((C - D) + x + y + ...))
// if D + (C - D + x + y + ...) could be proven to not signed wrap
// where D maximizes the number of trailing zeros of (C - D + x + y + ...)
//
// For instance, this will bring two seemingly different expressions:
// 1 + sext(5 + 20 * %x + 24 * %y) and
// sext(6 + 20 * %x + 24 * %y)
// to the same form:
// 2 + sext(4 + 20 * %x + 24 * %y)
if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
const APInt &D = extractConstantWithoutWrapping(*this, SC, SA);
if (D != 0) {
const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth);
const SCEV *SResidual =
getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth);
const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1);
return getAddExpr(SSExtD, SSExtR,
(SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
Depth + 1);
}
}
}
// If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can sign extend all of the
// operands (often constants). This allows analysis of something like
// this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
if (AR->isAffine()) {
const SCEV *Start = AR->getStart();
const SCEV *Step = AR->getStepRecurrence(*this);
unsigned BitWidth = getTypeSizeInBits(AR->getType());
const Loop *L = AR->getLoop();
if (!AR->hasNoSignedWrap()) {
auto NewFlags = proveNoWrapViaConstantRanges(AR);
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
}
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->hasNoSignedWrap())
return getAddRecExpr(
getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW);
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
// simply not analyzable, and it covers the case where this code is
// being called from within backedge-taken count analysis, such that
// attempting to ask for the backedge-taken count would likely result
// in infinite recursion. In the later case, the analysis code will
// cope with a conservative value, and it will take care to purge
// that value once it has finished.
const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
// Manually compute the final value for AR, checking for
// overflow.
// Check whether the backedge-taken count can be losslessly casted to
// the addrec's type. The count is always unsigned.
const SCEV *CastedMaxBECount =
getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
CastedMaxBECount, MaxBECount->getType(), Depth);
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
const SCEV *SMul = getMulExpr(CastedMaxBECount, Step,
SCEV::FlagAnyWrap, Depth + 1);
const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul,
SCEV::FlagAnyWrap,
Depth + 1),
WideTy, Depth + 1);
const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1);
const SCEV *WideMaxBECount =
getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
const SCEV *OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
getSignExtendExpr(Step, WideTy, Depth + 1),
SCEV::FlagAnyWrap, Depth + 1),
SCEV::FlagAnyWrap, Depth + 1);
if (SAdd == OperandExtendedAdd) {
// Cache knowledge of AR NSW, which is propagated to this AddRec.
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
Depth + 1),
getSignExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as unsigned.
// This covers loops that count up with an unsigned step.
OperandExtendedAdd =
getAddExpr(WideStart,
getMulExpr(WideMaxBECount,
getZeroExtendExpr(Step, WideTy, Depth + 1),
SCEV::FlagAnyWrap, Depth + 1),
SCEV::FlagAnyWrap, Depth + 1);
if (SAdd == OperandExtendedAdd) {
// If AR wraps around then
//
// abs(Step) * MaxBECount > unsigned-max(AR->getType())
// => SAdd != OperandExtendedAdd
//
// Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
// (SAdd == OperandExtendedAdd => AR is NW)
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(
getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
Depth + 1),
getZeroExtendExpr(Step, Ty, Depth + 1), L,
AR->getNoWrapFlags());
}
}
}
auto NewFlags = proveNoSignedWrapViaInduction(AR);
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
if (AR->hasNoSignedWrap()) {
// Same as nsw case above - duplicated here to avoid a compile time
// issue. It's not clear that the order of checks does matter, but
// it's one of two issue possible causes for a change which was
// reverted. Be conservative for the moment.
return getAddRecExpr(
getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
}
// sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw>
// if D + (C - D + Step * n) could be proven to not signed wrap
// where D maximizes the number of trailing zeros of (C - D + Step * n)
if (const auto *SC = dyn_cast<SCEVConstant>(Start)) {
const APInt &C = SC->getAPInt();
const APInt &D = extractConstantWithoutWrapping(*this, C, Step);
if (D != 0) {
const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth);
const SCEV *SResidual =
getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags());
const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1);
return getAddExpr(SSExtD, SSExtR,
(SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
Depth + 1);
}
}
if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
return getAddRecExpr(
getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
}
}
// If the input value is provably positive and we could not simplify
// away the sext build a zext instead.
if (isKnownNonNegative(Op))
return getZeroExtendExpr(Op, Ty, Depth + 1);
// The cast wasn't folded; create an explicit cast node.
// Recompute the insert position, as it may have been invalidated.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
return S;
}
/// getAnyExtendExpr - Return a SCEV for the given operand extended with
/// unspecified bits out to the given type.
const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
"This is not a conversion to a SCEVable type!");
Ty = getEffectiveSCEVType(Ty);
// Sign-extend negative constants.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
if (SC->getAPInt().isNegative())
return getSignExtendExpr(Op, Ty);
// Peel off a truncate cast.
if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
const SCEV *NewOp = T->getOperand();
if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
return getAnyExtendExpr(NewOp, Ty);
return getTruncateOrNoop(NewOp, Ty);
}
// Next try a zext cast. If the cast is folded, use it.
const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
if (!isa<SCEVZeroExtendExpr>(ZExt))
return ZExt;
// Next try a sext cast. If the cast is folded, use it.
const SCEV *SExt = getSignExtendExpr(Op, Ty);
if (!isa<SCEVSignExtendExpr>(SExt))
return SExt;
// Force the cast to be folded into the operands of an addrec.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Ops;
for (const SCEV *Op : AR->operands())
Ops.push_back(getAnyExtendExpr(Op, Ty));
return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
}
// If the expression is obviously signed, use the sext cast value.
if (isa<SCEVSMaxExpr>(Op))
return SExt;
// Absent any other information, use the zext cast value.
return ZExt;
}
/// Process the given Ops list, which is a list of operands to be added under
/// the given scale, update the given map. This is a helper function for
/// getAddRecExpr. As an example of what it does, given a sequence of operands
/// that would form an add expression like this:
///
/// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
///
/// where A and B are constants, update the map with these values:
///
/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
///
/// and add 13 + A*B*29 to AccumulatedConstant.
/// This will allow getAddRecExpr to produce this:
///
/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
///
/// This form often exposes folding opportunities that are hidden in
/// the original operand list.
///
/// Return true iff it appears that any interesting folding opportunities
/// may be exposed. This helps getAddRecExpr short-circuit extra work in
/// the common case where no interesting opportunities are present, and
/// is also used as a check to avoid infinite recursion.
static bool
CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
SmallVectorImpl<const SCEV *> &NewOps,
APInt &AccumulatedConstant,
const SCEV *const *Ops, size_t NumOperands,
const APInt &Scale,
ScalarEvolution &SE) {
bool Interesting = false;
// Iterate over the add operands. They are sorted, with constants first.
unsigned i = 0;
while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
++i;
// Pull a buried constant out to the outside.
if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
Interesting = true;
AccumulatedConstant += Scale * C->getAPInt();
}
// Next comes everything else. We're especially interested in multiplies
// here, but they're in the middle, so just visit the rest with one loop.
for (; i != NumOperands; ++i) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
APInt NewScale =
Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();
if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
// A multiplication of a constant with another add; recurse.
const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
Interesting |=
CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
Add->op_begin(), Add->getNumOperands(),
NewScale, SE);
} else {
// A multiplication of a constant with some other value. Update
// the map.
SmallVector<const SCEV *, 4> MulOps(drop_begin(Mul->operands()));
const SCEV *Key = SE.getMulExpr(MulOps);
auto Pair = M.insert({Key, NewScale});
if (Pair.second) {
NewOps.push_back(Pair.first->first);
} else {
Pair.first->second += NewScale;
// The map already had an entry for this value, which may indicate
// a folding opportunity.
Interesting = true;
}
}
} else {
// An ordinary operand. Update the map.
std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
M.insert({Ops[i], Scale});
if (Pair.second) {
NewOps.push_back(Pair.first->first);
} else {
Pair.first->second += Scale;
// The map already had an entry for this value, which may indicate
// a folding opportunity.
Interesting = true;
}
}
}
return Interesting;
}
bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
const SCEV *LHS, const SCEV *RHS) {
const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
SCEV::NoWrapFlags, unsigned);
switch (BinOp) {
default:
llvm_unreachable("Unsupported binary op");
case Instruction::Add:
Operation = &ScalarEvolution::getAddExpr;
break;
case Instruction::Sub:
Operation = &ScalarEvolution::getMinusSCEV;
break;
case Instruction::Mul:
Operation = &ScalarEvolution::getMulExpr;
break;
}
const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) =
Signed ? &ScalarEvolution::getSignExtendExpr
: &ScalarEvolution::getZeroExtendExpr;
// Check ext(LHS op RHS) == ext(LHS) op ext(RHS)
auto *NarrowTy = cast<IntegerType>(LHS->getType());
auto *WideTy =
IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
const SCEV *A = (this->*Extension)(
(this->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), WideTy, 0);
const SCEV *B = (this->*Operation)((this->*Extension)(LHS, WideTy, 0),
(this->*Extension)(RHS, WideTy, 0),
SCEV::FlagAnyWrap, 0);
return A == B;
}
std::pair<SCEV::NoWrapFlags, bool /*Deduced*/>
ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp(
const OverflowingBinaryOperator *OBO) {
SCEV::NoWrapFlags Flags = SCEV::NoWrapFlags::FlagAnyWrap;
if (OBO->hasNoUnsignedWrap())
Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
if (OBO->hasNoSignedWrap())
Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
bool Deduced = false;
if (OBO->hasNoUnsignedWrap() && OBO->hasNoSignedWrap())
return {Flags, Deduced};
if (OBO->getOpcode() != Instruction::Add &&
OBO->getOpcode() != Instruction::Sub &&
OBO->getOpcode() != Instruction::Mul)
return {Flags, Deduced};
const SCEV *LHS = getSCEV(OBO->getOperand(0));
const SCEV *RHS = getSCEV(OBO->getOperand(1));
if (!OBO->hasNoUnsignedWrap() &&
willNotOverflow((Instruction::BinaryOps)OBO->getOpcode(),
/* Signed */ false, LHS, RHS)) {
Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
Deduced = true;
}
if (!OBO->hasNoSignedWrap() &&
willNotOverflow((Instruction::BinaryOps)OBO->getOpcode(),
/* Signed */ true, LHS, RHS)) {
Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
Deduced = true;
}
return {Flags, Deduced};
}
// We're trying to construct a SCEV of type `Type' with `Ops' as operands and
// `OldFlags' as can't-wrap behavior. Infer a more aggressive set of
// can't-overflow flags for the operation if possible.
static SCEV::NoWrapFlags
StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
const ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags) {
using namespace std::placeholders;
using OBO = OverflowingBinaryOperator;
bool CanAnalyze =
Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
(void)CanAnalyze;
assert(CanAnalyze && "don't call from other places!");
int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
SCEV::NoWrapFlags SignOrUnsignWrap =
ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
// If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
auto IsKnownNonNegative = [&](const SCEV *S) {
return SE->isKnownNonNegative(S);
};
if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative))
Flags =
ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
if (SignOrUnsignWrap != SignOrUnsignMask &&
(Type == scAddExpr || Type == scMulExpr) && Ops.size() == 2 &&
isa<SCEVConstant>(Ops[0])) {
auto Opcode = [&] {
switch (Type) {
case scAddExpr:
return Instruction::Add;
case scMulExpr:
return Instruction::Mul;
default:
llvm_unreachable("Unexpected SCEV op.");
}
}();
const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();
// (A <opcode> C) --> (A <opcode> C)<nsw> if the op doesn't sign overflow.
if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
Opcode, C, OBO::NoSignedWrap);
if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
}
// (A <opcode> C) --> (A <opcode> C)<nuw> if the op doesn't unsign overflow.
if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
Opcode, C, OBO::NoUnsignedWrap);
if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
}
}
return Flags;
}
bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) {
return isLoopInvariant(S, L) && properlyDominates(S, L->getHeader());
}
/// Get a canonical add expression, or something simpler if possible.
const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags OrigFlags,
unsigned Depth) {
assert(!(OrigFlags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
"only nuw or nsw allowed");
assert(!Ops.empty() && "Cannot get empty add!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVAddExpr operand types don't match!");
unsigned NumPtrs = count_if(
Ops, [](const SCEV *Op) { return Op->getType()->isPointerTy(); });
assert(NumPtrs <= 1 && "add has at most one pointer operand");
#endif
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, &LI, DT);
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
++Idx;
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());
if (Ops.size() == 2) return Ops[0];
Ops.erase(Ops.begin()+1); // Erase the folded element
LHSC = cast<SCEVConstant>(Ops[0]);
}
// If we are left with a constant zero being added, strip it off.
if (LHSC->getValue()->isZero()) {
Ops.erase(Ops.begin());
--Idx;
}
if (Ops.size() == 1) return Ops[0];
}
// Delay expensive flag strengthening until necessary.
auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) {
return StrengthenNoWrapFlags(this, scAddExpr, Ops, OrigFlags);
};
// Limit recursion calls depth.
if (Depth > MaxArithDepth || hasHugeExpression(Ops))
return getOrCreateAddExpr(Ops, ComputeFlags(Ops));
if (SCEV *S = std::get<0>(findExistingSCEVInCache(scAddExpr, Ops))) {
// Don't strengthen flags if we have no new information.
SCEVAddExpr *Add = static_cast<SCEVAddExpr *>(S);
if (Add->getNoWrapFlags(OrigFlags) != OrigFlags)
Add->setNoWrapFlags(ComputeFlags(Ops));
return S;
}
// Okay, check to see if the same value occurs in the operand list more than
// once. If so, merge them together into an multiply expression. Since we
// sorted the list, these values are required to be adjacent.
Type *Ty = Ops[0]->getType();
bool FoundMatch = false;
for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
// Scan ahead to count how many equal operands there are.
unsigned Count = 2;
while (i+Count != e && Ops[i+Count] == Ops[i])
++Count;
// Merge the values into a multiply.
const SCEV *Scale = getConstant(Ty, Count);
const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1);
if (Ops.size() == Count)
return Mul;
Ops[i] = Mul;
Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
--i; e -= Count - 1;
FoundMatch = true;
}
if (FoundMatch)
return getAddExpr(Ops, OrigFlags, Depth + 1);
// Check for truncates. If all the operands are truncated from the same
// type, see if factoring out the truncate would permit the result to be
// folded. eg., n*trunc(x) + m*trunc(y) --> trunc(trunc(m)*x + trunc(n)*y)
// if the contents of the resulting outer trunc fold to something simple.
auto FindTruncSrcType = [&]() -> Type * {
// We're ultimately looking to fold an addrec of truncs and muls of only
// constants and truncs, so if we find any other types of SCEV
// as operands of the addrec then we bail and return nullptr here.
// Otherwise, we return the type of the operand of a trunc that we find.
if (auto *T = dyn_cast<SCEVTruncateExpr>(Ops[Idx]))
return T->getOperand()->getType();
if (const auto *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
const auto *LastOp = Mul->getOperand(Mul->getNumOperands() - 1);
if (const auto *T = dyn_cast<SCEVTruncateExpr>(LastOp))
return T->getOperand()->getType();
}
return nullptr;
};
if (auto *SrcType = FindTruncSrcType()) {
SmallVector<const SCEV *, 8> LargeOps;
bool Ok = true;
// Check all the operands to see if they can be represented in the
// source type of the truncate.
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
if (T->getOperand()->getType() != SrcType) {
Ok = false;
break;
}
LargeOps.push_back(T->getOperand());
} else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
LargeOps.push_back(getAnyExtendExpr(C, SrcType));
} else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
SmallVector<const SCEV *, 8> LargeMulOps;
for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
if (const SCEVTruncateExpr *T =
dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
if (T->getOperand()->getType() != SrcType) {
Ok = false;
break;
}
LargeMulOps.push_back(T->getOperand());
} else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
} else {
Ok = false;
break;
}
}
if (Ok)
LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1));
} else {
Ok = false;
break;
}
}
if (Ok) {
// Evaluate the expression in the larger type.
const SCEV *Fold = getAddExpr(LargeOps, SCEV::FlagAnyWrap, Depth + 1);
// If it folds to something simple, use it. Otherwise, don't.
if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
return getTruncateExpr(Fold, Ty);
}
}
if (Ops.size() == 2) {
// Check if we have an expression of the form ((X + C1) - C2), where C1 and
// C2 can be folded in a way that allows retaining wrapping flags of (X +
// C1).
const SCEV *A = Ops[0];
const SCEV *B = Ops[1];
auto *AddExpr = dyn_cast<SCEVAddExpr>(B);
auto *C = dyn_cast<SCEVConstant>(A);
if (AddExpr && C && isa<SCEVConstant>(AddExpr->getOperand(0))) {
auto C1 = cast<SCEVConstant>(AddExpr->getOperand(0))->getAPInt();
auto C2 = C->getAPInt();
SCEV::NoWrapFlags PreservedFlags = SCEV::FlagAnyWrap;
APInt ConstAdd = C1 + C2;
auto AddFlags = AddExpr->getNoWrapFlags();
// Adding a smaller constant is NUW if the original AddExpr was NUW.
if (ScalarEvolution::maskFlags(AddFlags, SCEV::FlagNUW) ==
SCEV::FlagNUW &&
ConstAdd.ule(C1)) {
PreservedFlags =
ScalarEvolution::setFlags(PreservedFlags, SCEV::FlagNUW);
}
// Adding a constant with the same sign and small magnitude is NSW, if the
// original AddExpr was NSW.
if (ScalarEvolution::maskFlags(AddFlags, SCEV::FlagNSW) ==
SCEV::FlagNSW &&
C1.isSignBitSet() == ConstAdd.isSignBitSet() &&
ConstAdd.abs().ule(C1.abs())) {
PreservedFlags =
ScalarEvolution::setFlags(PreservedFlags, SCEV::FlagNSW);
}
if (PreservedFlags != SCEV::FlagAnyWrap) {
SmallVector<const SCEV *, 4> NewOps(AddExpr->op_begin(),
AddExpr->op_end());
NewOps[0] = getConstant(ConstAdd);
return getAddExpr(NewOps, PreservedFlags);
}
}
}
// Skip past any other cast SCEVs.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
++Idx;
// If there are add operands they would be next.
if (Idx < Ops.size()) {
bool DeletedAdd = false;
// If the original flags and all inlined SCEVAddExprs are NUW, use the
// common NUW flag for expression after inlining. Other flags cannot be
// preserved, because they may depend on the original order of operations.
SCEV::NoWrapFlags CommonFlags = maskFlags(OrigFlags, SCEV::FlagNUW);
while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
if (Ops.size() > AddOpsInlineThreshold ||
Add->getNumOperands() > AddOpsInlineThreshold)
break;
// If we have an add, expand the add operands onto the end of the operands
// list.
Ops.erase(Ops.begin()+Idx);
Ops.append(Add->op_begin(), Add->op_end());
DeletedAdd = true;
CommonFlags = maskFlags(CommonFlags, Add->getNoWrapFlags());
}
// If we deleted at least one add, we added operands to the end of the list,
// and they are not necessarily sorted. Recurse to resort and resimplify
// any operands we just acquired.
if (DeletedAdd)
return getAddExpr(Ops, CommonFlags, Depth + 1);
}
// Skip over the add expression until we get to a multiply.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
++Idx;
// Check to see if there are any folding opportunities present with
// operands multiplied by constant values.
if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
uint64_t BitWidth = getTypeSizeInBits(Ty);
DenseMap<const SCEV *, APInt> M;
SmallVector<const SCEV *, 8> NewOps;
APInt AccumulatedConstant(BitWidth, 0);
if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
Ops.data(), Ops.size(),
APInt(BitWidth, 1), *this)) {
struct APIntCompare {
bool operator()(const APInt &LHS, const APInt &RHS) const {
return LHS.ult(RHS);
}
};
// Some interesting folding opportunity is present, so its worthwhile to
// re-generate the operands list. Group the operands by constant scale,
// to avoid multiplying by the same constant scale multiple times.
std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
for (const SCEV *NewOp : NewOps)
MulOpLists[M.find(NewOp)->second].push_back(NewOp);
// Re-generate the operands list.
Ops.clear();
if (AccumulatedConstant != 0)
Ops.push_back(getConstant(AccumulatedConstant));
for (auto &MulOp : MulOpLists) {
if (MulOp.first == 1) {
Ops.push_back(getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1));
} else if (MulOp.first != 0) {
Ops.push_back(getMulExpr(
getConstant(MulOp.first),
getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1),
SCEV::FlagAnyWrap, Depth + 1));
}
}
if (Ops.empty())
return getZero(Ty);
if (Ops.size() == 1)
return Ops[0];
return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
}
// If we are adding something to a multiply expression, make sure the
// something is not already an operand of the multiply. If so, merge it into
// the multiply.
for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
if (isa<SCEVConstant>(MulOpSCEV))
continue;
for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
if (MulOpSCEV == Ops[AddOp]) {
// Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
if (Mul->getNumOperands() != 2) {
// If the multiply has more than two operands, we must get the
// Y*Z term.
SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
Mul->op_begin()+MulOp);
MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
}
SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul};
const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV,
SCEV::FlagAnyWrap, Depth + 1);
if (Ops.size() == 2) return OuterMul;
if (AddOp < Idx) {
Ops.erase(Ops.begin()+AddOp);
Ops.erase(Ops.begin()+Idx-1);
} else {
Ops.erase(Ops.begin()+Idx);
Ops.erase(Ops.begin()+AddOp-1);
}
Ops.push_back(OuterMul);
return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
// Check this multiply against other multiplies being added together.
for (unsigned OtherMulIdx = Idx+1;
OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
++OtherMulIdx) {
const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
// If MulOp occurs in OtherMul, we can fold the two multiplies
// together.
for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
OMulOp != e; ++OMulOp)
if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
// Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
if (Mul->getNumOperands() != 2) {
SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
Mul->op_begin()+MulOp);
MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
}
const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
if (OtherMul->getNumOperands() != 2) {
SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
OtherMul->op_begin()+OMulOp);
MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
}
SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2};
const SCEV *InnerMulSum =
getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum,
SCEV::FlagAnyWrap, Depth + 1);
if (Ops.size() == 2) return OuterMul;
Ops.erase(Ops.begin()+Idx);
Ops.erase(Ops.begin()+OtherMulIdx-1);
Ops.push_back(OuterMul);
return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
}
}
}
// If there are any add recurrences in the operands list, see if any other
// added values are loop invariant. If so, we can fold them into the
// recurrence.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
++Idx;
// Scan over all recurrences, trying to fold loop invariants into them.
for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
// Scan all of the other operands to this add and add them to the vector if
// they are loop invariant w.r.t. the recurrence.
SmallVector<const SCEV *, 8> LIOps;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
}
// If we found some loop invariants, fold them into the recurrence.
if (!LIOps.empty()) {
// Compute nowrap flags for the addition of the loop-invariant ops and
// the addrec. Temporarily push it as an operand for that purpose.
LIOps.push_back(AddRec);
SCEV::NoWrapFlags Flags = ComputeFlags(LIOps);
LIOps.pop_back();
// NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
LIOps.push_back(AddRec->getStart());
SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
// This follows from the fact that the no-wrap flags on the outer add
// expression are applicable on the 0th iteration, when the add recurrence
// will be equal to its start value.
AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1);
// Build the new addrec. Propagate the NUW and NSW flags if both the
// outer add and the inner addrec are guaranteed to have no overflow.
// Always propagate NW.
Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
// Otherwise, add the folded AddRec by the non-invariant parts.
for (unsigned i = 0;; ++i)
if (Ops[i] == AddRec) {
Ops[i] = NewRec;
break;
}
return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
// Okay, if there weren't any loop invariants to be folded, check to see if
// there are multiple AddRec's with the same loop induction variable being
// added together. If so, we can fold them.
for (unsigned OtherIdx = Idx+1;
OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx) {
// We expect the AddRecExpr's to be sorted in reverse dominance order,
// so that the 1st found AddRecExpr is dominated by all others.
assert(DT.dominates(
cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(),
AddRec->getLoop()->getHeader()) &&
"AddRecExprs are not sorted in reverse dominance order?");
if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
// Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx) {
const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
if (OtherAddRec->getLoop() == AddRecLoop) {
for (unsigned i = 0, e = OtherAddRec->getNumOperands();
i != e; ++i) {
if (i >= AddRecOps.size()) {
AddRecOps.append(OtherAddRec->op_begin()+i,
OtherAddRec->op_end());
break;
}
SmallVector<const SCEV *, 2> TwoOps = {
AddRecOps[i], OtherAddRec->getOperand(i)};
AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
}
Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
}
}
// Step size has changed, so we cannot guarantee no self-wraparound.
Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
}
// Otherwise couldn't fold anything into this recurrence. Move onto the
// next one.
}
// Okay, it looks like we really DO need an add expr. Check to see if we
// already have one, otherwise create a new one.
return getOrCreateAddExpr(Ops, ComputeFlags(Ops));
}
const SCEV *
ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scAddExpr);
for (const SCEV *Op : Ops)
ID.AddPointer(Op);
void *IP = nullptr;
SCEVAddExpr *S =
static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
S = new (SCEVAllocator)
SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
}
S->setNoWrapFlags(Flags);
return S;
}
const SCEV *
ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
const Loop *L, SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scAddRecExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
ID.AddPointer(L);
void *IP = nullptr;
SCEVAddRecExpr *S =
static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
S = new (SCEVAllocator)
SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L);
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
}
setNoWrapFlags(S, Flags);
return S;
}
const SCEV *
ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scMulExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
void *IP = nullptr;
SCEVMulExpr *S =
static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
}
S->setNoWrapFlags(Flags);
return S;
}
static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
uint64_t k = i*j;
if (j > 1 && k / j != i) Overflow = true;
return k;
}
/// Compute the result of "n choose k", the binomial coefficient. If an
/// intermediate computation overflows, Overflow will be set and the return will
/// be garbage. Overflow is not cleared on absence of overflow.
static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
// We use the multiplicative formula:
// n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
// At each iteration, we take the n-th term of the numeral and divide by the
// (k-n)th term of the denominator. This division will always produce an
// integral result, and helps reduce the chance of overflow in the
// intermediate computations. However, we can still overflow even when the
// final result would fit.
if (n == 0 || n == k) return 1;
if (k > n) return 0;
if (k > n/2)
k = n-k;
uint64_t r = 1;
for (uint64_t i = 1; i <= k; ++i) {
r = umul_ov(r, n-(i-1), Overflow);
r /= i;
}
return r;
}
/// Determine if any of the operands in this SCEV are a constant or if
/// any of the add or multiply expressions in this SCEV contain a constant.
static bool containsConstantInAddMulChain(const SCEV *StartExpr) {
struct FindConstantInAddMulChain {
bool FoundConstant = false;
bool follow(const SCEV *S) {
FoundConstant |= isa<SCEVConstant>(S);
return isa<SCEVAddExpr>(S) || isa<SCEVMulExpr>(S);
}
bool isDone() const {
return FoundConstant;
}
};
FindConstantInAddMulChain F;
SCEVTraversal<FindConstantInAddMulChain> ST(F);
ST.visitAll(StartExpr);
return F.FoundConstant;
}
/// Get a canonical multiply expression, or something simpler if possible.
const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags OrigFlags,
unsigned Depth) {
assert(OrigFlags == maskFlags(OrigFlags, SCEV::FlagNUW | SCEV::FlagNSW) &&
"only nuw or nsw allowed");
assert(!Ops.empty() && "Cannot get empty mul!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
Type *ETy = Ops[0]->getType();
assert(!ETy->isPointerTy());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(Ops[i]->getType() == ETy &&
"SCEVMulExpr operand types don't match!");
#endif
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, &LI, DT);
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
++Idx;
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
Ops[0] = getConstant(LHSC->getAPInt() * RHSC->getAPInt());
if (Ops.size() == 2) return Ops[0];
Ops.erase(Ops.begin()+1); // Erase the folded element
LHSC = cast<SCEVConstant>(Ops[0]);
}
// If we have a multiply of zero, it will always be zero.
if (LHSC->getValue()->isZero())
return LHSC;
// If we are left with a constant one being multiplied, strip it off.
if (LHSC->getValue()->isOne()) {
Ops.erase(Ops.begin());
--Idx;
}
if (Ops.size() == 1)
return Ops[0];
}
// Delay expensive flag strengthening until necessary.
auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) {
return StrengthenNoWrapFlags(this, scMulExpr, Ops, OrigFlags);
};
// Limit recursion calls depth.
if (Depth > MaxArithDepth || hasHugeExpression(Ops))
return getOrCreateMulExpr(Ops, ComputeFlags(Ops));
if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) {
// Don't strengthen flags if we have no new information.
SCEVMulExpr *Mul = static_cast<SCEVMulExpr *>(S);
if (Mul->getNoWrapFlags(OrigFlags) != OrigFlags)
Mul->setNoWrapFlags(ComputeFlags(Ops));
return S;
}
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
if (Ops.size() == 2) {
// C1*(C2+V) -> C1*C2 + C1*V
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
// If any of Add's ops are Adds or Muls with a constant, apply this
// transformation as well.
//
// TODO: There are some cases where this transformation is not
// profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of
// this transformation should be narrowed down.
if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add))
return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
SCEV::FlagAnyWrap, Depth + 1),
getMulExpr(LHSC, Add->getOperand(1),
SCEV::FlagAnyWrap, Depth + 1),
SCEV::FlagAnyWrap, Depth + 1);
if (Ops[0]->isAllOnesValue()) {
// If we have a mul by -1 of an add, try distributing the -1 among the
// add operands.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
SmallVector<const SCEV *, 4> NewOps;
bool AnyFolded = false;
for (const SCEV *AddOp : Add->operands()) {
const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap,
Depth + 1);
if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
NewOps.push_back(Mul);
}
if (AnyFolded)
return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1);
} else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
// Negation preserves a recurrence's no self-wrap property.
SmallVector<const SCEV *, 4> Operands;
for (const SCEV *AddRecOp : AddRec->operands())
Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap,
Depth + 1));
return getAddRecExpr(Operands, AddRec->getLoop(),
AddRec->getNoWrapFlags(SCEV::FlagNW));
}
}
}
}
// Skip over the add expression until we get to a multiply.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
++Idx;
// If there are mul operands inline them all into this expression.
if (Idx < Ops.size()) {
bool DeletedMul = false;
while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
if (Ops.size() > MulOpsInlineThreshold)
break;
// If we have an mul, expand the mul operands onto the end of the
// operands list.
Ops.erase(Ops.begin()+Idx);
Ops.append(Mul->op_begin(), Mul->op_end());
DeletedMul = true;
}
// If we deleted at least one mul, we added operands to the end of the
// list, and they are not necessarily sorted. Recurse to resort and
// resimplify any operands we just acquired.
if (DeletedMul)
return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
// If there are any add recurrences in the operands list, see if any other
// added values are loop invariant. If so, we can fold them into the
// recurrence.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
++Idx;
// Scan over all recurrences, trying to fold loop invariants into them.
for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
// Scan all of the other operands to this mul and add them to the vector
// if they are loop invariant w.r.t. the recurrence.
SmallVector<const SCEV *, 8> LIOps;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
}
// If we found some loop invariants, fold them into the recurrence.
if (!LIOps.empty()) {
// NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(AddRec->getNumOperands());
const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1);
for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i),
SCEV::FlagAnyWrap, Depth + 1));
// Build the new addrec. Propagate the NUW and NSW flags if both the
// outer mul and the inner addrec are guaranteed to have no overflow.
//
// No self-wrap cannot be guaranteed after changing the step size, but
// will be inferred if either NUW or NSW is true.
SCEV::NoWrapFlags Flags = ComputeFlags({Scale, AddRec});
const SCEV *NewRec = getAddRecExpr(
NewOps, AddRecLoop, AddRec->getNoWrapFlags(Flags));
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
// Otherwise, multiply the folded AddRec by the non-invariant parts.
for (unsigned i = 0;; ++i)
if (Ops[i] == AddRec) {
Ops[i] = NewRec;
break;
}
return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
// Okay, if there weren't any loop invariants to be folded, check to see
// if there are multiple AddRec's with the same loop induction variable
// being multiplied together. If so, we can fold them.
// {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
// = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
// choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
// ]]],+,...up to x=2n}.
// Note that the arguments to choose() are always integers with values
// known at compile time, never SCEV objects.
//
// The implementation avoids pointless extra computations when the two
// addrec's are of different length (mathematically, it's equivalent to
// an infinite stream of zeros on the right).
bool OpsModified = false;
for (unsigned OtherIdx = Idx+1;
OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx) {
const SCEVAddRecExpr *OtherAddRec =
dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
continue;
// Limit max number of arguments to avoid creation of unreasonably big
// SCEVAddRecs with very complex operands.
if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 >
MaxAddRecSize || hasHugeExpression({AddRec, OtherAddRec}))
continue;
bool Overflow = false;
Type *Ty = AddRec->getType();
bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
SmallVector<const SCEV*, 7> AddRecOps;
for (int x = 0, xe = AddRec->getNumOperands() +
OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
SmallVector <const SCEV *, 7> SumOps;
for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
z < ze && !Overflow; ++z) {
uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
uint64_t Coeff;
if (LargerThan64Bits)
Coeff = umul_ov(Coeff1, Coeff2, Overflow);
else
Coeff = Coeff1*Coeff2;
const SCEV *CoeffTerm = getConstant(Ty, Coeff);
const SCEV *Term1 = AddRec->getOperand(y-z);
const SCEV *Term2 = OtherAddRec->getOperand(z);
SumOps.push_back(getMulExpr(CoeffTerm, Term1, Term2,
SCEV::FlagAnyWrap, Depth + 1));
}
}
if (SumOps.empty())
SumOps.push_back(getZero(Ty));
AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
}
if (!Overflow) {
const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop,
SCEV::FlagAnyWrap);
if (Ops.size() == 2) return NewAddRec;
Ops[Idx] = NewAddRec;
Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
OpsModified = true;
AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
if (!AddRec)
break;
}
}
if (OpsModified)
return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
// Otherwise couldn't fold anything into this recurrence. Move onto the
// next one.
}
// Okay, it looks like we really DO need an mul expr. Check to see if we
// already have one, otherwise create a new one.
return getOrCreateMulExpr(Ops, ComputeFlags(Ops));
}
/// Represents an unsigned remainder expression based on unsigned division.
const SCEV *ScalarEvolution::getURemExpr(const SCEV *LHS,
const SCEV *RHS) {
assert(getEffectiveSCEVType(LHS->getType()) ==
getEffectiveSCEVType(RHS->getType()) &&
"SCEVURemExpr operand types don't match!");
// Short-circuit easy cases
if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
// If constant is one, the result is trivial
if (RHSC->getValue()->isOne())
return getZero(LHS->getType()); // X urem 1 --> 0
// If constant is a power of two, fold into a zext(trunc(LHS)).
if (RHSC->getAPInt().isPowerOf2()) {
Type *FullTy = LHS->getType();
Type *TruncTy =
IntegerType::get(getContext(), RHSC->getAPInt().logBase2());
return getZeroExtendExpr(getTruncateExpr(LHS, TruncTy), FullTy);
}
}
// Fallback to %a == %x urem %y == %x -<nuw> ((%x udiv %y) *<nuw> %y)
const SCEV *UDiv = getUDivExpr(LHS, RHS);
const SCEV *Mult = getMulExpr(UDiv, RHS, SCEV::FlagNUW);
return getMinusSCEV(LHS, Mult, SCEV::FlagNUW);
}
/// Get a canonical unsigned division expression, or something simpler if
/// possible.
const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
const SCEV *RHS) {
assert(!LHS->getType()->isPointerTy() &&
"SCEVUDivExpr operand can't be pointer!");
assert(LHS->getType() == RHS->getType() &&
"SCEVUDivExpr operand types don't match!");
FoldingSetNodeID ID;
ID.AddInteger(scUDivExpr);
ID.AddPointer(LHS);
ID.AddPointer(RHS);
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
return S;
// 0 udiv Y == 0
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS))
if (LHSC->getValue()->isZero())
return LHS;
if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
if (RHSC->getValue()->isOne())
return LHS; // X udiv 1 --> x
// If the denominator is zero, the result of the udiv is undefined. Don't
// try to analyze it, because the resolution chosen here may differ from
// the resolution chosen in other parts of the compiler.
if (!RHSC->getValue()->isZero()) {
// Determine if the division can be folded into the operands of
// its operands.
// TODO: Generalize this to non-constants by using known-bits information.
Type *Ty = LHS->getType();
unsigned LZ = RHSC->getAPInt().countLeadingZeros();
unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
// For non-power-of-two values, effectively round the value up to the
// nearest power of two.
if (!RHSC->getAPInt().isPowerOf2())
++MaxShiftAmt;
IntegerType *ExtTy =
IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
if (const SCEVConstant *Step =
dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
// {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
const APInt &StepInt = Step->getAPInt();
const APInt &DivInt = RHSC->getAPInt();
if (!StepInt.urem(DivInt) &&
getZeroExtendExpr(AR, ExtTy) ==
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
AR->getLoop(), SCEV::FlagAnyWrap)) {
SmallVector<const SCEV *, 4> Operands;
for (const SCEV *Op : AR->operands())
Operands.push_back(getUDivExpr(Op, RHS));
return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);
}
/// Get a canonical UDivExpr for a recurrence.
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
// We can currently only fold X%N if X is constant.
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
if (StartC && !DivInt.urem(StepInt) &&
getZeroExtendExpr(AR, ExtTy) ==
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
AR->getLoop(), SCEV::FlagAnyWrap)) {
const APInt &StartInt = StartC->getAPInt();
const APInt &StartRem = StartInt.urem(StepInt);
if (StartRem != 0) {
const SCEV *NewLHS =
getAddRecExpr(getConstant(StartInt - StartRem), Step,
AR->getLoop(), SCEV::FlagNW);
if (LHS != NewLHS) {
LHS = NewLHS;
// Reset the ID to include the new LHS, and check if it is
// already cached.
ID.clear();
ID.AddInteger(scUDivExpr);
ID.AddPointer(LHS);
ID.AddPointer(RHS);
IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
return S;
}
}
}
}
// (A*B)/C --> A*(B/C) if safe and B/C can be folded.
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
for (const SCEV *Op : M->operands())
Operands.push_back(getZeroExtendExpr(Op, ExtTy));
if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
// Find an operand that's safely divisible.
for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
const SCEV *Op = M->getOperand(i);
const SCEV *Div = getUDivExpr(Op, RHSC);
if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
Operands = SmallVector<const SCEV *, 4>(M->operands());
Operands[i] = Div;
return getMulExpr(Operands);
}
}
}
// (A/B)/C --> A/(B*C) if safe and B*C can be folded.
if (const SCEVUDivExpr *OtherDiv = dyn_cast<SCEVUDivExpr>(LHS)) {
if (auto *DivisorConstant =
dyn_cast<SCEVConstant>(OtherDiv->getRHS())) {
bool Overflow = false;
APInt NewRHS =
DivisorConstant->getAPInt().umul_ov(RHSC->getAPInt(), Overflow);
if (Overflow) {
return getConstant(RHSC->getType(), 0, false);
}
return getUDivExpr(OtherDiv->getLHS(), getConstant(NewRHS));
}
}
// (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
for (const SCEV *Op : A->operands())
Operands.push_back(getZeroExtendExpr(Op, ExtTy));
if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
Operands.clear();
for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
if (isa<SCEVUDivExpr>(Op) ||
getMulExpr(Op, RHS) != A->getOperand(i))
break;
Operands.push_back(Op);
}
if (Operands.size() == A->getNumOperands())
return getAddExpr(Operands);
}
}
// Fold if both operands are constant.
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
Constant *LHSCV = LHSC->getValue();
Constant *RHSCV = RHSC->getValue();
return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
RHSCV)));
}
}
}
// The Insertion Point (IP) might be invalid by now (due to UniqueSCEVs
// changes). Make sure we get a new one.
IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
LHS, RHS);
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
return S;
}
static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
APInt A = C1->getAPInt().abs();
APInt B = C2->getAPInt().abs();
uint32_t ABW = A.getBitWidth();
uint32_t BBW = B.getBitWidth();
if (ABW > BBW)
B = B.zext(ABW);
else if (ABW < BBW)
A = A.zext(BBW);
return APIntOps::GreatestCommonDivisor(std::move(A), std::move(B));
}
/// Get a canonical unsigned division expression, or something simpler if
/// possible. There is no representation for an exact udiv in SCEV IR, but we
/// can attempt to remove factors from the LHS and RHS. We can't do this when
/// it's not exact because the udiv may be clearing bits.
const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
const SCEV *RHS) {
// TODO: we could try to find factors in all sorts of things, but for now we
// just deal with u/exact (multiply, constant). See SCEVDivision towards the
// end of this file for inspiration.
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
if (!Mul || !Mul->hasNoUnsignedWrap())
return getUDivExpr(LHS, RHS);
if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
// If the mulexpr multiplies by a constant, then that constant must be the
// first element of the mulexpr.
if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
if (LHSCst == RHSCst) {
SmallVector<const SCEV *, 2> Operands(drop_begin(Mul->operands()));
return getMulExpr(Operands);
}
// We can't just assume that LHSCst divides RHSCst cleanly, it could be
// that there's a factor provided by one of the other terms. We need to
// check.
APInt Factor = gcd(LHSCst, RHSCst);
if (!Factor.isIntN(1)) {
LHSCst =
cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor)));
RHSCst =
cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));
SmallVector<const SCEV *, 2> Operands;
Operands.push_back(LHSCst);
Operands.append(Mul->op_begin() + 1, Mul->op_end());
LHS = getMulExpr(Operands);
RHS = RHSCst;
Mul = dyn_cast<SCEVMulExpr>(LHS);
if (!Mul)
return getUDivExactExpr(LHS, RHS);
}
}
}
for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
if (Mul->getOperand(i) == RHS) {
SmallVector<const SCEV *, 2> Operands;
Operands.append(Mul->op_begin(), Mul->op_begin() + i);
Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
return getMulExpr(Operands);
}
}
return getUDivExpr(LHS, RHS);
}
/// Get an add recurrence expression for the specified loop. Simplify the
/// expression as much as possible.
const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
const Loop *L,
SCEV::NoWrapFlags Flags) {
SmallVector<const SCEV *, 4> Operands;
Operands.push_back(Start);
if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
if (StepChrec->getLoop() == L) {
Operands.append(StepChrec->op_begin(), StepChrec->op_end());
return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
}
Operands.push_back(Step);
return getAddRecExpr(Operands, L, Flags);
}
/// Get an add recurrence expression for the specified loop. Simplify the
/// expression as much as possible.
const SCEV *
ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
const Loop *L, SCEV::NoWrapFlags Flags) {
if (Operands.size() == 1) return Operands[0];
#ifndef NDEBUG
Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
"SCEVAddRecExpr operand types don't match!");
assert(!Operands[i]->getType()->isPointerTy() && "Step must be integer");
}
for (unsigned i = 0, e = Operands.size(); i != e; ++i)
assert(isLoopInvariant(Operands[i], L) &&
"SCEVAddRecExpr operand is not loop-invariant!");
#endif
if (Operands.back()->isZero()) {
Operands.pop_back();
return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X
}
// It's tempting to want to call getConstantMaxBackedgeTakenCount count here and
// use that information to infer NUW and NSW flags. However, computing a
// BE count requires calling getAddRecExpr, so we may not yet have a
// meaningful BE count at this point (and if we don't, we'd be stuck
// with a SCEVCouldNotCompute as the cached BE count).
Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
// Canonicalize nested AddRecs in by nesting them in order of loop depth.
if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
const Loop *NestedLoop = NestedAR->getLoop();
if (L->contains(NestedLoop)
? (L->getLoopDepth() < NestedLoop->getLoopDepth())
: (!NestedLoop->contains(L) &&
DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
SmallVector<const SCEV *, 4> NestedOperands(NestedAR->operands());
Operands[0] = NestedAR->getStart();
// AddRecs require their operands be loop-invariant with respect to their
// loops. Don't perform this transformation if it would break this
// requirement.
bool AllInvariant = all_of(
Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); });
if (AllInvariant) {
// Create a recurrence for the outer loop with the same step size.
//
// The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
// inner recurrence has the same property.
SCEV::NoWrapFlags OuterFlags =
maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) {
return isLoopInvariant(Op, NestedLoop);
});
if (AllInvariant) {
// Ok, both add recurrences are valid after the transformation.
//
// The inner recurrence keeps its NW flag but only keeps NUW/NSW if
// the outer recurrence has the same property.
SCEV::NoWrapFlags InnerFlags =
maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
}
}
// Reset Operands to its original state.
Operands[0] = NestedAR;
}
}
// Okay, it looks like we really DO need an addrec expr. Check to see if we
// already have one, otherwise create a new one.
return getOrCreateAddRecExpr(Operands, L, Flags);
}
const SCEV *
ScalarEvolution::getGEPExpr(GEPOperator *GEP,
const SmallVectorImpl<const SCEV *> &IndexExprs) {
const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
// getSCEV(Base)->getType() has the same address space as Base->getType()
// because SCEV::getType() preserves the address space.
Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType());
// FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
// instruction to its SCEV, because the Instruction may be guarded by control
// flow and the no-overflow bits may not be valid for the expression in any
// context. This can be fixed similarly to how these flags are handled for
// adds.
SCEV::NoWrapFlags OffsetWrap =
GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
Type *CurTy = GEP->getType();
bool FirstIter = true;
SmallVector<const SCEV *, 4> Offsets;
for (const SCEV *IndexExpr : IndexExprs) {
// Compute the (potentially symbolic) offset in bytes for this index.
if (StructType *STy = dyn_cast<StructType>(CurTy)) {
// For a struct, add the member offset.
ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
unsigned FieldNo = Index->getZExtValue();
const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo);
Offsets.push_back(FieldOffset);
// Update CurTy to the type of the field at Index.
CurTy = STy->getTypeAtIndex(Index);
} else {
// Update CurTy to its element type.
if (FirstIter) {
assert(isa<PointerType>(CurTy) &&
"The first index of a GEP indexes a pointer");
CurTy = GEP->getSourceElementType();
FirstIter = false;
} else {
CurTy = GetElementPtrInst::getTypeAtIndex(CurTy, (uint64_t)0);
}
// For an array, add the element offset, explicitly scaled.
const SCEV *ElementSize = getSizeOfExpr(IntIdxTy, CurTy);
// Getelementptr indices are signed.
IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy);
// Multiply the index by the element size to compute the element offset.
const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, OffsetWrap);
Offsets.push_back(LocalOffset);
}
}
// Handle degenerate case of GEP without offsets.
if (Offsets.empty())
return BaseExpr;
// Add the offsets together, assuming nsw if inbounds.
const SCEV *Offset = getAddExpr(Offsets, OffsetWrap);
// Add the base address and the offset. We cannot use the nsw flag, as the
// base address is unsigned. However, if we know that the offset is
// non-negative, we can use nuw.
SCEV::NoWrapFlags BaseWrap = GEP->isInBounds() && isKnownNonNegative(Offset)
? SCEV::FlagNUW : SCEV::FlagAnyWrap;
return getAddExpr(BaseExpr, Offset, BaseWrap);
}
std::tuple<SCEV *, FoldingSetNodeID, void *>
ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType,
ArrayRef<const SCEV *> Ops) {
FoldingSetNodeID ID;
void *IP = nullptr;
ID.AddInteger(SCEVType);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
return std::tuple<SCEV *, FoldingSetNodeID, void *>(
UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP);
}
const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) {
SCEV::NoWrapFlags Flags = IsNSW ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
return getSMaxExpr(Op, getNegativeSCEV(Op, Flags));
}
const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"Operand types don't match!");
assert(Ops[0]->getType()->isPointerTy() ==
Ops[i]->getType()->isPointerTy() &&
"min/max should be consistently pointerish");
}
#endif
bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr;
bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr;
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, &LI, DT);
// Check if we have created the same expression before.
if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) {
return S;
}
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
++Idx;
assert(Idx < Ops.size());
auto FoldOp = [&](const APInt &LHS, const APInt &RHS) {
if (Kind == scSMaxExpr)
return APIntOps::smax(LHS, RHS);
else if (Kind == scSMinExpr)
return APIntOps::smin(LHS, RHS);
else if (Kind == scUMaxExpr)
return APIntOps::umax(LHS, RHS);
else if (Kind == scUMinExpr)
return APIntOps::umin(LHS, RHS);
llvm_unreachable("Unknown SCEV min/max opcode");
};
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
ConstantInt *Fold = ConstantInt::get(
getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt()));
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
LHSC = cast<SCEVConstant>(Ops[0]);
}
bool IsMinV = LHSC->getValue()->isMinValue(IsSigned);
bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned);
if (IsMax ? IsMinV : IsMaxV) {
// If we are left with a constant minimum(/maximum)-int, strip it off.
Ops.erase(Ops.begin());
--Idx;
} else if (IsMax ? IsMaxV : IsMinV) {
// If we have a max(/min) with a constant maximum(/minimum)-int,
// it will always be the extremum.
return LHSC;
}
if (Ops.size() == 1) return Ops[0];
}
// Find the first operation of the same kind
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind)
++Idx;
// Check to see if one of the operands is of the same kind. If so, expand its
// operands onto our operand list, and recurse to simplify.
if (Idx < Ops.size()) {
bool DeletedAny = false;
while (Ops[Idx]->getSCEVType() == Kind) {
const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]);
Ops.erase(Ops.begin()+Idx);
Ops.append(SMME->op_begin(), SMME->op_end());
DeletedAny = true;
}
if (DeletedAny)
return getMinMaxExpr(Kind, Ops);
}
// Okay, check to see if the same value occurs in the operand list twice. If
// so, delete one. Since we sorted the list, these values are required to
// be adjacent.
llvm::CmpInst::Predicate GEPred =
IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
llvm::CmpInst::Predicate LEPred =
IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred;
llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred;
for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) {
if (Ops[i] == Ops[i + 1] ||
isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) {
// X op Y op Y --> X op Y
// X op Y --> X, if we know X, Y are ordered appropriately
Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
--i;
--e;
} else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i],
Ops[i + 1])) {
// X op Y --> Y, if we know X, Y are ordered appropriately
Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
--i;
--e;
}
}
if (Ops.size() == 1) return Ops[0];
assert(!Ops.empty() && "Reduced smax down to nothing!");
// Okay, it looks like we really DO need an expr. Check to see if we
// already have one, otherwise create a new one.
const SCEV *ExistingSCEV;
FoldingSetNodeID ID;
void *IP;
std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops);
if (ExistingSCEV)
return ExistingSCEV;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
SCEV *S = new (SCEVAllocator)
SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
return S;
}
const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) {
SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
return getSMaxExpr(Ops);
}
const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
return getMinMaxExpr(scSMaxExpr, Ops);
}
const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) {
SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
return getUMaxExpr(Ops);
}
const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
return getMinMaxExpr(scUMaxExpr, Ops);
}
const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
const SCEV *RHS) {
SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
return getSMinExpr(Ops);
}
const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
return getMinMaxExpr(scSMinExpr, Ops);
}
const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
const SCEV *RHS) {
SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
return getUMinExpr(Ops);
}
const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
return getMinMaxExpr(scUMinExpr, Ops);
}
const SCEV *
ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy,
ScalableVectorType *ScalableTy) {
Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo());
Constant *One = ConstantInt::get(IntTy, 1);
Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One);
// Note that the expression we created is the final expression, we don't
// want to simplify it any further Also, if we call a normal getSCEV(),
// we'll end up in an endless recursion. So just create an SCEVUnknown.
return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy));
}
const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
if (auto *ScalableAllocTy = dyn_cast<ScalableVectorType>(AllocTy))
return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy);
// We can bypass creating a target-independent constant expression and then
// folding it back into a ConstantInt. This is just a compile-time
// optimization.
return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
}
const SCEV *ScalarEvolution::getStoreSizeOfExpr(Type *IntTy, Type *StoreTy) {
if (auto *ScalableStoreTy = dyn_cast<ScalableVectorType>(StoreTy))
return getSizeOfScalableVectorExpr(IntTy, ScalableStoreTy);
// We can bypass creating a target-independent constant expression and then
// folding it back into a ConstantInt. This is just a compile-time
// optimization.
return getConstant(IntTy, getDataLayout().getTypeStoreSize(StoreTy));
}
const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
StructType *STy,
unsigned FieldNo) {
// We can bypass creating a target-independent constant expression and then
// folding it back into a ConstantInt. This is just a compile-time
// optimization.
return getConstant(
IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
}
const SCEV *ScalarEvolution::getUnknown(Value *V) {
// Don't attempt to do anything other than create a SCEVUnknown object
// here. createSCEV only calls getUnknown after checking for all other
// interesting possibilities, and any other code that calls getUnknown
// is doing so in order to hide a value from SCEV canonicalization.
FoldingSetNodeID ID;
ID.AddInteger(scUnknown);
ID.AddPointer(V);
void *IP = nullptr;
if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
assert(cast<SCEVUnknown>(S)->getValue() == V &&
"Stale SCEVUnknown in uniquing map!");
return S;
}
SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
FirstUnknown);
FirstUnknown = cast<SCEVUnknown>(S);
UniqueSCEVs.InsertNode(S, IP);
return S;
}
//===----------------------------------------------------------------------===//
// Basic SCEV Analysis and PHI Idiom Recognition Code
//
/// Test if values of the given type are analyzable within the SCEV
/// framework. This primarily includes integer types, and it can optionally
/// include pointer types if the ScalarEvolution class has access to
/// target-specific information.
bool ScalarEvolution::isSCEVable(Type *Ty) const {
// Integers and pointers are always SCEVable.
return Ty->isIntOrPtrTy();
}
/// Return the size in bits of the specified type, for which isSCEVable must
/// return true.
uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
if (Ty->isPointerTy())
return getDataLayout().getIndexTypeSizeInBits(Ty);
return getDataLayout().getTypeSizeInBits(Ty);
}
/// Return a type with the same bitwidth as the given type and which represents
/// how SCEV will treat the given type, for which isSCEVable must return
/// true. For pointer types, this is the pointer index sized integer type.
Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
if (Ty->isIntegerTy())
return Ty;
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
return getDataLayout().getIndexType(Ty);
}
Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2;
}
const SCEV *ScalarEvolution::getCouldNotCompute() {
return CouldNotCompute.get();
}
bool ScalarEvolution::checkValidity(const SCEV *S) const {
bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) {
auto *SU = dyn_cast<SCEVUnknown>(S);
return SU && SU->getValue() == nullptr;
});
return !ContainsNulls;
}
bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
HasRecMapType::iterator I = HasRecMap.find(S);
if (I != HasRecMap.end())
return I->second;
bool FoundAddRec =
SCEVExprContains(S, [](const SCEV *S) { return isa<SCEVAddRecExpr>(S); });
HasRecMap.insert({S, FoundAddRec});
return FoundAddRec;
}
/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
/// offset I, then return {S', I}, else return {\p S, nullptr}.
static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
const auto *Add = dyn_cast<SCEVAddExpr>(S);
if (!Add)
return {S, nullptr};
if (Add->getNumOperands() != 2)
return {S, nullptr};
auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
if (!ConstOp)
return {S, nullptr};
return {Add->getOperand(1), ConstOp->getValue()};
}
/// Return the ValueOffsetPair set for \p S. \p S can be represented
/// by the value and offset from any ValueOffsetPair in the set.
ScalarEvolution::ValueOffsetPairSetVector *
ScalarEvolution::getSCEVValues(const SCEV *S) {
ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
if (SI == ExprValueMap.end())
return nullptr;
#ifndef NDEBUG
if (VerifySCEVMap) {
// Check there is no dangling Value in the set returned.
for (const auto &VE : SI->second)
assert(ValueExprMap.count(VE.first));
}
#endif
return &SI->second;
}
/// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V)
/// cannot be used separately. eraseValueFromMap should be used to remove
/// V from ValueExprMap and ExprValueMap at the same time.
void ScalarEvolution::eraseValueFromMap(Value *V) {
ValueExprMapType::iterator I = ValueExprMap.find_as(V);
if (I != ValueExprMap.end()) {
const SCEV *S = I->second;
// Remove {V, 0} from the set of ExprValueMap[S]
if (auto *SV = getSCEVValues(S))
SV->remove({V, nullptr});
// Remove {V, Offset} from the set of ExprValueMap[Stripped]
const SCEV *Stripped;
ConstantInt *Offset;
std::tie(Stripped, Offset) = splitAddExpr(S);
if (Offset != nullptr) {
if (auto *SV = getSCEVValues(Stripped))
SV->remove({V, Offset});
}
ValueExprMap.erase(V);
}
}
/// Check whether value has nuw/nsw/exact set but SCEV does not.
/// TODO: In reality it is better to check the poison recursively
/// but this is better than nothing.
static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) {
if (auto *I = dyn_cast<Instruction>(V)) {
if (isa<OverflowingBinaryOperator>(I)) {
if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) {
if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap())
return true;
if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap())
return true;
}
} else if (isa<PossiblyExactOperator>(I) && I->isExact())
return true;
}
return false;
}
/// Return an existing SCEV if it exists, otherwise analyze the expression and
/// create a new one.
const SCEV *ScalarEvolution::getSCEV(Value *V) {
assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
const SCEV *S = getExistingSCEV(V);
if (S == nullptr) {
S = createSCEV(V);
// During PHI resolution, it is possible to create two SCEVs for the same
// V, so it is needed to double check whether V->S is inserted into
// ValueExprMap before insert S->{V, 0} into ExprValueMap.
std::pair<ValueExprMapType::iterator, bool> Pair =
ValueExprMap.insert({SCEVCallbackVH(V, this), S});
if (Pair.second && !SCEVLostPoisonFlags(S, V)) {
ExprValueMap[S].insert({V, nullptr});
// If S == Stripped + Offset, add Stripped -> {V, Offset} into
// ExprValueMap.
const SCEV *Stripped = S;
ConstantInt *Offset = nullptr;
std::tie(Stripped, Offset) = splitAddExpr(S);
// If stripped is SCEVUnknown, don't bother to save
// Stripped -> {V, offset}. It doesn't simplify and sometimes even
// increase the complexity of the expansion code.
// If V is GetElementPtrInst, don't save Stripped -> {V, offset}
// because it may generate add/sub instead of GEP in SCEV expansion.
if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
!isa<GetElementPtrInst>(V))
ExprValueMap[Stripped].insert({V, Offset});
}
}
return S;
}
const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
ValueExprMapType::iterator I = ValueExprMap.find_as(V);
if (I != ValueExprMap.end()) {
const SCEV *S = I->second;
if (checkValidity(S))
return S;
eraseValueFromMap(V);
forgetMemoizedResults(S);
}
return nullptr;
}
/// Return a SCEV corresponding to -V = -1*V
const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
SCEV::NoWrapFlags Flags) {
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
return getConstant(
cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
return getMulExpr(V, getMinusOne(Ty), Flags);
}
/// If Expr computes ~A, return A else return nullptr
static const SCEV *MatchNotExpr(const SCEV *Expr) {
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
if (!Add || Add->getNumOperands() != 2 ||
!Add->getOperand(0)->isAllOnesValue())
return nullptr;
const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
if (!AddRHS || AddRHS->getNumOperands() != 2 ||
!AddRHS->getOperand(0)->isAllOnesValue())
return nullptr;
return AddRHS->getOperand(1);
}
/// Return a SCEV corresponding to ~V = -1-V
const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
return getConstant(
cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
// Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y)
if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) {
auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) {
SmallVector<const SCEV *, 2> MatchedOperands;
for (const SCEV *Operand : MME->operands()) {
const SCEV *Matched = MatchNotExpr(Operand);
if (!Matched)
return (const SCEV *)nullptr;
MatchedOperands.push_back(Matched);
}
return getMinMaxExpr(SCEVMinMaxExpr::negate(MME->getSCEVType()),
MatchedOperands);
};
if (const SCEV *Replaced = MatchMinMaxNegation(MME))
return Replaced;
}
Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
return getMinusSCEV(getMinusOne(Ty), V);
}
/// Compute an expression equivalent to S - getPointerBase(S).
static const SCEV *removePointerBase(ScalarEvolution *SE, const SCEV *P) {
assert(P->getType()->isPointerTy());
if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(P)) {
// The base of an AddRec is the first operand.
SmallVector<const SCEV *> Ops{AddRec->operands()};
Ops[0] = removePointerBase(SE, Ops[0]);
// Don't try to transfer nowrap flags for now. We could in some cases
// (for example, if pointer operand of the AddRec is a SCEVUnknown).
return SE->getAddRecExpr(Ops, AddRec->getLoop(), SCEV::FlagAnyWrap);
}
if (auto *Add = dyn_cast<SCEVAddExpr>(P)) {
// The base of an Add is the pointer operand.
SmallVector<const SCEV *> Ops{Add->operands()};
const SCEV **PtrOp = nullptr;
for (const SCEV *&AddOp : Ops) {
if (AddOp->getType()->isPointerTy()) {
// If we find an Add with multiple pointer operands, treat it as a
// pointer base to be consistent with getPointerBase. Eventually
// we should be able to assert this is impossible.
if (PtrOp)
return SE->getZero(P->getType());
PtrOp = &AddOp;
}
}
*PtrOp = removePointerBase(SE, *PtrOp);
// Don't try to transfer nowrap flags for now. We could in some cases
// (for example, if the pointer operand of the Add is a SCEVUnknown).
return SE->getAddExpr(Ops);
}
// Any other expression must be a pointer base.
return SE->getZero(P->getType());
}
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
SCEV::NoWrapFlags Flags,
unsigned Depth) {
// Fast path: X - X --> 0.
if (LHS == RHS)
return getZero(LHS->getType());
// If we subtract two pointers with different pointer bases, bail.
// Eventually, we're going to add an assertion to getMulExpr that we
// can't multiply by a pointer.
if (RHS->getType()->isPointerTy()) {
if (!LHS->getType()->isPointerTy() ||
getPointerBase(LHS) != getPointerBase(RHS))
return getCouldNotCompute();
LHS = removePointerBase(this, LHS);
RHS = removePointerBase(this, RHS);
}
// We represent LHS - RHS as LHS + (-1)*RHS. This transformation
// makes it so that we cannot make much use of NUW.
auto AddFlags = SCEV::FlagAnyWrap;
const bool RHSIsNotMinSigned =
!getSignedRangeMin(RHS).isMinSignedValue();
if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
// Let M be the minimum representable signed value. Then (-1)*RHS
// signed-wraps if and only if RHS is M. That can happen even for
// a NSW subtraction because e.g. (-1)*M signed-wraps even though
// -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
// (-1)*RHS, we need to prove that RHS != M.
//
// If LHS is non-negative and we know that LHS - RHS does not
// signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
// either by proving that RHS > M or that LHS >= 0.
if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
AddFlags = SCEV::FlagNSW;
}
}
// FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
// RHS is NSW and LHS >= 0.
//
// The difficulty here is that the NSW flag may have been proven
// relative to a loop that is to be found in a recurrence in LHS and
// not in RHS. Applying NSW to (-1)*M may then let the NSW have a
// larger scope than intended.
auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth);
}
const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
unsigned Depth) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
return getTruncateExpr(V, Ty, Depth);
return getZeroExtendExpr(V, Ty, Depth);
}
const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty,
unsigned Depth) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
return getTruncateExpr(V, Ty, Depth);
return getSignExtendExpr(V, Ty, Depth);
}
const SCEV *
ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot noop or zero extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrZeroExtend cannot truncate!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
return getZeroExtendExpr(V, Ty);
}
const SCEV *
ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot noop or sign extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrSignExtend cannot truncate!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
return getSignExtendExpr(V, Ty);
}
const SCEV *
ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot noop or any extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrAnyExtend cannot truncate!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
return getAnyExtendExpr(V, Ty);
}
const SCEV *
ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate or noop with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
"getTruncateOrNoop cannot extend!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
return getTruncateExpr(V, Ty);
}
const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
const SCEV *RHS) {
const SCEV *PromotedLHS = LHS;
const SCEV *PromotedRHS = RHS;
if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
else
PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
return getUMaxExpr(PromotedLHS, PromotedRHS);
}
const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
const SCEV *RHS) {
SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
return getUMinFromMismatchedTypes(Ops);
}
const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(
SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "At least one operand must be!");
// Trivial case.
if (Ops.size() == 1)
return Ops[0];
// Find the max type first.
Type *MaxType = nullptr;
for (auto *S : Ops)
if (MaxType)
MaxType = getWiderType(MaxType, S->getType());
else
MaxType = S->getType();
assert(MaxType && "Failed to find maximum type!");
// Extend all ops to max type.
SmallVector<const SCEV *, 2> PromotedOps;
for (auto *S : Ops)
PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType));
// Generate umin.
return getUMinExpr(PromotedOps);
}
const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
// A pointer operand may evaluate to a nonpointer expression, such as null.
if (!V->getType()->isPointerTy())
return V;
while (true) {
if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
V = AddRec->getStart();
} else if (auto *Add = dyn_cast<SCEVAddExpr>(V)) {
const SCEV *PtrOp = nullptr;
for (const SCEV *AddOp : Add->operands()) {
if (AddOp->getType()->isPointerTy()) {
// Cannot find the base of an expression with multiple pointer ops.
if (PtrOp)
return V;
PtrOp = AddOp;
}
}
if (!PtrOp) // All operands were non-pointer.
return V;
V = PtrOp;
} else // Not something we can look further into.
return V;
}
}
/// Push users of the given Instruction onto the given Worklist.
static void
PushDefUseChildren(Instruction *I,
SmallVectorImpl<Instruction *> &Worklist) {
// Push the def-use children onto the Worklist stack.
for (User *U : I->users())
Worklist.push_back(cast<Instruction>(U));
}
void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
SmallVector<Instruction *, 16> Worklist;
PushDefUseChildren(PN, Worklist);
SmallPtrSet<Instruction *, 8> Visited;
Visited.insert(PN);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I).second)
continue;
auto It = ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
const SCEV *Old = It->second;
// Short-circuit the def-use traversal if the symbolic name
// ceases to appear in expressions.
if (Old != SymName && !hasOperand(Old, SymName))
continue;
// SCEVUnknown for a PHI either means that it has an unrecognized
// structure, it's a PHI that's in the progress of being computed
// by createNodeForPHI, or it's a single-value PHI. In the first case,
// additional loop trip count information isn't going to change anything.
// In the second case, createNodeForPHI will perform the necessary
// updates on its own when it gets to that point. In the third, we do
// want to forget the SCEVUnknown.
if (!isa<PHINode>(I) ||
!isa<SCEVUnknown>(Old) ||
(I != PN && Old == SymName)) {
eraseValueFromMap(It->first);
forgetMemoizedResults(Old);
}
}
PushDefUseChildren(I, Worklist);
}
}
namespace {
/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start
/// expression in case its Loop is L. If it is not L then
/// if IgnoreOtherLoops is true then use AddRec itself
/// otherwise rewrite cannot be done.
/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done.
class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
public:
static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
bool IgnoreOtherLoops = true) {
SCEVInitRewriter Rewriter(L, SE);
const SCEV *Result = Rewriter.visit(S);
if (Rewriter.hasSeenLoopVariantSCEVUnknown())
return SE.getCouldNotCompute();
return Rewriter.hasSeenOtherLoops() && !IgnoreOtherLoops
? SE.getCouldNotCompute()
: Result;
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
if (!SE.isLoopInvariant(Expr, L))
SeenLoopVariantSCEVUnknown = true;
return Expr;
}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
// Only re-write AddRecExprs for this loop.
if (Expr->getLoop() == L)
return Expr->getStart();
SeenOtherLoops = true;
return Expr;
}
bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; }
bool hasSeenOtherLoops() { return SeenOtherLoops; }
private:
explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
: SCEVRewriteVisitor(SE), L(L) {}
const Loop *L;
bool SeenLoopVariantSCEVUnknown = false;
bool SeenOtherLoops = false;
};
/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its post
/// increment expression in case its Loop is L. If it is not L then
/// use AddRec itself.
/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done.
class SCEVPostIncRewriter : public SCEVRewriteVisitor<SCEVPostIncRewriter> {
public:
static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) {
SCEVPostIncRewriter Rewriter(L, SE);
const SCEV *Result = Rewriter.visit(S);
return Rewriter.hasSeenLoopVariantSCEVUnknown()
? SE.getCouldNotCompute()
: Result;
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
if (!SE.isLoopInvariant(Expr, L))
SeenLoopVariantSCEVUnknown = true;
return Expr;
}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
// Only re-write AddRecExprs for this loop.
if (Expr->getLoop() == L)
return Expr->getPostIncExpr(SE);
SeenOtherLoops = true;
return Expr;
}
bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; }
bool hasSeenOtherLoops() { return SeenOtherLoops; }
private:
explicit SCEVPostIncRewriter(const Loop *L, ScalarEvolution &SE)
: SCEVRewriteVisitor(SE), L(L) {}
const Loop *L;
bool SeenLoopVariantSCEVUnknown = false;
bool SeenOtherLoops = false;
};
/// This class evaluates the compare condition by matching it against the
/// condition of loop latch. If there is a match we assume a true value
/// for the condition while building SCEV nodes.
class SCEVBackedgeConditionFolder
: public SCEVRewriteVisitor<SCEVBackedgeConditionFolder> {
public:
static const SCEV *rewrite(const SCEV *S, const Loop *L,
ScalarEvolution &SE) {
bool IsPosBECond = false;
Value *BECond = nullptr;
if (BasicBlock *Latch = L->getLoopLatch()) {
BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
if (BI && BI->isConditional()) {
assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
"Both outgoing branches should not target same header!");
BECond = BI->getCondition();
IsPosBECond = BI->getSuccessor(0) == L->getHeader();
} else {
return S;
}
}
SCEVBackedgeConditionFolder Rewriter(L, BECond, IsPosBECond, SE);
return Rewriter.visit(S);
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
const SCEV *Result = Expr;
bool InvariantF = SE.isLoopInvariant(Expr, L);
if (!InvariantF) {
Instruction *I = cast<Instruction>(Expr->getValue());
switch (I->getOpcode()) {
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
Optional<const SCEV *> Res =
compareWithBackedgeCondition(SI->getCondition());
if (Res.hasValue()) {
bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne();
Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue());
}
break;
}
default: {
Optional<const SCEV *> Res = compareWithBackedgeCondition(I);
if (Res.hasValue())
Result = Res.getValue();
break;
}
}
}
return Result;
}
private:
explicit SCEVBackedgeConditionFolder(const Loop *L, Value *BECond,
bool IsPosBECond, ScalarEvolution &SE)
: SCEVRewriteVisitor(SE), L(L), BackedgeCond(BECond),
IsPositiveBECond(IsPosBECond) {}
Optional<const SCEV *> compareWithBackedgeCondition(Value *IC);
const Loop *L;
/// Loop back condition.
Value *BackedgeCond = nullptr;
/// Set to true if loop back is on positive branch condition.
bool IsPositiveBECond;
};
Optional<const SCEV *>
SCEVBackedgeConditionFolder::compareWithBackedgeCondition(Value *IC) {
// If value matches the backedge condition for loop latch,
// then return a constant evolution node based on loopback
// branch taken.
if (BackedgeCond == IC)
return IsPositiveBECond ? SE.getOne(Type::getInt1Ty(SE.getContext()))
: SE.getZero(Type::getInt1Ty(SE.getContext()));
return None;
}
class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
public:
static const SCEV *rewrite(const SCEV *S, const Loop *L,
ScalarEvolution &SE) {
SCEVShiftRewriter Rewriter(L, SE);
const SCEV *Result = Rewriter.visit(S);
return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
// Only allow AddRecExprs for this loop.
if (!SE.isLoopInvariant(Expr, L))
Valid = false;
return Expr;
}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
if (Expr->getLoop() == L && Expr->isAffine())
return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE));
Valid = false;
return Expr;
}
bool isValid() { return Valid; }
private:
explicit SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
: SCEVRewriteVisitor(SE), L(L) {}
const Loop *L;
bool Valid = true;
};
} // end anonymous namespace
SCEV::NoWrapFlags
ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
if (!AR->isAffine())
return SCEV::FlagAnyWrap;
using OBO = OverflowingBinaryOperator;
SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap;
if (!AR->hasNoSignedWrap()) {
ConstantRange AddRecRange = getSignedRange(AR);
ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this));
auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
Instruction::Add, IncRange, OBO::NoSignedWrap);
if (NSWRegion.contains(AddRecRange))
Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW);
}
if (!AR->hasNoUnsignedWrap()) {
ConstantRange AddRecRange = getUnsignedRange(AR);
ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this));
auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
Instruction::Add, IncRange, OBO::NoUnsignedWrap);
if (NUWRegion.contains(AddRecRange))
Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW);
}
return Result;
}
SCEV::NoWrapFlags
ScalarEvolution::proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR) {
SCEV::NoWrapFlags Result = AR->getNoWrapFlags();
if (AR->hasNoSignedWrap())
return Result;
if (!AR->isAffine())
return Result;
const SCEV *Step = AR->getStepRecurrence(*this);
const Loop *L = AR->getLoop();
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
// simply not analyzable, and it covers the case where this code is
// being called from within backedge-taken count analysis, such that
// attempting to ask for the backedge-taken count would likely result
// in infinite recursion. In the later case, the analysis code will
// cope with a conservative value, and it will take care to purge
// that value once it has finished.
const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
// Normally, in the cases we can prove no-overflow via a
// backedge guarding condition, we can also compute a backedge
// taken count for the loop. The exceptions are assumptions and
// guards present in the loop -- SCEV is not great at exploiting
// these to compute max backedge taken counts, but can still use
// these to prove lack of overflow. Use this fact to avoid
// doing extra work that may not pay off.
if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
AC.assumptions().empty())
return Result;
// If the backedge is guarded by a comparison with the pre-inc value the
// addrec is safe. Also, if the entry is guarded by a comparison with the
// start value and the backedge is guarded by a comparison with the post-inc
// value, the addrec is safe.
ICmpInst::Predicate Pred;
const SCEV *OverflowLimit =
getSignedOverflowLimitForStep(Step, &Pred, this);
if (OverflowLimit &&
(isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
isKnownOnEveryIteration(Pred, AR, OverflowLimit))) {
Result = setFlags(Result, SCEV::FlagNSW);
}
return Result;
}
SCEV::NoWrapFlags
ScalarEvolution::proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR) {
SCEV::NoWrapFlags Result = AR->getNoWrapFlags();
if (AR->hasNoUnsignedWrap())
return Result;
if (!AR->isAffine())
return Result;
const SCEV *Step = AR->getStepRecurrence(*this);
unsigned BitWidth = getTypeSizeInBits(AR->getType());
const Loop *L = AR->getLoop();
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
// simply not analyzable, and it covers the case where this code is
// being called from within backedge-taken count analysis, such that
// attempting to ask for the backedge-taken count would likely result
// in infinite recursion. In the later case, the analysis code will
// cope with a conservative value, and it will take care to purge
// that value once it has finished.
const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
// Normally, in the cases we can prove no-overflow via a
// backedge guarding condition, we can also compute a backedge
// taken count for the loop. The exceptions are assumptions and
// guards present in the loop -- SCEV is not great at exploiting
// these to compute max backedge taken counts, but can still use
// these to prove lack of overflow. Use this fact to avoid
// doing extra work that may not pay off.
if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
AC.assumptions().empty())
return Result;
// If the backedge is guarded by a comparison with the pre-inc value the
// addrec is safe. Also, if the entry is guarded by a comparison with the
// start value and the backedge is guarded by a comparison with the post-inc
// value, the addrec is safe.
if (isKnownPositive(Step)) {
const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
getUnsignedRangeMax(Step));
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) {
Result = setFlags(Result, SCEV::FlagNUW);
}
}
return Result;
}
namespace {
/// Represents an abstract binary operation. This may exist as a
/// normal instruction or constant expression, or may have been
/// derived from an expression tree.
struct BinaryOp {
unsigned Opcode;
Value *LHS;
Value *RHS;
bool IsNSW = false;
bool IsNUW = false;
/// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or
/// constant expression.
Operator *Op = nullptr;
explicit BinaryOp(Operator *Op)
: Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)),
Op(Op) {
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) {
IsNSW = OBO->hasNoSignedWrap();
IsNUW = OBO->hasNoUnsignedWrap();
}
}
explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false,
bool IsNUW = false)
: Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {}
};
} // end anonymous namespace
/// Try to map \p V into a BinaryOp, and return \c None on failure.
static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
auto *Op = dyn_cast<Operator>(V);
if (!Op)
return None;
// Implementation detail: all the cleverness here should happen without
// creating new SCEV expressions -- our caller knowns tricks to avoid creating
// SCEV expressions when possible, and we should not break that.
switch (Op->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::UDiv:
case Instruction::URem:
case Instruction::And:
case Instruction::Or:
case Instruction::AShr:
case Instruction::Shl:
return BinaryOp(Op);
case Instruction::Xor:
if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1)))
// If the RHS of the xor is a signmask, then this is just an add.
// Instcombine turns add of signmask into xor as a strength reduction step.
if (RHSC->getValue().isSignMask())
return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
return BinaryOp(Op);
case Instruction::LShr:
// Turn logical shift right of a constant into a unsigned divide.
if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) {
uint32_t BitWidth = cast<IntegerType>(Op->getType())->getBitWidth();
// If the shift count is not less than the bitwidth, the result of
// the shift is undefined. Don't try to analyze it, because the
// resolution chosen here may differ from the resolution chosen in
// other parts of the compiler.
if (SA->getValue().ult(BitWidth)) {
Constant *X =
ConstantInt::get(SA->getContext(),
APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
return BinaryOp(Instruction::UDiv, Op->getOperand(0), X);
}
}
return BinaryOp(Op);
case Instruction::ExtractValue: {
auto *EVI = cast<ExtractValueInst>(Op);
if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0)
break;
auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand());
if (!WO)
break;
Instruction::BinaryOps BinOp = WO->getBinaryOp();
bool Signed = WO->isSigned();
// TODO: Should add nuw/nsw flags for mul as well.
if (BinOp == Instruction::Mul || !isOverflowIntrinsicNoWrap(WO, DT))
return BinaryOp(BinOp, WO->getLHS(), WO->getRHS());
// Now that we know that all uses of the arithmetic-result component of
// CI are guarded by the overflow check, we can go ahead and pretend
// that the arithmetic is non-overflowing.
return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(),
/* IsNSW = */ Signed, /* IsNUW = */ !Signed);
}
default:
break;
}
// Recognise intrinsic loop.decrement.reg, and as this has exactly the same
// semantics as a Sub, return a binary sub expression.
if (auto *II = dyn_cast<IntrinsicInst>(V))
if (II->getIntrinsicID() == Intrinsic::loop_decrement_reg)
return BinaryOp(Instruction::Sub, II->getOperand(0), II->getOperand(1));
return None;
}
/// Helper function to createAddRecFromPHIWithCasts. We have a phi
/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via
/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the
/// way. This function checks if \p Op, an operand of this SCEVAddExpr,
/// follows one of the following patterns:
/// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
/// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
/// If the SCEV expression of \p Op conforms with one of the expected patterns
/// we return the type of the truncation operation, and indicate whether the
/// truncated type should be treated as signed/unsigned by setting
/// \p Signed to true/false, respectively.
static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI,
bool &Signed, ScalarEvolution &SE) {
// The case where Op == SymbolicPHI (that is, with no type conversions on
// the way) is handled by the regular add recurrence creating logic and
// would have already been triggered in createAddRecForPHI. Reaching it here
// means that createAddRecFromPHI had failed for this PHI before (e.g.,
// because one of the other operands of the SCEVAddExpr updating this PHI is
// not invariant).
//
// Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in
// this case predicates that allow us to prove that Op == SymbolicPHI will
// be added.
if (Op == SymbolicPHI)
return nullptr;
unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType());
unsigned NewBits = SE.getTypeSizeInBits(Op->getType());
if (SourceBits != NewBits)
return nullptr;
const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op);
const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op);
if (!SExt && !ZExt)
return nullptr;
const SCEVTruncateExpr *Trunc =
SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand())
: dyn_cast<SCEVTruncateExpr>(ZExt->getOperand());
if (!Trunc)
return nullptr;
const SCEV *X = Trunc->getOperand();
if (X != SymbolicPHI)
return nullptr;
Signed = SExt != nullptr;
return Trunc->getType();
}
static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) {
if (!PN->getType()->isIntegerTy())
return nullptr;
const Loop *L = LI.getLoopFor(PN->getParent());
if (!L || L->getHeader() != PN->getParent())
return nullptr;
return L;
}
// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the
// computation that updates the phi follows the following pattern:
// (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum
// which correspond to a phi->trunc->sext/zext->add->phi update chain.
// If so, try to see if it can be rewritten as an AddRecExpr under some
// Predicates. If successful, return them as a pair. Also cache the results
// of the analysis.
//
// Example usage scenario:
// Say the Rewriter is called for the following SCEV:
// 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
// where:
// %X = phi i64 (%Start, %BEValue)
// It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X),
// and call this function with %SymbolicPHI = %X.
//
// The analysis will find that the value coming around the backedge has
// the following SCEV:
// BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
// Upon concluding that this matches the desired pattern, the function
// will return the pair {NewAddRec, SmallPredsVec} where:
// NewAddRec = {%Start,+,%Step}
// SmallPredsVec = {P1, P2, P3} as follows:
// P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)}<nsw> Flags: <nssw>
// P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64)
// P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64)
// The returned pair means that SymbolicPHI can be rewritten into NewAddRec
// under the predicates {P1,P2,P3}.
// This predicated rewrite will be cached in PredicatedSCEVRewrites:
// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)}
//
// TODO's:
//
// 1) Extend the Induction descriptor to also support inductions that involve
// casts: When needed (namely, when we are called in the context of the
// vectorizer induction analysis), a Set of cast instructions will be
// populated by this method, and provided back to isInductionPHI. This is
// needed to allow the vectorizer to properly record them to be ignored by
// the cost model and to avoid vectorizing them (otherwise these casts,
// which are redundant under the runtime overflow checks, will be
// vectorized, which can be costly).
//
// 2) Support additional induction/PHISCEV patterns: We also want to support
// inductions where the sext-trunc / zext-trunc operations (partly) occur
// after the induction update operation (the induction increment):
//
// (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix)
// which correspond to a phi->add->trunc->sext/zext->phi update chain.
//
// (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix)
// which correspond to a phi->trunc->add->sext/zext->phi update chain.
//
// 3) Outline common code with createAddRecFromPHI to avoid duplication.
Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) {
SmallVector<const SCEVPredicate *, 3> Predicates;
// *** Part1: Analyze if we have a phi-with-cast pattern for which we can
// return an AddRec expression under some predicate.
auto *PN = cast<PHINode>(SymbolicPHI->getValue());
const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
assert(L && "Expecting an integer loop header phi");
// The loop may have multiple entrances or multiple exits; we can analyze
// this phi as an addrec if it has a unique entry value and a unique
// backedge value.
Value *BEValueV = nullptr, *StartValueV = nullptr;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = PN->getIncomingValue(i);
if (L->contains(PN->getIncomingBlock(i))) {
if (!BEValueV) {
BEValueV = V;
} else if (BEValueV != V) {
BEValueV = nullptr;
break;
}
} else if (!StartValueV) {
StartValueV = V;
} else if (StartValueV != V) {
StartValueV = nullptr;
break;
}
}
if (!BEValueV || !StartValueV)
return None;
const SCEV *BEValue = getSCEV(BEValueV);
// If the value coming around the backedge is an add with the symbolic
// value we just inserted, possibly with casts that we can ignore under
// an appropriate runtime guard, then we found a simple induction variable!
const auto *Add = dyn_cast<SCEVAddExpr>(BEValue);
if (!Add)
return None;
// If there is a single occurrence of the symbolic value, possibly
// casted, replace it with a recurrence.
unsigned FoundIndex = Add->getNumOperands();
Type *TruncTy = nullptr;
bool Signed;
for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
if ((TruncTy =
isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this)))
if (FoundIndex == e) {
FoundIndex = i;
break;
}
if (FoundIndex == Add->getNumOperands())
return None;
// Create an add with everything but the specified operand.
SmallVector<const SCEV *, 8> Ops;
for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
if (i != FoundIndex)
Ops.push_back(Add->getOperand(i));
const SCEV *Accum = getAddExpr(Ops);
// The runtime checks will not be valid if the step amount is
// varying inside the loop.
if (!isLoopInvariant(Accum, L))
return None;
// *** Part2: Create the predicates
// Analysis was successful: we have a phi-with-cast pattern for which we
// can return an AddRec expression under the following predicates:
//
// P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum)
// fits within the truncated type (does not overflow) for i = 0 to n-1.
// P2: An Equal predicate that guarantees that
// Start = (Ext ix (Trunc iy (Start) to ix) to iy)
// P3: An Equal predicate that guarantees that
// Accum = (Ext ix (Trunc iy (Accum) to ix) to iy)
//
// As we next prove, the above predicates guarantee that:
// Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy)
//
//
// More formally, we want to prove that:
// Expr(i+1) = Start + (i+1) * Accum
// = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
//
// Given that:
// 1) Expr(0) = Start
// 2) Expr(1) = Start + Accum
// = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2
// 3) Induction hypothesis (step i):
// Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum
//
// Proof:
// Expr(i+1) =
// = Start + (i+1)*Accum
// = (Start + i*Accum) + Accum
// = Expr(i) + Accum
// = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum
// :: from step i
//
// = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum
//
// = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy)
// + (Ext ix (Trunc iy (Accum) to ix) to iy)
// + Accum :: from P3
//
// = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy)
// + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y)
//
// = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum
// = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
//
// By induction, the same applies to all iterations 1<=i<n:
//
// Create a truncated addrec for which we will add a no overflow check (P1).
const SCEV *StartVal = getSCEV(StartValueV);
const SCEV *PHISCEV =
getAddRecExpr(getTruncateExpr(StartVal, TruncTy),
getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap);
// PHISCEV can be either a SCEVConstant or a SCEVAddRecExpr.
// ex: If truncated Accum is 0 and StartVal is a constant, then PHISCEV
// will be constant.
//
// If PHISCEV is a constant, then P1 degenerates into P2 or P3, so we don't
// add P1.
if (const auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
SCEVWrapPredicate::IncrementWrapFlags AddedFlags =
Signed ? SCEVWrapPredicate::IncrementNSSW
: SCEVWrapPredicate::IncrementNUSW;
const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags);
Predicates.push_back(AddRecPred);
}
// Create the Equal Predicates P2,P3:
// It is possible that the predicates P2 and/or P3 are computable at
// compile time due to StartVal and/or Accum being constants.
// If either one is, then we can check that now and escape if either P2
// or P3 is false.
// Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy)
// for each of StartVal and Accum
auto getExtendedExpr = [&](const SCEV *Expr,
bool CreateSignExtend) -> const SCEV * {
assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant");
const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy);
const SCEV *ExtendedExpr =
CreateSignExtend ? getSignExtendExpr(TruncatedExpr, Expr->getType())
: getZeroExtendExpr(TruncatedExpr, Expr->getType());
return ExtendedExpr;
};
// Given:
// ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy
// = getExtendedExpr(Expr)
// Determine whether the predicate P: Expr == ExtendedExpr
// is known to be false at compile time
auto PredIsKnownFalse = [&](const SCEV *Expr,
const SCEV *ExtendedExpr) -> bool {
return Expr != ExtendedExpr &&
isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr);
};
const SCEV *StartExtended = getExtendedExpr(StartVal, Signed);
if (PredIsKnownFalse(StartVal, StartExtended)) {
LLVM_DEBUG(dbgs() << "P2 is compile-time false\n";);
return None;
}
// The Step is always Signed (because the overflow checks are either
// NSSW or NUSW)
const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true);
if (PredIsKnownFalse(Accum, AccumExtended)) {
LLVM_DEBUG(dbgs() << "P3 is compile-time false\n";);
return None;
}
auto AppendPredicate = [&](const SCEV *Expr,
const SCEV *ExtendedExpr) -> void {
if (Expr != ExtendedExpr &&
!isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) {
const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr);
LLVM_DEBUG(dbgs() << "Added Predicate: " << *Pred);
Predicates.push_back(Pred);
}
};
AppendPredicate(StartVal, StartExtended);
AppendPredicate(Accum, AccumExtended);
// *** Part3: Predicates are ready. Now go ahead and create the new addrec in
// which the casts had been folded away. The caller can rewrite SymbolicPHI
// into NewAR if it will also add the runtime overflow checks specified in
// Predicates.
auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap);
std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> PredRewrite =
std::make_pair(NewAR, Predicates);
// Remember the result of the analysis for this SCEV at this locayyytion.
PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite;
return PredRewrite;
}
Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
auto *PN = cast<PHINode>(SymbolicPHI->getValue());
const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
if (!L)
return None;
// Check to see if we already analyzed this PHI.
auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L});
if (I != PredicatedSCEVRewrites.end()) {
std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> Rewrite =
I->second;
// Analysis was done before and failed to create an AddRec:
if (Rewrite.first == SymbolicPHI)
return None;
// Analysis was done before and succeeded to create an AddRec under
// a predicate:
assert(isa<SCEVAddRecExpr>(Rewrite.first) && "Expected an AddRec");
assert(!(Rewrite.second).empty() && "Expected to find Predicates");
return Rewrite;
}
Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI);
// Record in the cache that the analysis failed
if (!Rewrite) {
SmallVector<const SCEVPredicate *, 3> Predicates;
PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates};
return None;
}
return Rewrite;
}
// FIXME: This utility is currently required because the Rewriter currently
// does not rewrite this expression:
// {0, +, (sext ix (trunc iy to ix) to iy)}
// into {0, +, %step},
// even when the following Equal predicate exists:
// "%step == (sext ix (trunc iy to ix) to iy)".
bool PredicatedScalarEvolution::areAddRecsEqualWithPreds(
const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const {
if (AR1 == AR2)
return true;
auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool {
if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) &&
!Preds.implies(SE.getEqualPredicate(Expr2, Expr1)))
return false;
return true;
};
if (!areExprsEqual(AR1->getStart(), AR2->getStart()) ||
!areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE)))
return false;
return true;
}
/// A helper function for createAddRecFromPHI to handle simple cases.
///
/// This function tries to find an AddRec expression for the simplest (yet most
/// common) cases: PN = PHI(Start, OP(Self, LoopInvariant)).
/// If it fails, createAddRecFromPHI will use a more general, but slow,
/// technique for finding the AddRec expression.
const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
Value *BEValueV,
Value *StartValueV) {
const Loop *L = LI.getLoopFor(PN->getParent());
assert(L && L->getHeader() == PN->getParent());
assert(BEValueV && StartValueV);
auto BO = MatchBinaryOp(BEValueV, DT);
if (!BO)
return nullptr;
if (BO->Opcode != Instruction::Add)
return nullptr;
const SCEV *Accum = nullptr;
if (BO->LHS == PN && L->isLoopInvariant(BO->RHS))
Accum = getSCEV(BO->RHS);
else if (BO->RHS == PN && L->isLoopInvariant(BO->LHS))
Accum = getSCEV(BO->LHS);
if (!Accum)
return nullptr;
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
if (BO->IsNUW)
Flags = setFlags(Flags, SCEV::FlagNUW);
if (BO->IsNSW)
Flags = setFlags(Flags, SCEV::FlagNSW);
const SCEV *StartVal = getSCEV(StartValueV);
const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
// We can add Flags to the post-inc expression only if we
// know that it is *undefined behavior* for BEValueV to
// overflow.
if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
(void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
return PHISCEV;
}
const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
const Loop *L = LI.getLoopFor(PN->getParent());
if (!L || L->getHeader() != PN->getParent())
return nullptr;
// The loop may have multiple entrances or multiple exits; we can analyze
// this phi as an addrec if it has a unique entry value and a unique
// backedge value.
Value *BEValueV = nullptr, *StartValueV = nullptr;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = PN->getIncomingValue(i);
if (L->contains(PN->getIncomingBlock(i))) {
if (!BEValueV) {
BEValueV = V;
} else if (BEValueV != V) {
BEValueV = nullptr;
break;
}
} else if (!StartValueV) {
StartValueV = V;
} else if (StartValueV != V) {
StartValueV = nullptr;
break;
}
}
if (!BEValueV || !StartValueV)
return nullptr;
assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
"PHI node already processed?");
// First, try to find AddRec expression without creating a fictituos symbolic
// value for PN.
if (auto *S = createSimpleAffineAddRec(PN, BEValueV, StartValueV))
return S;
// Handle PHI node value symbolically.
const SCEV *SymbolicName = getUnknown(PN);
ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName});
// Using this symbolic name for the PHI, analyze the value coming around
// the back-edge.
const SCEV *BEValue = getSCEV(BEValueV);
// NOTE: If BEValue is loop invariant, we know that the PHI node just
// has a special value for the first iteration of the loop.
// If the value coming around the backedge is an add with the symbolic
// value we just inserted, then we found a simple induction variable!
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
// If there is a single occurrence of the symbolic value, replace it
// with a recurrence.
unsigned FoundIndex = Add->getNumOperands();
for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
if (Add->getOperand(i) == SymbolicName)
if (FoundIndex == e) {
FoundIndex = i;
break;
}
if (FoundIndex != Add->getNumOperands()) {
// Create an add with everything but the specified operand.
SmallVector<const SCEV *, 8> Ops;
for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
if (i != FoundIndex)
Ops.push_back(SCEVBackedgeConditionFolder::rewrite(Add->getOperand(i),
L, *this));
const SCEV *Accum = getAddExpr(Ops);
// This is not a valid addrec if the step amount is varying each
// loop iteration, but is not itself an addrec in this loop.
if (isLoopInvariant(Accum, L) ||
(isa<SCEVAddRecExpr>(Accum) &&
cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
if (auto BO = MatchBinaryOp(BEValueV, DT)) {
if (BO->Opcode == Instruction::Add && BO->LHS == PN) {
if (BO->IsNUW)
Flags = setFlags(Flags, SCEV::FlagNUW);
if (BO->IsNSW)
Flags = setFlags(Flags, SCEV::FlagNSW);
}
} else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
// If the increment is an inbounds GEP, then we know the address
// space cannot be wrapped around. We cannot make any guarantee
// about signed or unsigned overflow because pointers are
// unsigned but we may have a negative index from the base
// pointer. We can guarantee that no unsigned wrap occurs if the
// indices form a positive value.
if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
Flags = setFlags(Flags, SCEV::FlagNW);
const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
Flags = setFlags(Flags, SCEV::FlagNUW);
}
// We cannot transfer nuw and nsw flags from subtraction
// operations -- sub nuw X, Y is not the same as add nuw X, -Y
// for instance.
}
const SCEV *StartVal = getSCEV(StartValueV);
const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
// Okay, for the entire analysis of this edge we assumed the PHI
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
forgetSymbolicName(PN, SymbolicName);
ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
// We can add Flags to the post-inc expression only if we
// know that it is *undefined behavior* for BEValueV to
// overflow.
if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
(void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
return PHISCEV;
}
}
} else {
// Otherwise, this could be a loop like this:
// i = 0; for (j = 1; ..; ++j) { .... i = j; }
// In this case, j = {1,+,1} and BEValue is j.
// Because the other in-value of i (0) fits the evolution of BEValue
// i really is an addrec evolution.
//
// We can generalize this saying that i is the shifted value of BEValue
// by one iteration:
// PHI(f(0), f({1,+,1})) --> f({0,+,1})
const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this, false);
if (Shifted != getCouldNotCompute() &&
Start != getCouldNotCompute()) {
const SCEV *StartVal = getSCEV(StartValueV);
if (Start == StartVal) {
// Okay, for the entire analysis of this edge we assumed the PHI
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
forgetSymbolicName(PN, SymbolicName);
ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
return Shifted;
}
}
}
// Remove the temporary PHI node SCEV that has been inserted while intending
// to create an AddRecExpr for this PHI node. We can not keep this temporary
// as it will prevent later (possibly simpler) SCEV expressions to be added
// to the ValueExprMap.
eraseValueFromMap(PN);
return nullptr;
}
// Checks if the SCEV S is available at BB. S is considered available at BB
// if S can be materialized at BB without introducing a fault.
static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
BasicBlock *BB) {
struct CheckAvailable {
bool TraversalDone = false;
bool Available = true;
const Loop *L = nullptr; // The loop BB is in (can be nullptr)
BasicBlock *BB = nullptr;
DominatorTree &DT;
CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT)
: L(L), BB(BB), DT(DT) {}
bool setUnavailable() {
TraversalDone = true;
Available = false;
return false;
}
bool follow(const SCEV *S) {
switch (S->getSCEVType()) {
case scConstant:
case scPtrToInt:
case scTruncate:
case scZeroExtend:
case scSignExtend:
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr:
case scUMinExpr:
case scSMinExpr:
// These expressions are available if their operand(s) is/are.
return true;
case scAddRecExpr: {
// We allow add recurrences that are on the loop BB is in, or some
// outer loop. This guarantees availability because the value of the
// add recurrence at BB is simply the "current" value of the induction
// variable. We can relax this in the future; for instance an add
// recurrence on a sibling dominating loop is also available at BB.
const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop();
if (L && (ARLoop == L || ARLoop->contains(L)))
return true;
return setUnavailable();
}
case scUnknown: {
// For SCEVUnknown, we check for simple dominance.
const auto *SU = cast<SCEVUnknown>(S);
Value *V = SU->getValue();
if (isa<Argument>(V))
return false;
if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB))
return false;
return setUnavailable();
}
case scUDivExpr:
case scCouldNotCompute:
// We do not try to smart about these at all.
return setUnavailable();
}
llvm_unreachable("Unknown SCEV kind!");
}
bool isDone() { return TraversalDone; }
};
CheckAvailable CA(L, BB, DT);
SCEVTraversal<CheckAvailable> ST(CA);
ST.visitAll(S);
return CA.Available;
}
// Try to match a control flow sequence that branches out at BI and merges back
// at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful
// match.
static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge,
Value *&C, Value *&LHS, Value *&RHS) {
C = BI->getCondition();
BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0));
BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1));
if (!LeftEdge.isSingleEdge())
return false;
assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()");
Use &LeftUse = Merge->getOperandUse(0);
Use &RightUse = Merge->getOperandUse(1);
if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) {
LHS = LeftUse;
RHS = RightUse;
return true;
}
if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) {
LHS = RightUse;
RHS = LeftUse;
return true;
}
return false;
}
const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
auto IsReachable =
[&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); };
if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) {
const Loop *L = LI.getLoopFor(PN->getParent());
// We don't want to break LCSSA, even in a SCEV expression tree.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (LI.getLoopFor(PN->getIncomingBlock(i)) != L)
return nullptr;
// Try to match
//
// br %cond, label %left, label %right
// left:
// br label %merge
// right:
// br label %merge
// merge:
// V = phi [ %x, %left ], [ %y, %right ]
//
// as "select %cond, %x, %y"
BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock();
assert(IDom && "At least the entry block should dominate PN");
auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr;
if (BI && BI->isConditional() &&
BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) &&
IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent()))
return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
}
return nullptr;
}
const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
if (const SCEV *S = createAddRecFromPHI(PN))
return S;
if (const SCEV *S = createNodeFromSelectLikePHI(PN))
return S;
// If the PHI has a single incoming value, follow that value, unless the
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC}))
if (LI.replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
// If it's not a loop phi, we can't handle it yet.
return getUnknown(PN);
}
const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
Value *Cond,
Value *TrueVal,
Value *FalseVal) {
// Handle "constant" branch or select. This can occur for instance when a
// loop pass transforms an inner loop and moves on to process the outer loop.
if (auto *CI = dyn_cast<ConstantInt>(Cond))
return getSCEV(CI->isOne() ? TrueVal : FalseVal);
// Try to match some simple smax or umax patterns.
auto *ICI = dyn_cast<ICmpInst>(Cond);
if (!ICI)
return getUnknown(I);
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
switch (ICI->getPredicate()) {
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
// a > b ? a+x : b+x -> max(a, b)+x
// a > b ? b+x : a+x -> min(a, b)+x
if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
bool Signed = ICI->isSigned();
const SCEV *LA = getSCEV(TrueVal);
const SCEV *RA = getSCEV(FalseVal);
const SCEV *LS = getSCEV(LHS);
const SCEV *RS = getSCEV(RHS);
if (LA->getType()->isPointerTy()) {
// FIXME: Handle cases where LS/RS are pointers not equal to LA/RA.
// Need to make sure we can't produce weird expressions involving
// negated pointers.
if (LA == LS && RA == RS)
return Signed ? getSMaxExpr(LS, RS) : getUMaxExpr(LS, RS);
if (LA == RS && RA == LS)
return Signed ? getSMinExpr(LS, RS) : getUMinExpr(LS, RS);
}
auto CoerceOperand = [&](const SCEV *Op) -> const SCEV * {
if (Op->getType()->isPointerTy()) {
Op = getLosslessPtrToIntExpr(Op);
if (isa<SCEVCouldNotCompute>(Op))
return Op;
}
if (Signed)
Op = getNoopOrSignExtend(Op, I->getType());
else
Op = getNoopOrZeroExtend(Op, I->getType());
return Op;
};
LS = CoerceOperand(LS);
RS = CoerceOperand(RS);
if (isa<SCEVCouldNotCompute>(LS) || isa<SCEVCouldNotCompute>(RS))
break;
const SCEV *LDiff = getMinusSCEV(LA, LS);
const SCEV *RDiff = getMinusSCEV(RA, RS);
if (LDiff == RDiff)
return getAddExpr(Signed ? getSMaxExpr(LS, RS) : getUMaxExpr(LS, RS),
LDiff);
LDiff = getMinusSCEV(LA, RS);
RDiff = getMinusSCEV(RA, LS);
if (LDiff == RDiff)
return getAddExpr(Signed ? getSMinExpr(LS, RS) : getUMinExpr(LS, RS),
LDiff);
}
break;
case ICmpInst::ICMP_NE:
// n != 0 ? n+x : 1+x -> umax(n, 1)+x
if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
const SCEV *One = getOne(I->getType());
const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
const SCEV *LA = getSCEV(TrueVal);
const SCEV *RA = getSCEV(FalseVal);
const SCEV *LDiff = getMinusSCEV(LA, LS);
const SCEV *RDiff = getMinusSCEV(RA, One);
if (LDiff == RDiff)
return getAddExpr(getUMaxExpr(One, LS), LDiff);
}
break;
case ICmpInst::ICMP_EQ:
// n == 0 ? 1+x : n+x -> umax(n, 1)+x
if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
const SCEV *One = getOne(I->getType());
const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
const SCEV *LA = getSCEV(TrueVal);
const SCEV *RA = getSCEV(FalseVal);
const SCEV *LDiff = getMinusSCEV(LA, One);
const SCEV *RDiff = getMinusSCEV(RA, LS);
if (LDiff == RDiff)
return getAddExpr(getUMaxExpr(One, LS), LDiff);
}
break;
default:
break;
}
return getUnknown(I);
}
/// Expand GEP instructions into add and multiply operations. This allows them
/// to be analyzed by regular SCEV code.
const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
// Don't attempt to analyze GEPs over unsized objects.
if (!GEP->getSourceElementType()->isSized())
return getUnknown(GEP);
SmallVector<const SCEV *, 4> IndexExprs;
for (Value *Index : GEP->indices())
IndexExprs.push_back(getSCEV(Index));
return getGEPExpr(GEP, IndexExprs);
}
uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
return C->getAPInt().countTrailingZeros();
if (const SCEVPtrToIntExpr *I = dyn_cast<SCEVPtrToIntExpr>(S))
return GetMinTrailingZeros(I->getOperand());
if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
return std::min(GetMinTrailingZeros(T->getOperand()),
(uint32_t)getTypeSizeInBits(T->getType()));
if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
return OpRes == getTypeSizeInBits(E->getOperand()->getType())
? getTypeSizeInBits(E->getType())
: OpRes;
}
if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
return OpRes == getTypeSizeInBits(E->getOperand()->getType())
? getTypeSizeInBits(E->getType())
: OpRes;
}
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
// The result is the min of all operands results.
uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
return MinOpRes;
}
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
// The result is the sum of all operands results.
uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
uint32_t BitWidth = getTypeSizeInBits(M->getType());
for (unsigned i = 1, e = M->getNumOperands();
SumOpRes != BitWidth && i != e; ++i)
SumOpRes =
std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth);
return SumOpRes;
}
if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
// The result is the min of all operands results.
uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
return MinOpRes;
}
if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
// The result is the min of all operands results.
uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
return MinOpRes;
}
if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
// The result is the min of all operands results.
uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
return MinOpRes;
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT);
return Known.countMinTrailingZeros();
}
// SCEVUDivExpr
return 0;
}
uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
auto I = MinTrailingZerosCache.find(S);
if (I != MinTrailingZerosCache.end())
return I->second;
uint32_t Result = GetMinTrailingZerosImpl(S);
auto InsertPair = MinTrailingZerosCache.insert({S, Result});
assert(InsertPair.second && "Should insert a new key");
return InsertPair.first->second;
}
/// Helper method to assign a range to V from metadata present in the IR.
static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
if (MDNode *MD = I->getMetadata(LLVMContext::MD_range))
return getConstantRangeFromMetadata(*MD);
return None;
}
void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec,
SCEV::NoWrapFlags Flags) {
if (AddRec->getNoWrapFlags(Flags) != Flags) {
AddRec->setNoWrapFlags(Flags);
UnsignedRanges.erase(AddRec);
SignedRanges.erase(AddRec);
}
}
ConstantRange ScalarEvolution::
getRangeForUnknownRecurrence(const SCEVUnknown *U) {
const DataLayout &DL = getDataLayout();
unsigned BitWidth = getTypeSizeInBits(U->getType());
const ConstantRange FullSet(BitWidth, /*isFullSet=*/true);
// Match a simple recurrence of the form: <start, ShiftOp, Step>, and then
// use information about the trip count to improve our available range. Note
// that the trip count independent cases are already handled by known bits.
// WARNING: The definition of recurrence used here is subtly different than
// the one used by AddRec (and thus most of this file). Step is allowed to
// be arbitrarily loop varying here, where AddRec allows only loop invariant
// and other addrecs in the same loop (for non-affine addrecs). The code
// below intentionally handles the case where step is not loop invariant.
auto *P = dyn_cast<PHINode>(U->getValue());
if (!P)
return FullSet;
// Make sure that no Phi input comes from an unreachable block. Otherwise,
// even the values that are not available in these blocks may come from them,
// and this leads to false-positive recurrence test.
for (auto *Pred : predecessors(P->getParent()))
if (!DT.isReachableFromEntry(Pred))
return FullSet;
BinaryOperator *BO;
Value *Start, *Step;
if (!matchSimpleRecurrence(P, BO, Start, Step))
return FullSet;
// If we found a recurrence in reachable code, we must be in a loop. Note
// that BO might be in some subloop of L, and that's completely okay.
auto *L = LI.getLoopFor(P->getParent());
assert(L && L->getHeader() == P->getParent());
if (!L->contains(BO->getParent()))
// NOTE: This bailout should be an assert instead. However, asserting
// the condition here exposes a case where LoopFusion is querying SCEV
// with malformed loop information during the midst of the transform.
// There doesn't appear to be an obvious fix, so for the moment bailout
// until the caller issue can be fixed. PR49566 tracks the bug.
return FullSet;
// TODO: Extend to other opcodes such as mul, and div
switch (BO->getOpcode()) {
default:
return FullSet;
case Instruction::AShr:
case Instruction::LShr:
case Instruction::Shl:
break;
};
if (BO->getOperand(0) != P)
// TODO: Handle the power function forms some day.
return FullSet;
unsigned TC = getSmallConstantMaxTripCount(L);
if (!TC || TC >= BitWidth)
return FullSet;
auto KnownStart = computeKnownBits(Start, DL, 0, &AC, nullptr, &DT);
auto KnownStep = computeKnownBits(Step, DL, 0, &AC, nullptr, &DT);
assert(KnownStart.getBitWidth() == BitWidth &&
KnownStep.getBitWidth() == BitWidth);
// Compute total shift amount, being careful of overflow and bitwidths.
auto MaxShiftAmt = KnownStep.getMaxValue();
APInt TCAP(BitWidth, TC-1);
bool Overflow = false;
auto TotalShift = MaxShiftAmt.umul_ov(TCAP, Overflow);
if (Overflow)
return FullSet;
switch (BO->getOpcode()) {
default:
llvm_unreachable("filtered out above");
case Instruction::AShr: {
// For each ashr, three cases:
// shift = 0 => unchanged value
// saturation => 0 or -1
// other => a value closer to zero (of the same sign)
// Thus, the end value is closer to zero than the start.
auto KnownEnd = KnownBits::ashr(KnownStart,
KnownBits::makeConstant(TotalShift));
if (KnownStart.isNonNegative())
// Analogous to lshr (simply not yet canonicalized)
return ConstantRange::getNonEmpty(KnownEnd.getMinValue(),
KnownStart.getMaxValue() + 1);
if (KnownStart.isNegative())
// End >=u Start && End <=s Start
return ConstantRange::getNonEmpty(KnownStart.getMinValue(),
KnownEnd.getMaxValue() + 1);
break;
}
case Instruction::LShr: {
// For each lshr, three cases:
// shift = 0 => unchanged value
// saturation => 0
// other => a smaller positive number
// Thus, the low end of the unsigned range is the last value produced.
auto KnownEnd = KnownBits::lshr(KnownStart,
KnownBits::makeConstant(TotalShift));
return ConstantRange::getNonEmpty(KnownEnd.getMinValue(),
KnownStart.getMaxValue() + 1);
}
case Instruction::Shl: {
// Iff no bits are shifted out, value increases on every shift.
auto KnownEnd = KnownBits::shl(KnownStart,
KnownBits::makeConstant(TotalShift));
if (TotalShift.ult(KnownStart.countMinLeadingZeros()))
return ConstantRange(KnownStart.getMinValue(),
KnownEnd.getMaxValue() + 1);
break;
}
};
return FullSet;
}
/// Determine the range for a particular SCEV. If SignHint is
/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
/// with a "cleaner" unsigned (resp. signed) representation.
const ConstantRange &
ScalarEvolution::getRangeRef(const SCEV *S,
ScalarEvolution::RangeSignHint SignHint) {
DenseMap<const SCEV *, ConstantRange> &Cache =
SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
: SignedRanges;
ConstantRange::PreferredRangeType RangeType =
SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED
? ConstantRange::Unsigned : ConstantRange::Signed;
// See if we've computed this range already.
DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
if (I != Cache.end())
return I->second;
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
return setRange(C, SignHint, ConstantRange(C->getAPInt()));
unsigned BitWidth = getTypeSizeInBits(S->getType());
ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
using OBO = OverflowingBinaryOperator;
// If the value has known zeros, the maximum value will have those known zeros
// as well.
uint32_t TZ = GetMinTrailingZeros(S);
if (TZ != 0) {
if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
ConservativeResult =
ConstantRange(APInt::getMinValue(BitWidth),
APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
else
ConservativeResult = ConstantRange(
APInt::getSignedMinValue(BitWidth),
APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
}
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
ConstantRange X = getRangeRef(Add->getOperand(0), SignHint);
unsigned WrapType = OBO::AnyWrap;
if (Add->hasNoSignedWrap())
WrapType |= OBO::NoSignedWrap;
if (Add->hasNoUnsignedWrap())
WrapType |= OBO::NoUnsignedWrap;
for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
X = X.addWithNoWrap(getRangeRef(Add->getOperand(i), SignHint),
WrapType, RangeType);
return setRange(Add, SignHint,
ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint);
for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint));
return setRange(Mul, SignHint,
ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint);
for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
X = X.smax(getRangeRef(SMax->getOperand(i), SignHint));
return setRange(SMax, SignHint,
ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint);
for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
X = X.umax(getRangeRef(UMax->getOperand(i), SignHint));
return setRange(UMax, SignHint,
ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVSMinExpr *SMin = dyn_cast<SCEVSMinExpr>(S)) {
ConstantRange X = getRangeRef(SMin->getOperand(0), SignHint);
for (unsigned i = 1, e = SMin->getNumOperands(); i != e; ++i)
X = X.smin(getRangeRef(SMin->getOperand(i), SignHint));
return setRange(SMin, SignHint,
ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVUMinExpr *UMin = dyn_cast<SCEVUMinExpr>(S)) {
ConstantRange X = getRangeRef(UMin->getOperand(0), SignHint);
for (unsigned i = 1, e = UMin->getNumOperands(); i != e; ++i)
X = X.umin(getRangeRef(UMin->getOperand(i), SignHint));
return setRange(UMin, SignHint,
ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
return setRange(UDiv, SignHint,
ConservativeResult.intersectWith(X.udiv(Y), RangeType));
}
if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint);
return setRange(ZExt, SignHint,
ConservativeResult.intersectWith(X.zeroExtend(BitWidth),
RangeType));
}
if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
ConstantRange X = getRangeRef(SExt->getOperand(), SignHint);
return setRange(SExt, SignHint,
ConservativeResult.intersectWith(X.signExtend(BitWidth),
RangeType));
}
if (const SCEVPtrToIntExpr *PtrToInt = dyn_cast<SCEVPtrToIntExpr>(S)) {
ConstantRange X = getRangeRef(PtrToInt->getOperand(), SignHint);
return setRange(PtrToInt, SignHint, X);
}
if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint);
return setRange(Trunc, SignHint,
ConservativeResult.intersectWith(X.truncate(BitWidth),
RangeType));
}
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
// If there's no unsigned wrap, the value will never be less than its
// initial value.
if (AddRec->hasNoUnsignedWrap()) {
APInt UnsignedMinValue = getUnsignedRangeMin(AddRec->getStart());
if (!UnsignedMinValue.isNullValue())
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(UnsignedMinValue, APInt(BitWidth, 0)), RangeType);
}
// If there's no signed wrap, and all the operands except initial value have
// the same sign or zero, the value won't ever be:
// 1: smaller than initial value if operands are non negative,
// 2: bigger than initial value if operands are non positive.
// For both cases, value can not cross signed min/max boundary.
if (AddRec->hasNoSignedWrap()) {
bool AllNonNeg = true;
bool AllNonPos = true;
for (unsigned i = 1, e = AddRec->getNumOperands(); i != e; ++i) {
if (!isKnownNonNegative(AddRec->getOperand(i)))
AllNonNeg = false;
if (!isKnownNonPositive(AddRec->getOperand(i)))
AllNonPos = false;
}
if (AllNonNeg)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange::getNonEmpty(getSignedRangeMin(AddRec->getStart()),
APInt::getSignedMinValue(BitWidth)),
RangeType);
else if (AllNonPos)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange::getNonEmpty(
APInt::getSignedMinValue(BitWidth),
getSignedRangeMax(AddRec->getStart()) + 1),
RangeType);
}
// TODO: non-affine addrec
if (AddRec->isAffine()) {
const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
auto RangeFromAffine = getRangeForAffineAR(
AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
BitWidth);
ConservativeResult =
ConservativeResult.intersectWith(RangeFromAffine, RangeType);
auto RangeFromFactoring = getRangeViaFactoring(
AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
BitWidth);
ConservativeResult =
ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
}
// Now try symbolic BE count and more powerful methods.
if (UseExpensiveRangeSharpening) {
const SCEV *SymbolicMaxBECount =
getSymbolicMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(SymbolicMaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
AddRec->hasNoSelfWrap()) {
auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR(
AddRec, SymbolicMaxBECount, BitWidth, SignHint);
ConservativeResult =
ConservativeResult.intersectWith(RangeFromAffineNew, RangeType);
}
}
}
return setRange(AddRec, SignHint, std::move(ConservativeResult));
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// Check if the IR explicitly contains !range metadata.
Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
if (MDRange.hasValue())
ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(),
RangeType);
// Use facts about recurrences in the underlying IR. Note that add
// recurrences are AddRecExprs and thus don't hit this path. This
// primarily handles shift recurrences.
auto CR = getRangeForUnknownRecurrence(U);
ConservativeResult = ConservativeResult.intersectWith(CR);
// See if ValueTracking can give us a useful range.
const DataLayout &DL = getDataLayout();
KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
if (Known.getBitWidth() != BitWidth)
Known = Known.zextOrTrunc(BitWidth);
// ValueTracking may be able to compute a tighter result for the number of
// sign bits than for the value of those sign bits.
unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
if (U->getType()->isPointerTy()) {
// If the pointer size is larger than the index size type, this can cause
// NS to be larger than BitWidth. So compensate for this.
unsigned ptrSize = DL.getPointerTypeSizeInBits(U->getType());
int ptrIdxDiff = ptrSize - BitWidth;
if (ptrIdxDiff > 0 && ptrSize > BitWidth && NS > (unsigned)ptrIdxDiff)
NS -= ptrIdxDiff;
}
if (NS > 1) {
// If we know any of the sign bits, we know all of the sign bits.
if (!Known.Zero.getHiBits(NS).isNullValue())
Known.Zero.setHighBits(NS);
if (!Known.One.getHiBits(NS).isNullValue())
Known.One.setHighBits(NS);
}
if (Known.getMinValue() != Known.getMaxValue() + 1)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(Known.getMinValue(), Known.getMaxValue() + 1),
RangeType);
if (NS > 1)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1),
RangeType);
// A range of Phi is a subset of union of all ranges of its input.
if (const PHINode *Phi = dyn_cast<PHINode>(U->getValue())) {
// Make sure that we do not run over cycled Phis.
if (PendingPhiRanges.insert(Phi).second) {
ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false);
for (auto &Op : Phi->operands()) {
auto OpRange = getRangeRef(getSCEV(Op), SignHint);
RangeFromOps = RangeFromOps.unionWith(OpRange);
// No point to continue if we already have a full set.
if (RangeFromOps.isFullSet())
break;
}
ConservativeResult =
ConservativeResult.intersectWith(RangeFromOps, RangeType);
bool Erased = PendingPhiRanges.erase(Phi);
assert(Erased && "Failed to erase Phi properly?");
(void) Erased;
}
}
return setRange(U, SignHint, std::move(ConservativeResult));
}
return setRange(S, SignHint, std::move(ConservativeResult));
}
// Given a StartRange, Step and MaxBECount for an expression compute a range of
// values that the expression can take. Initially, the expression has a value
// from StartRange and then is changed by Step up to MaxBECount times. Signed
// argument defines if we treat Step as signed or unsigned.
static ConstantRange getRangeForAffineARHelper(APInt Step,
const ConstantRange &StartRange,
const APInt &MaxBECount,
unsigned BitWidth, bool Signed) {
// If either Step or MaxBECount is 0, then the expression won't change, and we
// just need to return the initial range.
if (Step == 0 || MaxBECount == 0)
return StartRange;
// If we don't know anything about the initial value (i.e. StartRange is
// FullRange), then we don't know anything about the final range either.
// Return FullRange.
if (StartRange.isFullSet())
return ConstantRange::getFull(BitWidth);
// If Step is signed and negative, then we use its absolute value, but we also
// note that we're moving in the opposite direction.
bool Descending = Signed && Step.isNegative();
if (Signed)
// This is correct even for INT_SMIN. Let's look at i8 to illustrate this:
// abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128.
// This equations hold true due to the well-defined wrap-around behavior of
// APInt.
Step = Step.abs();
// Check if Offset is more than full span of BitWidth. If it is, the
// expression is guaranteed to overflow.
if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount))
return ConstantRange::getFull(BitWidth);
// Offset is by how much the expression can change. Checks above guarantee no
// overflow here.
APInt Offset = Step * MaxBECount;
// Minimum value of the final range will match the minimal value of StartRange
// if the expression is increasing and will be decreased by Offset otherwise.
// Maximum value of the final range will match the maximal value of StartRange
// if the expression is decreasing and will be increased by Offset otherwise.
APInt StartLower = StartRange.getLower();
APInt StartUpper = StartRange.getUpper() - 1;
APInt MovedBoundary = Descending ? (StartLower - std::move(Offset))
: (StartUpper + std::move(Offset));
// It's possible that the new minimum/maximum value will fall into the initial
// range (due to wrap around). This means that the expression can take any
// value in this bitwidth, and we have to return full range.
if (StartRange.contains(MovedBoundary))
return ConstantRange::getFull(BitWidth);
APInt NewLower =
Descending ? std::move(MovedBoundary) : std::move(StartLower);
APInt NewUpper =
Descending ? std::move(StartUpper) : std::move(MovedBoundary);
NewUpper += 1;
// No overflow detected, return [StartLower, StartUpper + Offset + 1) range.
return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper));
}
ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
const SCEV *Step,
const SCEV *MaxBECount,
unsigned BitWidth) {
assert(!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
"Precondition!");
MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount);
// First, consider step signed.
ConstantRange StartSRange = getSignedRange(Start);
ConstantRange StepSRange = getSignedRange(Step);
// If Step can be both positive and negative, we need to find ranges for the
// maximum absolute step values in both directions and union them.
ConstantRange SR =
getRangeForAffineARHelper(StepSRange.getSignedMin(), StartSRange,
MaxBECountValue, BitWidth, /* Signed = */ true);
SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(),
StartSRange, MaxBECountValue,
BitWidth, /* Signed = */ true));
// Next, consider step unsigned.
ConstantRange UR = getRangeForAffineARHelper(
getUnsignedRangeMax(Step), getUnsignedRange(Start),
MaxBECountValue, BitWidth, /* Signed = */ false);
// Finally, intersect signed and unsigned ranges.
return SR.intersectWith(UR, ConstantRange::Smallest);
}
ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth,
ScalarEvolution::RangeSignHint SignHint) {
assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n");
assert(AddRec->hasNoSelfWrap() &&
"This only works for non-self-wrapping AddRecs!");
const bool IsSigned = SignHint == HINT_RANGE_SIGNED;
const SCEV *Step = AddRec->getStepRecurrence(*this);
// Only deal with constant step to save compile time.
if (!isa<SCEVConstant>(Step))
return ConstantRange::getFull(BitWidth);
// Let's make sure that we can prove that we do not self-wrap during
// MaxBECount iterations. We need this because MaxBECount is a maximum
// iteration count estimate, and we might infer nw from some exit for which we
// do not know max exit count (or any other side reasoning).
// TODO: Turn into assert at some point.
if (getTypeSizeInBits(MaxBECount->getType()) >
getTypeSizeInBits(AddRec->getType()))
return ConstantRange::getFull(BitWidth);
MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType());
const SCEV *RangeWidth = getMinusOne(AddRec->getType());
const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step));
const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs);
if (!isKnownPredicateViaConstantRanges(ICmpInst::ICMP_ULE, MaxBECount,
MaxItersWithoutWrap))
return ConstantRange::getFull(BitWidth);
ICmpInst::Predicate LEPred =
IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
ICmpInst::Predicate GEPred =
IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
// We know that there is no self-wrap. Let's take Start and End values and
// look at all intermediate values V1, V2, ..., Vn that IndVar takes during
// the iteration. They either lie inside the range [Min(Start, End),
// Max(Start, End)] or outside it:
//
// Case 1: RangeMin ... Start V1 ... VN End ... RangeMax;
// Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax;
//
// No self wrap flag guarantees that the intermediate values cannot be BOTH
// outside and inside the range [Min(Start, End), Max(Start, End)]. Using that
// knowledge, let's try to prove that we are dealing with Case 1. It is so if
// Start <= End and step is positive, or Start >= End and step is negative.
const SCEV *Start = AddRec->getStart();
ConstantRange StartRange = getRangeRef(Start, SignHint);
ConstantRange EndRange = getRangeRef(End, SignHint);
ConstantRange RangeBetween = StartRange.unionWith(EndRange);
// If they already cover full iteration space, we will know nothing useful
// even if we prove what we want to prove.
if (RangeBetween.isFullSet())
return RangeBetween;
// Only deal with ranges that do not wrap (i.e. RangeMin < RangeMax).
bool IsWrappedSet = IsSigned ? RangeBetween.isSignWrappedSet()
: RangeBetween.isWrappedSet();
if (IsWrappedSet)
return ConstantRange::getFull(BitWidth);
if (isKnownPositive(Step) &&
isKnownPredicateViaConstantRanges(LEPred, Start, End))
return RangeBetween;
else if (isKnownNegative(Step) &&
isKnownPredicateViaConstantRanges(GEPred, Start, End))
return RangeBetween;
return ConstantRange::getFull(BitWidth);
}
ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
const SCEV *Step,
const SCEV *MaxBECount,
unsigned BitWidth) {
// RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q})
// == RangeOf({A,+,P}) union RangeOf({B,+,Q})
struct SelectPattern {
Value *Condition = nullptr;
APInt TrueValue;
APInt FalseValue;
explicit SelectPattern(ScalarEvolution &SE, unsigned BitWidth,
const SCEV *S) {
Optional<unsigned> CastOp;
APInt Offset(BitWidth, 0);
assert(SE.getTypeSizeInBits(S->getType()) == BitWidth &&
"Should be!");
// Peel off a constant offset:
if (auto *SA = dyn_cast<SCEVAddExpr>(S)) {
// In the future we could consider being smarter here and handle
// {Start+Step,+,Step} too.
if (SA->getNumOperands() != 2 || !isa<SCEVConstant>(SA->getOperand(0)))
return;
Offset = cast<SCEVConstant>(SA->getOperand(0))->getAPInt();
S = SA->getOperand(1);
}
// Peel off a cast operation
if (auto *SCast = dyn_cast<SCEVIntegralCastExpr>(S)) {
CastOp = SCast->getSCEVType();
S = SCast->getOperand();
}
using namespace llvm::PatternMatch;
auto *SU = dyn_cast<SCEVUnknown>(S);
const APInt *TrueVal, *FalseVal;
if (!SU ||
!match(SU->getValue(), m_Select(m_Value(Condition), m_APInt(TrueVal),
m_APInt(FalseVal)))) {
Condition = nullptr;
return;
}
TrueValue = *TrueVal;
FalseValue = *FalseVal;
// Re-apply the cast we peeled off earlier
if (CastOp.hasValue())
switch (*CastOp) {
default:
llvm_unreachable("Unknown SCEV cast type!");
case scTruncate:
TrueValue = TrueValue.trunc(BitWidth);
FalseValue = FalseValue.trunc(BitWidth);
break;
case scZeroExtend:
TrueValue = TrueValue.zext(BitWidth);
FalseValue = FalseValue.zext(BitWidth);
break;
case scSignExtend:
TrueValue = TrueValue.sext(BitWidth);
FalseValue = FalseValue.sext(BitWidth);
break;
}
// Re-apply the constant offset we peeled off earlier
TrueValue += Offset;
FalseValue += Offset;
}
bool isRecognized() { return Condition != nullptr; }
};
SelectPattern StartPattern(*this, BitWidth, Start);
if (!StartPattern.isRecognized())
return ConstantRange::getFull(BitWidth);
SelectPattern StepPattern(*this, BitWidth, Step);
if (!StepPattern.isRecognized())
return ConstantRange::getFull(BitWidth);
if (StartPattern.Condition != StepPattern.Condition) {
// We don't handle this case today; but we could, by considering four
// possibilities below instead of two. I'm not sure if there are cases where
// that will help over what getRange already does, though.
return ConstantRange::getFull(BitWidth);
}
// NB! Calling ScalarEvolution::getConstant is fine, but we should not try to
// construct arbitrary general SCEV expressions here. This function is called
// from deep in the call stack, and calling getSCEV (on a sext instruction,
// say) can end up caching a suboptimal value.
// FIXME: without the explicit `this` receiver below, MSVC errors out with
// C2352 and C2512 (otherwise it isn't needed).
const SCEV *TrueStart = this->getConstant(StartPattern.TrueValue);
const SCEV *TrueStep = this->getConstant(StepPattern.TrueValue);
const SCEV *FalseStart = this->getConstant(StartPattern.FalseValue);
const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue);
ConstantRange TrueRange =
this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount, BitWidth);
ConstantRange FalseRange =
this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount, BitWidth);
return TrueRange.unionWith(FalseRange);
}
SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
const BinaryOperator *BinOp = cast<BinaryOperator>(V);
// Return early if there are no flags to propagate to the SCEV.
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
if (BinOp->hasNoUnsignedWrap())
Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
if (BinOp->hasNoSignedWrap())
Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
if (Flags == SCEV::FlagAnyWrap)
return SCEV::FlagAnyWrap;
return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap;
}
bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
// Here we check that I is in the header of the innermost loop containing I,
// since we only deal with instructions in the loop header. The actual loop we
// need to check later will come from an add recurrence, but getting that
// requires computing the SCEV of the operands, which can be expensive. This
// check we can do cheaply to rule out some cases early.
Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent());
if (InnermostContainingLoop == nullptr ||
InnermostContainingLoop->getHeader() != I->getParent())
return false;
// Only proceed if we can prove that I does not yield poison.
if (!programUndefinedIfPoison(I))
return false;
// At this point we know that if I is executed, then it does not wrap
// according to at least one of NSW or NUW. If I is not executed, then we do
// not know if the calculation that I represents would wrap. Multiple
// instructions can map to the same SCEV. If we apply NSW or NUW from I to
// the SCEV, we must guarantee no wrapping for that SCEV also when it is
// derived from other instructions that map to the same SCEV. We cannot make
// that guarantee for cases where I is not executed. So we need to find the
// loop that I is considered in relation to and prove that I is executed for
// every iteration of that loop. That implies that the value that I
// calculates does not wrap anywhere in the loop, so then we can apply the
// flags to the SCEV.
//
// We check isLoopInvariant to disambiguate in case we are adding recurrences
// from different loops, so that we know which loop to prove that I is
// executed in.
for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) {
// I could be an extractvalue from a call to an overflow intrinsic.
// TODO: We can do better here in some cases.
if (!isSCEVable(I->getOperand(OpIndex)->getType()))
return false;
const SCEV *Op = getSCEV(I->getOperand(OpIndex));
if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
bool AllOtherOpsLoopInvariant = true;
for (unsigned OtherOpIndex = 0; OtherOpIndex < I->getNumOperands();
++OtherOpIndex) {
if (OtherOpIndex != OpIndex) {
const SCEV *OtherOp = getSCEV(I->getOperand(OtherOpIndex));
if (!isLoopInvariant(OtherOp, AddRec->getLoop())) {
AllOtherOpsLoopInvariant = false;
break;
}
}
}
if (AllOtherOpsLoopInvariant &&
isGuaranteedToExecuteForEveryIteration(I, AddRec->getLoop()))
return true;
}
}
return false;
}
bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
// If we know that \c I can never be poison period, then that's enough.
if (isSCEVExprNeverPoison(I))
return true;
// For an add recurrence specifically, we assume that infinite loops without
// side effects are undefined behavior, and then reason as follows:
//
// If the add recurrence is poison in any iteration, it is poison on all
// future iterations (since incrementing poison yields poison). If the result
// of the add recurrence is fed into the loop latch condition and the loop
// does not contain any throws or exiting blocks other than the latch, we now
// have the ability to "choose" whether the backedge is taken or not (by
// choosing a sufficiently evil value for the poison feeding into the branch)
// for every iteration including and after the one in which \p I first became
// poison. There are two possibilities (let's call the iteration in which \p
// I first became poison as K):
//
// 1. In the set of iterations including and after K, the loop body executes
// no side effects. In this case executing the backege an infinte number
// of times will yield undefined behavior.
//
// 2. In the set of iterations including and after K, the loop body executes
// at least one side effect. In this case, that specific instance of side
// effect is control dependent on poison, which also yields undefined
// behavior.
auto *ExitingBB = L->getExitingBlock();
auto *LatchBB = L->getLoopLatch();
if (!ExitingBB || !LatchBB || ExitingBB != LatchBB)
return false;
SmallPtrSet<const Instruction *, 16> Pushed;
SmallVector<const Instruction *, 8> PoisonStack;
// We start by assuming \c I, the post-inc add recurrence, is poison. Only
// things that are known to be poison under that assumption go on the
// PoisonStack.
Pushed.insert(I);
PoisonStack.push_back(I);
bool LatchControlDependentOnPoison = false;
while (!PoisonStack.empty() && !LatchControlDependentOnPoison) {
const Instruction *Poison = PoisonStack.pop_back_val();
for (auto *PoisonUser : Poison->users()) {
if (propagatesPoison(cast<Operator>(PoisonUser))) {
if (Pushed.insert(cast<Instruction>(PoisonUser)).second)
PoisonStack.push_back(cast<Instruction>(PoisonUser));
} else if (auto *BI = dyn_cast<BranchInst>(PoisonUser)) {
assert(BI->isConditional() && "Only possibility!");
if (BI->getParent() == LatchBB) {
LatchControlDependentOnPoison = true;
break;
}
}
}
}
return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
}
ScalarEvolution::LoopProperties
ScalarEvolution::getLoopProperties(const Loop *L) {
using LoopProperties = ScalarEvolution::LoopProperties;
auto Itr = LoopPropertiesCache.find(L);
if (Itr == LoopPropertiesCache.end()) {
auto HasSideEffects = [](Instruction *I) {
if (auto *SI = dyn_cast<StoreInst>(I))
return !SI->isSimple();
return I->mayThrow() || I->mayWriteToMemory();
};
LoopProperties LP = {/* HasNoAbnormalExits */ true,
/*HasNoSideEffects*/ true};
for (auto *BB : L->getBlocks())
for (auto &I : *BB) {
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
LP.HasNoAbnormalExits = false;
if (HasSideEffects(&I))
LP.HasNoSideEffects = false;
if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects)
break; // We're already as pessimistic as we can get.
}
auto InsertPair = LoopPropertiesCache.insert({L, LP});
assert(InsertPair.second && "We just checked!");
Itr = InsertPair.first;
}
return Itr->second;
}
bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) {
// A mustprogress loop without side effects must be finite.
// TODO: The check used here is very conservative. It's only *specific*
// side effects which are well defined in infinite loops.
return isMustProgress(L) && loopHasNoSideEffects(L);
}
const SCEV *ScalarEvolution::createSCEV(Value *V) {
if (!isSCEVable(V->getType()))
return getUnknown(V);
if (Instruction *I = dyn_cast<Instruction>(V)) {
// Don't attempt to analyze instructions in blocks that aren't
// reachable. Such instructions don't matter, and they aren't required
// to obey basic rules for definitions dominating uses which this
// analysis depends on.
if (!DT.isReachableFromEntry(I->getParent()))
return getUnknown(UndefValue::get(V->getType()));
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
return getConstant(CI);
else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee());
else if (!isa<ConstantExpr>(V))
return getUnknown(V);
Operator *U = cast<Operator>(V);
if (auto BO = MatchBinaryOp(U, DT)) {
switch (BO->Opcode) {
case Instruction::Add: {
// The simple thing to do would be to just call getSCEV on both operands
// and call getAddExpr with the result. However if we're looking at a
// bunch of things all added together, this can be quite inefficient,
// because it leads to N-1 getAddExpr calls for N ultimate operands.
// Instead, gather up all the operands and make a single getAddExpr call.
// LLVM IR canonical form means we need only traverse the left operands.
SmallVector<const SCEV *, 4> AddOps;
do {
if (BO->Op) {
if (auto *OpSCEV = getExistingSCEV(BO->Op)) {
AddOps.push_back(OpSCEV);
break;
}
// If a NUW or NSW flag can be applied to the SCEV for this
// addition, then compute the SCEV for this addition by itself
// with a separate call to getAddExpr. We need to do that
// instead of pushing the operands of the addition onto AddOps,
// since the flags are only known to apply to this particular
// addition - they may not apply to other additions that can be
// formed with operands from AddOps.
const SCEV *RHS = getSCEV(BO->RHS);
SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
if (Flags != SCEV::FlagAnyWrap) {
const SCEV *LHS = getSCEV(BO->LHS);
if (BO->Opcode == Instruction::Sub)
AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
else
AddOps.push_back(getAddExpr(LHS, RHS, Flags));
break;
}
}
if (BO->Opcode == Instruction::Sub)
AddOps.push_back(getNegativeSCEV(getSCEV(BO->RHS)));
else
AddOps.push_back(getSCEV(BO->RHS));
auto NewBO = MatchBinaryOp(BO->LHS, DT);
if (!NewBO || (NewBO->Opcode != Instruction::Add &&
NewBO->Opcode != Instruction::Sub)) {
AddOps.push_back(getSCEV(BO->LHS));
break;
}
BO = NewBO;
} while (true);
return getAddExpr(AddOps);
}
case Instruction::Mul: {
SmallVector<const SCEV *, 4> MulOps;
do {
if (BO->Op) {
if (auto *OpSCEV = getExistingSCEV(BO->Op)) {
MulOps.push_back(OpSCEV);
break;
}
SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
if (Flags != SCEV::FlagAnyWrap) {
MulOps.push_back(
getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags));
break;
}
}
MulOps.push_back(getSCEV(BO->RHS));
auto NewBO = MatchBinaryOp(BO->LHS, DT);
if (!NewBO || NewBO->Opcode != Instruction::Mul) {
MulOps.push_back(getSCEV(BO->LHS));
break;
}
BO = NewBO;
} while (true);
return getMulExpr(MulOps);
}
case Instruction::UDiv:
return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
case Instruction::URem:
return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
case Instruction::Sub: {
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
if (BO->Op)
Flags = getNoWrapFlagsFromUB(BO->Op);
return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags);
}
case Instruction::And:
// For an expression like x&255 that merely masks off the high bits,
// use zext(trunc(x)) as the SCEV expression.
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
if (CI->isZero())
return getSCEV(BO->RHS);
if (CI->isMinusOne())
return getSCEV(BO->LHS);
const APInt &A = CI->getValue();
// Instcombine's ShrinkDemandedConstant may strip bits out of
// constants, obscuring what would otherwise be a low-bits mask.
// Use computeKnownBits to compute what ShrinkDemandedConstant
// knew about to reconstruct a low-bits mask value.
unsigned LZ = A.countLeadingZeros();
unsigned TZ = A.countTrailingZeros();
unsigned BitWidth = A.getBitWidth();
KnownBits Known(BitWidth);
computeKnownBits(BO->LHS, Known, getDataLayout(),
0, &AC, nullptr, &DT);
APInt EffectiveMask =
APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
if ((LZ != 0 || TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) {
const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ));
const SCEV *LHS = getSCEV(BO->LHS);
const SCEV *ShiftedLHS = nullptr;
if (auto *LHSMul = dyn_cast<SCEVMulExpr>(LHS)) {
if (auto *OpC = dyn_cast<SCEVConstant>(LHSMul->getOperand(0))) {
// For an expression like (x * 8) & 8, simplify the multiply.
unsigned MulZeros = OpC->getAPInt().countTrailingZeros();
unsigned GCD = std::min(MulZeros, TZ);
APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD);
SmallVector<const SCEV*, 4> MulOps;
MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD)));
MulOps.append(LHSMul->op_begin() + 1, LHSMul->op_end());
auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags());
ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt));
}
}
if (!ShiftedLHS)
ShiftedLHS = getUDivExpr(LHS, MulCount);
return getMulExpr(
getZeroExtendExpr(
getTruncateExpr(ShiftedLHS,
IntegerType::get(getContext(), BitWidth - LZ - TZ)),
BO->LHS->getType()),
MulCount);
}
}
break;
case Instruction::Or:
// If the RHS of the Or is a constant, we may have something like:
// X*4+1 which got turned into X*4|1. Handle this as an Add so loop
// optimizations will transparently handle this case.
//
// In order for this transformation to be safe, the LHS must be of the
// form X*(2^n) and the Or constant must be less than 2^n.
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
const SCEV *LHS = getSCEV(BO->LHS);
const APInt &CIVal = CI->getValue();
if (GetMinTrailingZeros(LHS) >=
(CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
// Build a plain add SCEV.
return getAddExpr(LHS, getSCEV(CI),
(SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNSW));
}
}
break;
case Instruction::Xor:
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
// If the RHS of xor is -1, then this is a not operation.
if (CI->isMinusOne())
return getNotSCEV(getSCEV(BO->LHS));
// Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
// This is a variant of the check for xor with -1, and it handles
// the case where instcombine has trimmed non-demanded bits out
// of an xor with -1.
if (auto *LBO = dyn_cast<BinaryOperator>(BO->LHS))
if (ConstantInt *LCI = dyn_cast<ConstantInt>(LBO->getOperand(1)))
if (LBO->getOpcode() == Instruction::And &&
LCI->getValue() == CI->getValue())
if (const SCEVZeroExtendExpr *Z =
dyn_cast<SCEVZeroExtendExpr>(getSCEV(BO->LHS))) {
Type *UTy = BO->LHS->getType();
const SCEV *Z0 = Z->getOperand();
Type *Z0Ty = Z0->getType();
unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
// If C is a low-bits mask, the zero extend is serving to
// mask off the high bits. Complement the operand and
// re-apply the zext.
if (CI->getValue().isMask(Z0TySize))
return getZeroExtendExpr(getNotSCEV(Z0), UTy);
// If C is a single bit, it may be in the sign-bit position
// before the zero-extend. In this case, represent the xor
// using an add, which is equivalent, and re-apply the zext.
APInt Trunc = CI->getValue().trunc(Z0TySize);
if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
Trunc.isSignMask())
return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
UTy);
}
}
break;
case Instruction::Shl:
// Turn shift left of a constant amount into a multiply.
if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) {
uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth();
// If the shift count is not less than the bitwidth, the result of
// the shift is undefined. Don't try to analyze it, because the
// resolution chosen here may differ from the resolution chosen in
// other parts of the compiler.
if (SA->getValue().uge(BitWidth))
break;
// We can safely preserve the nuw flag in all cases. It's also safe to
// turn a nuw nsw shl into a nuw nsw mul. However, nsw in isolation
// requires special handling. It can be preserved as long as we're not
// left shifting by bitwidth - 1.
auto Flags = SCEV::FlagAnyWrap;
if (BO->Op) {
auto MulFlags = getNoWrapFlagsFromUB(BO->Op);
if ((MulFlags & SCEV::FlagNSW) &&
((MulFlags & SCEV::FlagNUW) || SA->getValue().ult(BitWidth - 1)))
Flags = (SCEV::NoWrapFlags)(Flags | SCEV::FlagNSW);
if (MulFlags & SCEV::FlagNUW)
Flags = (SCEV::NoWrapFlags)(Flags | SCEV::FlagNUW);
}
Constant *X = ConstantInt::get(
getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags);
}
break;
case Instruction::AShr: {
// AShr X, C, where C is a constant.
ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS);
if (!CI)
break;
Type *OuterTy = BO->LHS->getType();
uint64_t BitWidth = getTypeSizeInBits(OuterTy);
// If the shift count is not less than the bitwidth, the result of
// the shift is undefined. Don't try to analyze it, because the
// resolution chosen here may differ from the resolution chosen in
// other parts of the compiler.
if (CI->getValue().uge(BitWidth))
break;
if (CI->isZero())
return getSCEV(BO->LHS); // shift by zero --> noop
uint64_t AShrAmt = CI->getZExtValue();
Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt);
Operator *L = dyn_cast<Operator>(BO->LHS);
if (L && L->getOpcode() == Instruction::Shl) {
// X = Shl A, n
// Y = AShr X, m
// Both n and m are constant.
const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0));
if (L->getOperand(1) == BO->RHS)
// For a two-shift sext-inreg, i.e. n = m,
// use sext(trunc(x)) as the SCEV expression.
return getSignExtendExpr(
getTruncateExpr(ShlOp0SCEV, TruncTy), OuterTy);
ConstantInt *ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1));
if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)) {
uint64_t ShlAmt = ShlAmtCI->getZExtValue();
if (ShlAmt > AShrAmt) {
// When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV
// expression. We already checked that ShlAmt < BitWidth, so
// the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as
// ShlAmt - AShrAmt < Amt.
APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt,
ShlAmt - AShrAmt);
return getSignExtendExpr(
getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy),
getConstant(Mul)), OuterTy);
}
}
}
break;
}
}
}
switch (U->getOpcode()) {
case Instruction::Trunc:
return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
case Instruction::ZExt:
return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
case Instruction::SExt:
if (auto BO = MatchBinaryOp(U->getOperand(0), DT)) {
// The NSW flag of a subtract does not always survive the conversion to
// A + (-1)*B. By pushing sign extension onto its operands we are much
// more likely to preserve NSW and allow later AddRec optimisations.
//
// NOTE: This is effectively duplicating this logic from getSignExtend:
// sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
// but by that point the NSW information has potentially been lost.
if (BO->Opcode == Instruction::Sub && BO->IsNSW) {
Type *Ty = U->getType();
auto *V1 = getSignExtendExpr(getSCEV(BO->LHS), Ty);
auto *V2 = getSignExtendExpr(getSCEV(BO->RHS), Ty);
return getMinusSCEV(V1, V2, SCEV::FlagNSW);
}
}
return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
case Instruction::BitCast:
// BitCasts are no-op casts so we just eliminate the cast.
if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
return getSCEV(U->getOperand(0));
break;
case Instruction::PtrToInt: {
// Pointer to integer cast is straight-forward, so do model it.
const SCEV *Op = getSCEV(U->getOperand(0));
Type *DstIntTy = U->getType();
// But only if effective SCEV (integer) type is wide enough to represent
// all possible pointer values.
const SCEV *IntOp = getPtrToIntExpr(Op, DstIntTy);
if (isa<SCEVCouldNotCompute>(IntOp))
return getUnknown(V);
return IntOp;
}
case Instruction::IntToPtr:
// Just don't deal with inttoptr casts.
return getUnknown(V);
case Instruction::SDiv:
// If both operands are non-negative, this is just an udiv.
if (isKnownNonNegative(getSCEV(U->getOperand(0))) &&
isKnownNonNegative(getSCEV(U->getOperand(1))))
return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)));
break;
case Instruction::SRem:
// If both operands are non-negative, this is just an urem.
if (isKnownNonNegative(getSCEV(U->getOperand(0))) &&
isKnownNonNegative(getSCEV(U->getOperand(1))))
return getURemExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)));
break;
case Instruction::GetElementPtr:
return createNodeForGEP(cast<GEPOperator>(U));
case Instruction::PHI:
return createNodeForPHI(cast<PHINode>(U));
case Instruction::Select:
// U can also be a select constant expr, which let fall through. Since
// createNodeForSelect only works for a condition that is an `ICmpInst`, and
// constant expressions cannot have instructions as operands, we'd have
// returned getUnknown for a select constant expressions anyway.
if (isa<Instruction>(U))
return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
U->getOperand(1), U->getOperand(2));
break;
case Instruction::Call:
case Instruction::Invoke:
if (Value *RV = cast<CallBase>(U)->getReturnedArgOperand())
return getSCEV(RV);
if (auto *II = dyn_cast<IntrinsicInst>(U)) {
switch (II->getIntrinsicID()) {
case Intrinsic::abs:
return getAbsExpr(
getSCEV(II->getArgOperand(0)),
/*IsNSW=*/cast<ConstantInt>(II->getArgOperand(1))->isOne());
case Intrinsic::umax:
return getUMaxExpr(getSCEV(II->getArgOperand(0)),
getSCEV(II->getArgOperand(1)));
case Intrinsic::umin:
return getUMinExpr(getSCEV(II->getArgOperand(0)),
getSCEV(II->getArgOperand(1)));
case Intrinsic::smax:
return getSMaxExpr(getSCEV(II->getArgOperand(0)),
getSCEV(II->getArgOperand(1)));
case Intrinsic::smin:
return getSMinExpr(getSCEV(II->getArgOperand(0)),
getSCEV(II->getArgOperand(1)));
case Intrinsic::usub_sat: {
const SCEV *X = getSCEV(II->getArgOperand(0));
const SCEV *Y = getSCEV(II->getArgOperand(1));
const SCEV *ClampedY = getUMinExpr(X, Y);
return getMinusSCEV(X, ClampedY, SCEV::FlagNUW);
}
case Intrinsic::uadd_sat: {
const SCEV *X = getSCEV(II->getArgOperand(0));
const SCEV *Y = getSCEV(II->getArgOperand(1));
const SCEV *ClampedX = getUMinExpr(X, getNotSCEV(Y));
return getAddExpr(ClampedX, Y, SCEV::FlagNUW);
}
case Intrinsic::start_loop_iterations:
// A start_loop_iterations is just equivalent to the first operand for
// SCEV purposes.
return getSCEV(II->getArgOperand(0));
default:
break;
}
}
break;
}
return getUnknown(V);
}
//===----------------------------------------------------------------------===//
// Iteration Count Computation Code
//
const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount) {
// Get the trip count from the BE count by adding 1. Overflow, results
// in zero which means "unknown".
return getAddExpr(ExitCount, getOne(ExitCount->getType()));
}
static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
if (!ExitCount)
return 0;
ConstantInt *ExitConst = ExitCount->getValue();
// Guard against huge trip counts.
if (ExitConst->getValue().getActiveBits() > 32)
return 0;
// In case of integer overflow, this returns 0, which is correct.
return ((unsigned)ExitConst->getZExtValue()) + 1;
}
unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
auto *ExitCount = dyn_cast<SCEVConstant>(getBackedgeTakenCount(L, Exact));
return getConstantTripCount(ExitCount);
}
unsigned
ScalarEvolution::getSmallConstantTripCount(const Loop *L,
const BasicBlock *ExitingBlock) {
assert(ExitingBlock && "Must pass a non-null exiting block!");
assert(L->isLoopExiting(ExitingBlock) &&
"Exiting block must actually branch out of the loop!");
const SCEVConstant *ExitCount =
dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
return getConstantTripCount(ExitCount);
}
unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
const auto *MaxExitCount =
dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
return getConstantTripCount(MaxExitCount);
}
unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
Optional<unsigned> Res = None;
for (auto *ExitingBB : ExitingBlocks) {
unsigned Multiple = getSmallConstantTripMultiple(L, ExitingBB);
if (!Res)
Res = Multiple;
Res = (unsigned)GreatestCommonDivisor64(*Res, Multiple);
}
return Res.getValueOr(1);
}
unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
const SCEV *ExitCount) {
if (ExitCount == getCouldNotCompute())
return 1;
// Get the trip count
const SCEV *TCExpr = getTripCountFromExitCount(ExitCount);
const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr);
if (!TC)
// Attempt to factor more general cases. Returns the greatest power of
// two divisor. If overflow happens, the trip count expression is still
// divisible by the greatest power of 2 divisor returned.
return 1U << std::min((uint32_t)31,
GetMinTrailingZeros(applyLoopGuards(TCExpr, L)));
ConstantInt *Result = TC->getValue();
// Guard against huge trip counts (this requires checking
// for zero to handle the case where the trip count == -1 and the
// addition wraps).
if (!Result || Result->getValue().getActiveBits() > 32 ||
Result->getValue().getActiveBits() == 0)
return 1;
return (unsigned)Result->getZExtValue();
}
/// Returns the largest constant divisor of the trip count of this loop as a
/// normal unsigned value, if possible. This means that the actual trip count is
/// always a multiple of the returned value (don't forget the trip count could
/// very well be zero as well!).
///
/// Returns 1 if the trip count is unknown or not guaranteed to be the
/// multiple of a constant (which is also the case if the trip count is simply
/// constant, use getSmallConstantTripCount for that case), Will also return 1
/// if the trip count is very large (>= 2^32).
///
/// As explained in the comments for getSmallConstantTripCount, this assumes
/// that control exits the loop via ExitingBlock.
unsigned
ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
const BasicBlock *ExitingBlock) {
assert(ExitingBlock && "Must pass a non-null exiting block!");
assert(L->isLoopExiting(ExitingBlock) &&
"Exiting block must actually branch out of the loop!");
const SCEV *ExitCount = getExitCount(L, ExitingBlock);
return getSmallConstantTripMultiple(L, ExitCount);
}
const SCEV *ScalarEvolution::getExitCount(const Loop *L,
const BasicBlock *ExitingBlock,
ExitCountKind Kind) {
switch (Kind) {
case Exact:
case SymbolicMaximum:
return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
case ConstantMaximum:
return getBackedgeTakenInfo(L).getConstantMax(ExitingBlock, this);
};
llvm_unreachable("Invalid ExitCountKind!");
}
const SCEV *
ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
SCEVUnionPredicate &Preds) {
return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds);
}
const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L,
ExitCountKind Kind) {
switch (Kind) {
case Exact:
return getBackedgeTakenInfo(L).getExact(L, this);
case ConstantMaximum:
return getBackedgeTakenInfo(L).getConstantMax(this);
case SymbolicMaximum:
return getBackedgeTakenInfo(L).getSymbolicMax(L, this);
};
llvm_unreachable("Invalid ExitCountKind!");
}
bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
return getBackedgeTakenInfo(L).isConstantMaxOrZero(this);
}
/// Push PHI nodes in the header of the given loop onto the given Worklist.
static void
PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
BasicBlock *Header = L->getHeader();
// Push all Loop-header PHIs onto the Worklist stack.
for (PHINode &PN : Header->phis())
Worklist.push_back(&PN);
}
const ScalarEvolution::BackedgeTakenInfo &
ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
auto &BTI = getBackedgeTakenInfo(L);
if (BTI.hasFullInfo())
return BTI;
auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
if (!Pair.second)
return Pair.first->second;
BackedgeTakenInfo Result =
computeBackedgeTakenCount(L, /*AllowPredicates=*/true);
return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result);
}
ScalarEvolution::BackedgeTakenInfo &
ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// Initially insert an invalid entry for this loop. If the insertion
// succeeds, proceed to actually compute a backedge-taken count and
// update the value. The temporary CouldNotCompute value tells SCEV
// code elsewhere that it shouldn't attempt to request a new
// backedge-taken count, which could result in infinite recursion.
std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
BackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
if (!Pair.second)
return Pair.first->second;
// computeBackedgeTakenCount may allocate memory for its result. Inserting it
// into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
// must be cleared in this scope.
BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
// In product build, there are no usage of statistic.
(void)NumTripCountsComputed;
(void)NumTripCountsNotComputed;
#if LLVM_ENABLE_STATS || !defined(NDEBUG)
const SCEV *BEExact = Result.getExact(L, this);
if (BEExact != getCouldNotCompute()) {
assert(isLoopInvariant(BEExact, L) &&
isLoopInvariant(Result.getConstantMax(this), L) &&
"Computed backedge-taken count isn't loop invariant for loop!");
++NumTripCountsComputed;
} else if (Result.getConstantMax(this) == getCouldNotCompute() &&
isa<PHINode>(L->getHeader()->begin())) {
// Only count loops that have phi nodes as not being computable.
++NumTripCountsNotComputed;
}
#endif // LLVM_ENABLE_STATS || !defined(NDEBUG)
// Now that we know more about the trip count for this loop, forget any
// existing SCEV values for PHI nodes in this loop since they are only
// conservative estimates made without the benefit of trip count
// information. This is similar to the code in forgetLoop, except that
// it handles SCEVUnknown PHI nodes specially.
if (Result.hasAnyInfo()) {
SmallVector<Instruction *, 16> Worklist;
PushLoopPHIs(L, Worklist);
SmallPtrSet<Instruction *, 8> Discovered;
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
const SCEV *Old = It->second;
// SCEVUnknown for a PHI either means that it has an unrecognized
// structure, or it's a PHI that's in the progress of being computed
// by createNodeForPHI. In the former case, additional loop trip
// count information isn't going to change anything. In the later
// case, createNodeForPHI will perform the necessary updates on its
// own when it gets to that point.
if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
eraseValueFromMap(It->first);
forgetMemoizedResults(Old);
}
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
// Since we don't need to invalidate anything for correctness and we're
// only invalidating to make SCEV's results more precise, we get to stop
// early to avoid invalidating too much. This is especially important in
// cases like:
//
// %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node
// loop0:
// %pn0 = phi
// ...
// loop1:
// %pn1 = phi
// ...
//
// where both loop0 and loop1's backedge taken count uses the SCEV
// expression for %v. If we don't have the early stop below then in cases
// like the above, getBackedgeTakenInfo(loop1) will clear out the trip
// count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip
// count for loop1, effectively nullifying SCEV's trip count cache.
for (auto *U : I->users())
if (auto *I = dyn_cast<Instruction>(U)) {
auto *LoopForUser = LI.getLoopFor(I->getParent());
if (LoopForUser && L->contains(LoopForUser) &&
Discovered.insert(I).second)
Worklist.push_back(I);
}
}
}
// Re-lookup the insert position, since the call to
// computeBackedgeTakenCount above could result in a
// recusive call to getBackedgeTakenInfo (on a different
// loop), which would invalidate the iterator computed
// earlier.
return BackedgeTakenCounts.find(L)->second = std::move(Result);
}
void ScalarEvolution::forgetAllLoops() {
// This method is intended to forget all info about loops. It should
// invalidate caches as if the following happened:
// - The trip counts of all loops have changed arbitrarily
// - Every llvm::Value has been updated in place to produce a different
// result.
BackedgeTakenCounts.clear();
PredicatedBackedgeTakenCounts.clear();
LoopPropertiesCache.clear();
ConstantEvolutionLoopExitValue.clear();
ValueExprMap.clear();
ValuesAtScopes.clear();
LoopDispositions.clear();
BlockDispositions.clear();
UnsignedRanges.clear();
SignedRanges.clear();
ExprValueMap.clear();
HasRecMap.clear();
MinTrailingZerosCache.clear();
PredicatedSCEVRewrites.clear();
}
void ScalarEvolution::forgetLoop(const Loop *L) {
SmallVector<const Loop *, 16> LoopWorklist(1, L);
SmallVector<Instruction *, 32> Worklist;
SmallPtrSet<Instruction *, 16> Visited;
// Iterate over all the loops and sub-loops to drop SCEV information.
while (!LoopWorklist.empty()) {
auto *CurrL = LoopWorklist.pop_back_val();
// Drop any stored trip count value.
BackedgeTakenCounts.erase(CurrL);
PredicatedBackedgeTakenCounts.erase(CurrL);
// Drop information about predicated SCEV rewrites for this loop.
for (auto I = PredicatedSCEVRewrites.begin();
I != PredicatedSCEVRewrites.end();) {
std::pair<const SCEV *, const Loop *> Entry = I->first;
if (Entry.second == CurrL)
PredicatedSCEVRewrites.erase(I++);
else
++I;
}
auto LoopUsersItr = LoopUsers.find(CurrL);
if (LoopUsersItr != LoopUsers.end()) {
for (auto *S : LoopUsersItr->second)
forgetMemoizedResults(S);
LoopUsers.erase(LoopUsersItr);
}
// Drop information about expressions based on loop-header PHIs.
PushLoopPHIs(CurrL, Worklist);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I).second)
continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
eraseValueFromMap(It->first);
forgetMemoizedResults(It->second);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
PushDefUseChildren(I, Worklist);
}
LoopPropertiesCache.erase(CurrL);
// Forget all contained loops too, to avoid dangling entries in the
// ValuesAtScopes map.
LoopWorklist.append(CurrL->begin(), CurrL->end());
}
}
void ScalarEvolution::forgetTopmostLoop(const Loop *L) {
while (Loop *Parent = L->getParentLoop())
L = Parent;
forgetLoop(L);
}
void ScalarEvolution::forgetValue(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return;
// Drop information about expressions based on loop-header PHIs.
SmallVector<Instruction *, 16> Worklist;
Worklist.push_back(I);
SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
I = Worklist.pop_back_val();
if (!Visited.insert(I).second)
continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
eraseValueFromMap(It->first);
forgetMemoizedResults(It->second);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
PushDefUseChildren(I, Worklist);
}
}
void ScalarEvolution::forgetLoopDispositions(const Loop *L) {
LoopDispositions.clear();
}
/// Get the exact loop backedge taken count considering all loop exits. A
/// computable result can only be returned for loops with all exiting blocks
/// dominating the latch. howFarToZero assumes that the limit of each loop test
/// is never skipped. This is a valid assumption as long as the loop exits via
/// that test. For precise results, it is the caller's responsibility to specify
/// the relevant loop exiting block using getExact(ExitingBlock, SE).
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE,
SCEVUnionPredicate *Preds) const {
// If any exits were not computable, the loop is not computable.
if (!isComplete() || ExitNotTaken.empty())
return SE->getCouldNotCompute();
const BasicBlock *Latch = L->getLoopLatch();
// All exiting blocks we have collected must dominate the only backedge.
if (!Latch)
return SE->getCouldNotCompute();
// All exiting blocks we have gathered dominate loop's latch, so exact trip
// count is simply a minimum out of all these calculated exit counts.
SmallVector<const SCEV *, 2> Ops;
for (auto &ENT : ExitNotTaken) {
const SCEV *BECount = ENT.ExactNotTaken;
assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!");
assert(SE->DT.dominates(ENT.ExitingBlock, Latch) &&
"We should only have known counts for exiting blocks that dominate "
"latch!");
Ops.push_back(BECount);
if (Preds && !ENT.hasAlwaysTruePredicate())
Preds->add(ENT.Predicate.get());
assert((Preds || ENT.hasAlwaysTruePredicate()) &&
"Predicate should be always true!");
}
return SE->getUMinFromMismatchedTypes(Ops);
}
/// Get the exact not taken count for this loop exit.
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock,
ScalarEvolution *SE) const {
for (auto &ENT : ExitNotTaken)
if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
return ENT.ExactNotTaken;
return SE->getCouldNotCompute();
}
const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
const BasicBlock *ExitingBlock, ScalarEvolution *SE) const {
for (auto &ENT : ExitNotTaken)
if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
return ENT.MaxNotTaken;
return SE->getCouldNotCompute();
}
/// getConstantMax - Get the constant max backedge taken count for the loop.
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const {
auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
return !ENT.hasAlwaysTruePredicate();
};
if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getConstantMax())
return SE->getCouldNotCompute();
assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
isa<SCEVConstant>(getConstantMax())) &&
"No point in having a non-constant max backedge taken count!");
return getConstantMax();
}
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(const Loop *L,
ScalarEvolution *SE) {
if (!SymbolicMax)
SymbolicMax = SE->computeSymbolicMaxBackedgeTakenCount(L);
return SymbolicMax;
}
bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero(
ScalarEvolution *SE) const {
auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
return !ENT.hasAlwaysTruePredicate();
};
return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
}
bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S) const {
return Operands.contains(S);
}
ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E)
: ExitLimit(E, E, false, None) {
}
ScalarEvolution::ExitLimit::ExitLimit(
const SCEV *E, const SCEV *M, bool MaxOrZero,
ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList)
: ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
!isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
"Exact is not allowed to be less precise than Max");
assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
isa<SCEVConstant>(MaxNotTaken)) &&
"No point in having a non-constant max backedge taken count!");
for (auto *PredSet : PredSetList)
for (auto *P : *PredSet)
addPredicate(P);
assert((isa<SCEVCouldNotCompute>(E) || !E->getType()->isPointerTy()) &&
"Backedge count should be int");
assert((isa<SCEVCouldNotCompute>(M) || !M->getType()->isPointerTy()) &&
"Max backedge count should be int");
}
ScalarEvolution::ExitLimit::ExitLimit(
const SCEV *E, const SCEV *M, bool MaxOrZero,
const SmallPtrSetImpl<const SCEVPredicate *> &PredSet)
: ExitLimit(E, M, MaxOrZero, {&PredSet}) {
}
ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
bool MaxOrZero)
: ExitLimit(E, M, MaxOrZero, None) {
}
class SCEVRecordOperands {
SmallPtrSetImpl<const SCEV *> &Operands;
public:
SCEVRecordOperands(SmallPtrSetImpl<const SCEV *> &Operands)
: Operands(Operands) {}
bool follow(const SCEV *S) {
Operands.insert(S);
return true;
}
bool isDone() { return false; }
};
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo> ExitCounts,
bool IsComplete, const SCEV *ConstantMax, bool MaxOrZero)
: ConstantMax(ConstantMax), IsComplete(IsComplete), MaxOrZero(MaxOrZero) {
using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
ExitNotTaken.reserve(ExitCounts.size());
std::transform(
ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken),
[&](const EdgeExitInfo &EEI) {
BasicBlock *ExitBB = EEI.first;
const ExitLimit &EL = EEI.second;
if (EL.Predicates.empty())
return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
nullptr);
std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
for (auto *Pred : EL.Predicates)
Predicate->add(Pred);
return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
std::move(Predicate));
});
assert((isa<SCEVCouldNotCompute>(ConstantMax) ||
isa<SCEVConstant>(ConstantMax)) &&
"No point in having a non-constant max backedge taken count!");
SCEVRecordOperands RecordOperands(Operands);
SCEVTraversal<SCEVRecordOperands> ST(RecordOperands);
if (!isa<SCEVCouldNotCompute>(ConstantMax))
ST.visitAll(ConstantMax);
for (auto &ENT : ExitNotTaken)
if (!isa<SCEVCouldNotCompute>(ENT.ExactNotTaken))
ST.visitAll(ENT.ExactNotTaken);
}
/// Compute the number of times the backedge of the specified loop will execute.
ScalarEvolution::BackedgeTakenInfo
ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
bool AllowPredicates) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
SmallVector<EdgeExitInfo, 4> ExitCounts;
bool CouldComputeBECount = true;
BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
const SCEV *MustExitMaxBECount = nullptr;
const SCEV *MayExitMaxBECount = nullptr;
bool MustExitMaxOrZero = false;
// Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
// and compute maxBECount.
// Do a union of all the predicates here.
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitBB = ExitingBlocks[i];
// We canonicalize untaken exits to br (constant), ignore them so that
// proving an exit untaken doesn't negatively impact our ability to reason
// about the loop as whole.
if (auto *BI = dyn_cast<BranchInst>(ExitBB->getTerminator()))
if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
if ((ExitIfTrue && CI->isZero()) || (!ExitIfTrue && CI->isOne()))
continue;
}
ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);
assert((AllowPredicates || EL.Predicates.empty()) &&
"Predicated exit limit when predicates are not allowed!");
// 1. For each exit that can be computed, add an entry to ExitCounts.
// CouldComputeBECount is true only if all exits can be computed.
if (EL.ExactNotTaken == getCouldNotCompute())
// We couldn't compute an exact value for this exit, so
// we won't be able to compute an exact value for the loop.
CouldComputeBECount = false;
else
ExitCounts.emplace_back(ExitBB, EL);
// 2. Derive the loop's MaxBECount from each exit's max number of
// non-exiting iterations. Partition the loop exits into two kinds:
// LoopMustExits and LoopMayExits.
//
// If the exit dominates the loop latch, it is a LoopMustExit otherwise it
// is a LoopMayExit. If any computable LoopMustExit is found, then
// MaxBECount is the minimum EL.MaxNotTaken of computable
// LoopMustExits. Otherwise, MaxBECount is conservatively the maximum
// EL.MaxNotTaken, where CouldNotCompute is considered greater than any
// computable EL.MaxNotTaken.
if (EL.MaxNotTaken != getCouldNotCompute() && Latch &&
DT.dominates(ExitBB, Latch)) {
if (!MustExitMaxBECount) {
MustExitMaxBECount = EL.MaxNotTaken;
MustExitMaxOrZero = EL.MaxOrZero;
} else {
MustExitMaxBECount =
getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken);
}
} else if (MayExitMaxBECount != getCouldNotCompute()) {
if (!MayExitMaxBECount || EL.MaxNotTaken == getCouldNotCompute())
MayExitMaxBECount = EL.MaxNotTaken;
else {
MayExitMaxBECount =
getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken);
}
}
}
const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
(MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
// The loop backedge will be taken the maximum or zero times if there's
// a single exit that must be taken the maximum or zero times.
bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
MaxBECount, MaxOrZero);
}
ScalarEvolution::ExitLimit
ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
bool AllowPredicates) {
assert(L->contains(ExitingBlock) && "Exit count for non-loop block?");
// If our exiting block does not dominate the latch, then its connection with
// loop's exit limit may be far from trivial.
const BasicBlock *Latch = L->getLoopLatch();
if (!Latch || !DT.dominates(ExitingBlock, Latch))
return getCouldNotCompute();
bool IsOnlyExit = (L->getExitingBlock() != nullptr);
Instruction *Term = ExitingBlock->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
assert(BI->isConditional() && "If unconditional, it can't be in loop!");
bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
assert(ExitIfTrue == L->contains(BI->getSuccessor(1)) &&
"It should have one successor in loop and one exit block!");
// Proceed to the next level to examine the exit condition expression.
return computeExitLimitFromCond(
L, BI->getCondition(), ExitIfTrue,
/*ControlsExit=*/IsOnlyExit, AllowPredicates);
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) {
// For switch, make sure that there is a single exit from the loop.
BasicBlock *Exit = nullptr;
for (auto *SBB : successors(ExitingBlock))
if (!L->contains(SBB)) {
if (Exit) // Multiple exit successors.
return getCouldNotCompute();
Exit = SBB;
}
assert(Exit && "Exiting block must have at least one exit");
return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
/*ControlsExit=*/IsOnlyExit);
}
return getCouldNotCompute();
}
ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond(
const Loop *L, Value *ExitCond, bool ExitIfTrue,
bool ControlsExit, bool AllowPredicates) {
ScalarEvolution::ExitLimitCacheTy Cache(L, ExitIfTrue, AllowPredicates);
return computeExitLimitFromCondCached(Cache, L, ExitCond, ExitIfTrue,
ControlsExit, AllowPredicates);
}
Optional<ScalarEvolution::ExitLimit>
ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond,
bool ExitIfTrue, bool ControlsExit,
bool AllowPredicates) {
(void)this->L;
(void)this->ExitIfTrue;
(void)this->AllowPredicates;
assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
this->AllowPredicates == AllowPredicates &&
"Variance in assumed invariant key components!");
auto Itr = TripCountMap.find({ExitCond, ControlsExit});
if (Itr == TripCountMap.end())
return None;
return Itr->second;
}
void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond,
bool ExitIfTrue,
bool ControlsExit,
bool AllowPredicates,
const ExitLimit &EL) {
assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
this->AllowPredicates == AllowPredicates &&
"Variance in assumed invariant key components!");
auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL});
assert(InsertResult.second && "Expected successful insertion!");
(void)InsertResult;
(void)ExitIfTrue;
}
ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached(
ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
bool ControlsExit, bool AllowPredicates) {
if (auto MaybeEL =
Cache.find(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates))
return *MaybeEL;
ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, ExitIfTrue,
ControlsExit, AllowPredicates);
Cache.insert(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates, EL);
return EL;
}
ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
bool ControlsExit, bool AllowPredicates) {
// Handle BinOp conditions (And, Or).
if (auto LimitFromBinOp = computeExitLimitFromCondFromBinOp(
Cache, L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates))
return *LimitFromBinOp;
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) {
ExitLimit EL =
computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit);
if (EL.hasFullInfo() || !AllowPredicates)
return EL;
// Try again, but use SCEV predicates this time.
return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit,
/*AllowPredicates=*/true);
}
// Check for a constant condition. These are normally stripped out by
// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
// preserve the CFG and is temporarily leaving constant conditions
// in place.
if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
if (ExitIfTrue == !CI->getZExtValue())
// The backedge is always taken.
return getCouldNotCompute();
else
// The backedge is never taken.
return getZero(CI->getType());
}
// If it's not an integer or pointer comparison then compute it the hard way.
return computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
}
Optional<ScalarEvolution::ExitLimit>
ScalarEvolution::computeExitLimitFromCondFromBinOp(
ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
bool ControlsExit, bool AllowPredicates) {
// Check if the controlling expression for this loop is an And or Or.
Value *Op0, *Op1;
bool IsAnd = false;
if (match(ExitCond, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
IsAnd = true;
else if (match(ExitCond, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
IsAnd = false;
else
return None;
// EitherMayExit is true in these two cases:
// br (and Op0 Op1), loop, exit
// br (or Op0 Op1), exit, loop
bool EitherMayExit = IsAnd ^ ExitIfTrue;
ExitLimit EL0 = computeExitLimitFromCondCached(Cache, L, Op0, ExitIfTrue,
ControlsExit && !EitherMayExit,
AllowPredicates);
ExitLimit EL1 = computeExitLimitFromCondCached(Cache, L, Op1, ExitIfTrue,
ControlsExit && !EitherMayExit,
AllowPredicates);
// Be robust against unsimplified IR for the form "op i1 X, NeutralElement"
const Constant *NeutralElement = ConstantInt::get(ExitCond->getType(), IsAnd);
if (isa<ConstantInt>(Op1))
return Op1 == NeutralElement ? EL0 : EL1;
if (isa<ConstantInt>(Op0))
return Op0 == NeutralElement ? EL1 : EL0;
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (EitherMayExit) {
// Both conditions must be same for the loop to continue executing.
// Choose the less conservative count.
// If ExitCond is a short-circuit form (select), using
// umin(EL0.ExactNotTaken, EL1.ExactNotTaken) is unsafe in general.
// To see the detailed examples, please see
// test/Analysis/ScalarEvolution/exit-count-select.ll
bool PoisonSafe = isa<BinaryOperator>(ExitCond);
if (!PoisonSafe)
// Even if ExitCond is select, we can safely derive BECount using both
// EL0 and EL1 in these cases:
// (1) EL0.ExactNotTaken is non-zero
// (2) EL1.ExactNotTaken is non-poison
// (3) EL0.ExactNotTaken is zero (BECount should be simply zero and
// it cannot be umin(0, ..))
// The PoisonSafe assignment below is simplified and the assertion after
// BECount calculation fully guarantees the condition (3).
PoisonSafe = isa<SCEVConstant>(EL0.ExactNotTaken) ||
isa<SCEVConstant>(EL1.ExactNotTaken);
if (EL0.ExactNotTaken != getCouldNotCompute() &&
EL1.ExactNotTaken != getCouldNotCompute() && PoisonSafe) {
BECount =
getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
// If EL0.ExactNotTaken was zero and ExitCond was a short-circuit form,
// it should have been simplified to zero (see the condition (3) above)
assert(!isa<BinaryOperator>(ExitCond) || !EL0.ExactNotTaken->isZero() ||
BECount->isZero());
}
if (EL0.MaxNotTaken == getCouldNotCompute())
MaxBECount = EL1.MaxNotTaken;
else if (EL1.MaxNotTaken == getCouldNotCompute())
MaxBECount = EL0.MaxNotTaken;
else
MaxBECount = getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
} else {
// Both conditions must be same at the same time for the loop to exit.
// For now, be conservative.
if (EL0.ExactNotTaken == EL1.ExactNotTaken)
BECount = EL0.ExactNotTaken;
}
// There are cases (e.g. PR26207) where computeExitLimitFromCond is able
// to be more aggressive when computing BECount than when computing
// MaxBECount. In these cases it is possible for EL0.ExactNotTaken and
// EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
// to not.
if (isa<SCEVCouldNotCompute>(MaxBECount) &&
!isa<SCEVCouldNotCompute>(BECount))
MaxBECount = getConstant(getUnsignedRangeMax(BECount));
return ExitLimit(BECount, MaxBECount, false,
{ &EL0.Predicates, &EL1.Predicates });
}
ScalarEvolution::ExitLimit
ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
ICmpInst *ExitCond,
bool ExitIfTrue,
bool ControlsExit,
bool AllowPredicates) {
// If the condition was exit on true, convert the condition to exit on false
ICmpInst::Predicate Pred;
if (!ExitIfTrue)
Pred = ExitCond->getPredicate();
else
Pred = ExitCond->getInversePredicate();
const ICmpInst::Predicate OriginalPred = Pred;
// Handle common loops like: for (X = "string"; *X; ++X)
if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
ExitLimit ItCnt =
computeLoadConstantCompareExitLimit(LI, RHS, L, Pred);
if (ItCnt.hasAnyInfo())
return ItCnt;
}
const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
// Try to evaluate any dependencies out of the loop.
LHS = getSCEVAtScope(LHS, L);
RHS = getSCEVAtScope(RHS, L);
// At this point, we would like to compute how many iterations of the
// loop the predicate will return true for these inputs.
if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
// If there is a loop-invariant, force it into the RHS.
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
}
// Simplify the operands before analyzing them.
(void)SimplifyICmpOperands(Pred, LHS, RHS);
// If we have a comparison of a chrec against a constant, try to use value
// ranges to answer this query.
if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
if (AddRec->getLoop() == L) {
// Form the constant range.
ConstantRange CompRange =
ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt());
const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
}
switch (Pred) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
if (LHS->getType()->isPointerTy()) {
LHS = getLosslessPtrToIntExpr(LHS);
if (isa<SCEVCouldNotCompute>(LHS))
return LHS;
}
if (RHS->getType()->isPointerTy()) {
RHS = getLosslessPtrToIntExpr(RHS);
if (isa<SCEVCouldNotCompute>(RHS))
return RHS;
}
ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit,
AllowPredicates);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_EQ: { // while (X == Y)
// Convert to: while (X-Y == 0)
if (LHS->getType()->isPointerTy()) {
LHS = getLosslessPtrToIntExpr(LHS);
if (isa<SCEVCouldNotCompute>(LHS))
return LHS;
}
if (RHS->getType()->isPointerTy()) {
RHS = getLosslessPtrToIntExpr(RHS);
if (isa<SCEVCouldNotCompute>(RHS))
return RHS;
}
ExitLimit EL = howFarToNonZero(getMinusSCEV(LHS, RHS), L);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_ULT: { // while (X < Y)
bool IsSigned = Pred == ICmpInst::ICMP_SLT;
ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit,
AllowPredicates);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_UGT: { // while (X > Y)
bool IsSigned = Pred == ICmpInst::ICMP_SGT;
ExitLimit EL =
howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit,
AllowPredicates);
if (EL.hasAnyInfo()) return EL;
break;
}
default:
break;
}
auto *ExhaustiveCount =
computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
if (!isa<SCEVCouldNotCompute>(ExhaustiveCount))
return ExhaustiveCount;
return computeShiftCompareExitLimit(ExitCond->getOperand(0),
ExitCond->getOperand(1), L, OriginalPred);
}
ScalarEvolution::ExitLimit
ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
SwitchInst *Switch,
BasicBlock *ExitingBlock,
bool ControlsExit) {
assert(!L->contains(ExitingBlock) && "Not an exiting block!");
// Give up if the exit is the default dest of a switch.
if (Switch->getDefaultDest() == ExitingBlock)
return getCouldNotCompute();
assert(L->contains(Switch->getDefaultDest()) &&
"Default case must not exit the loop!");
const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
// while (X != Y) --> while (X-Y != 0)
ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
if (EL.hasAnyInfo())
return EL;
return getCouldNotCompute();
}
static ConstantInt *
EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
ScalarEvolution &SE) {
const SCEV *InVal = SE.getConstant(C);
const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
assert(isa<SCEVConstant>(Val) &&
"Evaluation of SCEV at constant didn't fold correctly?");
return cast<SCEVConstant>(Val)->getValue();
}
/// Given an exit condition of 'icmp op load X, cst', try to see if we can
/// compute the backedge execution count.
ScalarEvolution::ExitLimit
ScalarEvolution::computeLoadConstantCompareExitLimit(
LoadInst *LI,
Constant *RHS,
const Loop *L,
ICmpInst::Predicate predicate) {
if (LI->isVolatile()) return getCouldNotCompute();
// Check to see if the loaded pointer is a getelementptr of a global.
// TODO: Use SCEV instead of manually grubbing with GEPs.
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
if (!GEP) return getCouldNotCompute();
// Make sure that it is really a constant global we are gepping, with an
// initializer, and make sure the first IDX is really 0.
GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
!cast<Constant>(GEP->getOperand(1))->isNullValue())
return getCouldNotCompute();
// Okay, we allow one non-constant index into the GEP instruction.
Value *VarIdx = nullptr;
std::vector<Constant*> Indexes;
unsigned VarIdxNum = 0;
for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
Indexes.push_back(CI);
} else if (!isa<ConstantInt>(GEP->getOperand(i))) {
if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
VarIdx = GEP->getOperand(i);
VarIdxNum = i-2;
Indexes.push_back(nullptr);
}
// Loop-invariant loads may be a byproduct of loop optimization. Skip them.
if (!VarIdx)
return getCouldNotCompute();
// Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
// Check to see if X is a loop variant variable value now.
const SCEV *Idx = getSCEV(VarIdx);
Idx = getSCEVAtScope(Idx, L);
// We can only recognize very limited forms of loop index expressions, in
// particular, only affine AddRec's like {C1,+,C2}<L>.
const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
if (!IdxExpr || IdxExpr->getLoop() != L || !IdxExpr->isAffine() ||
isLoopInvariant(IdxExpr, L) ||
!isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
!isa<SCEVConstant>(IdxExpr->getOperand(1)))
return getCouldNotCompute();
unsigned MaxSteps = MaxBruteForceIterations;
for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
ConstantInt *ItCst = ConstantInt::get(
cast<IntegerType>(IdxExpr->getType()), IterationNum);
ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
// Form the GEP offset.
Indexes[VarIdxNum] = Val;
Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
Indexes);
if (!Result) break; // Cannot compute!
// Evaluate the condition for this iteration.
Result = ConstantExpr::getICmp(predicate, Result, RHS);
if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
++NumArrayLenItCounts;
return getConstant(ItCst); // Found terminating iteration!
}
}
return getCouldNotCompute();
}
ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) {
ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
if (!RHS)
return getCouldNotCompute();
const BasicBlock *Latch = L->getLoopLatch();
if (!Latch)
return getCouldNotCompute();
const BasicBlock *Predecessor = L->getLoopPredecessor();
if (!Predecessor)
return getCouldNotCompute();
// Return true if V is of the form "LHS `shift_op` <positive constant>".
// Return LHS in OutLHS and shift_opt in OutOpCode.
auto MatchPositiveShift =
[](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) {
using namespace PatternMatch;
ConstantInt *ShiftAmt;
if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
OutOpCode = Instruction::LShr;
else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
OutOpCode = Instruction::AShr;
else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
OutOpCode = Instruction::Shl;
else
return false;
return ShiftAmt->getValue().isStrictlyPositive();
};
// Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in
//
// loop:
// %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
// %iv.shifted = lshr i32 %iv, <positive constant>
//
// Return true on a successful match. Return the corresponding PHI node (%iv
// above) in PNOut and the opcode of the shift operation in OpCodeOut.
auto MatchShiftRecurrence =
[&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
Optional<Instruction::BinaryOps> PostShiftOpCode;
{
Instruction::BinaryOps OpC;
Value *V;
// If we encounter a shift instruction, "peel off" the shift operation,
// and remember that we did so. Later when we inspect %iv's backedge
// value, we will make sure that the backedge value uses the same
// operation.
//
// Note: the peeled shift operation does not have to be the same
// instruction as the one feeding into the PHI's backedge value. We only
// really care about it being the same *kind* of shift instruction --
// that's all that is required for our later inferences to hold.
if (MatchPositiveShift(LHS, V, OpC)) {
PostShiftOpCode = OpC;
LHS = V;
}
}
PNOut = dyn_cast<PHINode>(LHS);
if (!PNOut || PNOut->getParent() != L->getHeader())
return false;
Value *BEValue = PNOut->getIncomingValueForBlock(Latch);
Value *OpLHS;
return
// The backedge value for the PHI node must be a shift by a positive
// amount
MatchPositiveShift(BEValue, OpLHS, OpCodeOut) &&
// of the PHI node itself
OpLHS == PNOut &&
// and the kind of shift should be match the kind of shift we peeled
// off, if any.
(!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut);
};
PHINode *PN;
Instruction::BinaryOps OpCode;
if (!MatchShiftRecurrence(LHS, PN, OpCode))
return getCouldNotCompute();
const DataLayout &DL = getDataLayout();
// The key rationale for this optimization is that for some kinds of shift
// recurrences, the value of the recurrence "stabilizes" to either 0 or -1
// within a finite number of iterations. If the condition guarding the
// backedge (in the sense that the backedge is taken if the condition is true)
// is false for the value the shift recurrence stabilizes to, then we know
// that the backedge is taken only a finite number of times.
ConstantInt *StableValue = nullptr;
switch (OpCode) {
default:
llvm_unreachable("Impossible case!");
case Instruction::AShr: {
// {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
// bitwidth(K) iterations.
Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
KnownBits Known = computeKnownBits(FirstValue, DL, 0, &AC,
Predecessor->getTerminator(), &DT);
auto *Ty = cast<IntegerType>(RHS->getType());
if (Known.isNonNegative())
StableValue = ConstantInt::get(Ty, 0);
else if (Known.isNegative())
StableValue = ConstantInt::get(Ty, -1, true);
else
return getCouldNotCompute();
break;
}
case Instruction::LShr:
case Instruction::Shl:
// Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>}
// stabilize to 0 in at most bitwidth(K) iterations.
StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0);
break;
}
auto *Result =
ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI);
assert(Result->getType()->isIntegerTy(1) &&
"Otherwise cannot be an operand to a branch instruction");
if (Result->isZeroValue()) {
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
const SCEV *UpperBound =
getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
return ExitLimit(getCouldNotCompute(), UpperBound, false);
}
return getCouldNotCompute();
}
/// Return true if we can constant fold an instruction of the specified type,
/// assuming that all operands were constants.
static bool CanConstantFold(const Instruction *I) {
if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
isa<LoadInst>(I) || isa<ExtractValueInst>(I))
return true;
if (const CallInst *CI = dyn_cast<CallInst>(I))
if (const Function *F = CI->getCalledFunction())
return canConstantFoldCallTo(CI, F);
return false;
}
/// Determine whether this instruction can constant evolve within this loop
/// assuming its operands can all constant evolve.
static bool canConstantEvolve(Instruction *I, const Loop *L) {
// An instruction outside of the loop can't be derived from a loop PHI.
if (!L->contains(I)) return false;
if (isa<PHINode>(I)) {
// We don't currently keep track of the control flow needed to evaluate
// PHIs, so we cannot handle PHIs inside of loops.
return L->getHeader() == I->getParent();
}
// If we won't be able to constant fold this expression even if the operands
// are constants, bail early.
return CanConstantFold(I);
}
/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
/// recursing through each instruction operand until reaching a loop header phi.
static PHINode *
getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
DenseMap<Instruction *, PHINode *> &PHIMap,
unsigned Depth) {
if (Depth > MaxConstantEvolvingDepth)
return nullptr;
// Otherwise, we can evaluate this instruction if all of its operands are
// constant or derived from a PHI node themselves.
PHINode *PHI = nullptr;
for (Value *Op : UseInst->operands()) {
if (isa<Constant>(Op)) continue;
Instruction *OpInst = dyn_cast<Instruction>(Op);
if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
PHINode *P = dyn_cast<PHINode>(OpInst);
if (!P)
// If this operand is already visited, reuse the prior result.
// We may have P != PHI if this is the deepest point at which the
// inconsistent paths meet.
P = PHIMap.lookup(OpInst);
if (!P) {
// Recurse and memoize the results, whether a phi is found or not.
// This recursive call invalidates pointers into PHIMap.
P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1);
PHIMap[OpInst] = P;
}
if (!P)
return nullptr; // Not evolving from PHI
if (PHI && PHI != P)
return nullptr; // Evolving from multiple different PHIs.
PHI = P;
}
// This is a expression evolving from a constant PHI!
return PHI;
}
/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
/// in the loop that V is derived from. We allow arbitrary operations along the
/// way, but the operands of an operation must either be constants or a value
/// derived from a constant PHI. If this expression does not fit with these
/// constraints, return null.
static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I || !canConstantEvolve(I, L)) return nullptr;
if (PHINode *PN = dyn_cast<PHINode>(I))
return PN;
// Record non-constant instructions contained by the loop.
DenseMap<Instruction *, PHINode *> PHIMap;
return getConstantEvolvingPHIOperands(I, L, PHIMap, 0);
}
/// EvaluateExpression - Given an expression that passes the
/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
/// in the loop has the value PHIVal. If we can't fold this expression for some
/// reason, return null.
static Constant *EvaluateExpression(Value *V, const Loop *L,
DenseMap<Instruction *, Constant *> &Vals,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Convenient constant check, but redundant for recursive calls.
if (Constant *C = dyn_cast<Constant>(V)) return C;
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return nullptr;
if (Constant *C = Vals.lookup(I)) return C;
// An instruction inside the loop depends on a value outside the loop that we
// weren't given a mapping for, or a value such as a call inside the loop.
if (!canConstantEvolve(I, L)) return nullptr;
// An unmapped PHI can be due to a branch or another loop inside this loop,
// or due to this not being the initial iteration through a loop where we
// couldn't compute the evolution of this particular PHI last time.
if (isa<PHINode>(I)) return nullptr;
std::vector<Constant*> Operands(I->getNumOperands());
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
if (!Operand) {
Operands[i] = dyn_cast<Constant>(I->getOperand(i));
if (!Operands[i]) return nullptr;
continue;
}
Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
Vals[Operand] = C;
if (!C) return nullptr;
Operands[i] = C;
}
if (CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
Operands[1], DL, TLI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isVolatile())
return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL);
}
return ConstantFoldInstOperands(I, Operands, DL, TLI);
}
// If every incoming value to PN except the one for BB is a specific Constant,
// return that, else return nullptr.
static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) {
Constant *IncomingVal = nullptr;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
if (PN->getIncomingBlock(i) == BB)
continue;
auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i));
if (!CurrentVal)
return nullptr;
if (IncomingVal != CurrentVal) {
if (IncomingVal)
return nullptr;
IncomingVal = CurrentVal;
}
}
return IncomingVal;
}
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
/// in the header of its containing loop, we know the loop executes a
/// constant number of times, and the PHI node is just a recurrence
/// involving constants, fold it.
Constant *
ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
const APInt &BEs,
const Loop *L) {
auto I = ConstantEvolutionLoopExitValue.find(PN);
if (I != ConstantEvolutionLoopExitValue.end())
return I->second;
if (BEs.ugt(MaxBruteForceIterations))
return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it.
Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
DenseMap<Instruction *, Constant *> CurrentIterVals;
BasicBlock *Header = L->getHeader();
assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
BasicBlock *Latch = L->getLoopLatch();
if (!Latch)
return nullptr;
for (PHINode &PHI : Header->phis()) {
if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
CurrentIterVals[&PHI] = StartCST;
}
if (!CurrentIterVals.count(PN))
return RetVal = nullptr;
Value *BEValue = PN->getIncomingValueForBlock(Latch);
// Execute the loop symbolically to determine the exit value.
assert(BEs.getActiveBits() < CHAR_BIT * sizeof(unsigned) &&
"BEs is <= MaxBruteForceIterations which is an 'unsigned'!");
unsigned NumIterations = BEs.getZExtValue(); // must be in range
unsigned IterationNum = 0;
const DataLayout &DL = getDataLayout();
for (; ; ++IterationNum) {
if (IterationNum == NumIterations)
return RetVal = CurrentIterVals[PN]; // Got exit value!
// Compute the value of the PHIs for the next iteration.
// EvaluateExpression adds non-phi values to the CurrentIterVals map.
DenseMap<Instruction *, Constant *> NextIterVals;
Constant *NextPHI =
EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
if (!NextPHI)
return nullptr; // Couldn't evaluate!
NextIterVals[PN] = NextPHI;
bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
// Also evaluate the other PHI nodes. However, we don't get to stop if we
// cease to be able to evaluate one of them or if they stop evolving,
// because that doesn't necessarily prevent us from computing PN.
SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
for (const auto &I : CurrentIterVals) {
PHINode *PHI = dyn_cast<PHINode>(I.first);
if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
PHIsToCompute.emplace_back(PHI, I.second);
}
// We use two distinct loops because EvaluateExpression may invalidate any
// iterators into CurrentIterVals.
for (const auto &I : PHIsToCompute) {
PHINode *PHI = I.first;
Constant *&NextPHI = NextIterVals[PHI];
if (!NextPHI) { // Not already computed.
Value *BEValue = PHI->getIncomingValueForBlock(Latch);
NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
}
if (NextPHI != I.second)
StoppedEvolving = false;
}
// If all entries in CurrentIterVals == NextIterVals then we can stop
// iterating, the loop can't continue to change.
if (StoppedEvolving)
return RetVal = CurrentIterVals[PN];
CurrentIterVals.swap(NextIterVals);
}
}
const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
Value *Cond,
bool ExitWhen) {
PHINode *PN = getConstantEvolvingPHI(Cond, L);
if (!PN) return getCouldNotCompute();
// If the loop is canonicalized, the PHI will have exactly two entries.
// That's the only form we support here.
if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
DenseMap<Instruction *, Constant *> CurrentIterVals;
BasicBlock *Header = L->getHeader();
assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Should follow from NumIncomingValues == 2!");
for (PHINode &PHI : Header->phis()) {
if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
CurrentIterVals[&PHI] = StartCST;
}
if (!CurrentIterVals.count(PN))
return getCouldNotCompute();
// Okay, we find a PHI node that defines the trip count of this loop. Execute
// the loop symbolically to determine when the condition gets a value of
// "ExitWhen".
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
const DataLayout &DL = getDataLayout();
for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
auto *CondVal = dyn_cast_or_null<ConstantInt>(
EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));
// Couldn't symbolically evaluate.
if (!CondVal) return getCouldNotCompute();
if (CondVal->getValue() == uint64_t(ExitWhen)) {
++NumBruteForceTripCountsComputed;
return getConstant(Type::getInt32Ty(getContext()), IterationNum);
}
// Update all the PHI nodes for the next iteration.
DenseMap<Instruction *, Constant *> NextIterVals;
// Create a list of which PHIs we need to compute. We want to do this before
// calling EvaluateExpression on them because that may invalidate iterators
// into CurrentIterVals.
SmallVector<PHINode *, 8> PHIsToCompute;
for (const auto &I : CurrentIterVals) {
PHINode *PHI = dyn_cast<PHINode>(I.first);
if (!PHI || PHI->getParent() != Header) continue;
PHIsToCompute.push_back(PHI);
}
for (PHINode *PHI : PHIsToCompute) {
Constant *&NextPHI = NextIterVals[PHI];
if (NextPHI) continue; // Already computed!
Value *BEValue = PHI->getIncomingValueForBlock(Latch);
NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
}
CurrentIterVals.swap(NextIterVals);
}
// Too many iterations were needed to evaluate.
return getCouldNotCompute();
}
const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values =
ValuesAtScopes[V];
// Check to see if we've folded this expression at this loop before.
for (auto &LS : Values)
if (LS.first == L)
return LS.second ? LS.second : V;
Values.emplace_back(L, nullptr);
// Otherwise compute it.
const SCEV *C = computeSCEVAtScope(V, L);
for (auto &LS : reverse(ValuesAtScopes[V]))
if (LS.first == L) {
LS.second = C;
break;
}
return C;
}
/// This builds up a Constant using the ConstantExpr interface. That way, we
/// will return Constants for objects which aren't represented by a
/// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
/// Returns NULL if the SCEV isn't representable as a Constant.
static Constant *BuildConstantFromSCEV(const SCEV *V) {
switch (V->getSCEVType()) {
case scCouldNotCompute:
case scAddRecExpr:
return nullptr;
case scConstant:
return cast<SCEVConstant>(V)->getValue();
case scUnknown:
return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
case scSignExtend: {
const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
return ConstantExpr::getSExt(CastOp, SS->getType());
return nullptr;
}
case scZeroExtend: {
const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
return ConstantExpr::getZExt(CastOp, SZ->getType());
return nullptr;
}
case scPtrToInt: {
const SCEVPtrToIntExpr *P2I = cast<SCEVPtrToIntExpr>(V);
if (Constant *CastOp = BuildConstantFromSCEV(P2I->getOperand()))
return ConstantExpr::getPtrToInt(CastOp, P2I->getType());
return nullptr;
}
case scTruncate: {
const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
return ConstantExpr::getTrunc(CastOp, ST->getType());
return nullptr;
}
case scAddExpr: {
const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
unsigned AS = PTy->getAddressSpace();
Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
C = ConstantExpr::getBitCast(C, DestPtrTy);
}
for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
if (!C2)
return nullptr;
// First pointer!
if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
unsigned AS = C2->getType()->getPointerAddressSpace();
std::swap(C, C2);
Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
// The offsets have been converted to bytes. We can add bytes to an
// i8* by GEP with the byte count in the first index.
C = ConstantExpr::getBitCast(C, DestPtrTy);
}
// Don't bother trying to sum two pointers. We probably can't
// statically compute a load that results from it anyway.
if (C2->getType()->isPointerTy())
return nullptr;
if (C->getType()->isPointerTy()) {
C = ConstantExpr::getGetElementPtr(Type::getInt8Ty(C->getContext()),
C, C2);
} else {
C = ConstantExpr::getAdd(C, C2);
}
}
return C;
}
return nullptr;
}
case scMulExpr: {
const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
// Don't bother with pointers at all.
if (C->getType()->isPointerTy())
return nullptr;
for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
if (!C2 || C2->getType()->isPointerTy())
return nullptr;
C = ConstantExpr::getMul(C, C2);
}
return C;
}
return nullptr;
}
case scUDivExpr: {
const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
if (LHS->getType() == RHS->getType())
return ConstantExpr::getUDiv(LHS, RHS);
return nullptr;
}
case scSMaxExpr:
case scUMaxExpr:
case scSMinExpr:
case scUMinExpr:
return nullptr; // TODO: smax, umax, smin, umax.
}
llvm_unreachable("Unknown SCEV kind!");
}
const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
if (isa<SCEVConstant>(V)) return V;
// If this instruction is evolved from a constant-evolving PHI, compute the
// exit value from the loop without using SCEVs.
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
if (PHINode *PN = dyn_cast<PHINode>(I)) {
const Loop *CurrLoop = this->LI[I->getParent()];
// Looking for loop exit value.
if (CurrLoop && CurrLoop->getParentLoop() == L &&
PN->getParent() == CurrLoop->getHeader()) {
// Okay, there is no closed form solution for the PHI node. Check
// to see if the loop that contains it has a known backedge-taken
// count. If so, we may be able to force computation of the exit
// value.
const SCEV *BackedgeTakenCount = getBackedgeTakenCount(CurrLoop);
// This trivial case can show up in some degenerate cases where
// the incoming IR has not yet been fully simplified.
if (BackedgeTakenCount->isZero()) {
Value *InitValue = nullptr;
bool MultipleInitValues = false;
for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
if (!CurrLoop->contains(PN->getIncomingBlock(i))) {
if (!InitValue)
InitValue = PN->getIncomingValue(i);
else if (InitValue != PN->getIncomingValue(i)) {
MultipleInitValues = true;
break;
}
}
}
if (!MultipleInitValues && InitValue)
return getSCEV(InitValue);
}
// Do we have a loop invariant value flowing around the backedge
// for a loop which must execute the backedge?
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
isKnownPositive(BackedgeTakenCount) &&
PN->getNumIncomingValues() == 2) {
unsigned InLoopPred =
CurrLoop->contains(PN->getIncomingBlock(0)) ? 0 : 1;
Value *BackedgeVal = PN->getIncomingValue(InLoopPred);
if (CurrLoop->isLoopInvariant(BackedgeVal))
return getSCEV(BackedgeVal);
}
if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
// Okay, we know how many times the containing loop executes. If
// this is a constant evolving PHI node, get the final value at
// the specified iteration number.
Constant *RV = getConstantEvolutionLoopExitValue(
PN, BTCC->getAPInt(), CurrLoop);
if (RV) return getSCEV(RV);
}
}
// If there is a single-input Phi, evaluate it at our scope. If we can
// prove that this replacement does not break LCSSA form, use new value.
if (PN->getNumOperands() == 1) {
const SCEV *Input = getSCEV(PN->getOperand(0));
const SCEV *InputAtScope = getSCEVAtScope(Input, L);
// TODO: We can generalize it using LI.replacementPreservesLCSSAForm,
// for the simplest case just support constants.
if (isa<SCEVConstant>(InputAtScope)) return InputAtScope;
}
}
// Okay, this is an expression that we cannot symbolically evaluate
// into a SCEV. Check to see if it's possible to symbolically evaluate
// the arguments into constants, and if so, try to constant propagate the
// result. This is particularly useful for computing loop exit values.
if (CanConstantFold(I)) {
SmallVector<Constant *, 4> Operands;
bool MadeImprovement = false;
for (Value *Op : I->operands()) {
if (Constant *C = dyn_cast<Constant>(Op)) {
Operands.push_back(C);
continue;
}
// If any of the operands is non-constant and if they are
// non-integer and non-pointer, don't even try to analyze them
// with scev techniques.
if (!isSCEVable(Op->getType()))
return V;
const SCEV *OrigV = getSCEV(Op);
const SCEV *OpV = getSCEVAtScope(OrigV, L);
MadeImprovement |= OrigV != OpV;
Constant *C = BuildConstantFromSCEV(OpV);
if (!C) return V;
if (C->getType() != Op->getType())
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
Op->getType(),
false),
C, Op->getType());
Operands.push_back(C);
}
// Check to see if getSCEVAtScope actually made an improvement.
if (MadeImprovement) {
Constant *C = nullptr;
const DataLayout &DL = getDataLayout();
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
Operands[1], DL, &TLI);
else if (const LoadInst *Load = dyn_cast<LoadInst>(I)) {
if (!Load->isVolatile())
C = ConstantFoldLoadFromConstPtr(Operands[0], Load->getType(),
DL);
} else
C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
if (!C) return V;
return getSCEV(C);
}
}
}
// This is some other type of SCEVUnknown, just return it.
return V;
}
if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
// Avoid performing the look-up in the common case where the specified
// expression has no loop-variant portions.
for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
if (OpAtScope != Comm->getOperand(i)) {
// Okay, at least one of these operands is loop variant but might be
// foldable. Build a new instance of the folded commutative expression.
SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
Comm->op_begin()+i);
NewOps.push_back(OpAtScope);
for (++i; i != e; ++i) {
OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
NewOps.push_back(OpAtScope);
}
if (isa<SCEVAddExpr>(Comm))
return getAddExpr(NewOps, Comm->getNoWrapFlags());
if (isa<SCEVMulExpr>(Comm))
return getMulExpr(NewOps, Comm->getNoWrapFlags());
if (isa<SCEVMinMaxExpr>(Comm))
return getMinMaxExpr(Comm->getSCEVType(), NewOps);
llvm_unreachable("Unknown commutative SCEV type!");
}
}
// If we got here, all operands are loop invariant.
return Comm;
}
if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
if (LHS == Div->getLHS() && RHS == Div->getRHS())
return Div; // must be loop invariant
return getUDivExpr(LHS, RHS);
}
// If this is a loop recurrence for a loop that does not contain L, then we
// are dealing with the final value computed by the loop.
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
// First, attempt to evaluate each operand.
// Avoid performing the look-up in the common case where the specified
// expression has no loop-variant portions.
for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
if (OpAtScope == AddRec->getOperand(i))
continue;
// Okay, at least one of these operands is loop variant but might be
// foldable. Build a new instance of the folded commutative expression.
SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
AddRec->op_begin()+i);
NewOps.push_back(OpAtScope);
for (++i; i != e; ++i)
NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
const SCEV *FoldedRec =
getAddRecExpr(NewOps, AddRec->getLoop(),
AddRec->getNoWrapFlags(SCEV::FlagNW));
AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
// The addrec may be folded to a nonrecurrence, for example, if the
// induction variable is multiplied by zero after constant folding. Go
// ahead and return the folded value.
if (!AddRec)
return FoldedRec;
break;
}
// If the scope is outside the addrec's loop, evaluate it by using the
// loop exit value of the addrec.
if (!AddRec->getLoop()->contains(L)) {
// To evaluate this recurrence, we need to know how many times the AddRec
// loop iterates. Compute this now.
const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
// Then, evaluate the AddRec.
return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
}
return AddRec;
}
if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
if (Op == Cast->getOperand())
return Cast; // must be loop invariant
return getZeroExtendExpr(Op, Cast->getType());
}
if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
if (Op == Cast->getOperand())
return Cast; // must be loop invariant
return getSignExtendExpr(Op, Cast->getType());
}
if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
if (Op == Cast->getOperand())
return Cast; // must be loop invariant
return getTruncateExpr(Op, Cast->getType());
}
if (const SCEVPtrToIntExpr *Cast = dyn_cast<SCEVPtrToIntExpr>(V)) {
const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
if (Op == Cast->getOperand())
return Cast; // must be loop invariant
return getPtrToIntExpr(Op, Cast->getType());
}
llvm_unreachable("Unknown SCEV type!");
}
const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
return getSCEVAtScope(getSCEV(V), L);
}
const SCEV *ScalarEvolution::stripInjectiveFunctions(const SCEV *S) const {
if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S))
return stripInjectiveFunctions(ZExt->getOperand());
if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S))
return stripInjectiveFunctions(SExt->getOperand());
return S;
}
/// Finds the minimum unsigned root of the following equation:
///
/// A * X = B (mod N)
///
/// where N = 2^BW and BW is the common bit width of A and B. The signedness of
/// A and B isn't important.
///
/// If the equation does not have a solution, SCEVCouldNotCompute is returned.
static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B,
ScalarEvolution &SE) {
uint32_t BW = A.getBitWidth();
assert(BW == SE.getTypeSizeInBits(B->getType()));
assert(A != 0 && "A must be non-zero.");
// 1. D = gcd(A, N)
//
// The gcd of A and N may have only one prime factor: 2. The number of
// trailing zeros in A is its multiplicity
uint32_t Mult2 = A.countTrailingZeros();
// D = 2^Mult2
// 2. Check if B is divisible by D.
//
// B is divisible by D if and only if the multiplicity of prime factor 2 for B
// is not less than multiplicity of this prime factor for D.
if (SE.GetMinTrailingZeros(B) < Mult2)
return SE.getCouldNotCompute();
// 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
// modulo (N / D).
//
// If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent
// (N / D) in general. The inverse itself always fits into BW bits, though,
// so we immediately truncate it.
APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D
APInt Mod(BW + 1, 0);
Mod.setBit(BW - Mult2); // Mod = N / D
APInt I = AD.multiplicativeInverse(Mod).trunc(BW);
// 4. Compute the minimum unsigned root of the equation:
// I * (B / D) mod (N / D)
// To simplify the computation, we factor out the divide by D:
// (I * B mod N) / D
const SCEV *D = SE.getConstant(APInt::getOneBitSet(BW, Mult2));
return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D);
}
/// For a given quadratic addrec, generate coefficients of the corresponding
/// quadratic equation, multiplied by a common value to ensure that they are
/// integers.
/// The returned value is a tuple { A, B, C, M, BitWidth }, where
/// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C
/// were multiplied by, and BitWidth is the bit width of the original addrec
/// coefficients.
/// This function returns None if the addrec coefficients are not compile-
/// time constants.
static Optional<std::tuple<APInt, APInt, APInt, APInt, unsigned>>
GetQuadraticEquation(const SCEVAddRecExpr *AddRec) {
assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: "
<< *AddRec << '\n');
// We currently can only solve this if the coefficients are constants.
if (!LC || !MC || !NC) {
LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n");
return None;
}
APInt L = LC->getAPInt();
APInt M = MC->getAPInt();
APInt N = NC->getAPInt();
assert(!N.isNullValue() && "This is not a quadratic addrec");
unsigned BitWidth = LC->getAPInt().getBitWidth();
unsigned NewWidth = BitWidth + 1;
LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: "
<< BitWidth << '\n');
// The sign-extension (as opposed to a zero-extension) here matches the
// extension used in SolveQuadraticEquationWrap (with the same motivation).
N = N.sext(NewWidth);
M = M.sext(NewWidth);
L = L.sext(NewWidth);
// The increments are M, M+N, M+2N, ..., so the accumulated values are
// L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is,
// L+M, L+2M+N, L+3M+3N, ...
// After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N.
//
// The equation Acc = 0 is then
// L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0.
// In a quadratic form it becomes:
// N n^2 + (2M-N) n + 2L = 0.
APInt A = N;
APInt B = 2 * M - A;
APInt C = 2 * L;
APInt T = APInt(NewWidth, 2);
LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B
<< "x + " << C << ", coeff bw: " << NewWidth
<< ", multiplied by " << T << '\n');
return std::make_tuple(A, B, C, T, BitWidth);
}
/// Helper function to compare optional APInts:
/// (a) if X and Y both exist, return min(X, Y),
/// (b) if neither X nor Y exist, return None,
/// (c) if exactly one of X and Y exists, return that value.
static Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) {
if (X.hasValue() && Y.hasValue()) {
unsigned W = std::max(X->getBitWidth(), Y->getBitWidth());
APInt XW = X->sextOrSelf(W);
APInt YW = Y->sextOrSelf(W);
return XW.slt(YW) ? *X : *Y;
}
if (!X.hasValue() && !Y.hasValue())
return None;
return X.hasValue() ? *X : *Y;
}
/// Helper function to truncate an optional APInt to a given BitWidth.
/// When solving addrec-related equations, it is preferable to return a value
/// that has the same bit width as the original addrec's coefficients. If the
/// solution fits in the original bit width, truncate it (except for i1).
/// Returning a value of a different bit width may inhibit some optimizations.
///
/// In general, a solution to a quadratic equation generated from an addrec
/// may require BW+1 bits, where BW is the bit width of the addrec's
/// coefficients. The reason is that the coefficients of the quadratic
/// equation are BW+1 bits wide (to avoid truncation when converting from
/// the addrec to the equation).
static Optional<APInt> TruncIfPossible(Optional<APInt> X, unsigned BitWidth) {
if (!X.hasValue())
return None;
unsigned W = X->getBitWidth();
if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth))
return X->trunc(BitWidth);
return X;
}
/// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n
/// iterations. The values L, M, N are assumed to be signed, and they
/// should all have the same bit widths.
/// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW,
/// where BW is the bit width of the addrec's coefficients.
/// If the calculated value is a BW-bit integer (for BW > 1), it will be
/// returned as such, otherwise the bit width of the returned value may
/// be greater than BW.
///
/// This function returns None if
/// (a) the addrec coefficients are not constant, or
/// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases
/// like x^2 = 5, no integer solutions exist, in other cases an integer
/// solution may exist, but SolveQuadraticEquationWrap may fail to find it.
static Optional<APInt>
SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
APInt A, B, C, M;
unsigned BitWidth;
auto T = GetQuadraticEquation(AddRec);
if (!T.hasValue())
return None;
std::tie(A, B, C, M, BitWidth) = *T;
LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n");
Optional<APInt> X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1);
if (!X.hasValue())
return None;
ConstantInt *CX = ConstantInt::get(SE.getContext(), *X);
ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE);
if (!V->isZero())
return None;
return TruncIfPossible(X, BitWidth);
}
/// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n
/// iterations. The values M, N are assumed to be signed, and they
/// should all have the same bit widths.
/// Find the least n such that c(n) does not belong to the given range,
/// while c(n-1) does.
///
/// This function returns None if
/// (a) the addrec coefficients are not constant, or
/// (b) SolveQuadraticEquationWrap was unable to find a solution for the
/// bounds of the range.
static Optional<APInt>
SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
const ConstantRange &Range, ScalarEvolution &SE) {
assert(AddRec->getOperand(0)->isZero() &&
"Starting value of addrec should be 0");
LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range "
<< Range << ", addrec " << *AddRec << '\n');
// This case is handled in getNumIterationsInRange. Here we can assume that
// we start in the range.
assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) &&
"Addrec's initial value should be in range");
APInt A, B, C, M;
unsigned BitWidth;
auto T = GetQuadraticEquation(AddRec);
if (!T.hasValue())
return None;
// Be careful about the return value: there can be two reasons for not
// returning an actual number. First, if no solutions to the equations
// were found, and second, if the solutions don't leave the given range.
// The first case means that the actual solution is "unknown", the second
// means that it's known, but not valid. If the solution is unknown, we
// cannot make any conclusions.
// Return a pair: the optional solution and a flag indicating if the
// solution was found.
auto SolveForBoundary = [&](APInt Bound) -> std::pair<Optional<APInt>,bool> {
// Solve for signed overflow and unsigned overflow, pick the lower
// solution.
LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary "
<< Bound << " (before multiplying by " << M << ")\n");
Bound *= M; // The quadratic equation multiplier.
Optional<APInt> SO = None;
if (BitWidth > 1) {
LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
"signed overflow\n");
SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth);
}
LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
"unsigned overflow\n");
Optional<APInt> UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound,
BitWidth+1);
auto LeavesRange = [&] (const APInt &X) {
ConstantInt *C0 = ConstantInt::get(SE.getContext(), X);
ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE);
if (Range.contains(V0->getValue()))
return false;
// X should be at least 1, so X-1 is non-negative.
ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1);
ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE);
if (Range.contains(V1->getValue()))
return true;
return false;
};
// If SolveQuadraticEquationWrap returns None, it means that there can
// be a solution, but the function failed to find it. We cannot treat it
// as "no solution".
if (!SO.hasValue() || !UO.hasValue())
return { None, false };
// Check the smaller value first to see if it leaves the range.
// At this point, both SO and UO must have values.
Optional<APInt> Min = MinOptional(SO, UO);
if (LeavesRange(*Min))
return { Min, true };
Optional<APInt> Max = Min == SO ? UO : SO;
if (LeavesRange(*Max))
return { Max, true };
// Solutions were found, but were eliminated, hence the "true".
return { None, true };
};
std::tie(A, B, C, M, BitWidth) = *T;
// Lower bound is inclusive, subtract 1 to represent the exiting value.
APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1;
APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth());
auto SL = SolveForBoundary(Lower);
auto SU = SolveForBoundary(Upper);
// If any of the solutions was unknown, no meaninigful conclusions can
// be made.
if (!SL.second || !SU.second)
return None;
// Claim: The correct solution is not some value between Min and Max.
//
// Justification: Assuming that Min and Max are different values, one of
// them is when the first signed overflow happens, the other is when the
// first unsigned overflow happens. Crossing the range boundary is only
// possible via an overflow (treating 0 as a special case of it, modeling
// an overflow as crossing k*2^W for some k).
//
// The interesting case here is when Min was eliminated as an invalid
// solution, but Max was not. The argument is that if there was another
// overflow between Min and Max, it would also have been eliminated if
// it was considered.
//
// For a given boundary, it is possible to have two overflows of the same
// type (signed/unsigned) without having the other type in between: this
// can happen when the vertex of the parabola is between the iterations
// corresponding to the overflows. This is only possible when the two
// overflows cross k*2^W for the same k. In such case, if the second one
// left the range (and was the first one to do so), the first overflow
// would have to enter the range, which would mean that either we had left
// the range before or that we started outside of it. Both of these cases
// are contradictions.
//
// Claim: In the case where SolveForBoundary returns None, the correct
// solution is not some value between the Max for this boundary and the
// Min of the other boundary.
//
// Justification: Assume that we had such Max_A and Min_B corresponding
// to range boundaries A and B and such that Max_A < Min_B. If there was
// a solution between Max_A and Min_B, it would have to be caused by an
// overflow corresponding to either A or B. It cannot correspond to B,
// since Min_B is the first occurrence of such an overflow. If it
// corresponded to A, it would have to be either a signed or an unsigned
// overflow that is larger than both eliminated overflows for A. But
// between the eliminated overflows and this overflow, the values would
// cover the entire value space, thus crossing the other boundary, which
// is a contradiction.
return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth);
}
ScalarEvolution::ExitLimit
ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
bool AllowPredicates) {
// This is only used for loops with a "x != y" exit test. The exit condition
// is now expressed as a single expression, V = x-y. So the exit test is
// effectively V != 0. We know and take advantage of the fact that this
// expression only being used in a comparison by zero context.
SmallPtrSet<const SCEVPredicate *, 4> Predicates;
// If the value is a constant
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
// If the value is already zero, the branch will execute zero times.
if (C->getValue()->isZero()) return C;
return getCouldNotCompute(); // Otherwise it will loop infinitely.
}
const SCEVAddRecExpr *AddRec =
dyn_cast<SCEVAddRecExpr>(stripInjectiveFunctions(V));
if (!AddRec && AllowPredicates)
// Try to make this an AddRec using runtime tests, in the first X
// iterations of this loop, where X is the SCEV expression found by the
// algorithm below.
AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates);
if (!AddRec || AddRec->getLoop() != L)
return getCouldNotCompute();
// If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
// the quadratic equation to solve it.
if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
// We can only use this value if the chrec ends up with an exact zero
// value at this index. When solving for "X*X != 5", for example, we
// should not accept a root of 2.
if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) {
const auto *R = cast<SCEVConstant>(getConstant(S.getValue()));
return ExitLimit(R, R, false, Predicates);
}
return getCouldNotCompute();
}
// Otherwise we can only handle this if it is affine.
if (!AddRec->isAffine())
return getCouldNotCompute();
// If this is an affine expression, the execution count of this branch is
// the minimum unsigned root of the following equation:
//
// Start + Step*N = 0 (mod 2^BW)
//
// equivalent to:
//
// Step*N = -Start (mod 2^BW)
//
// where BW is the common bit width of Start and Step.
// Get the initial value for the loop.
const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
// For now we handle only constant steps.
//
// TODO: Handle a nonconstant Step given AddRec<NUW>. If the
// AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
// to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
// We have not yet seen any such cases.
const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
if (!StepC || StepC->getValue()->isZero())
return getCouldNotCompute();
// For positive steps (counting up until unsigned overflow):
// N = -Start/Step (as unsigned)
// For negative steps (counting down to zero):
// N = Start/-Step
// First compute the unsigned distance from zero in the direction of Step.
bool CountDown = StepC->getAPInt().isNegative();
const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
// Handle unitary steps, which cannot wraparound.
// 1*N = -Start; -1*N = Start (mod 2^BW), so:
// N = Distance (as unsigned)
if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) {
APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L));
APInt MaxBECountBase = getUnsignedRangeMax(Distance);
if (MaxBECountBase.ult(MaxBECount))
MaxBECount = MaxBECountBase;
// When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated,
// we end up with a loop whose backedge-taken count is n - 1. Detect this
// case, and see if we can improve the bound.
//
// Explicitly handling this here is necessary because getUnsignedRange
// isn't context-sensitive; it doesn't know that we only care about the
// range inside the loop.
const SCEV *Zero = getZero(Distance->getType());
const SCEV *One = getOne(Distance->getType());
const SCEV *DistancePlusOne = getAddExpr(Distance, One);
if (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, DistancePlusOne, Zero)) {
// If Distance + 1 doesn't overflow, we can compute the maximum distance
// as "unsigned_max(Distance + 1) - 1".
ConstantRange CR = getUnsignedRange(DistancePlusOne);
MaxBECount = APIntOps::umin(MaxBECount, CR.getUnsignedMax() - 1);
}
return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates);
}
// If the condition controls loop exit (the loop exits only if the expression
// is true) and the addition is no-wrap we can use unsigned divide to
// compute the backedge count. In this case, the step may not divide the
// distance, but we don't care because if the condition is "missed" the loop
// will have undefined behavior due to wrapping.
if (ControlsExit && AddRec->hasNoSelfWrap() &&
loopHasNoAbnormalExits(AddRec->getLoop())) {
const SCEV *Exact =
getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
const SCEV *Max = getCouldNotCompute();
if (Exact != getCouldNotCompute()) {
APInt MaxInt = getUnsignedRangeMax(applyLoopGuards(Exact, L));
APInt BaseMaxInt = getUnsignedRangeMax(Exact);
if (BaseMaxInt.ult(MaxInt))
Max = getConstant(BaseMaxInt);
else
Max = getConstant(MaxInt);
}
return ExitLimit(Exact, Max, false, Predicates);
}
// Solve the general equation.
const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(),
getNegativeSCEV(Start), *this);
const SCEV *M = E == getCouldNotCompute()
? E
: getConstant(getUnsignedRangeMax(E));
return ExitLimit(E, M, false, Predicates);
}
ScalarEvolution::ExitLimit
ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) {
// Loops that look like: while (X == 0) are very strange indeed. We don't
// handle them yet except for the trivial case. This could be expanded in the
// future as needed.
// If the value is a constant, check to see if it is known to be non-zero
// already. If so, the backedge will execute zero times.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
if (!C->getValue()->isZero())
return getZero(C->getType());
return getCouldNotCompute(); // Otherwise it will loop infinitely.
}
// We could implement others, but I really doubt anyone writes loops like
// this, and if they did, they would already be constant folded.
return getCouldNotCompute();
}
std::pair<const BasicBlock *, const BasicBlock *>
ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB)
const {
// If the block has a unique predecessor, then there is no path from the
// predecessor to the block that does not go through the direct edge
// from the predecessor to the block.
if (const BasicBlock *Pred = BB->getSinglePredecessor())
return {Pred, BB};
// A loop's header is defined to be a block that dominates the loop.
// If the header has a unique predecessor outside the loop, it must be
// a block that has exactly one successor that can reach the loop.
if (const Loop *L = LI.getLoopFor(BB))
return {L->getLoopPredecessor(), L->getHeader()};
return {nullptr, nullptr};
}
/// SCEV structural equivalence is usually sufficient for testing whether two
/// expressions are equal, however for the purposes of looking for a condition
/// guarding a loop, it can be useful to be a little more general, since a
/// front-end may have replicated the controlling expression.
static bool HasSameValue(const SCEV *A, const SCEV *B) {
// Quick check to see if they are the same SCEV.
if (A == B) return true;
auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) {
// Not all instructions that are "identical" compute the same value. For
// instance, two distinct alloca instructions allocating the same type are
// identical and do not read memory; but compute distinct values.
return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A));
};
// Otherwise, if they're both SCEVUnknown, it's possible that they hold
// two different instructions with the same value. Check for this case.
if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
if (ComputesEqualValues(AI, BI))
return true;
// Otherwise assume they may have a different value.
return false;
}
bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
const SCEV *&LHS, const SCEV *&RHS,
unsigned Depth) {
bool Changed = false;
// Simplifies ICMP to trivial true or false by turning it into '0 == 0' or
// '0 != 0'.
auto TrivialCase = [&](bool TriviallyTrue) {
LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
Pred = TriviallyTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
return true;
};
// If we hit the max recursion limit bail out.
if (Depth >= 3)
return false;
// Canonicalize a constant to the right side.
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
// Check for both operands constant.
if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
if (ConstantExpr::getICmp(Pred,
LHSC->getValue(),
RHSC->getValue())->isNullValue())
return TrivialCase(false);
else
return TrivialCase(true);
}
// Otherwise swap the operands to put the constant on the right.
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
Changed = true;
}
// If we're comparing an addrec with a value which is loop-invariant in the
// addrec's loop, put the addrec on the left. Also make a dominance check,
// as both operands could be addrecs loop-invariant in each other's loop.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
const Loop *L = AR->getLoop();
if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
Changed = true;
}
}
// If there's a constant operand, canonicalize comparisons with boundary
// cases, and canonicalize *-or-equal comparisons to regular comparisons.
if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
const APInt &RA = RC->getAPInt();
bool SimplifiedByConstantRange = false;
if (!ICmpInst::isEquality(Pred)) {
ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA);
if (ExactCR.isFullSet())
return TrivialCase(true);
else if (ExactCR.isEmptySet())
return TrivialCase(false);
APInt NewRHS;
CmpInst::Predicate NewPred;
if (ExactCR.getEquivalentICmp(NewPred, NewRHS) &&
ICmpInst::isEquality(NewPred)) {
// We were able to convert an inequality to an equality.
Pred = NewPred;
RHS = getConstant(NewRHS);
Changed = SimplifiedByConstantRange = true;
}
}
if (!SimplifiedByConstantRange) {
switch (Pred) {
default:
break;
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_NE:
// Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
if (!RA)
if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
if (const SCEVMulExpr *ME =
dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
ME->getOperand(0)->isAllOnesValue()) {
RHS = AE->getOperand(1);
LHS = ME->getOperand(1);
Changed = true;
}
break;
// The "Should have been caught earlier!" messages refer to the fact
// that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
// should have fired on the corresponding cases, and canonicalized the
// check to trivial case.
case ICmpInst::ICMP_UGE:
assert(!RA.isMinValue() && "Should have been caught earlier!");
Pred = ICmpInst::ICMP_UGT;
RHS = getConstant(RA - 1);
Changed = true;
break;
case ICmpInst::ICMP_ULE:
assert(!RA.isMaxValue() && "Should have been caught earlier!");
Pred = ICmpInst::ICMP_ULT;
RHS = getConstant(RA + 1);
Changed = true;
break;
case ICmpInst::ICMP_SGE:
assert(!RA.isMinSignedValue() && "Should have been caught earlier!");
Pred = ICmpInst::ICMP_SGT;
RHS = getConstant(RA - 1);
Changed = true;
break;
case ICmpInst::ICMP_SLE:
assert(!RA.isMaxSignedValue() && "Should have been caught earlier!");
Pred = ICmpInst::ICMP_SLT;
RHS = getConstant(RA + 1);
Changed = true;
break;
}
}
}
// Check for obvious equality.
if (HasSameValue(LHS, RHS)) {
if (ICmpInst::isTrueWhenEqual(Pred))
return TrivialCase(true);
if (ICmpInst::isFalseWhenEqual(Pred))
return TrivialCase(false);
}
// If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
// adding or subtracting 1 from one of the operands.
switch (Pred) {
case ICmpInst::ICMP_SLE:
if (!getSignedRangeMax(RHS).isMaxSignedValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SLT;
Changed = true;
} else if (!getSignedRangeMin(LHS).isMinSignedValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SLT;
Changed = true;
}
break;
case ICmpInst::ICMP_SGE:
if (!getSignedRangeMin(RHS).isMinSignedValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SGT;
Changed = true;
} else if (!getSignedRangeMax(LHS).isMaxSignedValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SGT;
Changed = true;
}
break;
case ICmpInst::ICMP_ULE:
if (!getUnsignedRangeMax(RHS).isMaxValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
SCEV::FlagNUW);
Pred = ICmpInst::ICMP_ULT;
Changed = true;
} else if (!getUnsignedRangeMin(LHS).isMinValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);
Pred = ICmpInst::ICMP_ULT;
Changed = true;
}
break;
case ICmpInst::ICMP_UGE:
if (!getUnsignedRangeMin(RHS).isMinValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
Pred = ICmpInst::ICMP_UGT;
Changed = true;
} else if (!getUnsignedRangeMax(LHS).isMaxValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
SCEV::FlagNUW);
Pred = ICmpInst::ICMP_UGT;
Changed = true;
}
break;
default:
break;
}
// TODO: More simplifications are possible here.
// Recursively simplify until we either hit a recursion limit or nothing
// changes.
if (Changed)
return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
return Changed;
}
bool ScalarEvolution::isKnownNegative(const SCEV *S) {
return getSignedRangeMax(S).isNegative();
}
bool ScalarEvolution::isKnownPositive(const SCEV *S) {
return getSignedRangeMin(S).isStrictlyPositive();
}
bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
return !getSignedRangeMin(S).isNegative();
}
bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
return !getSignedRangeMax(S).isStrictlyPositive();
}
bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
return getUnsignedRangeMin(S) != 0;
}
std::pair<const SCEV *, const SCEV *>
ScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) {
// Compute SCEV on entry of loop L.
const SCEV *Start = SCEVInitRewriter::rewrite(S, L, *this);
if (Start == getCouldNotCompute())
return { Start, Start };
// Compute post increment SCEV for loop L.
const SCEV *PostInc = SCEVPostIncRewriter::rewrite(S, L, *this);
assert(PostInc != getCouldNotCompute() && "Unexpected could not compute");
return { Start, PostInc };
}
bool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// First collect all loops.
SmallPtrSet<const Loop *, 8> LoopsUsed;
getUsedLoops(LHS, LoopsUsed);
getUsedLoops(RHS, LoopsUsed);
if (LoopsUsed.empty())
return false;
// Domination relationship must be a linear order on collected loops.
#ifndef NDEBUG
for (auto *L1 : LoopsUsed)
for (auto *L2 : LoopsUsed)
assert((DT.dominates(L1->getHeader(), L2->getHeader()) ||
DT.dominates(L2->getHeader(), L1->getHeader())) &&
"Domination relationship is not a linear order");
#endif
const Loop *MDL =
*std::max_element(LoopsUsed.begin(), LoopsUsed.end(),
[&](const Loop *L1, const Loop *L2) {
return DT.properlyDominates(L1->getHeader(), L2->getHeader());
});
// Get init and post increment value for LHS.
auto SplitLHS = SplitIntoInitAndPostInc(MDL, LHS);
// if LHS contains unknown non-invariant SCEV then bail out.
if (SplitLHS.first == getCouldNotCompute())
return false;
assert (SplitLHS.second != getCouldNotCompute() && "Unexpected CNC");
// Get init and post increment value for RHS.
auto SplitRHS = SplitIntoInitAndPostInc(MDL, RHS);
// if RHS contains unknown non-invariant SCEV then bail out.
if (SplitRHS.first == getCouldNotCompute())
return false;
assert (SplitRHS.second != getCouldNotCompute() && "Unexpected CNC");
// It is possible that init SCEV contains an invariant load but it does
// not dominate MDL and is not available at MDL loop entry, so we should
// check it here.
if (!isAvailableAtLoopEntry(SplitLHS.first, MDL) ||
!isAvailableAtLoopEntry(SplitRHS.first, MDL))
return false;
// It seems backedge guard check is faster than entry one so in some cases
// it can speed up whole estimation by short circuit
return isLoopBackedgeGuardedByCond(MDL, Pred, SplitLHS.second,
SplitRHS.second) &&
isLoopEntryGuardedByCond(MDL, Pred, SplitLHS.first, SplitRHS.first);
}
bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// Canonicalize the inputs first.
(void)SimplifyICmpOperands(Pred, LHS, RHS);
if (isKnownViaInduction(Pred, LHS, RHS))
return true;
if (isKnownPredicateViaSplitting(Pred, LHS, RHS))
return true;
// Otherwise see what can be done with some simple reasoning.
return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS);
}
Optional<bool> ScalarEvolution::evaluatePredicate(ICmpInst::Predicate Pred,
const SCEV *LHS,
const SCEV *RHS) {
if (isKnownPredicate(Pred, LHS, RHS))
return true;
else if (isKnownPredicate(ICmpInst::getInversePredicate(Pred), LHS, RHS))
return false;
return None;
}
bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const Instruction *Context) {
// TODO: Analyze guards and assumes from Context's block.
return isKnownPredicate(Pred, LHS, RHS) ||
isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS);
}
Optional<bool>
ScalarEvolution::evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS,
const Instruction *Context) {
Optional<bool> KnownWithoutContext = evaluatePredicate(Pred, LHS, RHS);
if (KnownWithoutContext)
return KnownWithoutContext;
if (isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS))
return true;
else if (isBasicBlockEntryGuardedByCond(Context->getParent(),
ICmpInst::getInversePredicate(Pred),
LHS, RHS))
return false;
return None;
}
bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred,
const SCEVAddRecExpr *LHS,
const SCEV *RHS) {
const Loop *L = LHS->getLoop();
return isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS) &&
isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS);
}
Optional<ScalarEvolution::MonotonicPredicateType>
ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS,
ICmpInst::Predicate Pred) {
auto Result = getMonotonicPredicateTypeImpl(LHS, Pred);
#ifndef NDEBUG
// Verify an invariant: inverting the predicate should turn a monotonically
// increasing change to a monotonically decreasing one, and vice versa.
if (Result) {
auto ResultSwapped =
getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred));
assert(ResultSwapped.hasValue() && "should be able to analyze both!");
assert(ResultSwapped.getValue() != Result.getValue() &&
"monotonicity should flip as we flip the predicate");
}
#endif
return Result;
}
Optional<ScalarEvolution::MonotonicPredicateType>
ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS,
ICmpInst::Predicate Pred) {
// A zero step value for LHS means the induction variable is essentially a
// loop invariant value. We don't really depend on the predicate actually
// flipping from false to true (for increasing predicates, and the other way
// around for decreasing predicates), all we care about is that *if* the
// predicate changes then it only changes from false to true.
//
// A zero step value in itself is not very useful, but there may be places
// where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
// as general as possible.
// Only handle LE/LT/GE/GT predicates.
if (!ICmpInst::isRelational(Pred))
return None;
bool IsGreater = ICmpInst::isGE(Pred) || ICmpInst::isGT(Pred);
assert((IsGreater || ICmpInst::isLE(Pred) || ICmpInst::isLT(Pred)) &&
"Should be greater or less!");
// Check that AR does not wrap.
if (ICmpInst::isUnsigned(Pred)) {
if (!LHS->hasNoUnsignedWrap())
return None;
return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
} else {
assert(ICmpInst::isSigned(Pred) &&
"Relational predicate is either signed or unsigned!");
if (!LHS->hasNoSignedWrap())
return None;
const SCEV *Step = LHS->getStepRecurrence(*this);
if (isKnownNonNegative(Step))
return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
if (isKnownNonPositive(Step))
return !IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
return None;
}
}
Optional<ScalarEvolution::LoopInvariantPredicate>
ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const Loop *L) {
// If there is a loop-invariant, force it into the RHS, otherwise bail out.
if (!isLoopInvariant(RHS, L)) {
if (!isLoopInvariant(LHS, L))
return None;
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
}
const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS);
if (!ArLHS || ArLHS->getLoop() != L)
return None;
auto MonotonicType = getMonotonicPredicateType(ArLHS, Pred);
if (!MonotonicType)
return None;
// If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
// true as the loop iterates, and the backedge is control dependent on
// "ArLHS `Pred` RHS" == true then we can reason as follows:
//
// * if the predicate was false in the first iteration then the predicate
// is never evaluated again, since the loop exits without taking the
// backedge.
// * if the predicate was true in the first iteration then it will
// continue to be true for all future iterations since it is
// monotonically increasing.
//
// For both the above possibilities, we can replace the loop varying
// predicate with its value on the first iteration of the loop (which is
// loop invariant).
//
// A similar reasoning applies for a monotonically decreasing predicate, by
// replacing true with false and false with true in the above two bullets.
bool Increasing = *MonotonicType == ScalarEvolution::MonotonicallyIncreasing;
auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);
if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
return None;
return ScalarEvolution::LoopInvariantPredicate(Pred, ArLHS->getStart(), RHS);
}
Optional<ScalarEvolution::LoopInvariantPredicate>
ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
const Instruction *Context, const SCEV *MaxIter) {
// Try to prove the following set of facts:
// - The predicate is monotonic in the iteration space.
// - If the check does not fail on the 1st iteration:
// - No overflow will happen during first MaxIter iterations;
// - It will not fail on the MaxIter'th iteration.
// If the check does fail on the 1st iteration, we leave the loop and no
// other checks matter.
// If there is a loop-invariant, force it into the RHS, otherwise bail out.
if (!isLoopInvariant(RHS, L)) {
if (!isLoopInvariant(LHS, L))
return None;
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
}
auto *AR = dyn_cast<SCEVAddRecExpr>(LHS);
if (!AR || AR->getLoop() != L)
return None;
// The predicate must be relational (i.e. <, <=, >=, >).
if (!ICmpInst::isRelational(Pred))
return None;
// TODO: Support steps other than +/- 1.
const SCEV *Step = AR->getStepRecurrence(*this);
auto *One = getOne(Step->getType());
auto *MinusOne = getNegativeSCEV(One);
if (Step != One && Step != MinusOne)
return None;
// Type mismatch here means that MaxIter is potentially larger than max
// unsigned value in start type, which mean we cannot prove no wrap for the
// indvar.
if (AR->getType() != MaxIter->getType())
return None;
// Value of IV on suggested last iteration.
const SCEV *Last = AR->evaluateAtIteration(MaxIter, *this);
// Does it still meet the requirement?
if (!isLoopBackedgeGuardedByCond(L, Pred, Last, RHS))
return None;
// Because step is +/- 1 and MaxIter has same type as Start (i.e. it does
// not exceed max unsigned value of this type), this effectively proves
// that there is no wrap during the iteration. To prove that there is no
// signed/unsigned wrap, we need to check that
// Start <= Last for step = 1 or Start >= Last for step = -1.
ICmpInst::Predicate NoOverflowPred =
CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
if (Step == MinusOne)
NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred);
const SCEV *Start = AR->getStart();
if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context))
return None;
// Everything is fine.
return ScalarEvolution::LoopInvariantPredicate(Pred, Start, RHS);
}
bool ScalarEvolution::isKnownPredicateViaConstantRanges(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
if (HasSameValue(LHS, RHS))
return ICmpInst::isTrueWhenEqual(Pred);
// This code is split out from isKnownPredicate because it is called from
// within isLoopEntryGuardedByCond.
auto CheckRanges = [&](const ConstantRange &RangeLHS,
const ConstantRange &RangeRHS) {
return RangeLHS.icmp(Pred, RangeRHS);
};
// The check at the top of the function catches the case where the values are
// known to be equal.
if (Pred == CmpInst::ICMP_EQ)
return false;
if (Pred == CmpInst::ICMP_NE) {
if (CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) ||
CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)))
return true;
auto *Diff = getMinusSCEV(LHS, RHS);
return !isa<SCEVCouldNotCompute>(Diff) && isKnownNonZero(Diff);
}
if (CmpInst::isSigned(Pred))
return CheckRanges(getSignedRange(LHS), getSignedRange(RHS));
return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS));
}
bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
const SCEV *LHS,
const SCEV *RHS) {
// Match X to (A + C1)<ExpectedFlags> and Y to (A + C2)<ExpectedFlags>, where
// C1 and C2 are constant integers. If either X or Y are not add expressions,
// consider them as X + 0 and Y + 0 respectively. C1 and C2 are returned via
// OutC1 and OutC2.
auto MatchBinaryAddToConst = [this](const SCEV *X, const SCEV *Y,
APInt &OutC1, APInt &OutC2,
SCEV::NoWrapFlags ExpectedFlags) {
const SCEV *XNonConstOp, *XConstOp;
const SCEV *YNonConstOp, *YConstOp;
SCEV::NoWrapFlags XFlagsPresent;
SCEV::NoWrapFlags YFlagsPresent;
if (!splitBinaryAdd(X, XConstOp, XNonConstOp, XFlagsPresent)) {
XConstOp = getZero(X->getType());
XNonConstOp = X;
XFlagsPresent = ExpectedFlags;
}
if (!isa<SCEVConstant>(XConstOp) ||
(XFlagsPresent & ExpectedFlags) != ExpectedFlags)
return false;
if (!splitBinaryAdd(Y, YConstOp, YNonConstOp, YFlagsPresent)) {
YConstOp = getZero(Y->getType());
YNonConstOp = Y;
YFlagsPresent = ExpectedFlags;
}
if (!isa<SCEVConstant>(YConstOp) ||
(YFlagsPresent & ExpectedFlags) != ExpectedFlags)
return false;
if (YNonConstOp != XNonConstOp)
return false;
OutC1 = cast<SCEVConstant>(XConstOp)->getAPInt();
OutC2 = cast<SCEVConstant>(YConstOp)->getAPInt();
return true;
};
APInt C1;
APInt C2;
switch (Pred) {
default:
break;
case ICmpInst::ICMP_SGE:
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SLE:
// (X + C1)<nsw> s<= (X + C2)<nsw> if C1 s<= C2.
if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNSW) && C1.sle(C2))
return true;
break;
case ICmpInst::ICMP_SGT:
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SLT:
// (X + C1)<nsw> s< (X + C2)<nsw> if C1 s< C2.
if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNSW) && C1.slt(C2))
return true;
break;
case ICmpInst::ICMP_UGE:
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_ULE:
// (X + C1)<nuw> u<= (X + C2)<nuw> for C1 u<= C2.
if (MatchBinaryAddToConst(RHS, LHS, C2, C1, SCEV::FlagNUW) && C1.ule(C2))
return true;
break;
case ICmpInst::ICMP_UGT:
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_ULT:
// (X + C1)<nuw> u< (X + C2)<nuw> if C1 u< C2.
if (MatchBinaryAddToConst(RHS, LHS, C2, C1, SCEV::FlagNUW) && C1.ult(C2))
return true;
break;
}
return false;
}
bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred,
const SCEV *LHS,
const SCEV *RHS) {
if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate)
return false;
// Allowing arbitrary number of activations of isKnownPredicateViaSplitting on
// the stack can result in exponential time complexity.
SaveAndRestore<bool> Restore(ProvingSplitPredicate, true);
// If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L
//
// To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use
// isKnownPredicate. isKnownPredicate is more powerful, but also more
// expensive; and using isKnownNonNegative(RHS) is sufficient for most of the
// interesting cases seen in practice. We can consider "upgrading" L >= 0 to
// use isKnownPredicate later if needed.
return isKnownNonNegative(RHS) &&
isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) &&
isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS);
}
bool ScalarEvolution::isImpliedViaGuard(const BasicBlock *BB,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// No need to even try if we know the module has no guards.
if (!HasGuards)
return false;
return any_of(*BB, [&](const Instruction &I) {
using namespace llvm::PatternMatch;
Value *Condition;
return match(&I, m_Intrinsic<Intrinsic::experimental_guard>(
m_Value(Condition))) &&
isImpliedCond(Pred, LHS, RHS, Condition, false);
});
}
/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
/// protected by a conditional between LHS and RHS. This is used to
/// to eliminate casts.
bool
ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// Interpret a null as meaning no loop, where there is obviously no guard
// (interprocedural conditions notwithstanding).
if (!L) return true;
if (VerifyIR)
assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
"This cannot be done on broken IR!");
if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
return true;
BasicBlock *Latch = L->getLoopLatch();
if (!Latch)
return false;
BranchInst *LoopContinuePredicate =
dyn_cast<BranchInst>(Latch->getTerminator());
if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
isImpliedCond(Pred, LHS, RHS,
LoopContinuePredicate->getCondition(),
LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
return true;
// We don't want more than one activation of the following loops on the stack
// -- that can lead to O(n!) time complexity.
if (WalkingBEDominatingConds)
return false;
SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true);
// See if we can exploit a trip count to prove the predicate.
const auto &BETakenInfo = getBackedgeTakenInfo(L);
const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this);
if (LatchBECount != getCouldNotCompute()) {
// We know that Latch branches back to the loop header exactly
// LatchBECount times. This means the backdege condition at Latch is
// equivalent to "{0,+,1} u< LatchBECount".
Type *Ty = LatchBECount->getType();
auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW);
const SCEV *LoopCounter =
getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags);
if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter,
LatchBECount))
return true;
}
// Check conditions due to any @llvm.assume intrinsics.
for (auto &AssumeVH : AC.assumptions()) {
if (!AssumeVH)
continue;
auto *CI = cast<CallInst>(AssumeVH);
if (!DT.dominates(CI, Latch->getTerminator()))
continue;
if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
return true;
}
// If the loop is not reachable from the entry block, we risk running into an
// infinite loop as we walk up into the dom tree. These loops do not matter
// anyway, so we just return a conservative answer when we see them.
if (!DT.isReachableFromEntry(L->getHeader()))
return false;
if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
return true;
for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
DTN != HeaderDTN; DTN = DTN->getIDom()) {
assert(DTN && "should reach the loop header before reaching the root!");
BasicBlock *BB = DTN->getBlock();
if (isImpliedViaGuard(BB, Pred, LHS, RHS))
return true;
BasicBlock *PBB = BB->getSinglePredecessor();
if (!PBB)
continue;
BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
if (!ContinuePredicate || !ContinuePredicate->isConditional())
continue;
Value *Condition = ContinuePredicate->getCondition();
// If we have an edge `E` within the loop body that dominates the only
// latch, the condition guarding `E` also guards the backedge. This
// reasoning works only for loops with a single latch.
BasicBlockEdge DominatingEdge(PBB, BB);
if (DominatingEdge.isSingleEdge()) {
// We're constructively (and conservatively) enumerating edges within the
// loop body that dominate the latch. The dominator tree better agree
// with us on this:
assert(DT.dominates(DominatingEdge, Latch) && "should be!");
if (isImpliedCond(Pred, LHS, RHS, Condition,
BB != ContinuePredicate->getSuccessor(0)))
return true;
}
}
return false;
}
bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
ICmpInst::Predicate Pred,
const SCEV *LHS,
const SCEV *RHS) {
if (VerifyIR)
assert(!verifyFunction(*BB->getParent(), &dbgs()) &&
"This cannot be done on broken IR!");
// If we cannot prove strict comparison (e.g. a > b), maybe we can prove
// the facts (a >= b && a != b) separately. A typical situation is when the
// non-strict comparison is known from ranges and non-equality is known from
// dominating predicates. If we are proving strict comparison, we always try
// to prove non-equality and non-strict comparison separately.
auto NonStrictPredicate = ICmpInst::getNonStrictPredicate(Pred);
const bool ProvingStrictComparison = (Pred != NonStrictPredicate);
bool ProvedNonStrictComparison = false;
bool ProvedNonEquality = false;
auto SplitAndProve =
[&](std::function<bool(ICmpInst::Predicate)> Fn) -> bool {
if (!ProvedNonStrictComparison)
ProvedNonStrictComparison = Fn(NonStrictPredicate);
if (!ProvedNonEquality)
ProvedNonEquality = Fn(ICmpInst::ICMP_NE);
if (ProvedNonStrictComparison && ProvedNonEquality)
return true;
return false;
};
if (ProvingStrictComparison) {
auto ProofFn = [&](ICmpInst::Predicate P) {
return isKnownViaNonRecursiveReasoning(P, LHS, RHS);
};
if (SplitAndProve(ProofFn))
return true;
}
// Try to prove (Pred, LHS, RHS) using isImpliedViaGuard.
auto ProveViaGuard = [&](const BasicBlock *Block) {
if (isImpliedViaGuard(Block, Pred, LHS, RHS))
return true;
if (ProvingStrictComparison) {
auto ProofFn = [&](ICmpInst::Predicate P) {
return isImpliedViaGuard(Block, P, LHS, RHS);
};
if (SplitAndProve(ProofFn))
return true;
}
return false;
};
// Try to prove (Pred, LHS, RHS) using isImpliedCond.
auto ProveViaCond = [&](const Value *Condition, bool Inverse) {
const Instruction *Context = &BB->front();
if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context))
return true;
if (ProvingStrictComparison) {
auto ProofFn = [&](ICmpInst::Predicate P) {
return isImpliedCond(P, LHS, RHS, Condition, Inverse, Context);
};
if (SplitAndProve(ProofFn))
return true;
}
return false;
};
// Starting at the block's predecessor, climb up the predecessor chain, as long
// as there are predecessors that can be found that have unique successors
// leading to the original block.
const Loop *ContainingLoop = LI.getLoopFor(BB);
const BasicBlock *PredBB;
if (ContainingLoop && ContainingLoop->getHeader() == BB)
PredBB = ContainingLoop->getLoopPredecessor();
else
PredBB = BB->getSinglePredecessor();
for (std::pair<const BasicBlock *, const BasicBlock *> Pair(PredBB, BB);
Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
if (ProveViaGuard(Pair.first))
return true;
const BranchInst *LoopEntryPredicate =
dyn_cast<BranchInst>(Pair.first->getTerminator());
if (!LoopEntryPredicate ||
LoopEntryPredicate->isUnconditional())
continue;
if (ProveViaCond(LoopEntryPredicate->getCondition(),
LoopEntryPredicate->getSuccessor(0) != Pair.second))
return true;
}
// Check conditions due to any @llvm.assume intrinsics.
for (auto &AssumeVH : AC.assumptions()) {
if (!AssumeVH)
continue;
auto *CI = cast<CallInst>(AssumeVH);
if (!DT.dominates(CI, BB))
continue;
if (ProveViaCond(CI->getArgOperand(0), false))
return true;
}
return false;
}
bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
ICmpInst::Predicate Pred,
const SCEV *LHS,
const SCEV *RHS) {
// Interpret a null as meaning no loop, where there is obviously no guard
// (interprocedural conditions notwithstanding).
if (!L)
return false;
// Both LHS and RHS must be available at loop entry.
assert(isAvailableAtLoopEntry(LHS, L) &&
"LHS is not available at Loop Entry");
assert(isAvailableAtLoopEntry(RHS, L) &&
"RHS is not available at Loop Entry");
if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
return true;
return isBasicBlockEntryGuardedByCond(L->getHeader(), Pred, LHS, RHS);
}
bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS,
const Value *FoundCondValue, bool Inverse,
const Instruction *Context) {
// False conditions implies anything. Do not bother analyzing it further.
if (FoundCondValue ==
ConstantInt::getBool(FoundCondValue->getContext(), Inverse))
return true;
if (!PendingLoopPredicates.insert(FoundCondValue).second)
return false;
auto ClearOnExit =
make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); });
// Recursively handle And and Or conditions.
const Value *Op0, *Op1;
if (match(FoundCondValue, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
if (!Inverse)
return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, Context) ||
isImpliedCond(Pred, LHS, RHS, Op1, Inverse, Context);
} else if (match(FoundCondValue, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
if (Inverse)
return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, Context) ||
isImpliedCond(Pred, LHS, RHS, Op1, Inverse, Context);
}
const ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
if (!ICI) return false;
// Now that we found a conditional branch that dominates the loop or controls
// the loop latch. Check to see if it is the comparison we are looking for.
ICmpInst::Predicate FoundPred;
if (Inverse)
FoundPred = ICI->getInversePredicate();
else
FoundPred = ICI->getPredicate();
const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context);
}
bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS,
ICmpInst::Predicate FoundPred,
const SCEV *FoundLHS, const SCEV *FoundRHS,
const Instruction *Context) {
// Balance the types.
if (getTypeSizeInBits(LHS->getType()) <
getTypeSizeInBits(FoundLHS->getType())) {
// For unsigned and equality predicates, try to prove that both found
// operands fit into narrow unsigned range. If so, try to prove facts in
// narrow types.
if (!CmpInst::isSigned(FoundPred) && !FoundLHS->getType()->isPointerTy()) {
auto *NarrowType = LHS->getType();
auto *WideType = FoundLHS->getType();
auto BitWidth = getTypeSizeInBits(NarrowType);
const SCEV *MaxValue = getZeroExtendExpr(
getConstant(APInt::getMaxValue(BitWidth)), WideType);
if (isKnownPredicate(ICmpInst::ICMP_ULE, FoundLHS, MaxValue) &&
isKnownPredicate(ICmpInst::ICMP_ULE, FoundRHS, MaxValue)) {
const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType);
const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType);
if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS,
TruncFoundRHS, Context))
return true;
}
}
if (LHS->getType()->isPointerTy())
return false;
if (CmpInst::isSigned(Pred)) {
LHS = getSignExtendExpr(LHS, FoundLHS->getType());
RHS = getSignExtendExpr(RHS, FoundLHS->getType());
} else {
LHS = getZeroExtendExpr(LHS, FoundLHS->getType());
RHS = getZeroExtendExpr(RHS, FoundLHS->getType());
}
} else if (getTypeSizeInBits(LHS->getType()) >
getTypeSizeInBits(FoundLHS->getType())) {
if (FoundLHS->getType()->isPointerTy())
return false;
if (CmpInst::isSigned(FoundPred)) {
FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
} else {
FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
}
}
return isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, FoundLHS,
FoundRHS, Context);
}
bool ScalarEvolution::isImpliedCondBalancedTypes(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS,
const Instruction *Context) {
assert(getTypeSizeInBits(LHS->getType()) ==
getTypeSizeInBits(FoundLHS->getType()) &&
"Types should be balanced!");
// Canonicalize the query to match the way instcombine will have
// canonicalized the comparison.
if (SimplifyICmpOperands(Pred, LHS, RHS))
if (LHS == RHS)
return CmpInst::isTrueWhenEqual(Pred);
if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
if (FoundLHS == FoundRHS)
return CmpInst::isFalseWhenEqual(FoundPred);
// Check to see if we can make the LHS or RHS match.
if (LHS == FoundRHS || RHS == FoundLHS) {
if (isa<SCEVConstant>(RHS)) {
std::swap(FoundLHS, FoundRHS);
FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
} else {
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
}
}
// Check whether the found predicate is the same as the desired predicate.
if (FoundPred == Pred)
return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context);
// Check whether swapping the found predicate makes it the same as the
// desired predicate.
if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
// We can write the implication
// 0. LHS Pred RHS <- FoundLHS SwapPred FoundRHS
// using one of the following ways:
// 1. LHS Pred RHS <- FoundRHS Pred FoundLHS
// 2. RHS SwapPred LHS <- FoundLHS SwapPred FoundRHS
// 3. LHS Pred RHS <- ~FoundLHS Pred ~FoundRHS
// 4. ~LHS SwapPred ~RHS <- FoundLHS SwapPred FoundRHS
// Forms 1. and 2. require swapping the operands of one condition. Don't
// do this if it would break canonical constant/addrec ordering.
if (!isa<SCEVConstant>(RHS) && !isa<SCEVAddRecExpr>(LHS))
return isImpliedCondOperands(FoundPred, RHS, LHS, FoundLHS, FoundRHS,
Context);
if (!isa<SCEVConstant>(FoundRHS) && !isa<SCEVAddRecExpr>(FoundLHS))
return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context);
// Don't try to getNotSCEV pointers.
if (LHS->getType()->isPointerTy() || FoundLHS->getType()->isPointerTy())
return false;
// There's no clear preference between forms 3. and 4., try both.
return isImpliedCondOperands(FoundPred, getNotSCEV(LHS), getNotSCEV(RHS),
FoundLHS, FoundRHS, Context) ||
isImpliedCondOperands(Pred, LHS, RHS, getNotSCEV(FoundLHS),
getNotSCEV(FoundRHS), Context);
}
// Unsigned comparison is the same as signed comparison when both the operands
// are non-negative.
if (CmpInst::isUnsigned(FoundPred) &&
CmpInst::getSignedPredicate(FoundPred) == Pred &&
isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS))
return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context);
// Check if we can make progress by sharpening ranges.
if (FoundPred == ICmpInst::ICMP_NE &&
(isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
const SCEVConstant *C = nullptr;
const SCEV *V = nullptr;
if (isa<SCEVConstant>(FoundLHS)) {
C = cast<SCEVConstant>(FoundLHS);
V = FoundRHS;
} else {
C = cast<SCEVConstant>(FoundRHS);
V = FoundLHS;
}
// The guarding predicate tells us that C != V. If the known range
// of V is [C, t), we can sharpen the range to [C + 1, t). The
// range we consider has to correspond to same signedness as the
// predicate we're interested in folding.
APInt Min = ICmpInst::isSigned(Pred) ?
getSignedRangeMin(V) : getUnsignedRangeMin(V);
if (Min == C->getAPInt()) {
// Given (V >= Min && V != Min) we conclude V >= (Min + 1).
// This is true even if (Min + 1) wraps around -- in case of
// wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
APInt SharperMin = Min + 1;
switch (Pred) {
case ICmpInst::ICMP_SGE:
case ICmpInst::ICMP_UGE:
// We know V `Pred` SharperMin. If this implies LHS `Pred`
// RHS, we're done.
if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin),
Context))
return true;
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_UGT:
// We know from the range information that (V `Pred` Min ||
// V == Min). We know from the guarding condition that !(V
// == Min). This gives us
//
// V `Pred` Min || V == Min && !(V == Min)
// => V `Pred` Min
//
// If V `Pred` Min implies LHS `Pred` RHS, we're done.
if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min),
Context))
return true;
break;
// `LHS < RHS` and `LHS <= RHS` are handled in the same way as `RHS > LHS` and `RHS >= LHS` respectively.
case ICmpInst::ICMP_SLE:
case ICmpInst::ICMP_ULE:
if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
LHS, V, getConstant(SharperMin), Context))
return true;
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_ULT:
if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
LHS, V, getConstant(Min), Context))
return true;
break;
default:
// No change
break;
}
}
}
// Check whether the actual condition is beyond sufficient.
if (FoundPred == ICmpInst::ICMP_EQ)
if (ICmpInst::isTrueWhenEqual(Pred))
if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context))
return true;
if (Pred == ICmpInst::ICMP_NE)
if (!ICmpInst::isTrueWhenEqual(FoundPred))
if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS,
Context))
return true;
// Otherwise assume the worst.
return false;
}
bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
const SCEV *&L, const SCEV *&R,
SCEV::NoWrapFlags &Flags) {
const auto *AE = dyn_cast<SCEVAddExpr>(Expr);
if (!AE || AE->getNumOperands() != 2)
return false;
L = AE->getOperand(0);
R = AE->getOperand(1);
Flags = AE->getNoWrapFlags();
return true;
}
Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
const SCEV *Less) {
// We avoid subtracting expressions here because this function is usually
// fairly deep in the call stack (i.e. is called many times).
// X - X = 0.
if (More == Less)
return APInt(getTypeSizeInBits(More->getType()), 0);
if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
const auto *LAR = cast<SCEVAddRecExpr>(Less);
const auto *MAR = cast<SCEVAddRecExpr>(More);
if (LAR->getLoop() != MAR->getLoop())
return None;
// We look at affine expressions only; not for correctness but to keep
// getStepRecurrence cheap.
if (!LAR->isAffine() || !MAR->isAffine())
return None;
if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this))
return None;
Less = LAR->getStart();
More = MAR->getStart();
// fall through
}
if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
const auto &M = cast<SCEVConstant>(More)->getAPInt();
const auto &L = cast<SCEVConstant>(Less)->getAPInt();
return M - L;
}
SCEV::NoWrapFlags Flags;
const SCEV *LLess = nullptr, *RLess = nullptr;
const SCEV *LMore = nullptr, *RMore = nullptr;
const SCEVConstant *C1 = nullptr, *C2 = nullptr;
// Compare (X + C1) vs X.
if (splitBinaryAdd(Less, LLess, RLess, Flags))
if ((C1 = dyn_cast<SCEVConstant>(LLess)))
if (RLess == More)
return -(C1->getAPInt());
// Compare X vs (X + C2).
if (splitBinaryAdd(More, LMore, RMore, Flags))
if ((C2 = dyn_cast<SCEVConstant>(LMore)))
if (RMore == Less)
return C2->getAPInt();
// Compare (X + C1) vs (X + C2).
if (C1 && C2 && RLess == RMore)
return C2->getAPInt() - C1->getAPInt();
return None;
}
bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context) {
// Try to recognize the following pattern:
//
// FoundRHS = ...
// ...
// loop:
// FoundLHS = {Start,+,W}
// context_bb: // Basic block from the same loop
// known(Pred, FoundLHS, FoundRHS)
//
// If some predicate is known in the context of a loop, it is also known on
// each iteration of this loop, including the first iteration. Therefore, in
// this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to
// prove the original pred using this fact.
if (!Context)
return false;
const BasicBlock *ContextBB = Context->getParent();
// Make sure AR varies in the context block.
if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundLHS)) {
const Loop *L = AR->getLoop();
// Make sure that context belongs to the loop and executes on 1st iteration
// (if it ever executes at all).
if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch()))
return false;
if (!isAvailableAtLoopEntry(FoundRHS, AR->getLoop()))
return false;
return isImpliedCondOperands(Pred, LHS, RHS, AR->getStart(), FoundRHS);
}
if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundRHS)) {
const Loop *L = AR->getLoop();
// Make sure that context belongs to the loop and executes on 1st iteration
// (if it ever executes at all).
if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch()))
return false;
if (!isAvailableAtLoopEntry(FoundLHS, AR->getLoop()))
return false;
return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->getStart());
}
return false;
}
bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS, const SCEV *FoundRHS) {
if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT)
return false;
const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS);
if (!AddRecLHS)
return false;
const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS);
if (!AddRecFoundLHS)
return false;
// We'd like to let SCEV reason about control dependencies, so we constrain
// both the inequalities to be about add recurrences on the same loop. This
// way we can use isLoopEntryGuardedByCond later.
const Loop *L = AddRecFoundLHS->getLoop();
if (L != AddRecLHS->getLoop())
return false;
// FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1)
//
// FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C)
// ... (2)
//
// Informal proof for (2), assuming (1) [*]:
//
// We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**]
//
// Then
//
// FoundLHS s< FoundRHS s< INT_MIN - C
// <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ]
// <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ]
// <=> (FoundLHS + INT_MIN + C + INT_MIN) s<
// (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ]
// <=> FoundLHS + C s< FoundRHS + C
//
// [*]: (1) can be proved by ruling out overflow.
//
// [**]: This can be proved by analyzing all the four possibilities:
// (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and
// (A s>= 0, B s>= 0).
//
// Note:
// Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C"
// will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS
// = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS
// s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is
// neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
// C)".
Optional<APInt> LDiff = computeConstantDifference(LHS, FoundLHS);
Optional<APInt> RDiff = computeConstantDifference(RHS, FoundRHS);
if (!LDiff || !RDiff || *LDiff != *RDiff)
return false;
if (LDiff->isMinValue())
return true;
APInt FoundRHSLimit;
if (Pred == CmpInst::ICMP_ULT) {
FoundRHSLimit = -(*RDiff);
} else {
assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff;
}
// Try to prove (1) or (2), as needed.
return isAvailableAtLoopEntry(FoundRHS, L) &&
isLoopEntryGuardedByCond(L, Pred, FoundRHS,
getConstant(FoundRHSLimit));
}
bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS, unsigned Depth) {
const PHINode *LPhi = nullptr, *RPhi = nullptr;
auto ClearOnExit = make_scope_exit([&]() {
if (LPhi) {
bool Erased = PendingMerges.erase(LPhi);
assert(Erased && "Failed to erase LPhi!");
(void)Erased;
}
if (RPhi) {
bool Erased = PendingMerges.erase(RPhi);
assert(Erased && "Failed to erase RPhi!");
(void)Erased;
}
});
// Find respective Phis and check that they are not being pending.
if (const SCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS))
if (auto *Phi = dyn_cast<PHINode>(LU->getValue())) {
if (!PendingMerges.insert(Phi).second)
return false;
LPhi = Phi;
}
if (const SCEVUnknown *RU = dyn_cast<SCEVUnknown>(RHS))
if (auto *Phi = dyn_cast<PHINode>(RU->getValue())) {
// If we detect a loop of Phi nodes being processed by this method, for
// example:
//
// %a = phi i32 [ %some1, %preheader ], [ %b, %latch ]
// %b = phi i32 [ %some2, %preheader ], [ %a, %latch ]
//
// we don't want to deal with a case that complex, so return conservative
// answer false.
if (!PendingMerges.insert(Phi).second)
return false;
RPhi = Phi;
}
// If none of LHS, RHS is a Phi, nothing to do here.
if (!LPhi && !RPhi)
return false;
// If there is a SCEVUnknown Phi we are interested in, make it left.
if (!LPhi) {
std::swap(LHS, RHS);
std::swap(FoundLHS, FoundRHS);
std::swap(LPhi, RPhi);
Pred = ICmpInst::getSwappedPredicate(Pred);
}
assert(LPhi && "LPhi should definitely be a SCEVUnknown Phi!");
const BasicBlock *LBB = LPhi->getParent();
const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
auto ProvedEasily = [&](const SCEV *S1, const SCEV *S2) {
return isKnownViaNonRecursiveReasoning(Pred, S1, S2) ||
isImpliedCondOperandsViaRanges(Pred, S1, S2, FoundLHS, FoundRHS) ||
isImpliedViaOperations(Pred, S1, S2, FoundLHS, FoundRHS, Depth);
};
if (RPhi && RPhi->getParent() == LBB) {
// Case one: RHS is also a SCEVUnknown Phi from the same basic block.
// If we compare two Phis from the same block, and for each entry block
// the predicate is true for incoming values from this block, then the
// predicate is also true for the Phis.
for (const BasicBlock *IncBB : predecessors(LBB)) {
const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
const SCEV *R = getSCEV(RPhi->getIncomingValueForBlock(IncBB));
if (!ProvedEasily(L, R))
return false;
}
} else if (RAR && RAR->getLoop()->getHeader() == LBB) {
// Case two: RHS is also a Phi from the same basic block, and it is an
// AddRec. It means that there is a loop which has both AddRec and Unknown
// PHIs, for it we can compare incoming values of AddRec from above the loop
// and latch with their respective incoming values of LPhi.
// TODO: Generalize to handle loops with many inputs in a header.
if (LPhi->getNumIncomingValues() != 2) return false;
auto *RLoop = RAR->getLoop();
auto *Predecessor = RLoop->getLoopPredecessor();
assert(Predecessor && "Loop with AddRec with no predecessor?");
const SCEV *L1 = getSCEV(LPhi->getIncomingValueForBlock(Predecessor));
if (!ProvedEasily(L1, RAR->getStart()))
return false;
auto *Latch = RLoop->getLoopLatch();
assert(Latch && "Loop with AddRec with no latch?");
const SCEV *L2 = getSCEV(LPhi->getIncomingValueForBlock(Latch));
if (!ProvedEasily(L2, RAR->getPostIncExpr(*this)))
return false;
} else {
// In all other cases go over inputs of LHS and compare each of them to RHS,
// the predicate is true for (LHS, RHS) if it is true for all such pairs.
// At this point RHS is either a non-Phi, or it is a Phi from some block
// different from LBB.
for (const BasicBlock *IncBB : predecessors(LBB)) {
// Check that RHS is available in this block.
if (!dominates(RHS, IncBB))
return false;
const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
// Make sure L does not refer to a value from a potentially previous
// iteration of a loop.
if (!properlyDominates(L, IncBB))
return false;
if (!ProvedEasily(L, RHS))
return false;
}
}
return true;
}
bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS,
const Instruction *Context) {
if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
return true;
if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
return true;
if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS,
Context))
return true;
return isImpliedCondOperandsHelper(Pred, LHS, RHS,
FoundLHS, FoundRHS);
}
/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values?
template <typename MinMaxExprType>
static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr,
const SCEV *Candidate) {
const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr);
if (!MinMaxExpr)
return false;
return is_contained(MinMaxExpr->operands(), Candidate);
}
static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// If both sides are affine addrecs for the same loop, with equal
// steps, and we know the recurrences don't wrap, then we only
// need to check the predicate on the starting values.
if (!ICmpInst::isRelational(Pred))
return false;
const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
if (!LAR)
return false;
const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
if (!RAR)
return false;
if (LAR->getLoop() != RAR->getLoop())
return false;
if (!LAR->isAffine() || !RAR->isAffine())
return false;
if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE))
return false;
SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ?
SCEV::FlagNSW : SCEV::FlagNUW;
if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW))
return false;
return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart());
}
/// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
/// expression?
static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
switch (Pred) {
default:
return false;
case ICmpInst::ICMP_SGE:
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SLE:
return
// min(A, ...) <= A
IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) ||
// A <= max(A, ...)
IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
case ICmpInst::ICMP_UGE:
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_ULE:
return
// min(A, ...) <= A
IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) ||
// A <= max(A, ...)
IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
}
llvm_unreachable("covered switch fell through?!");
}
bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS,
unsigned Depth) {
assert(getTypeSizeInBits(LHS->getType()) ==
getTypeSizeInBits(RHS->getType()) &&
"LHS and RHS have different sizes?");
assert(getTypeSizeInBits(FoundLHS->getType()) ==
getTypeSizeInBits(FoundRHS->getType()) &&
"FoundLHS and FoundRHS have different sizes?");
// We want to avoid hurting the compile time with analysis of too big trees.
if (Depth > MaxSCEVOperationsImplicationDepth)
return false;
// We only want to work with GT comparison so far.
if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) {
Pred = CmpInst::getSwappedPredicate(Pred);
std::swap(LHS, RHS);
std::swap(FoundLHS, FoundRHS);
}
// For unsigned, try to reduce it to corresponding signed comparison.
if (Pred == ICmpInst::ICMP_UGT)
// We can replace unsigned predicate with its signed counterpart if all
// involved values are non-negative.
// TODO: We could have better support for unsigned.
if (isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) {
// Knowing that both FoundLHS and FoundRHS are non-negative, and knowing
// FoundLHS >u FoundRHS, we also know that FoundLHS >s FoundRHS. Let us
// use this fact to prove that LHS and RHS are non-negative.
const SCEV *MinusOne = getMinusOne(LHS->getType());
if (isImpliedCondOperands(ICmpInst::ICMP_SGT, LHS, MinusOne, FoundLHS,
FoundRHS) &&
isImpliedCondOperands(ICmpInst::ICMP_SGT, RHS, MinusOne, FoundLHS,
FoundRHS))
Pred = ICmpInst::ICMP_SGT;
}
if (Pred != ICmpInst::ICMP_SGT)
return false;
auto GetOpFromSExt = [&](const SCEV *S) {
if (auto *Ext = dyn_cast<SCEVSignExtendExpr>(S))
return Ext->getOperand();
// TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off
// the constant in some cases.
return S;
};
// Acquire values from extensions.
auto *OrigLHS = LHS;
auto *OrigFoundLHS = FoundLHS;
LHS = GetOpFromSExt(LHS);
FoundLHS = GetOpFromSExt(FoundLHS);
// Is the SGT predicate can be proved trivially or using the found context.
auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) {
return isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGT, S1, S2) ||
isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS,
FoundRHS, Depth + 1);
};
if (auto *LHSAddExpr = dyn_cast<SCEVAddExpr>(LHS)) {
// We want to avoid creation of any new non-constant SCEV. Since we are
// going to compare the operands to RHS, we should be certain that we don't
// need any size extensions for this. So let's decline all cases when the
// sizes of types of LHS and RHS do not match.
// TODO: Maybe try to get RHS from sext to catch more cases?
if (getTypeSizeInBits(LHS->getType()) != getTypeSizeInBits(RHS->getType()))
return false;
// Should not overflow.
if (!LHSAddExpr->hasNoSignedWrap())
return false;
auto *LL = LHSAddExpr->getOperand(0);
auto *LR = LHSAddExpr->getOperand(1);
auto *MinusOne = getMinusOne(RHS->getType());
// Checks that S1 >= 0 && S2 > RHS, trivially or using the found context.
auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) {
return IsSGTViaContext(S1, MinusOne) && IsSGTViaContext(S2, RHS);
};
// Try to prove the following rule:
// (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS).
// (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS).
if (IsSumGreaterThanRHS(LL, LR) || IsSumGreaterThanRHS(LR, LL))
return true;
} else if (auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) {
Value *LL, *LR;
// FIXME: Once we have SDiv implemented, we can get rid of this matching.
using namespace llvm::PatternMatch;
if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) {
// Rules for division.
// We are going to perform some comparisons with Denominator and its
// derivative expressions. In general case, creating a SCEV for it may
// lead to a complex analysis of the entire graph, and in particular it
// can request trip count recalculation for the same loop. This would
// cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid
// this, we only want to create SCEVs that are constants in this section.
// So we bail if Denominator is not a constant.
if (!isa<ConstantInt>(LR))
return false;
auto *Denominator = cast<SCEVConstant>(getSCEV(LR));
// We want to make sure that LHS = FoundLHS / Denominator. If it is so,
// then a SCEV for the numerator already exists and matches with FoundLHS.
auto *Numerator = getExistingSCEV(LL);
if (!Numerator || Numerator->getType() != FoundLHS->getType())
return false;
// Make sure that the numerator matches with FoundLHS and the denominator
// is positive.
if (!HasSameValue(Numerator, FoundLHS) || !isKnownPositive(Denominator))
return false;
auto *DTy = Denominator->getType();
auto *FRHSTy = FoundRHS->getType();
if (DTy->isPointerTy() != FRHSTy->isPointerTy())
// One of types is a pointer and another one is not. We cannot extend
// them properly to a wider type, so let us just reject this case.
// TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help
// to avoid this check.
return false;
// Given that:
// FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0.
auto *WTy = getWiderType(DTy, FRHSTy);
auto *DenominatorExt = getNoopOrSignExtend(Denominator, WTy);
auto *FoundRHSExt = getNoopOrSignExtend(FoundRHS, WTy);
// Try to prove the following rule:
// (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS).
// For example, given that FoundLHS > 2. It means that FoundLHS is at
// least 3. If we divide it by Denominator < 4, we will have at least 1.
auto *DenomMinusTwo = getMinusSCEV(DenominatorExt, getConstant(WTy, 2));
if (isKnownNonPositive(RHS) &&
IsSGTViaContext(FoundRHSExt, DenomMinusTwo))
return true;
// Try to prove the following rule:
// (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS).
// For example, given that FoundLHS > -3. Then FoundLHS is at least -2.
// If we divide it by Denominator > 2, then:
// 1. If FoundLHS is negative, then the result is 0.
// 2. If FoundLHS is non-negative, then the result is non-negative.
// Anyways, the result is non-negative.
auto *MinusOne = getMinusOne(WTy);
auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt);
if (isKnownNegative(RHS) &&
IsSGTViaContext(FoundRHSExt, NegDenomMinusOne))
return true;
}
}
// If our expression contained SCEVUnknown Phis, and we split it down and now
// need to prove something for them, try to prove the predicate for every
// possible incoming values of those Phis.
if (isImpliedViaMerge(Pred, OrigLHS, RHS, OrigFoundLHS, FoundRHS, Depth + 1))
return true;
return false;
}
static bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// zext x u<= sext x, sext x s<= zext x
switch (Pred) {
case ICmpInst::ICMP_SGE:
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SLE: {
// If operand >=s 0 then ZExt == SExt. If operand <s 0 then SExt <s ZExt.
const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(LHS);
const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(RHS);
if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
return true;
break;
}
case ICmpInst::ICMP_UGE:
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_ULE: {
// If operand >=s 0 then ZExt == SExt. If operand <s 0 then ZExt <u SExt.
const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS);
const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(RHS);
if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
return true;
break;
}
default:
break;
};
return false;
}
bool
ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
return isKnownPredicateExtendIdiom(Pred, LHS, RHS) ||
isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
}
bool
ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS) {
switch (Pred) {
default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_NE:
if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS))
return true;
break;
}
// Maybe it can be proved via operations?
if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS))
return true;
return false;
}
bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
const SCEV *LHS,
const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS) {
if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
// The restriction on `FoundRHS` be lifted easily -- it exists only to
// reduce the compile time impact of this optimization.
return false;
Optional<APInt> Addend = computeConstantDifference(LHS, FoundLHS);
if (!Addend)
return false;
const APInt &ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
// `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
// antecedent "`FoundLHS` `Pred` `FoundRHS`".
ConstantRange FoundLHSRange =
ConstantRange::makeExactICmpRegion(Pred, ConstFoundRHS);
// Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`:
ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend));
// We can also compute the range of values for `LHS` that satisfy the
// consequent, "`LHS` `Pred` `RHS`":
const APInt &ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
// The antecedent implies the consequent if every value of `LHS` that
// satisfies the antecedent also satisfies the consequent.
return LHSRange.icmp(Pred, ConstRHS);
}
bool ScalarEvolution::canIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
bool IsSigned) {
assert(isKnownPositive(Stride) && "Positive stride expected!");
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
const SCEV *One = getOne(Stride->getType());
if (IsSigned) {
APInt MaxRHS = getSignedRangeMax(RHS);
APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));
// SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS);
}
APInt MaxRHS = getUnsignedRangeMax(RHS);
APInt MaxValue = APInt::getMaxValue(BitWidth);
APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));
// UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS);
}
bool ScalarEvolution::canIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
bool IsSigned) {
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
const SCEV *One = getOne(Stride->getType());
if (IsSigned) {
APInt MinRHS = getSignedRangeMin(RHS);
APInt MinValue = APInt::getSignedMinValue(BitWidth);
APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));
// SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS);
}
APInt MinRHS = getUnsignedRangeMin(RHS);
APInt MinValue = APInt::getMinValue(BitWidth);
APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));
// UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS);
}
const SCEV *ScalarEvolution::getUDivCeilSCEV(const SCEV *N, const SCEV *D) {
// umin(N, 1) + floor((N - umin(N, 1)) / D)
// This is equivalent to "1 + floor((N - 1) / D)" for N != 0. The umin
// expression fixes the case of N=0.
const SCEV *MinNOne = getUMinExpr(N, getOne(N->getType()));
const SCEV *NMinusOne = getMinusSCEV(N, MinNOne);
return getAddExpr(MinNOne, getUDivExpr(NMinusOne, D));
}
const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start,
const SCEV *Stride,
const SCEV *End,
unsigned BitWidth,
bool IsSigned) {
// The logic in this function assumes we can represent a positive stride.
// If we can't, the backedge-taken count must be zero.
if (IsSigned && BitWidth == 1)
return getZero(Stride->getType());
// Calculate the maximum backedge count based on the range of values
// permitted by Start, End, and Stride.
APInt MinStart =
IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start);
APInt MinStride =
IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride);
// We assume either the stride is positive, or the backedge-taken count
// is zero. So force StrideForMaxBECount to be at least one.
APInt One(BitWidth, 1);
APInt StrideForMaxBECount = IsSigned ? APIntOps::smax(One, MinStride)
: APIntOps::umax(One, MinStride);
APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth)
: APInt::getMaxValue(BitWidth);
APInt Limit = MaxValue - (StrideForMaxBECount - 1);
// Although End can be a MAX expression we estimate MaxEnd considering only
// the case End = RHS of the loop termination condition. This is safe because
// in the other case (End - Start) is zero, leading to a zero maximum backedge
// taken count.
APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit)
: APIntOps::umin(getUnsignedRangeMax(End), Limit);
// MaxBECount = ceil((max(MaxEnd, MinStart) - MinStart) / Stride)
MaxEnd = IsSigned ? APIntOps::smax(MaxEnd, MinStart)
: APIntOps::umax(MaxEnd, MinStart);
return getUDivCeilSCEV(getConstant(MaxEnd - MinStart) /* Delta */,
getConstant(StrideForMaxBECount) /* Step */);
}
ScalarEvolution::ExitLimit
ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
bool ControlsExit, bool AllowPredicates) {
SmallPtrSet<const SCEVPredicate *, 4> Predicates;
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
bool PredicatedIV = false;
if (!IV && AllowPredicates) {
// Try to make this an AddRec using runtime tests, in the first X
// iterations of this loop, where X is the SCEV expression found by the
// algorithm below.
IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
PredicatedIV = true;
}
// Avoid weird loops
if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
// A precondition of this method is that the condition being analyzed
// reaches an exiting branch which dominates the latch. Given that, we can
// assume that an increment which violates the nowrap specification and
// produces poison must cause undefined behavior when the resulting poison
// value is branched upon and thus we can conclude that the backedge is
// taken no more often than would be required to produce that poison value.
// Note that a well defined loop can exit on the iteration which violates
// the nowrap specification if there is another exit (either explicit or
// implicit/exceptional) which causes the loop to execute before the
// exiting instruction we're analyzing would trigger UB.
auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW;
bool NoWrap = ControlsExit && IV->getNoWrapFlags(WrapType);
ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
const SCEV *Stride = IV->getStepRecurrence(*this);
bool PositiveStride = isKnownPositive(Stride);
// Avoid negative or zero stride values.
if (!PositiveStride) {
// We can compute the correct backedge taken count for loops with unknown
// strides if we can prove that the loop is not an infinite loop with side
// effects. Here's the loop structure we are trying to handle -
//
// i = start
// do {
// A[i] = i;
// i += s;
// } while (i < end);
//
// The backedge taken count for such loops is evaluated as -
// (max(end, start + stride) - start - 1) /u stride
//
// The additional preconditions that we need to check to prove correctness
// of the above formula is as follows -
//
// a) IV is either nuw or nsw depending upon signedness (indicated by the
// NoWrap flag).
// b) loop is single exit with no side effects.
//
//
// Precondition a) implies that if the stride is negative, this is a single
// trip loop. The backedge taken count formula reduces to zero in this case.
//
// Precondition b) implies that if rhs is invariant in L, then unknown
// stride being zero means the backedge can't be taken without UB.
//
// The positive stride case is the same as isKnownPositive(Stride) returning
// true (original behavior of the function).
//
// We want to make sure that the stride is truly unknown as there are edge
// cases where ScalarEvolution propagates no wrap flags to the
// post-increment/decrement IV even though the increment/decrement operation
// itself is wrapping. The computed backedge taken count may be wrong in
// such cases. This is prevented by checking that the stride is not known to
// be either positive or non-positive. For example, no wrap flags are
// propagated to the post-increment IV of this loop with a trip count of 2 -
//
// unsigned char i;
// for(i=127; i<128; i+=129)
// A[i] = i;
//
if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
!loopIsFiniteByAssumption(L))
return getCouldNotCompute();
if (!isKnownNonZero(Stride)) {
// If we have a step of zero, and RHS isn't invariant in L, we don't know
// if it might eventually be greater than start and if so, on which
// iteration. We can't even produce a useful upper bound.
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
// We allow a potentially zero stride, but we need to divide by stride
// below. Since the loop can't be infinite and this check must control
// the sole exit, we can infer the exit must be taken on the first
// iteration (e.g. backedge count = 0) if the stride is zero. Given that,
// we know the numerator in the divides below must be zero, so we can
// pick an arbitrary non-zero value for the denominator (e.g. stride)
// and produce the right result.
// FIXME: Handle the case where Stride is poison?
auto wouldZeroStrideBeUB = [&]() {
// Proof by contradiction. Suppose the stride were zero. If we can
// prove that the backedge *is* taken on the first iteration, then since
// we know this condition controls the sole exit, we must have an
// infinite loop. We can't have a (well defined) infinite loop per
// check just above.
// Note: The (Start - Stride) term is used to get the start' term from
// (start' + stride,+,stride). Remember that we only care about the
// result of this expression when stride == 0 at runtime.
auto *StartIfZero = getMinusSCEV(IV->getStart(), Stride);
return isLoopEntryGuardedByCond(L, Cond, StartIfZero, RHS);
};
if (!wouldZeroStrideBeUB()) {
Stride = getUMaxExpr(Stride, getOne(Stride->getType()));
}
}
} else if (!Stride->isOne() && !NoWrap) {
auto isUBOnWrap = [&]() {
// Can we prove this loop *must* be UB if overflow of IV occurs?
// Reasoning goes as follows:
// * Suppose the IV did self wrap.
// * If Stride evenly divides the iteration space, then once wrap
// occurs, the loop must revisit the same values.
// * We know that RHS is invariant, and that none of those values
// caused this exit to be taken previously. Thus, this exit is
// dynamically dead.
// * If this is the sole exit, then a dead exit implies the loop
// must be infinite if there are no abnormal exits.
// * If the loop were infinite, then it must either not be mustprogress
// or have side effects. Otherwise, it must be UB.
// * It can't (by assumption), be UB so we have contradicted our
// premise and can conclude the IV did not in fact self-wrap.
// From no-self-wrap, we need to then prove no-(un)signed-wrap. This
// follows trivially from the fact that every (un)signed-wrapped, but
// not self-wrapped value must be LT than the last value before
// (un)signed wrap. Since we know that last value didn't exit, nor
// will any smaller one.
if (!isLoopInvariant(RHS, L))
return false;
auto *StrideC = dyn_cast<SCEVConstant>(Stride);
if (!StrideC || !StrideC->getAPInt().isPowerOf2())
return false;
if (!ControlsExit || !loopHasNoAbnormalExits(L))
return false;
return loopIsFiniteByAssumption(L);
};
// Avoid proven overflow cases: this will ensure that the backedge taken
// count will not generate any unsigned overflow. Relaxed no-overflow
// conditions exploit NoWrapFlags, allowing to optimize in presence of
// undefined behaviors like the case of C language.
if (canIVOverflowOnLT(RHS, Stride, IsSigned) && !isUBOnWrap())
return getCouldNotCompute();
}
// On all paths just preceeding, we established the following invariant:
// IV can be assumed not to overflow up to and including the exiting
// iteration. We proved this in one of two ways:
// 1) We can show overflow doesn't occur before the exiting iteration
// 1a) canIVOverflowOnLT, and b) step of one
// 2) We can show that if overflow occurs, the loop must execute UB
// before any possible exit.
// Note that we have not yet proved RHS invariant (in general).
const SCEV *Start = IV->getStart();
// Preserve pointer-typed Start/RHS to pass to isLoopEntryGuardedByCond.
// Use integer-typed versions for actual computation.
const SCEV *OrigStart = Start;
const SCEV *OrigRHS = RHS;
if (Start->getType()->isPointerTy()) {
Start = getLosslessPtrToIntExpr(Start);
if (isa<SCEVCouldNotCompute>(Start))
return Start;
}
if (RHS->getType()->isPointerTy()) {
RHS = getLosslessPtrToIntExpr(RHS);
if (isa<SCEVCouldNotCompute>(RHS))
return RHS;
}
// When the RHS is not invariant, we do not know the end bound of the loop and
// cannot calculate the ExactBECount needed by ExitLimit. However, we can
// calculate the MaxBECount, given the start, stride and max value for the end
// bound of the loop (RHS), and the fact that IV does not overflow (which is
// checked above).
if (!isLoopInvariant(RHS, L)) {
const SCEV *MaxBECount = computeMaxBECountForLT(
Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount,
false /*MaxOrZero*/, Predicates);
}
// We use the expression (max(End,Start)-Start)/Stride to describe the
// backedge count, as if the backedge is taken at least once max(End,Start)
// is End and so the result is as above, and if not max(End,Start) is Start
// so we get a backedge count of zero.
const SCEV *BECount = nullptr;
auto *StartMinusStride = getMinusSCEV(OrigStart, Stride);
// Can we prove (max(RHS,Start) > Start - Stride?
if (isLoopEntryGuardedByCond(L, Cond, StartMinusStride, Start) &&
isLoopEntryGuardedByCond(L, Cond, StartMinusStride, RHS)) {
// In this case, we can use a refined formula for computing backedge taken
// count. The general formula remains:
// "End-Start /uceiling Stride" where "End = max(RHS,Start)"
// We want to use the alternate formula:
// "((End - 1) - (Start - Stride)) /u Stride"
// Let's do a quick case analysis to show these are equivalent under
// our precondition that max(RHS,Start) > Start - Stride.
// * For RHS <= Start, the backedge-taken count must be zero.
// "((End - 1) - (Start - Stride)) /u Stride" reduces to
// "((Start - 1) - (Start - Stride)) /u Stride" which simplies to
// "Stride - 1 /u Stride" which is indeed zero for all non-zero values
// of Stride. For 0 stride, we've use umin(1,Stride) above, reducing
// this to the stride of 1 case.
// * For RHS >= Start, the backedge count must be "RHS-Start /uceil Stride".
// "((End - 1) - (Start - Stride)) /u Stride" reduces to
// "((RHS - 1) - (Start - Stride)) /u Stride" reassociates to
// "((RHS - (Start - Stride) - 1) /u Stride".
// Our preconditions trivially imply no overflow in that form.
const SCEV *MinusOne = getMinusOne(Stride->getType());
const SCEV *Numerator =
getMinusSCEV(getAddExpr(RHS, MinusOne), StartMinusStride);
if (!isa<SCEVCouldNotCompute>(Numerator)) {
BECount = getUDivExpr(Numerator, Stride);
}
}
const SCEV *BECountIfBackedgeTaken = nullptr;
if (!BECount) {
auto canProveRHSGreaterThanEqualStart = [&]() {
auto CondGE = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart))
return true;
// (RHS > Start - 1) implies RHS >= Start.
// * "RHS >= Start" is trivially equivalent to "RHS > Start - 1" if
// "Start - 1" doesn't overflow.
// * For signed comparison, if Start - 1 does overflow, it's equal
// to INT_MAX, and "RHS >s INT_MAX" is trivially false.
// * For unsigned comparison, if Start - 1 does overflow, it's equal
// to UINT_MAX, and "RHS >u UINT_MAX" is trivially false.
//
// FIXME: Should isLoopEntryGuardedByCond do this for us?
auto CondGT = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
auto *StartMinusOne = getAddExpr(OrigStart,
getMinusOne(OrigStart->getType()));
return isLoopEntryGuardedByCond(L, CondGT, OrigRHS, StartMinusOne);
};
// If we know that RHS >= Start in the context of loop, then we know that
// max(RHS, Start) = RHS at this point.
const SCEV *End;
if (canProveRHSGreaterThanEqualStart()) {
End = RHS;
} else {
// If RHS < Start, the backedge will be taken zero times. So in
// general, we can write the backedge-taken count as:
//
// RHS >= Start ? ceil(RHS - Start) / Stride : 0
//
// We convert it to the following to make it more convenient for SCEV:
//
// ceil(max(RHS, Start) - Start) / Stride
End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
// See what would happen if we assume the backedge is taken. This is
// used to compute MaxBECount.
BECountIfBackedgeTaken = getUDivCeilSCEV(getMinusSCEV(RHS, Start), Stride);
}
// At this point, we know:
//
// 1. If IsSigned, Start <=s End; otherwise, Start <=u End
// 2. The index variable doesn't overflow.
//
// Therefore, we know N exists such that
// (Start + Stride * N) >= End, and computing "(Start + Stride * N)"
// doesn't overflow.
//
// Using this information, try to prove whether the addition in
// "(Start - End) + (Stride - 1)" has unsigned overflow.
const SCEV *One = getOne(Stride->getType());
bool MayAddOverflow = [&] {
if (auto *StrideC = dyn_cast<SCEVConstant>(Stride)) {
if (StrideC->getAPInt().isPowerOf2()) {
// Suppose Stride is a power of two, and Start/End are unsigned
// integers. Let UMAX be the largest representable unsigned
// integer.
//
// By the preconditions of this function, we know
// "(Start + Stride * N) >= End", and this doesn't overflow.
// As a formula:
//
// End <= (Start + Stride * N) <= UMAX
//
// Subtracting Start from all the terms:
//
// End - Start <= Stride * N <= UMAX - Start
//
// Since Start is unsigned, UMAX - Start <= UMAX. Therefore:
//
// End - Start <= Stride * N <= UMAX
//
// Stride * N is a multiple of Stride. Therefore,
//
// End - Start <= Stride * N <= UMAX - (UMAX mod Stride)
//
// Since Stride is a power of two, UMAX + 1 is divisible by Stride.
// Therefore, UMAX mod Stride == Stride - 1. So we can write:
//
// End - Start <= Stride * N <= UMAX - Stride - 1
//
// Dropping the middle term:
//
// End - Start <= UMAX - Stride - 1
//
// Adding Stride - 1 to both sides:
//
// (End - Start) + (Stride - 1) <= UMAX
//
// In other words, the addition doesn't have unsigned overflow.
//
// A similar proof works if we treat Start/End as signed values.
// Just rewrite steps before "End - Start <= Stride * N <= UMAX" to
// use signed max instead of unsigned max. Note that we're trying
// to prove a lack of unsigned overflow in either case.
return false;
}
}
if (Start == Stride || Start == getMinusSCEV(Stride, One)) {
// If Start is equal to Stride, (End - Start) + (Stride - 1) == End - 1.
// If !IsSigned, 0 <u Stride == Start <=u End; so 0 <u End - 1 <u End.
// If IsSigned, 0 <s Stride == Start <=s End; so 0 <s End - 1 <s End.
//
// If Start is equal to Stride - 1, (End - Start) + Stride - 1 == End.
return false;
}
return true;
}();
const SCEV *Delta = getMinusSCEV(End, Start);
if (!MayAddOverflow) {
// floor((D + (S - 1)) / S)
// We prefer this formulation if it's legal because it's fewer operations.
BECount =
getUDivExpr(getAddExpr(Delta, getMinusSCEV(Stride, One)), Stride);
} else {
BECount = getUDivCeilSCEV(Delta, Stride);
}
}
const SCEV *MaxBECount;
bool MaxOrZero = false;
if (isa<SCEVConstant>(BECount)) {
MaxBECount = BECount;
} else if (BECountIfBackedgeTaken &&
isa<SCEVConstant>(BECountIfBackedgeTaken)) {
// If we know exactly how many times the backedge will be taken if it's
// taken at least once, then the backedge count will either be that or
// zero.
MaxBECount = BECountIfBackedgeTaken;
MaxOrZero = true;
} else {
MaxBECount = computeMaxBECountForLT(
Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
}
if (isa<SCEVCouldNotCompute>(MaxBECount) &&
!isa<SCEVCouldNotCompute>(BECount))
MaxBECount = getConstant(getUnsignedRangeMax(BECount));
return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
}
ScalarEvolution::ExitLimit
ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
bool ControlsExit, bool AllowPredicates) {
SmallPtrSet<const SCEVPredicate *, 4> Predicates;
// We handle only IV > Invariant
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
if (!IV && AllowPredicates)
// Try to make this an AddRec using runtime tests, in the first X
// iterations of this loop, where X is the SCEV expression found by the
// algorithm below.
IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
// Avoid weird loops
if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW;
bool NoWrap = ControlsExit && IV->getNoWrapFlags(WrapType);
ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
// Avoid negative or zero stride values
if (!isKnownPositive(Stride))
return getCouldNotCompute();
// Avoid proven overflow cases: this will ensure that the backedge taken count
// will not generate any unsigned overflow. Relaxed no-overflow conditions
// exploit NoWrapFlags, allowing to optimize in presence of undefined
// behaviors like the case of C language.
if (!Stride->isOne() && !NoWrap)
if (canIVOverflowOnGT(RHS, Stride, IsSigned))
return getCouldNotCompute();
const SCEV *Start = IV->getStart();
const SCEV *End = RHS;
if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) {
// If we know that Start >= RHS in the context of loop, then we know that
// min(RHS, Start) = RHS at this point.
if (isLoopEntryGuardedByCond(
L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, Start, RHS))
End = RHS;
else
End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start);
}
if (Start->getType()->isPointerTy()) {
Start = getLosslessPtrToIntExpr(Start);
if (isa<SCEVCouldNotCompute>(Start))
return Start;
}
if (End->getType()->isPointerTy()) {
End = getLosslessPtrToIntExpr(End);
if (isa<SCEVCouldNotCompute>(End))
return End;
}
// Compute ((Start - End) + (Stride - 1)) / Stride.
// FIXME: This can overflow. Holding off on fixing this for now;
// howManyGreaterThans will hopefully be gone soon.
const SCEV *One = getOne(Stride->getType());
const SCEV *BECount = getUDivExpr(
getAddExpr(getMinusSCEV(Start, End), getMinusSCEV(Stride, One)), Stride);
APInt MaxStart = IsSigned ? getSignedRangeMax(Start)
: getUnsignedRangeMax(Start);
APInt MinStride = IsSigned ? getSignedRangeMin(Stride)
: getUnsignedRangeMin(Stride);
unsigned BitWidth = getTypeSizeInBits(LHS->getType());
APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
: APInt::getMinValue(BitWidth) + (MinStride - 1);
// Although End can be a MIN expression we estimate MinEnd considering only
// the case End = RHS. This is safe because in the other case (Start - End)
// is zero, leading to a zero maximum backedge taken count.
APInt MinEnd =
IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit)
: APIntOps::umax(getUnsignedRangeMin(RHS), Limit);
const SCEV *MaxBECount = isa<SCEVConstant>(BECount)
? BECount
: getUDivCeilSCEV(getConstant(MaxStart - MinEnd),
getConstant(MinStride));
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
return ExitLimit(BECount, MaxBECount, false, Predicates);
}
const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
ScalarEvolution &SE) const {
if (Range.isFullSet()) // Infinite loop.
return SE.getCouldNotCompute();
// If the start is a non-zero constant, shift the range to simplify things.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
if (!SC->getValue()->isZero()) {
SmallVector<const SCEV *, 4> Operands(operands());
Operands[0] = SE.getZero(SC->getType());
const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
getNoWrapFlags(FlagNW));
if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted))
return ShiftedAddRec->getNumIterationsInRange(
Range.subtract(SC->getAPInt()), SE);
// This is strange and shouldn't happen.
return SE.getCouldNotCompute();
}
// The only time we can solve this is when we have all constant indices.
// Otherwise, we cannot determine the overflow conditions.
if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); }))
return SE.getCouldNotCompute();
// Okay at this point we know that all elements of the chrec are constants and
// that the start element is zero.
// First check to see if the range contains zero. If not, the first
// iteration exits.
unsigned BitWidth = SE.getTypeSizeInBits(getType());
if (!Range.contains(APInt(BitWidth, 0)))
return SE.getZero(getType());
if (isAffine()) {
// If this is an affine expression then we have this situation:
// Solve {0,+,A} in Range === Ax in Range
// We know that zero is in the range. If A is positive then we know that
// the upper value of the range must be the first possible exit value.
// If A is negative then the lower of the range is the last possible loop
// value. Also note that we already checked for a full range.
APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt();
APInt End = A.sge(1) ? (Range.getUpper() - 1) : Range.getLower();
// The exit value should be (End+A)/A.
APInt ExitVal = (End + A).udiv(A);
ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
// Evaluate at the exit value. If we really did fall out of the valid
// range, then we computed our trip count, otherwise wrap around or other
// things must have happened.
ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
if (Range.contains(Val->getValue()))
return SE.getCouldNotCompute(); // Something strange happened
// Ensure that the previous value is in the range. This is a sanity check.
assert(Range.contains(
EvaluateConstantChrecAtConstant(this,
ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
"Linear scev computation is off in a bad way!");
return SE.getConstant(ExitValue);
}
if (isQuadratic()) {
if (auto S = SolveQuadraticAddRecRange(this, Range, SE))
return SE.getConstant(S.getValue());
}
return SE.getCouldNotCompute();
}
const SCEVAddRecExpr *
SCEVAddRecExpr::getPostIncExpr(ScalarEvolution &SE) const {
assert(getNumOperands() > 1 && "AddRec with zero step?");
// There is a temptation to just call getAddExpr(this, getStepRecurrence(SE)),
// but in this case we cannot guarantee that the value returned will be an
// AddRec because SCEV does not have a fixed point where it stops
// simplification: it is legal to return ({rec1} + {rec2}). For example, it
// may happen if we reach arithmetic depth limit while simplifying. So we
// construct the returned value explicitly.
SmallVector<const SCEV *, 3> Ops;
// If this is {A,+,B,+,C,...,+,N}, then its step is {B,+,C,+,...,+,N}, and
// (this + Step) is {A+B,+,B+C,+...,+,N}.
for (unsigned i = 0, e = getNumOperands() - 1; i < e; ++i)
Ops.push_back(SE.getAddExpr(getOperand(i), getOperand(i + 1)));
// We know that the last operand is not a constant zero (otherwise it would
// have been popped out earlier). This guarantees us that if the result has
// the same last operand, then it will also not be popped out, meaning that
// the returned value will be an AddRec.
const SCEV *Last = getOperand(getNumOperands() - 1);
assert(!Last->isZero() && "Recurrency with zero step?");
Ops.push_back(Last);
return cast<SCEVAddRecExpr>(SE.getAddRecExpr(Ops, getLoop(),
SCEV::FlagAnyWrap));
}
// Return true when S contains at least an undef value.
static inline bool containsUndefs(const SCEV *S) {
return SCEVExprContains(S, [](const SCEV *S) {
if (const auto *SU = dyn_cast<SCEVUnknown>(S))
return isa<UndefValue>(SU->getValue());
return false;
});
}
namespace {
// Collect all steps of SCEV expressions.
struct SCEVCollectStrides {
ScalarEvolution &SE;
SmallVectorImpl<const SCEV *> &Strides;
SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
: SE(SE), Strides(S) {}
bool follow(const SCEV *S) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
Strides.push_back(AR->getStepRecurrence(SE));
return true;
}
bool isDone() const { return false; }
};
// Collect all SCEVUnknown and SCEVMulExpr expressions.
struct SCEVCollectTerms {
SmallVectorImpl<const SCEV *> &Terms;
SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
bool follow(const SCEV *S) {
if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
isa<SCEVSignExtendExpr>(S)) {
if (!containsUndefs(S))
Terms.push_back(S);
// Stop recursion: once we collected a term, do not walk its operands.
return false;
}
// Keep looking.
return true;
}
bool isDone() const { return false; }
};
// Check if a SCEV contains an AddRecExpr.
struct SCEVHasAddRec {
bool &ContainsAddRec;
SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
ContainsAddRec = false;
}
bool follow(const SCEV *S) {
if (isa<SCEVAddRecExpr>(S)) {
ContainsAddRec = true;
// Stop recursion: once we collected a term, do not walk its operands.
return false;
}
// Keep looking.
return true;
}
bool isDone() const { return false; }
};
// Find factors that are multiplied with an expression that (possibly as a
// subexpression) contains an AddRecExpr. In the expression:
//
// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
//
// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
// parameters as they form a product with an induction variable.
//
// This collector expects all array size parameters to be in the same MulExpr.
// It might be necessary to later add support for collecting parameters that are
// spread over different nested MulExpr.
struct SCEVCollectAddRecMultiplies {
SmallVectorImpl<const SCEV *> &Terms;
ScalarEvolution &SE;
SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE)
: Terms(T), SE(SE) {}
bool follow(const SCEV *S) {
if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
bool HasAddRec = false;
SmallVector<const SCEV *, 0> Operands;
for (auto Op : Mul->operands()) {
const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
if (Unknown && !isa<CallInst>(Unknown->getValue())) {
Operands.push_back(Op);
} else if (Unknown) {
HasAddRec = true;
} else {
bool ContainsAddRec = false;
SCEVHasAddRec ContiansAddRec(ContainsAddRec);
visitAll(Op, ContiansAddRec);
HasAddRec |= ContainsAddRec;
}
}
if (Operands.size() == 0)
return true;
if (!HasAddRec)
return false;
Terms.push_back(SE.getMulExpr(Operands));
// Stop recursion: once we collected a term, do not walk its operands.
return false;
}
// Keep looking.
return true;
}
bool isDone() const { return false; }
};
} // end anonymous namespace
/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
/// two places:
/// 1) The strides of AddRec expressions.
/// 2) Unknowns that are multiplied with AddRec expressions.
void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
SmallVectorImpl<const SCEV *> &Terms) {
SmallVector<const SCEV *, 4> Strides;
SCEVCollectStrides StrideCollector(*this, Strides);
visitAll(Expr, StrideCollector);
LLVM_DEBUG({
dbgs() << "Strides:\n";
for (const SCEV *S : Strides)
dbgs() << *S << "\n";
});
for (const SCEV *S : Strides) {
SCEVCollectTerms TermCollector(Terms);
visitAll(S, TermCollector);
}
LLVM_DEBUG({
dbgs() << "Terms:\n";
for (const SCEV *T : Terms)
dbgs() << *T << "\n";
});
SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
visitAll(Expr, MulCollector);
}
static bool findArrayDimensionsRec(ScalarEvolution &SE,
SmallVectorImpl<const SCEV *> &Terms,
SmallVectorImpl<const SCEV *> &Sizes) {
int Last = Terms.size() - 1;
const SCEV *Step = Terms[Last];
// End of recursion.
if (Last == 0) {
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
SmallVector<const SCEV *, 2> Qs;
for (const SCEV *Op : M->operands())
if (!isa<SCEVConstant>(Op))
Qs.push_back(Op);
Step = SE.getMulExpr(Qs);
}
Sizes.push_back(Step);
return true;
}
for (const SCEV *&Term : Terms) {
// Normalize the terms before the next call to findArrayDimensionsRec.
const SCEV *Q, *R;
SCEVDivision::divide(SE, Term, Step, &Q, &R);
// Bail out when GCD does not evenly divide one of the terms.
if (!R->isZero())
return false;
Term = Q;
}
// Remove all SCEVConstants.
erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); });
if (Terms.size() > 0)
if (!findArrayDimensionsRec(SE, Terms, Sizes))
return false;
Sizes.push_back(Step);
return true;
}
// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
for (const SCEV *T : Terms)
if (SCEVExprContains(T, [](const SCEV *S) { return isa<SCEVUnknown>(S); }))
return true;
return false;
}
// Return the number of product terms in S.
static inline int numberOfTerms(const SCEV *S) {
if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
return Expr->getNumOperands();
return 1;
}
static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
if (isa<SCEVConstant>(T))
return nullptr;
if (isa<SCEVUnknown>(T))
return T;
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
SmallVector<const SCEV *, 2> Factors;
for (const SCEV *Op : M->operands())
if (!isa<SCEVConstant>(Op))
Factors.push_back(Op);
return SE.getMulExpr(Factors);
}
return T;
}
/// Return the size of an element read or written by Inst.
const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
Type *Ty;
if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
Ty = Store->getValueOperand()->getType();
else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
Ty = Load->getType();
else
return nullptr;
Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty));
return getSizeOfExpr(ETy, Ty);
}
void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
SmallVectorImpl<const SCEV *> &Sizes,
const SCEV *ElementSize) {
if (Terms.size() < 1 || !ElementSize)
return;
// Early return when Terms do not contain parameters: we do not delinearize
// non parametric SCEVs.
if (!containsParameters(Terms))
return;
LLVM_DEBUG({
dbgs() << "Terms:\n";
for (const SCEV *T : Terms)
dbgs() << *T << "\n";
});
// Remove duplicates.
array_pod_sort(Terms.begin(), Terms.end());
Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
// Put larger terms first.
llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) {
return numberOfTerms(LHS) > numberOfTerms(RHS);
});
// Try to divide all terms by the element size. If term is not divisible by
// element size, proceed with the original term.
for (const SCEV *&Term : Terms) {
const SCEV *Q, *R;
SCEVDivision::divide(*this, Term, ElementSize, &Q, &R);
if (!Q->isZero())
Term = Q;
}
SmallVector<const SCEV *, 4> NewTerms;
// Remove constant factors.
for (const SCEV *T : Terms)
if (const SCEV *NewT = removeConstantFactors(*this, T))
NewTerms.push_back(NewT);
LLVM_DEBUG({
dbgs() << "Terms after sorting:\n";
for (const SCEV *T : NewTerms)
dbgs() << *T << "\n";
});
if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) {
Sizes.clear();
return;
}
// The last element to be pushed into Sizes is the size of an element.
Sizes.push_back(ElementSize);
LLVM_DEBUG({
dbgs() << "Sizes:\n";
for (const SCEV *S : Sizes)
dbgs() << *S << "\n";
});
}
void ScalarEvolution::computeAccessFunctions(
const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
SmallVectorImpl<const SCEV *> &Sizes) {
// Early exit in case this SCEV is not an affine multivariate function.
if (Sizes.empty())
return;
if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
if (!AR->isAffine())
return;
const SCEV *Res = Expr;
int Last = Sizes.size() - 1;
for (int i = Last; i >= 0; i--) {
const SCEV *Q, *R;
SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R);
LLVM_DEBUG({
dbgs() << "Res: " << *Res << "\n";
dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
dbgs() << "Res divided by Sizes[i]:\n";
dbgs() << "Quotient: " << *Q << "\n";
dbgs() << "Remainder: " << *R << "\n";
});
Res = Q;
// Do not record the last subscript corresponding to the size of elements in
// the array.
if (i == Last) {
// Bail out if the remainder is too complex.
if (isa<SCEVAddRecExpr>(R)) {
Subscripts.clear();
Sizes.clear();
return;
}
continue;
}
// Record the access function for the current subscript.
Subscripts.push_back(R);
}
// Also push in last position the remainder of the last division: it will be
// the access function of the innermost dimension.
Subscripts.push_back(Res);
std::reverse(Subscripts.begin(), Subscripts.end());
LLVM_DEBUG({
dbgs() << "Subscripts:\n";
for (const SCEV *S : Subscripts)
dbgs() << *S << "\n";
});
}
/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
/// sizes of an array access. Returns the remainder of the delinearization that
/// is the offset start of the array. The SCEV->delinearize algorithm computes
/// the multiples of SCEV coefficients: that is a pattern matching of sub
/// expressions in the stride and base of a SCEV corresponding to the
/// computation of a GCD (greatest common divisor) of base and stride. When
/// SCEV->delinearize fails, it returns the SCEV unchanged.
///
/// For example: when analyzing the memory access A[i][j][k] in this loop nest
///
/// void foo(long n, long m, long o, double A[n][m][o]) {
///
/// for (long i = 0; i < n; i++)
/// for (long j = 0; j < m; j++)
/// for (long k = 0; k < o; k++)
/// A[i][j][k] = 1.0;
/// }
///
/// the delinearization input is the following AddRec SCEV:
///
/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
///
/// From this SCEV, we are able to say that the base offset of the access is %A
/// because it appears as an offset that does not divide any of the strides in
/// the loops:
///
/// CHECK: Base offset: %A
///
/// and then SCEV->delinearize determines the size of some of the dimensions of
/// the array as these are the multiples by which the strides are happening:
///
/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
///
/// Note that the outermost dimension remains of UnknownSize because there are
/// no strides that would help identifying the size of the last dimension: when
/// the array has been statically allocated, one could compute the size of that
/// dimension by dividing the overall size of the array by the size of the known
/// dimensions: %m * %o * 8.
///
/// Finally delinearize provides the access functions for the array reference
/// that does correspond to A[i][j][k] of the above C testcase:
///
/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
///
/// The testcases are checking the output of a function pass:
/// DelinearizationPass that walks through all loads and stores of a function
/// asking for the SCEV of the memory access with respect to all enclosing
/// loops, calling SCEV->delinearize on that and printing the results.
void ScalarEvolution::delinearize(const SCEV *Expr,
SmallVectorImpl<const SCEV *> &Subscripts,
SmallVectorImpl<const SCEV *> &Sizes,
const SCEV *ElementSize) {
// First step: collect parametric terms.
SmallVector<const SCEV *, 4> Terms;
collectParametricTerms(Expr, Terms);
if (Terms.empty())
return;
// Second step: find subscript sizes.
findArrayDimensions(Terms, Sizes, ElementSize);
if (Sizes.empty())
return;
// Third step: compute the access functions for each subscript.
computeAccessFunctions(Expr, Subscripts, Sizes);
if (Subscripts.empty())
return;
LLVM_DEBUG({
dbgs() << "succeeded to delinearize " << *Expr << "\n";
dbgs() << "ArrayDecl[UnknownSize]";
for (const SCEV *S : Sizes)
dbgs() << "[" << *S << "]";
dbgs() << "\nArrayRef";
for (const SCEV *S : Subscripts)
dbgs() << "[" << *S << "]";
dbgs() << "\n";
});
}
bool ScalarEvolution::getIndexExpressionsFromGEP(
const GetElementPtrInst *GEP, SmallVectorImpl<const SCEV *> &Subscripts,
SmallVectorImpl<int> &Sizes) {
assert(Subscripts.empty() && Sizes.empty() &&
"Expected output lists to be empty on entry to this function.");
assert(GEP && "getIndexExpressionsFromGEP called with a null GEP");
Type *Ty = nullptr;
bool DroppedFirstDim = false;
for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
const SCEV *Expr = getSCEV(GEP->getOperand(i));
if (i == 1) {
Ty = GEP->getSourceElementType();
if (auto *Const = dyn_cast<SCEVConstant>(Expr))
if (Const->getValue()->isZero()) {
DroppedFirstDim = true;
continue;
}
Subscripts.push_back(Expr);
continue;
}
auto *ArrayTy = dyn_cast<ArrayType>(Ty);
if (!ArrayTy) {
Subscripts.clear();
Sizes.clear();
return false;
}
Subscripts.push_back(Expr);
if (!(DroppedFirstDim && i == 2))
Sizes.push_back(ArrayTy->getNumElements());
Ty = ArrayTy->getElementType();
}
return !Subscripts.empty();
}
//===----------------------------------------------------------------------===//
// SCEVCallbackVH Class Implementation
//===----------------------------------------------------------------------===//
void ScalarEvolution::SCEVCallbackVH::deleted() {
assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
SE->ConstantEvolutionLoopExitValue.erase(PN);
SE->eraseValueFromMap(getValPtr());
// this now dangles!
}
void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
// Forget all the expressions associated with users of the old value,
// so that future queries will recompute the expressions using the new
// value.
Value *Old = getValPtr();
SmallVector<User *, 16> Worklist(Old->users());
SmallPtrSet<User *, 8> Visited;
while (!Worklist.empty()) {
User *U = Worklist.pop_back_val();
// Deleting the Old value will cause this to dangle. Postpone
// that until everything else is done.
if (U == Old)
continue;
if (!Visited.insert(U).second)
continue;
if (PHINode *PN = dyn_cast<PHINode>(U))
SE->ConstantEvolutionLoopExitValue.erase(PN);
SE->eraseValueFromMap(U);
llvm::append_range(Worklist, U->users());
}
// Delete the Old value.
if (PHINode *PN = dyn_cast<PHINode>(Old))
SE->ConstantEvolutionLoopExitValue.erase(PN);
SE->eraseValueFromMap(Old);
// this now dangles!
}
ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
: CallbackVH(V), SE(se) {}
//===----------------------------------------------------------------------===//
// ScalarEvolution Class Implementation
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
AssumptionCache &AC, DominatorTree &DT,
LoopInfo &LI)
: F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64),
LoopDispositions(64), BlockDispositions(64) {
// To use guards for proving predicates, we need to scan every instruction in
// relevant basic blocks, and not just terminators. Doing this is a waste of
// time if the IR does not actually contain any calls to
// @llvm.experimental.guard, so do a quick check and remember this beforehand.
//
// This pessimizes the case where a pass that preserves ScalarEvolution wants
// to _add_ guards to the module when there weren't any before, and wants
// ScalarEvolution to optimize based on those guards. For now we prefer to be
// efficient in lieu of being smart in that rather obscure case.
auto *GuardDecl = F.getParent()->getFunction(
Intrinsic::getName(Intrinsic::experimental_guard));
HasGuards = GuardDecl && !GuardDecl->use_empty();
}
ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
: F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT),
LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
ValueExprMap(std::move(Arg.ValueExprMap)),
PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
PendingPhiRanges(std::move(Arg.PendingPhiRanges)),
PendingMerges(std::move(Arg.PendingMerges)),
MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)),
BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
PredicatedBackedgeTakenCounts(
std::move(Arg.PredicatedBackedgeTakenCounts)),
ConstantEvolutionLoopExitValue(
std::move(Arg.ConstantEvolutionLoopExitValue)),
ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
LoopDispositions(std::move(Arg.LoopDispositions)),
LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
BlockDispositions(std::move(Arg.BlockDispositions)),
UnsignedRanges(std::move(Arg.UnsignedRanges)),
SignedRanges(std::move(Arg.SignedRanges)),
UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
UniquePreds(std::move(Arg.UniquePreds)),
SCEVAllocator(std::move(Arg.SCEVAllocator)),
LoopUsers(std::move(Arg.LoopUsers)),
PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)),
FirstUnknown(Arg.FirstUnknown) {
Arg.FirstUnknown = nullptr;
}
ScalarEvolution::~ScalarEvolution() {
// Iterate through all the SCEVUnknown instances and call their
// destructors, so that they release their references to their values.
for (SCEVUnknown *U = FirstUnknown; U;) {
SCEVUnknown *Tmp = U;
U = U->Next;
Tmp->~SCEVUnknown();
}
FirstUnknown = nullptr;
ExprValueMap.clear();
ValueExprMap.clear();
HasRecMap.clear();
BackedgeTakenCounts.clear();
PredicatedBackedgeTakenCounts.clear();
assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
assert(PendingPhiRanges.empty() && "getRangeRef garbage");
assert(PendingMerges.empty() && "isImpliedViaMerge garbage");
assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
}
bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
}
static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
const Loop *L) {
// Print all inner loops first
for (Loop *I : *L)
PrintLoopInfo(OS, SE, I);
OS << "Loop ";
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
if (ExitingBlocks.size() != 1)
OS << "<multiple exits> ";
if (SE->hasLoopInvariantBackedgeTakenCount(L))
OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L) << "\n";
else
OS << "Unpredictable backedge-taken count.\n";
if (ExitingBlocks.size() > 1)
for (BasicBlock *ExitingBlock : ExitingBlocks) {
OS << " exit count for " << ExitingBlock->getName() << ": "
<< *SE->getExitCount(L, ExitingBlock) << "\n";
}
OS << "Loop ";
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
if (!isa<SCEVCouldNotCompute>(SE->getConstantMaxBackedgeTakenCount(L))) {
OS << "max backedge-taken count is " << *SE->getConstantMaxBackedgeTakenCount(L);
if (SE->isBackedgeTakenCountMaxOrZero(L))
OS << ", actual taken count either this or zero.";
} else {
OS << "Unpredictable max backedge-taken count. ";
}
OS << "\n"
"Loop ";
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
SCEVUnionPredicate Pred;
auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred);
if (!isa<SCEVCouldNotCompute>(PBT)) {
OS << "Predicated backedge-taken count is " << *PBT << "\n";
OS << " Predicates:\n";
Pred.print(OS, 4);
} else {
OS << "Unpredictable predicated backedge-taken count. ";
}
OS << "\n";
if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
OS << "Loop ";
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n";
}
}
static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) {
switch (LD) {
case ScalarEvolution::LoopVariant:
return "Variant";
case ScalarEvolution::LoopInvariant:
return "Invariant";
case ScalarEvolution::LoopComputable:
return "Computable";
}
llvm_unreachable("Unknown ScalarEvolution::LoopDisposition kind!");
}
void ScalarEvolution::print(raw_ostream &OS) const {
// ScalarEvolution's implementation of the print method is to print
// out SCEV values of all instructions that are interesting. Doing
// this potentially causes it to create new SCEV objects though,
// which technically conflicts with the const qualifier. This isn't
// observable from outside the class though, so casting away the
// const isn't dangerous.
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
if (ClassifyExpressions) {
OS << "Classifying expressions for: ";
F.printAsOperand(OS, /*PrintType=*/false);
OS << "\n";
for (Instruction &I : instructions(F))
if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
OS << I << '\n';
OS << " --> ";
const SCEV *SV = SE.getSCEV(&I);
SV->print(OS);
if (!isa<SCEVCouldNotCompute>(SV)) {
OS << " U: ";
SE.getUnsignedRange(SV).print(OS);
OS << " S: ";
SE.getSignedRange(SV).print(OS);
}
const Loop *L = LI.getLoopFor(I.getParent());
const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
if (AtUse != SV) {
OS << " --> ";
AtUse->print(OS);
if (!isa<SCEVCouldNotCompute>(AtUse)) {
OS << " U: ";
SE.getUnsignedRange(AtUse).print(OS);
OS << " S: ";
SE.getSignedRange(AtUse).print(OS);
}
}
if (L) {
OS << "\t\t" "Exits: ";
const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
if (!SE.isLoopInvariant(ExitValue, L)) {
OS << "<<Unknown>>";
} else {
OS << *ExitValue;
}
bool First = true;
for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
if (First) {
OS << "\t\t" "LoopDispositions: { ";
First = false;
} else {
OS << ", ";
}
Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
}
for (auto *InnerL : depth_first(L)) {
if (InnerL == L)
continue;
if (First) {
OS << "\t\t" "LoopDispositions: { ";
First = false;
} else {
OS << ", ";
}
InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL));
}
OS << " }";
}
OS << "\n";
}
}
OS << "Determining loop execution counts for: ";
F.printAsOperand(OS, /*PrintType=*/false);
OS << "\n";
for (Loop *I : LI)
PrintLoopInfo(OS, &SE, I);
}
ScalarEvolution::LoopDisposition
ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
auto &Values = LoopDispositions[S];
for (auto &V : Values) {
if (V.getPointer() == L)
return V.getInt();
}
Values.emplace_back(L, LoopVariant);
LoopDisposition D = computeLoopDisposition(S, L);
auto &Values2 = LoopDispositions[S];
for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
if (V.getPointer() == L) {
V.setInt(D);
break;
}
}
return D;
}
ScalarEvolution::LoopDisposition
ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
switch (S->getSCEVType()) {
case scConstant:
return LoopInvariant;
case scPtrToInt:
case scTruncate:
case scZeroExtend:
case scSignExtend:
return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
case scAddRecExpr: {
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
// If L is the addrec's loop, it's computable.
if (AR->getLoop() == L)
return LoopComputable;
// Add recurrences are never invariant in the function-body (null loop).
if (!L)
return LoopVariant;
// Everything that is not defined at loop entry is variant.
if (DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))
return LoopVariant;
assert(!L->contains(AR->getLoop()) && "Containing loop's header does not"
" dominate the contained loop's header?");
// This recurrence is invariant w.r.t. L if AR's loop contains L.
if (AR->getLoop()->contains(L))
return LoopInvariant;
// This recurrence is variant w.r.t. L if any of its operands
// are variant.
for (auto *Op : AR->operands())
if (!isLoopInvariant(Op, L))
return LoopVariant;
// Otherwise it's loop-invariant.
return LoopInvariant;
}
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr:
case scUMinExpr:
case scSMinExpr: {
bool HasVarying = false;
for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
LoopDisposition D = getLoopDisposition(Op, L);
if (D == LoopVariant)
return LoopVariant;
if (D == LoopComputable)
HasVarying = true;
}
return HasVarying ? LoopComputable : LoopInvariant;
}
case scUDivExpr: {
const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
if (LD == LoopVariant)
return LoopVariant;
LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
if (RD == LoopVariant)
return LoopVariant;
return (LD == LoopInvariant && RD == LoopInvariant) ?
LoopInvariant : LoopComputable;
}
case scUnknown:
// All non-instruction values are loop invariant. All instructions are loop
// invariant if they are not contained in the specified loop.
// Instructions are never considered invariant in the function body
// (null loop) because they are defined within the "loop".
if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
return LoopInvariant;
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
}
llvm_unreachable("Unknown SCEV kind!");
}
bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
return getLoopDisposition(S, L) == LoopInvariant;
}
bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
return getLoopDisposition(S, L) == LoopComputable;
}
ScalarEvolution::BlockDisposition
ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
auto &Values = BlockDispositions[S];
for (auto &V : Values) {
if (V.getPointer() == BB)
return V.getInt();
}
Values.emplace_back(BB, DoesNotDominateBlock);
BlockDisposition D = computeBlockDisposition(S, BB);
auto &Values2 = BlockDispositions[S];
for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
if (V.getPointer() == BB) {
V.setInt(D);
break;
}
}
return D;
}
ScalarEvolution::BlockDisposition
ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
switch (S->getSCEVType()) {
case scConstant:
return ProperlyDominatesBlock;
case scPtrToInt:
case scTruncate:
case scZeroExtend:
case scSignExtend:
return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
case scAddRecExpr: {
// This uses a "dominates" query instead of "properly dominates" query
// to test for proper dominance too, because the instruction which
// produces the addrec's value is a PHI, and a PHI effectively properly
// dominates its entire containing block.
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
if (!DT.dominates(AR->getLoop()->getHeader(), BB))
return DoesNotDominateBlock;
// Fall through into SCEVNAryExpr handling.
LLVM_FALLTHROUGH;
}
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr:
case scUMinExpr:
case scSMinExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
bool Proper = true;
for (const SCEV *NAryOp : NAry->operands()) {
BlockDisposition D = getBlockDisposition(NAryOp, BB);
if (D == DoesNotDominateBlock)
return DoesNotDominateBlock;
if (D == DominatesBlock)
Proper = false;
}
return Proper ? ProperlyDominatesBlock : DominatesBlock;
}
case scUDivExpr: {
const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
BlockDisposition LD = getBlockDisposition(LHS, BB);
if (LD == DoesNotDominateBlock)
return DoesNotDominateBlock;
BlockDisposition RD = getBlockDisposition(RHS, BB);
if (RD == DoesNotDominateBlock)
return DoesNotDominateBlock;
return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
ProperlyDominatesBlock : DominatesBlock;
}
case scUnknown:
if (Instruction *I =
dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
if (I->getParent() == BB)
return DominatesBlock;
if (DT.properlyDominates(I->getParent(), BB))
return ProperlyDominatesBlock;
return DoesNotDominateBlock;
}
return ProperlyDominatesBlock;
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
}
llvm_unreachable("Unknown SCEV kind!");
}
bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
return getBlockDisposition(S, BB) >= DominatesBlock;
}
bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
}
bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
}
void
ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
ValuesAtScopes.erase(S);
LoopDispositions.erase(S);
BlockDispositions.erase(S);
UnsignedRanges.erase(S);
SignedRanges.erase(S);
ExprValueMap.erase(S);
HasRecMap.erase(S);
MinTrailingZerosCache.erase(S);
for (auto I = PredicatedSCEVRewrites.begin();
I != PredicatedSCEVRewrites.end();) {
std::pair<const SCEV *, const Loop *> Entry = I->first;
if (Entry.first == S)
PredicatedSCEVRewrites.erase(I++);
else
++I;
}
auto RemoveSCEVFromBackedgeMap =
[S](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
for (auto I = Map.begin(), E = Map.end(); I != E;) {
BackedgeTakenInfo &BEInfo = I->second;
if (BEInfo.hasOperand(S))
Map.erase(I++);
else
++I;
}
};
RemoveSCEVFromBackedgeMap(BackedgeTakenCounts);
RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
}
void
ScalarEvolution::getUsedLoops(const SCEV *S,
SmallPtrSetImpl<const Loop *> &LoopsUsed) {
struct FindUsedLoops {
FindUsedLoops(SmallPtrSetImpl<const Loop *> &LoopsUsed)
: LoopsUsed(LoopsUsed) {}
SmallPtrSetImpl<const Loop *> &LoopsUsed;
bool follow(const SCEV *S) {
if (auto *AR = dyn_cast<SCEVAddRecExpr>(S))
LoopsUsed.insert(AR->getLoop());
return true;
}
bool isDone() const { return false; }
};
FindUsedLoops F(LoopsUsed);
SCEVTraversal<FindUsedLoops>(F).visitAll(S);
}
void ScalarEvolution::addToLoopUseLists(const SCEV *S) {
SmallPtrSet<const Loop *, 8> LoopsUsed;
getUsedLoops(S, LoopsUsed);
for (auto *L : LoopsUsed)
LoopUsers[L].push_back(S);
}
void ScalarEvolution::verify() const {
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
ScalarEvolution SE2(F, TLI, AC, DT, LI);
SmallVector<Loop *, 8> LoopStack(LI.begin(), LI.end());
// Map's SCEV expressions from one ScalarEvolution "universe" to another.
struct SCEVMapper : public SCEVRewriteVisitor<SCEVMapper> {
SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {}
const SCEV *visitConstant(const SCEVConstant *Constant) {
return SE.getConstant(Constant->getAPInt());
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
return SE.getUnknown(Expr->getValue());
}
const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
return SE.getCouldNotCompute();
}
};
SCEVMapper SCM(SE2);
while (!LoopStack.empty()) {
auto *L = LoopStack.pop_back_val();
llvm::append_range(LoopStack, *L);
auto *CurBECount = SCM.visit(
const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L));
auto *NewBECount = SE2.getBackedgeTakenCount(L);
if (CurBECount == SE2.getCouldNotCompute() ||
NewBECount == SE2.getCouldNotCompute()) {
// NB! This situation is legal, but is very suspicious -- whatever pass
// change the loop to make a trip count go from could not compute to
// computable or vice-versa *should have* invalidated SCEV. However, we
// choose not to assert here (for now) since we don't want false
// positives.
continue;
}
if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) {
// SCEV treats "undef" as an unknown but consistent value (i.e. it does
// not propagate undef aggressively). This means we can (and do) fail
// verification in cases where a transform makes the trip count of a loop
// go from "undef" to "undef+1" (say). The transform is fine, since in
// both cases the loop iterates "undef" times, but SCEV thinks we
// increased the trip count of the loop by 1 incorrectly.
continue;
}
if (SE.getTypeSizeInBits(CurBECount->getType()) >
SE.getTypeSizeInBits(NewBECount->getType()))
NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType());
else if (SE.getTypeSizeInBits(CurBECount->getType()) <
SE.getTypeSizeInBits(NewBECount->getType()))
CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType());
const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount);
// Unless VerifySCEVStrict is set, we only compare constant deltas.
if ((VerifySCEVStrict || isa<SCEVConstant>(Delta)) && !Delta->isZero()) {
dbgs() << "Trip Count for " << *L << " Changed!\n";
dbgs() << "Old: " << *CurBECount << "\n";
dbgs() << "New: " << *NewBECount << "\n";
dbgs() << "Delta: " << *Delta << "\n";
std::abort();
}
}
// Collect all valid loops currently in LoopInfo.
SmallPtrSet<Loop *, 32> ValidLoops;
SmallVector<Loop *, 32> Worklist(LI.begin(), LI.end());
while (!Worklist.empty()) {
Loop *L = Worklist.pop_back_val();
if (ValidLoops.contains(L))
continue;
ValidLoops.insert(L);
Worklist.append(L->begin(), L->end());
}
// Check for SCEV expressions referencing invalid/deleted loops.
for (auto &KV : ValueExprMap) {
auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second);
if (!AR)
continue;
assert(ValidLoops.contains(AR->getLoop()) &&
"AddRec references invalid loop");
}
}
bool ScalarEvolution::invalidate(
Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv) {
// Invalidate the ScalarEvolution object whenever it isn't preserved or one
// of its dependencies is invalidated.
auto PAC = PA.getChecker<ScalarEvolutionAnalysis>();
return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
Inv.invalidate<AssumptionAnalysis>(F, PA) ||
Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
Inv.invalidate<LoopAnalysis>(F, PA);
}
AnalysisKey ScalarEvolutionAnalysis::Key;
ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
FunctionAnalysisManager &AM) {
return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<AssumptionAnalysis>(F),
AM.getResult<DominatorTreeAnalysis>(F),
AM.getResult<LoopAnalysis>(F));
}
PreservedAnalyses
ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) {
AM.getResult<ScalarEvolutionAnalysis>(F).verify();
return PreservedAnalyses::all();
}
PreservedAnalyses
ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
// For compatibility with opt's -analyze feature under legacy pass manager
// which was not ported to NPM. This keeps tests using
// update_analyze_test_checks.py working.
OS << "Printing analysis 'Scalar Evolution Analysis' for function '"
<< F.getName() << "':\n";
AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
return PreservedAnalyses::all();
}
INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
char ScalarEvolutionWrapperPass::ID = 0;
ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry());
}
bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
SE.reset(new ScalarEvolution(
F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
return false;
}
void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); }
void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const {
SE->print(OS);
}
void ScalarEvolutionWrapperPass::verifyAnalysis() const {
if (!VerifySCEV)
return;
SE->verify();
}
void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequiredTransitive<AssumptionCacheTracker>();
AU.addRequiredTransitive<LoopInfoWrapperPass>();
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS,
const SCEV *RHS) {
FoldingSetNodeID ID;
assert(LHS->getType() == RHS->getType() &&
"Type mismatch between LHS and RHS");
// Unique this node based on the arguments
ID.AddInteger(SCEVPredicate::P_Equal);
ID.AddPointer(LHS);
ID.AddPointer(RHS);
void *IP = nullptr;
if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
return S;
SCEVEqualPredicate *Eq = new (SCEVAllocator)
SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS);
UniquePreds.InsertNode(Eq, IP);
return Eq;
}
const SCEVPredicate *ScalarEvolution::getWrapPredicate(
const SCEVAddRecExpr *AR,
SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
FoldingSetNodeID ID;
// Unique this node based on the arguments
ID.AddInteger(SCEVPredicate::P_Wrap);
ID.AddPointer(AR);
ID.AddInteger(AddedFlags);
void *IP = nullptr;
if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
return S;
auto *OF = new (SCEVAllocator)
SCEVWrapPredicate(ID.Intern(SCEVAllocator), AR, AddedFlags);
UniquePreds.InsertNode(OF, IP);
return OF;
}
namespace {
class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
public:
/// Rewrites \p S in the context of a loop L and the SCEV predication
/// infrastructure.
///
/// If \p Pred is non-null, the SCEV expression is rewritten to respect the
/// equivalences present in \p Pred.
///
/// If \p NewPreds is non-null, rewrite is free to add further predicates to
/// \p NewPreds such that the result will be an AddRecExpr.
static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
SCEVUnionPredicate *Pred) {
SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred);
return Rewriter.visit(S);
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
if (Pred) {
auto ExprPreds = Pred->getPredicatesForExpr(Expr);
for (auto *Pred : ExprPreds)
if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
if (IPred->getLHS() == Expr)
return IPred->getRHS();
}
return convertToAddRecWithPreds(Expr);
}
const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
const SCEV *Operand = visit(Expr->getOperand());
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
if (AR && AR->getLoop() == L && AR->isAffine()) {
// This couldn't be folded because the operand didn't have the nuw
// flag. Add the nusw flag as an assumption that we could make.
const SCEV *Step = AR->getStepRecurrence(SE);
Type *Ty = Expr->getType();
if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNUSW))
return SE.getAddRecExpr(SE.getZeroExtendExpr(AR->getStart(), Ty),
SE.getSignExtendExpr(Step, Ty), L,
AR->getNoWrapFlags());
}
return SE.getZeroExtendExpr(Operand, Expr->getType());
}
const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
const SCEV *Operand = visit(Expr->getOperand());
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
if (AR && AR->getLoop() == L && AR->isAffine()) {
// This couldn't be folded because the operand didn't have the nsw
// flag. Add the nssw flag as an assumption that we could make.
const SCEV *Step = AR->getStepRecurrence(SE);
Type *Ty = Expr->getType();
if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNSSW))
return SE.getAddRecExpr(SE.getSignExtendExpr(AR->getStart(), Ty),
SE.getSignExtendExpr(Step, Ty), L,
AR->getNoWrapFlags());
}
return SE.getSignExtendExpr(Operand, Expr->getType());
}
private:
explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
SCEVUnionPredicate *Pred)
: SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}
bool addOverflowAssumption(const SCEVPredicate *P) {
if (!NewPreds) {
// Check if we've already made this assumption.
return Pred && Pred->implies(P);
}
NewPreds->insert(P);
return true;
}
bool addOverflowAssumption(const SCEVAddRecExpr *AR,
SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
auto *A = SE.getWrapPredicate(AR, AddedFlags);
return addOverflowAssumption(A);
}
// If \p Expr represents a PHINode, we try to see if it can be represented
// as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible
// to add this predicate as a runtime overflow check, we return the AddRec.
// If \p Expr does not meet these conditions (is not a PHI node, or we
// couldn't create an AddRec for it, or couldn't add the predicate), we just
// return \p Expr.
const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) {
if (!isa<PHINode>(Expr->getValue()))
return Expr;
Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr);
if (!PredicatedRewrite)
return Expr;
for (auto *P : PredicatedRewrite->second){
// Wrap predicates from outer loops are not supported.
if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) {
auto *AR = cast<const SCEVAddRecExpr>(WP->getExpr());
if (L != AR->getLoop())
return Expr;
}
if (!addOverflowAssumption(P))
return Expr;
}
return PredicatedRewrite->first;
}
SmallPtrSetImpl<const SCEVPredicate *> *NewPreds;
SCEVUnionPredicate *Pred;
const Loop *L;
};
} // end anonymous namespace
const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
SCEVUnionPredicate &Preds) {
return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds);
}
const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
const SCEV *S, const Loop *L,
SmallPtrSetImpl<const SCEVPredicate *> &Preds) {
SmallPtrSet<const SCEVPredicate *, 4> TransformPreds;
S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr);
auto *AddRec = dyn_cast<SCEVAddRecExpr>(S);
if (!AddRec)
return nullptr;
// Since the transformation was successful, we can now transfer the SCEV
// predicates.
for (auto *P : TransformPreds)
Preds.insert(P);
return AddRec;
}
/// SCEV predicates
SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID,
SCEVPredicateKind Kind)
: FastID(ID), Kind(Kind) {}
SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
const SCEV *LHS, const SCEV *RHS)
: SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {
assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match");
assert(LHS != RHS && "LHS and RHS are the same SCEV");
}
bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
const auto *Op = dyn_cast<SCEVEqualPredicate>(N);
if (!Op)
return false;
return Op->LHS == LHS && Op->RHS == RHS;
}
bool SCEVEqualPredicate::isAlwaysTrue() const { return false; }
const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; }
void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const {
OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
}
SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID,
const SCEVAddRecExpr *AR,
IncrementWrapFlags Flags)
: SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {}
const SCEV *SCEVWrapPredicate::getExpr() const { return AR; }
bool SCEVWrapPredicate::implies(const SCEVPredicate *N) const {
const auto *Op = dyn_cast<SCEVWrapPredicate>(N);
return Op && Op->AR == AR && setFlags(Flags, Op->Flags) == Flags;
}
bool SCEVWrapPredicate::isAlwaysTrue() const {
SCEV::NoWrapFlags ScevFlags = AR->getNoWrapFlags();
IncrementWrapFlags IFlags = Flags;
if (ScalarEvolution::setFlags(ScevFlags, SCEV::FlagNSW) == ScevFlags)
IFlags = clearFlags(IFlags, IncrementNSSW);
return IFlags == IncrementAnyWrap;
}
void SCEVWrapPredicate::print(raw_ostream &OS, unsigned Depth) const {
OS.indent(Depth) << *getExpr() << " Added Flags: ";
if (SCEVWrapPredicate::IncrementNUSW & getFlags())
OS << "<nusw>";
if (SCEVWrapPredicate::IncrementNSSW & getFlags())
OS << "<nssw>";
OS << "\n";
}
SCEVWrapPredicate::IncrementWrapFlags
SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR,
ScalarEvolution &SE) {
IncrementWrapFlags ImpliedFlags = IncrementAnyWrap;
SCEV::NoWrapFlags StaticFlags = AR->getNoWrapFlags();
// We can safely transfer the NSW flag as NSSW.
if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNSW) == StaticFlags)
ImpliedFlags = IncrementNSSW;
if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNUW) == StaticFlags) {
// If the increment is positive, the SCEV NUW flag will also imply the
// WrapPredicate NUSW flag.
if (const auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE)))
if (Step->getValue()->getValue().isNonNegative())
ImpliedFlags = setFlags(ImpliedFlags, IncrementNUSW);
}
return ImpliedFlags;
}
/// Union predicates don't get cached so create a dummy set ID for it.
SCEVUnionPredicate::SCEVUnionPredicate()
: SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {}
bool SCEVUnionPredicate::isAlwaysTrue() const {
return all_of(Preds,
[](const SCEVPredicate *I) { return I->isAlwaysTrue(); });
}
ArrayRef<const SCEVPredicate *>
SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) {
auto I = SCEVToPreds.find(Expr);
if (I == SCEVToPreds.end())
return ArrayRef<const SCEVPredicate *>();
return I->second;
}
bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const {
if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N))
return all_of(Set->Preds,
[this](const SCEVPredicate *I) { return this->implies(I); });
auto ScevPredsIt = SCEVToPreds.find(N->getExpr());
if (ScevPredsIt == SCEVToPreds.end())
return false;
auto &SCEVPreds = ScevPredsIt->second;
return any_of(SCEVPreds,
[N](const SCEVPredicate *I) { return I->implies(N); });
}
const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; }
void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
for (auto Pred : Preds)
Pred->print(OS, Depth);
}
void SCEVUnionPredicate::add(const SCEVPredicate *N) {
if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) {
for (auto Pred : Set->Preds)
add(Pred);
return;
}
if (implies(N))
return;
const SCEV *Key = N->getExpr();
assert(Key && "Only SCEVUnionPredicate doesn't have an "
" associated expression!");
SCEVToPreds[Key].push_back(N);
Preds.push_back(N);
}
PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
Loop &L)
: SE(SE), L(L) {}
const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
const SCEV *Expr = SE.getSCEV(V);
RewriteEntry &Entry = RewriteMap[Expr];
// If we already have an entry and the version matches, return it.
if (Entry.second && Generation == Entry.first)
return Entry.second;
// We found an entry but it's stale. Rewrite the stale entry
// according to the current predicate.
if (Entry.second)
Expr = Entry.second;
const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds);
Entry = {Generation, NewSCEV};
return NewSCEV;
}
const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() {
if (!BackedgeCount) {
SCEVUnionPredicate BackedgePred;
BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred);
addPredicate(BackedgePred);
}
return BackedgeCount;
}
void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
if (Preds.implies(&Pred))
return;
Preds.add(&Pred);
updateGeneration();
}
const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const {
return Preds;
}
void PredicatedScalarEvolution::updateGeneration() {
// If the generation number wrapped recompute everything.
if (++Generation == 0) {
for (auto &II : RewriteMap) {
const SCEV *Rewritten = II.second.second;
II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)};
}
}
}
void PredicatedScalarEvolution::setNoOverflow(
Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) {
const SCEV *Expr = getSCEV(V);
const auto *AR = cast<SCEVAddRecExpr>(Expr);
auto ImpliedFlags = SCEVWrapPredicate::getImpliedFlags(AR, SE);
// Clear the statically implied flags.
Flags = SCEVWrapPredicate::clearFlags(Flags, ImpliedFlags);
addPredicate(*SE.getWrapPredicate(AR, Flags));
auto II = FlagsMap.insert({V, Flags});
if (!II.second)
II.first->second = SCEVWrapPredicate::setFlags(Flags, II.first->second);
}
bool PredicatedScalarEvolution::hasNoOverflow(
Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) {
const SCEV *Expr = getSCEV(V);
const auto *AR = cast<SCEVAddRecExpr>(Expr);
Flags = SCEVWrapPredicate::clearFlags(
Flags, SCEVWrapPredicate::getImpliedFlags(AR, SE));
auto II = FlagsMap.find(V);
if (II != FlagsMap.end())
Flags = SCEVWrapPredicate::clearFlags(Flags, II->second);
return Flags == SCEVWrapPredicate::IncrementAnyWrap;
}
const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) {
const SCEV *Expr = this->getSCEV(V);
SmallPtrSet<const SCEVPredicate *, 4> NewPreds;
auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds);
if (!New)
return nullptr;
for (auto *P : NewPreds)
Preds.add(P);
updateGeneration();
RewriteMap[SE.getSCEV(V)] = {Generation, New};
return New;
}
PredicatedScalarEvolution::PredicatedScalarEvolution(
const PredicatedScalarEvolution &Init)
: RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
for (auto I : Init.FlagsMap)
FlagsMap.insert(I);
}
void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const {
// For each block.
for (auto *BB : L.getBlocks())
for (auto &I : *BB) {
if (!SE.isSCEVable(I.getType()))
continue;
auto *Expr = SE.getSCEV(&I);
auto II = RewriteMap.find(Expr);
if (II == RewriteMap.end())
continue;
// Don't print things that are not interesting.
if (II->second.second == Expr)
continue;
OS.indent(Depth) << "[PSE]" << I << ":\n";
OS.indent(Depth + 2) << *Expr << "\n";
OS.indent(Depth + 2) << "--> " << *II->second.second << "\n";
}
}
// Match the mathematical pattern A - (A / B) * B, where A and B can be
// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used
// for URem with constant power-of-2 second operands.
// It's not always easy, as A and B can be folded (imagine A is X / 2, and B is
// 4, A / B becomes X / 8).
bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,
const SCEV *&RHS) {
// Try to match 'zext (trunc A to iB) to iY', which is used
// for URem with constant power-of-2 second operands. Make sure the size of
// the operand A matches the size of the whole expressions.
if (const auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr))
if (const auto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) {
LHS = Trunc->getOperand();
// Bail out if the type of the LHS is larger than the type of the
// expression for now.
if (getTypeSizeInBits(LHS->getType()) >
getTypeSizeInBits(Expr->getType()))
return false;
if (LHS->getType() != Expr->getType())
LHS = getZeroExtendExpr(LHS, Expr->getType());
RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1)
<< getTypeSizeInBits(Trunc->getType()));
return true;
}
const auto *Add = dyn_cast<SCEVAddExpr>(Expr);
if (Add == nullptr || Add->getNumOperands() != 2)
return false;
const SCEV *A = Add->getOperand(1);
const auto *Mul = dyn_cast<SCEVMulExpr>(Add->getOperand(0));
if (Mul == nullptr)
return false;
const auto MatchURemWithDivisor = [&](const SCEV *B) {
// (SomeExpr + (-(SomeExpr / B) * B)).
if (Expr == getURemExpr(A, B)) {
LHS = A;
RHS = B;
return true;
}
return false;
};
// (SomeExpr + (-1 * (SomeExpr / B) * B)).
if (Mul->getNumOperands() == 3 && isa<SCEVConstant>(Mul->getOperand(0)))
return MatchURemWithDivisor(Mul->getOperand(1)) ||
MatchURemWithDivisor(Mul->getOperand(2));
// (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)).
if (Mul->getNumOperands() == 2)
return MatchURemWithDivisor(Mul->getOperand(1)) ||
MatchURemWithDivisor(Mul->getOperand(0)) ||
MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) ||
MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0)));
return false;
}
const SCEV *
ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
// Form an expression for the maximum exit count possible for this loop. We
// merge the max and exact information to approximate a version of
// getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
SmallVector<const SCEV*, 4> ExitCounts;
for (BasicBlock *ExitingBB : ExitingBlocks) {
const SCEV *ExitCount = getExitCount(L, ExitingBB);
if (isa<SCEVCouldNotCompute>(ExitCount))
ExitCount = getExitCount(L, ExitingBB,
ScalarEvolution::ConstantMaximum);
if (!isa<SCEVCouldNotCompute>(ExitCount)) {
assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
"We should only have known counts for exiting blocks that "
"dominate latch!");
ExitCounts.push_back(ExitCount);
}
}
if (ExitCounts.empty())
return getCouldNotCompute();
return getUMinFromMismatchedTypes(ExitCounts);
}
/// This rewriter is similar to SCEVParameterRewriter (it replaces SCEVUnknown
/// components following the Map (Value -> SCEV)), but skips AddRecExpr because
/// we cannot guarantee that the replacement is loop invariant in the loop of
/// the AddRec.
class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
ValueToSCEVMapTy &Map;
public:
SCEVLoopGuardRewriter(ScalarEvolution &SE, ValueToSCEVMapTy &M)
: SCEVRewriteVisitor(SE), Map(M) {}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; }
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
auto I = Map.find(Expr->getValue());
if (I == Map.end())
return Expr;
return I->second;
}
};
const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
const SCEV *RHS, ValueToSCEVMapTy &RewriteMap) {
// If we have LHS == 0, check if LHS is computing a property of some unknown
// SCEV %v which we can rewrite %v to express explicitly.
const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
if (Predicate == CmpInst::ICMP_EQ && RHSC &&
RHSC->getValue()->isNullValue()) {
// If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
// explicitly express that.
const SCEV *URemLHS = nullptr;
const SCEV *URemRHS = nullptr;
if (matchURem(LHS, URemLHS, URemRHS)) {
if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
Value *V = LHSUnknown->getValue();
auto Multiple =
getMulExpr(getUDivExpr(URemLHS, URemRHS), URemRHS,
(SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNSW));
RewriteMap[V] = Multiple;
return;
}
}
}
if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) {
std::swap(LHS, RHS);
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
// Check for a condition of the form (-C1 + X < C2). InstCombine will
// create this form when combining two checks of the form (X u< C2 + C1) and
// (X >=u C1).
auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap]() {
auto *AddExpr = dyn_cast<SCEVAddExpr>(LHS);
if (!AddExpr || AddExpr->getNumOperands() != 2)
return false;
auto *C1 = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
auto *LHSUnknown = dyn_cast<SCEVUnknown>(AddExpr->getOperand(1));
auto *C2 = dyn_cast<SCEVConstant>(RHS);
if (!C1 || !C2 || !LHSUnknown)
return false;
auto ExactRegion =
ConstantRange::makeExactICmpRegion(Predicate, C2->getAPInt())
.sub(C1->getAPInt());
// Bail out, unless we have a non-wrapping, monotonic range.
if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet())
return false;
auto I = RewriteMap.find(LHSUnknown->getValue());
- const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
+ const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHSUnknown;
RewriteMap[LHSUnknown->getValue()] = getUMaxExpr(
getConstant(ExactRegion.getUnsignedMin()),
getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUnsignedMax())));
return true;
};
if (MatchRangeCheckIdiom())
return;
// For now, limit to conditions that provide information about unknown
// expressions. RHS also cannot contain add recurrences.
auto *LHSUnknown = dyn_cast<SCEVUnknown>(LHS);
if (!LHSUnknown || containsAddRecurrence(RHS))
return;
// Check whether LHS has already been rewritten. In that case we want to
// chain further rewrites onto the already rewritten value.
auto I = RewriteMap.find(LHSUnknown->getValue());
const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
const SCEV *RewrittenRHS = nullptr;
switch (Predicate) {
case CmpInst::ICMP_ULT:
RewrittenRHS =
getUMinExpr(RewrittenLHS, getMinusSCEV(RHS, getOne(RHS->getType())));
break;
case CmpInst::ICMP_SLT:
RewrittenRHS =
getSMinExpr(RewrittenLHS, getMinusSCEV(RHS, getOne(RHS->getType())));
break;
case CmpInst::ICMP_ULE:
RewrittenRHS = getUMinExpr(RewrittenLHS, RHS);
break;
case CmpInst::ICMP_SLE:
RewrittenRHS = getSMinExpr(RewrittenLHS, RHS);
break;
case CmpInst::ICMP_UGT:
RewrittenRHS =
getUMaxExpr(RewrittenLHS, getAddExpr(RHS, getOne(RHS->getType())));
break;
case CmpInst::ICMP_SGT:
RewrittenRHS =
getSMaxExpr(RewrittenLHS, getAddExpr(RHS, getOne(RHS->getType())));
break;
case CmpInst::ICMP_UGE:
RewrittenRHS = getUMaxExpr(RewrittenLHS, RHS);
break;
case CmpInst::ICMP_SGE:
RewrittenRHS = getSMaxExpr(RewrittenLHS, RHS);
break;
case CmpInst::ICMP_EQ:
if (isa<SCEVConstant>(RHS))
RewrittenRHS = RHS;
break;
case CmpInst::ICMP_NE:
if (isa<SCEVConstant>(RHS) &&
cast<SCEVConstant>(RHS)->getValue()->isNullValue())
RewrittenRHS = getUMaxExpr(RewrittenLHS, getOne(RHS->getType()));
break;
default:
break;
}
if (RewrittenRHS)
RewriteMap[LHSUnknown->getValue()] = RewrittenRHS;
};
// Starting at the loop predecessor, climb up the predecessor chain, as long
// as there are predecessors that can be found that have unique successors
// leading to the original header.
// TODO: share this logic with isLoopEntryGuardedByCond.
ValueToSCEVMapTy RewriteMap;
for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
L->getLoopPredecessor(), L->getHeader());
Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
const BranchInst *LoopEntryPredicate =
dyn_cast<BranchInst>(Pair.first->getTerminator());
if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional())
continue;
bool EnterIfTrue = LoopEntryPredicate->getSuccessor(0) == Pair.second;
SmallVector<Value *, 8> Worklist;
SmallPtrSet<Value *, 8> Visited;
Worklist.push_back(LoopEntryPredicate->getCondition());
while (!Worklist.empty()) {
Value *Cond = Worklist.pop_back_val();
if (!Visited.insert(Cond).second)
continue;
if (auto *Cmp = dyn_cast<ICmpInst>(Cond)) {
auto Predicate =
EnterIfTrue ? Cmp->getPredicate() : Cmp->getInversePredicate();
CollectCondition(Predicate, getSCEV(Cmp->getOperand(0)),
getSCEV(Cmp->getOperand(1)), RewriteMap);
continue;
}
Value *L, *R;
if (EnterIfTrue ? match(Cond, m_LogicalAnd(m_Value(L), m_Value(R)))
: match(Cond, m_LogicalOr(m_Value(L), m_Value(R)))) {
Worklist.push_back(L);
Worklist.push_back(R);
}
}
}
// Also collect information from assumptions dominating the loop.
for (auto &AssumeVH : AC.assumptions()) {
if (!AssumeVH)
continue;
auto *AssumeI = cast<CallInst>(AssumeVH);
auto *Cmp = dyn_cast<ICmpInst>(AssumeI->getOperand(0));
if (!Cmp || !DT.dominates(AssumeI, L->getHeader()))
continue;
CollectCondition(Cmp->getPredicate(), getSCEV(Cmp->getOperand(0)),
getSCEV(Cmp->getOperand(1)), RewriteMap);
}
if (RewriteMap.empty())
return Expr;
SCEVLoopGuardRewriter Rewriter(*this, RewriteMap);
return Rewriter.visit(Expr);
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
index 65828898d392..9053acce60c4 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1,1164 +1,1163 @@
//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include <utility>
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "tti"
static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
cl::Hidden,
cl::desc("Recognize reduction patterns."));
namespace {
/// No-op implementation of the TTI interface using the utility base
/// classes.
///
/// This is used when no target specific information is available.
struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
explicit NoTTIImpl(const DataLayout &DL)
: TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
};
} // namespace
bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
// If the loop has irreducible control flow, it can not be converted to
// Hardware loop.
LoopBlocksRPO RPOT(L);
RPOT.perform(&LI);
if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
return false;
return true;
}
IntrinsicCostAttributes::IntrinsicCostAttributes(
Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost)
: II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id),
ScalarizationCost(ScalarizationCost) {
if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
FMF = FPMO->getFastMathFlags();
Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
FunctionType *FTy = CI.getCalledFunction()->getFunctionType();
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
}
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Type *> Tys,
FastMathFlags Flags,
const IntrinsicInst *I,
InstructionCost ScalarCost)
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
}
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
ArrayRef<const Value *> Args)
: RetTy(Ty), IID(Id) {
Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
ParamTys.reserve(Arguments.size());
for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
ParamTys.push_back(Arguments[Idx]->getType());
}
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys,
FastMathFlags Flags,
const IntrinsicInst *I,
InstructionCost ScalarCost)
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
}
bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
LoopInfo &LI, DominatorTree &DT,
bool ForceNestedLoop,
bool ForceHardwareLoopPHI) {
SmallVector<BasicBlock *, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
for (BasicBlock *BB : ExitingBlocks) {
// If we pass the updated counter back through a phi, we need to know
// which latch the updated value will be coming from.
if (!L->isLoopLatch(BB)) {
if (ForceHardwareLoopPHI || CounterInReg)
continue;
}
const SCEV *EC = SE.getExitCount(L, BB);
if (isa<SCEVCouldNotCompute>(EC))
continue;
if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
if (ConstEC->getValue()->isZero())
continue;
} else if (!SE.isLoopInvariant(EC, L))
continue;
if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
continue;
// If this exiting block is contained in a nested loop, it is not eligible
// for insertion of the branch-and-decrement since the inner loop would
// end up messing up the value in the CTR.
if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
continue;
// We now have a loop-invariant count of loop iterations (which is not the
// constant zero) for which we know that this loop will not exit via this
// existing block.
// We need to make sure that this block will run on every loop iteration.
// For this to be true, we must dominate all blocks with backedges. Such
// blocks are in-loop predecessors to the header block.
bool NotAlways = false;
for (BasicBlock *Pred : predecessors(L->getHeader())) {
if (!L->contains(Pred))
continue;
if (!DT.dominates(BB, Pred)) {
NotAlways = true;
break;
}
}
if (NotAlways)
continue;
// Make sure this blocks ends with a conditional branch.
Instruction *TI = BB->getTerminator();
if (!TI)
continue;
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (!BI->isConditional())
continue;
ExitBranch = BI;
} else
continue;
// Note that this block may not be the loop latch block, even if the loop
// has a latch block.
ExitBlock = BB;
ExitCount = EC;
-
break;
}
if (!ExitBlock)
return false;
return true;
}
TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
: TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
TargetTransformInfo::~TargetTransformInfo() {}
TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
: TTIImpl(std::move(Arg.TTIImpl)) {}
TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
TTIImpl = std::move(RHS.TTIImpl);
return *this;
}
unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
return TTIImpl->getInliningThresholdMultiplier();
}
unsigned
TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const {
return TTIImpl->adjustInliningThreshold(CB);
}
int TargetTransformInfo::getInlinerVectorBonusPercent() const {
return TTIImpl->getInlinerVectorBonusPercent();
}
InstructionCost
TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) const {
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
}
unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) const {
return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
}
InstructionCost
TargetTransformInfo::getUserCost(const User *U,
ArrayRef<const Value *> Operands,
enum TargetCostKind CostKind) const {
InstructionCost Cost = TTIImpl->getUserCost(U, Operands, CostKind);
assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) &&
"TTI should not produce negative costs!");
return Cost;
}
BranchProbability TargetTransformInfo::getPredictableBranchThreshold() const {
return TTIImpl->getPredictableBranchThreshold();
}
bool TargetTransformInfo::hasBranchDivergence() const {
return TTIImpl->hasBranchDivergence();
}
bool TargetTransformInfo::useGPUDivergenceAnalysis() const {
return TTIImpl->useGPUDivergenceAnalysis();
}
bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
return TTIImpl->isSourceOfDivergence(V);
}
bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const {
return TTIImpl->isAlwaysUniform(V);
}
unsigned TargetTransformInfo::getFlatAddressSpace() const {
return TTIImpl->getFlatAddressSpace();
}
bool TargetTransformInfo::collectFlatAddressOperands(
SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const {
return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
}
bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS,
unsigned ToAS) const {
return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
}
unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
return TTIImpl->getAssumedAddrSpace(V);
}
Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
IntrinsicInst *II, Value *OldV, Value *NewV) const {
return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
}
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return TTIImpl->isLoweredToCall(F);
}
bool TargetTransformInfo::isHardwareLoopProfitable(
Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}
bool TargetTransformInfo::preferPredicateOverEpilogue(
Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
TargetLibraryInfo *TLI, DominatorTree *DT,
const LoopAccessInfo *LAI) const {
return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
}
bool TargetTransformInfo::emitGetActiveLaneMask() const {
return TTIImpl->emitGetActiveLaneMask();
}
Optional<Instruction *>
TargetTransformInfo::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
return TTIImpl->instCombineIntrinsic(IC, II);
}
Optional<Value *> TargetTransformInfo::simplifyDemandedUseBitsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
bool &KnownBitsComputed) const {
return TTIImpl->simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
KnownBitsComputed);
}
Optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
APInt &UndefElts2, APInt &UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) const {
return TTIImpl->simplifyDemandedVectorEltsIntrinsic(
IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
SimplifyAndSetOp);
}
void TargetTransformInfo::getUnrollingPreferences(
Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
return TTIImpl->getUnrollingPreferences(L, SE, UP);
}
void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) const {
return TTIImpl->getPeelingPreferences(L, SE, PP);
}
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
return TTIImpl->isLegalAddImmediate(Imm);
}
bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
return TTIImpl->isLegalICmpImmediate(Imm);
}
bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
Instruction *I) const {
return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale, AddrSpace, I);
}
bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
return TTIImpl->isLSRCostLess(C1, C2);
}
bool TargetTransformInfo::isNumRegsMajorCostOfLSR() const {
return TTIImpl->isNumRegsMajorCostOfLSR();
}
bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const {
return TTIImpl->isProfitableLSRChainElement(I);
}
bool TargetTransformInfo::canMacroFuseCmp() const {
return TTIImpl->canMacroFuseCmp();
}
bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI,
ScalarEvolution *SE, LoopInfo *LI,
DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) const {
return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
}
TTI::AddressingModeKind
TargetTransformInfo::getPreferredAddressingMode(const Loop *L,
ScalarEvolution *SE) const {
return TTIImpl->getPreferredAddressingMode(L, SE);
}
bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
Align Alignment) const {
return TTIImpl->isLegalMaskedStore(DataType, Alignment);
}
bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
Align Alignment) const {
return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
}
bool TargetTransformInfo::isLegalNTStore(Type *DataType,
Align Alignment) const {
return TTIImpl->isLegalNTStore(DataType, Alignment);
}
bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
return TTIImpl->isLegalNTLoad(DataType, Alignment);
}
bool TargetTransformInfo::isLegalMaskedGather(Type *DataType,
Align Alignment) const {
return TTIImpl->isLegalMaskedGather(DataType, Alignment);
}
bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType,
Align Alignment) const {
return TTIImpl->isLegalMaskedScatter(DataType, Alignment);
}
bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
return TTIImpl->isLegalMaskedCompressStore(DataType);
}
bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
return TTIImpl->isLegalMaskedExpandLoad(DataType);
}
bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
return TTIImpl->hasDivRemOp(DataType, IsSigned);
}
bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
unsigned AddrSpace) const {
return TTIImpl->hasVolatileVariant(I, AddrSpace);
}
bool TargetTransformInfo::prefersVectorizedAddressing() const {
return TTIImpl->prefersVectorizedAddressing();
}
InstructionCost TargetTransformInfo::getScalingFactorCost(
Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
int64_t Scale, unsigned AddrSpace) const {
InstructionCost Cost = TTIImpl->getScalingFactorCost(
Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
bool TargetTransformInfo::LSRWithInstrQueries() const {
return TTIImpl->LSRWithInstrQueries();
}
bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
return TTIImpl->isTruncateFree(Ty1, Ty2);
}
bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
return TTIImpl->isProfitableToHoist(I);
}
bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
return TTIImpl->isTypeLegal(Ty);
}
InstructionCost TargetTransformInfo::getRegUsageForType(Type *Ty) const {
return TTIImpl->getRegUsageForType(Ty);
}
bool TargetTransformInfo::shouldBuildLookupTables() const {
return TTIImpl->shouldBuildLookupTables();
}
bool TargetTransformInfo::shouldBuildLookupTablesForConstant(
Constant *C) const {
return TTIImpl->shouldBuildLookupTablesForConstant(C);
}
bool TargetTransformInfo::shouldBuildRelLookupTables() const {
return TTIImpl->shouldBuildRelLookupTables();
}
bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
return TTIImpl->useColdCCForColdCall(F);
}
InstructionCost
TargetTransformInfo::getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
bool Insert, bool Extract) const {
return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
}
InstructionCost TargetTransformInfo::getOperandsScalarizationOverhead(
ArrayRef<const Value *> Args, ArrayRef<Type *> Tys) const {
return TTIImpl->getOperandsScalarizationOverhead(Args, Tys);
}
bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
return TTIImpl->supportsEfficientVectorElementLoadStore();
}
bool TargetTransformInfo::enableAggressiveInterleaving(
bool LoopHasReductions) const {
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
}
TargetTransformInfo::MemCmpExpansionOptions
TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
}
bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
return TTIImpl->enableInterleavedAccessVectorization();
}
bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
return TTIImpl->enableMaskedInterleavedAccessVectorization();
}
bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
return TTIImpl->isFPVectorizationPotentiallyUnsafe();
}
bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
unsigned AddressSpace,
Align Alignment,
bool *Fast) const {
return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth,
AddressSpace, Alignment, Fast);
}
TargetTransformInfo::PopcntSupportKind
TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
return TTIImpl->getPopcntSupport(IntTyWidthInBit);
}
bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
return TTIImpl->haveFastSqrt(Ty);
}
bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const {
return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
}
InstructionCost TargetTransformInfo::getFPOpCost(Type *Ty) const {
InstructionCost Cost = TTIImpl->getFPOpCost(Ty);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode,
unsigned Idx,
const APInt &Imm,
Type *Ty) const {
InstructionCost Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost
TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) const {
InstructionCost Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getIntImmCostInst(
unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind, Instruction *Inst) const {
InstructionCost Cost =
TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost
TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) const {
InstructionCost Cost =
TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
return TTIImpl->getNumberOfRegisters(ClassID);
}
unsigned TargetTransformInfo::getRegisterClassForType(bool Vector,
Type *Ty) const {
return TTIImpl->getRegisterClassForType(Vector, Ty);
}
const char *TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
return TTIImpl->getRegisterClassName(ClassID);
}
TypeSize TargetTransformInfo::getRegisterBitWidth(
TargetTransformInfo::RegisterKind K) const {
return TTIImpl->getRegisterBitWidth(K);
}
unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const {
return TTIImpl->getMinVectorRegisterBitWidth();
}
Optional<unsigned> TargetTransformInfo::getMaxVScale() const {
return TTIImpl->getMaxVScale();
}
bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const {
return TTIImpl->shouldMaximizeVectorBandwidth();
}
ElementCount TargetTransformInfo::getMinimumVF(unsigned ElemWidth,
bool IsScalable) const {
return TTIImpl->getMinimumVF(ElemWidth, IsScalable);
}
unsigned TargetTransformInfo::getMaximumVF(unsigned ElemWidth,
unsigned Opcode) const {
return TTIImpl->getMaximumVF(ElemWidth, Opcode);
}
bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
return TTIImpl->shouldConsiderAddressTypePromotion(
I, AllowPromotionWithoutCommonHeader);
}
unsigned TargetTransformInfo::getCacheLineSize() const {
return TTIImpl->getCacheLineSize();
}
llvm::Optional<unsigned>
TargetTransformInfo::getCacheSize(CacheLevel Level) const {
return TTIImpl->getCacheSize(Level);
}
llvm::Optional<unsigned>
TargetTransformInfo::getCacheAssociativity(CacheLevel Level) const {
return TTIImpl->getCacheAssociativity(Level);
}
unsigned TargetTransformInfo::getPrefetchDistance() const {
return TTIImpl->getPrefetchDistance();
}
unsigned TargetTransformInfo::getMinPrefetchStride(
unsigned NumMemAccesses, unsigned NumStridedMemAccesses,
unsigned NumPrefetches, bool HasCall) const {
return TTIImpl->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
NumPrefetches, HasCall);
}
unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
return TTIImpl->getMaxPrefetchIterationsAhead();
}
bool TargetTransformInfo::enableWritePrefetching() const {
return TTIImpl->enableWritePrefetching();
}
unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
return TTIImpl->getMaxInterleaveFactor(VF);
}
TargetTransformInfo::OperandValueKind
TargetTransformInfo::getOperandInfo(const Value *V,
OperandValueProperties &OpProps) {
OperandValueKind OpInfo = OK_AnyValue;
OpProps = OP_None;
if (const auto *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getValue().isPowerOf2())
OpProps = OP_PowerOf2;
return OK_UniformConstantValue;
}
// A broadcast shuffle creates a uniform value.
// TODO: Add support for non-zero index broadcasts.
// TODO: Add support for different source vector width.
if (const auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
if (ShuffleInst->isZeroEltSplat())
OpInfo = OK_UniformValue;
const Value *Splat = getSplatValue(V);
// Check for a splat of a constant or for a non uniform vector of constants
// and check if the constant(s) are all powers of two.
if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
OpInfo = OK_NonUniformConstantValue;
if (Splat) {
OpInfo = OK_UniformConstantValue;
if (auto *CI = dyn_cast<ConstantInt>(Splat))
if (CI->getValue().isPowerOf2())
OpProps = OP_PowerOf2;
} else if (const auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
OpProps = OP_PowerOf2;
for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
if (CI->getValue().isPowerOf2())
continue;
OpProps = OP_None;
break;
}
}
}
// Check for a splat of a uniform value. This is not loop aware, so return
// true only for the obviously uniform cases (argument, globalvalue)
if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
OpInfo = OK_UniformValue;
return OpInfo;
}
InstructionCost TargetTransformInfo::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
OperandValueKind Opd1Info, OperandValueKind Opd2Info,
OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args, const Instruction *CxtI) const {
InstructionCost Cost =
TTIImpl->getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getShuffleCost(ShuffleKind Kind,
VectorType *Ty,
ArrayRef<int> Mask,
int Index,
VectorType *SubTp) const {
InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
TTI::CastContextHint
TargetTransformInfo::getCastContextHint(const Instruction *I) {
if (!I)
return CastContextHint::None;
auto getLoadStoreKind = [](const Value *V, unsigned LdStOp, unsigned MaskedOp,
unsigned GatScatOp) {
const Instruction *I = dyn_cast<Instruction>(V);
if (!I)
return CastContextHint::None;
if (I->getOpcode() == LdStOp)
return CastContextHint::Normal;
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
if (II->getIntrinsicID() == MaskedOp)
return TTI::CastContextHint::Masked;
if (II->getIntrinsicID() == GatScatOp)
return TTI::CastContextHint::GatherScatter;
}
return TTI::CastContextHint::None;
};
switch (I->getOpcode()) {
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPExt:
return getLoadStoreKind(I->getOperand(0), Instruction::Load,
Intrinsic::masked_load, Intrinsic::masked_gather);
case Instruction::Trunc:
case Instruction::FPTrunc:
if (I->hasOneUse())
return getLoadStoreKind(*I->user_begin(), Instruction::Store,
Intrinsic::masked_store,
Intrinsic::masked_scatter);
break;
default:
return CastContextHint::None;
}
return TTI::CastContextHint::None;
}
InstructionCost TargetTransformInfo::getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH,
TTI::TargetCostKind CostKind, const Instruction *I) const {
assert((I == nullptr || I->getOpcode() == Opcode) &&
"Opcode should reflect passed instruction.");
InstructionCost Cost =
TTIImpl->getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getExtractWithExtendCost(
unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const {
InstructionCost Cost =
TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getCFInstrCost(
unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I) const {
assert((I == nullptr || I->getOpcode() == Opcode) &&
"Opcode should reflect passed instruction.");
InstructionCost Cost = TTIImpl->getCFInstrCost(Opcode, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind, const Instruction *I) const {
assert((I == nullptr || I->getOpcode() == Opcode) &&
"Opcode should reflect passed instruction.");
InstructionCost Cost =
TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode,
Type *Val,
unsigned Index) const {
InstructionCost Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, const Instruction *I) const {
assert((I == nullptr || I->getOpcode() == Opcode) &&
"Opcode should reflect passed instruction.");
InstructionCost Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment,
AddressSpace, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getMaskedMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind) const {
InstructionCost Cost = TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
InstructionCost Cost = TTIImpl->getGatherScatterOpCost(
Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) const {
InstructionCost Cost = TTIImpl->getInterleavedMemoryOpCost(
Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost
TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) const {
InstructionCost Cost = TTIImpl->getIntrinsicInstrCost(ICA, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost
TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind) const {
InstructionCost Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
return TTIImpl->getNumberOfParts(Tp);
}
InstructionCost
TargetTransformInfo::getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
const SCEV *Ptr) const {
InstructionCost Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
InstructionCost Cost = TTIImpl->getMemcpyCost(I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getArithmeticReductionCost(
unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) const {
InstructionCost Cost =
TTIImpl->getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getMinMaxReductionCost(
VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
TTI::TargetCostKind CostKind) const {
InstructionCost Cost =
TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getExtendedAddReductionCost(
bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
TTI::TargetCostKind CostKind) const {
return TTIImpl->getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
CostKind);
}
InstructionCost
TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
}
bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) const {
return TTIImpl->getTgtMemIntrinsic(Inst, Info);
}
unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
}
Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
IntrinsicInst *Inst, Type *ExpectedType) const {
return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
}
Type *TargetTransformInfo::getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const {
return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
DestAddrSpace, SrcAlign, DestAlign);
}
void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign) const {
TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
SrcAddrSpace, DestAddrSpace,
SrcAlign, DestAlign);
}
bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
return TTIImpl->areInlineCompatible(Caller, Callee);
}
bool TargetTransformInfo::areFunctionArgsABICompatible(
const Function *Caller, const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const {
return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
}
bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
Type *Ty) const {
return TTIImpl->isIndexedLoadLegal(Mode, Ty);
}
bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode,
Type *Ty) const {
return TTIImpl->isIndexedStoreLegal(Mode, Ty);
}
unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
return TTIImpl->getLoadStoreVecRegBitWidth(AS);
}
bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const {
return TTIImpl->isLegalToVectorizeLoad(LI);
}
bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const {
return TTIImpl->isLegalToVectorizeStore(SI);
}
bool TargetTransformInfo::isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const {
return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
AddrSpace);
}
bool TargetTransformInfo::isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const {
return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
AddrSpace);
}
bool TargetTransformInfo::isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc, ElementCount VF) const {
return TTIImpl->isLegalToVectorizeReduction(RdxDesc, VF);
}
bool TargetTransformInfo::isElementTypeLegalForScalableVector(Type *Ty) const {
return TTIImpl->isElementTypeLegalForScalableVector(Ty);
}
unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const {
return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
}
unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
unsigned StoreSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const {
return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
}
bool TargetTransformInfo::preferInLoopReduction(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const {
return TTIImpl->preferInLoopReduction(Opcode, Ty, Flags);
}
bool TargetTransformInfo::preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty, ReductionFlags Flags) const {
return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags);
}
TargetTransformInfo::VPLegalization
TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
return TTIImpl->getVPLegalizationStrategy(VPI);
}
bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
return TTIImpl->shouldExpandReduction(II);
}
unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
return TTIImpl->getGISelRematGlobalCost();
}
bool TargetTransformInfo::supportsScalableVectors() const {
return TTIImpl->supportsScalableVectors();
}
bool TargetTransformInfo::hasActiveVectorLength() const {
return TTIImpl->hasActiveVectorLength();
}
InstructionCost
TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
return TTIImpl->getInstructionLatency(I);
}
InstructionCost
TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
switch (I->getOpcode()) {
case Instruction::GetElementPtr:
case Instruction::Ret:
case Instruction::PHI:
case Instruction::Br:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::FDiv:
case Instruction::URem:
case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::FNeg:
case Instruction::Select:
case Instruction::ICmp:
case Instruction::FCmp:
case Instruction::Store:
case Instruction::Load:
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
case Instruction::ExtractElement:
case Instruction::InsertElement:
case Instruction::ExtractValue:
case Instruction::ShuffleVector:
case Instruction::Call:
case Instruction::Switch:
return getUserCost(I, CostKind);
default:
// We don't have any information on this instruction.
return -1;
}
}
TargetTransformInfo::Concept::~Concept() {}
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
TargetIRAnalysis::TargetIRAnalysis(
std::function<Result(const Function &)> TTICallback)
: TTICallback(std::move(TTICallback)) {}
TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F,
FunctionAnalysisManager &) {
return TTICallback(F);
}
AnalysisKey TargetIRAnalysis::Key;
TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
return Result(F.getParent()->getDataLayout());
}
// Register the basic pass.
INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti",
"Target Transform Information", false, true)
char TargetTransformInfoWrapperPass::ID = 0;
void TargetTransformInfoWrapperPass::anchor() {}
TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass()
: ImmutablePass(ID) {
initializeTargetTransformInfoWrapperPassPass(
*PassRegistry::getPassRegistry());
}
TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
TargetIRAnalysis TIRA)
: ImmutablePass(ID), TIRA(std::move(TIRA)) {
initializeTargetTransformInfoWrapperPassPass(
*PassRegistry::getPassRegistry());
}
TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
FunctionAnalysisManager DummyFAM;
TTI = TIRA.run(F, DummyFAM);
return *TTI;
}
ImmutablePass *
llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) {
return new TargetTransformInfoWrapperPass(std::move(TIRA));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index faa14dca1c3f..7edc44c48bbd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,1560 +1,1561 @@
//===- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains support for constructing a dwarf compile unit.
//
//===----------------------------------------------------------------------===//
#include "DwarfCompileUnit.h"
#include "AddressPool.h"
#include "DwarfExpression.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <iterator>
#include <string>
#include <utility>
using namespace llvm;
static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) {
// According to DWARF Debugging Information Format Version 5,
// 3.1.2 Skeleton Compilation Unit Entries:
// "When generating a split DWARF object file (see Section 7.3.2
// on page 187), the compilation unit in the .debug_info section
// is a "skeleton" compilation unit with the tag DW_TAG_skeleton_unit"
if (DW->getDwarfVersion() >= 5 && Kind == UnitKind::Skeleton)
return dwarf::DW_TAG_skeleton_unit;
return dwarf::DW_TAG_compile_unit;
}
DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW,
DwarfFile *DWU, UnitKind Kind)
: DwarfUnit(GetCompileUnitType(Kind, DW), Node, A, DW, DWU), UniqueID(UID) {
insertDIE(Node, &getUnitDie());
MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin");
}
/// addLabelAddress - Add a dwarf label attribute data and value using
/// DW_FORM_addr or DW_FORM_GNU_addr_index.
void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label) {
// Don't use the address pool in non-fission or in the skeleton unit itself.
if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5)
return addLocalLabelAddress(Die, Attribute, Label);
if (Label)
DD->addArangeLabel(SymbolCU(this, Label));
bool UseAddrOffsetFormOrExpressions =
DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions();
const MCSymbol *Base = nullptr;
if (Label->isInSection() && UseAddrOffsetFormOrExpressions)
Base = DD->getSectionLabel(&Label->getSection());
if (!Base || Base == Label) {
unsigned idx = DD->getAddressPool().getIndex(Label);
addAttribute(Die, Attribute,
DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx
: dwarf::DW_FORM_GNU_addr_index,
DIEInteger(idx));
return;
}
// Could be extended to work with DWARFv4 Split DWARF if that's important for
// someone. In that case DW_FORM_data would be used.
assert(DD->getDwarfVersion() >= 5 &&
"Addr+offset expressions are only valuable when using debug_addr (to "
"reduce relocations) available in DWARFv5 or higher");
if (DD->useAddrOffsetExpressions()) {
auto *Loc = new (DIEValueAllocator) DIEBlock();
addPoolOpAddress(*Loc, Label);
addBlock(Die, Attribute, dwarf::DW_FORM_exprloc, Loc);
} else
addAttribute(Die, Attribute, dwarf::DW_FORM_LLVM_addrx_offset,
new (DIEValueAllocator) DIEAddrOffset(
DD->getAddressPool().getIndex(Base), Label, Base));
}
void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
dwarf::Attribute Attribute,
const MCSymbol *Label) {
if (Label)
DD->addArangeLabel(SymbolCU(this, Label));
if (Label)
addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIELabel(Label));
else
addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIEInteger(0));
}
unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
// If we print assembly, we can't separate .file entries according to
// compile units. Thus all files will belong to the default compile unit.
// FIXME: add a better feature test than hasRawTextSupport. Even better,
// extend .file to support this.
unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID();
if (!File)
return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None,
CUID);
return Asm->OutStreamer->emitDwarfFileDirective(
0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
File->getSource(), CUID);
}
DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
// Check for pre-existence.
if (DIE *Die = getDIE(GV))
return Die;
assert(GV);
auto *GVContext = GV->getScope();
const DIType *GTy = GV->getType();
// Construct the context before querying for the existence of the DIE in
// case such construction creates the DIE.
auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr;
DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs)
: getOrCreateContextDIE(GVContext);
// Add to map.
DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV);
DIScope *DeclContext;
if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) {
DeclContext = SDMDecl->getScope();
assert(SDMDecl->isStaticMember() && "Expected static member decl");
assert(GV->isDefinition());
// We need the declaration DIE that is in the static member's class.
DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
// If the global variable's type is different from the one in the class
// member type, assume that it's more specific and also emit it.
if (GTy != SDMDecl->getBaseType())
addType(*VariableDIE, GTy);
} else {
DeclContext = GV->getScope();
// Add name and type.
addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
if (GTy)
addType(*VariableDIE, GTy);
// Add scoping info.
if (!GV->isLocalToUnit())
addFlag(*VariableDIE, dwarf::DW_AT_external);
// Add line number info.
addSourceLine(*VariableDIE, GV);
}
if (!GV->isDefinition())
addFlag(*VariableDIE, dwarf::DW_AT_declaration);
else
addGlobalName(GV->getName(), *VariableDIE, DeclContext);
if (uint32_t AlignInBytes = GV->getAlignInBytes())
addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
if (MDTuple *TP = GV->getTemplateParams())
addTemplateParams(*VariableDIE, DINodeArray(TP));
// Add location.
addLocationAttribute(VariableDIE, GV, GlobalExprs);
return VariableDIE;
}
void DwarfCompileUnit::addLocationAttribute(
DIE *VariableDIE, const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
bool addToAccelTable = false;
DIELoc *Loc = nullptr;
Optional<unsigned> NVPTXAddressSpace;
std::unique_ptr<DIEDwarfExpression> DwarfExpr;
for (const auto &GE : GlobalExprs) {
const GlobalVariable *Global = GE.Var;
const DIExpression *Expr = GE.Expr;
// For compatibility with DWARF 3 and earlier,
// DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) or
// DW_AT_location(DW_OP_consts, X, DW_OP_stack_value) becomes
// DW_AT_const_value(X).
if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) {
addToAccelTable = true;
addConstantValue(
*VariableDIE,
DIExpression::SignedOrUnsignedConstant::UnsignedConstant ==
*Expr->isConstant(),
Expr->getElement(1));
break;
}
// We cannot describe the location of dllimport'd variables: the
// computation of their address requires loads from the IAT.
if (Global && Global->hasDLLImportStorageClass())
continue;
// Nothing to describe without address or constant.
if (!Global && (!Expr || !Expr->isConstant()))
continue;
if (Global && Global->isThreadLocal() &&
!Asm->getObjFileLowering().supportDebugThreadLocalLocation())
continue;
if (!Loc) {
addToAccelTable = true;
Loc = new (DIEValueAllocator) DIELoc;
DwarfExpr = std::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
}
if (Expr) {
// According to
// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
// cuda-gdb requires DW_AT_address_class for all variables to be able to
// correctly interpret address space of the variable address.
// Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
// sequence for the NVPTX + gdb target.
unsigned LocalNVPTXAddressSpace;
if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
const DIExpression *NewExpr =
DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
if (NewExpr != Expr) {
Expr = NewExpr;
NVPTXAddressSpace = LocalNVPTXAddressSpace;
}
}
DwarfExpr->addFragmentOffset(Expr);
}
if (Global) {
const MCSymbol *Sym = Asm->getSymbol(Global);
if (Global->isThreadLocal()) {
if (Asm->TM.useEmulatedTLS()) {
// TODO: add debug info for emulated thread local mode.
} else {
// FIXME: Make this work with -gsplit-dwarf.
unsigned PointerSize = Asm->getDataLayout().getPointerSize();
assert((PointerSize == 4 || PointerSize == 8) &&
"Add support for other sizes if necessary");
// Based on GCC's support for TLS:
if (!DD->useSplitDwarf()) {
// 1) Start with a constNu of the appropriate pointer size
addUInt(*Loc, dwarf::DW_FORM_data1,
PointerSize == 4 ? dwarf::DW_OP_const4u
: dwarf::DW_OP_const8u);
// 2) containing the (relocated) offset of the TLS variable
// within the module's TLS block.
addExpr(*Loc,
PointerSize == 4 ? dwarf::DW_FORM_data4
: dwarf::DW_FORM_data8,
Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
} else {
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
addUInt(*Loc, dwarf::DW_FORM_udata,
DD->getAddressPool().getIndex(Sym, /* TLS */ true));
}
// 3) followed by an OP to make the debugger do a TLS lookup.
addUInt(*Loc, dwarf::DW_FORM_data1,
DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
: dwarf::DW_OP_form_tls_address);
}
} else {
DD->addArangeLabel(SymbolCU(this, Sym));
addOpAddress(*Loc, Sym);
}
}
// Global variables attached to symbols are memory locations.
// It would be better if this were unconditional, but malformed input that
// mixes non-fragments and fragments for the same variable is too expensive
// to detect in the verifier.
if (DwarfExpr->isUnknownLocation())
DwarfExpr->setMemoryLocationKind();
DwarfExpr->addExpression(Expr);
}
if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
// According to
// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
// cuda-gdb requires DW_AT_address_class for all variables to be able to
// correctly interpret address space of the variable address.
const unsigned NVPTX_ADDR_global_space = 5;
addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_global_space);
}
if (Loc)
addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
if (DD->useAllLinkageNames())
addLinkageName(*VariableDIE, GV->getLinkageName());
if (addToAccelTable) {
DD->addAccelName(*CUNode, GV->getName(), *VariableDIE);
// If the linkage name is different than the name, go ahead and output
// that as well into the name table.
if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() &&
DD->useAllLinkageNames())
DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE);
}
}
DIE *DwarfCompileUnit::getOrCreateCommonBlock(
const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) {
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());
if (DIE *NDie = getDIE(CB))
return NDie;
DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB);
StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName();
addString(NDie, dwarf::DW_AT_name, Name);
addGlobalName(Name, NDie, CB->getScope());
if (CB->getFile())
addSourceLine(NDie, CB->getLineNo(), CB->getFile());
if (DIGlobalVariable *V = CB->getDecl())
getCU().addLocationAttribute(&NDie, V, GlobalExprs);
return &NDie;
}
void DwarfCompileUnit::addRange(RangeSpan Range) {
DD->insertSectionLabel(Range.Begin);
bool SameAsPrevCU = this == DD->getPrevCU();
DD->setPrevCU(this);
// If we have no current ranges just add the range and return, otherwise,
// check the current section and CU against the previous section and CU we
// emitted into and the subprogram was contained within. If these are the
// same then extend our current range, otherwise add this as a new range.
if (CURanges.empty() || !SameAsPrevCU ||
(&CURanges.back().End->getSection() !=
&Range.End->getSection())) {
CURanges.push_back(Range);
return;
}
CURanges.back().End = Range.End;
}
void DwarfCompileUnit::initStmtList() {
if (CUNode->isDebugDirectivesOnly())
return;
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
if (DD->useSectionsAsReferences()) {
LineTableStartSym = TLOF.getDwarfLineSection()->getBeginSymbol();
} else {
LineTableStartSym =
Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID());
}
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section. For split dwarf this is
// left in the skeleton CU and so not included.
// The line table entries are not always emitted in assembly, so it
// is not okay to use line_table_start here.
addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym,
TLOF.getDwarfLineSection()->getBeginSymbol());
}
void DwarfCompileUnit::applyStmtList(DIE &D) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
addSectionLabel(D, dwarf::DW_AT_stmt_list, LineTableStartSym,
TLOF.getDwarfLineSection()->getBeginSymbol());
}
void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
const MCSymbol *End) {
assert(Begin && "Begin label should not be null!");
assert(End && "End label should not be null!");
assert(Begin->isDefined() && "Invalid starting label");
assert(End->isDefined() && "Invalid end label");
addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
if (DD->getDwarfVersion() < 4)
addLabelAddress(D, dwarf::DW_AT_high_pc, End);
else
addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
}
// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
// and DW_AT_high_pc attributes. If there are global variables in this
// scope then create and insert DIEs for these variables.
DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
SmallVector<RangeSpan, 2> BB_List;
// If basic block sections are on, ranges for each basic block section has
// to be emitted separately.
for (const auto &R : Asm->MBBSectionRanges)
BB_List.push_back({R.second.BeginLabel, R.second.EndLabel});
attachRangesOrLowHighPC(*SPDie, BB_List);
if (DD->useAppleExtensionAttributes() &&
!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
*DD->getCurrentFunction()))
addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
// Only include DW_AT_frame_base in full debug info
if (!includeMinimalInlineScopes()) {
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
TargetFrameLowering::DwarfFrameBase FrameBase =
TFI->getDwarfFrameBase(*Asm->MF);
switch (FrameBase.Kind) {
case TargetFrameLowering::DwarfFrameBase::Register: {
if (Register::isPhysicalRegister(FrameBase.Location.Reg)) {
MachineLocation Location(FrameBase.Location.Reg);
addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
}
break;
}
case TargetFrameLowering::DwarfFrameBase::CFA: {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_call_frame_cfa);
addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
break;
}
case TargetFrameLowering::DwarfFrameBase::WasmFrameBase: {
// FIXME: duplicated from Target/WebAssembly/WebAssembly.h
// don't want to depend on target specific headers in this code?
const unsigned TI_GLOBAL_RELOC = 3;
if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) {
// These need to be relocatable.
assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far.
auto SPSym = cast<MCSymbolWasm>(
Asm->GetExternalSymbolSymbol("__stack_pointer"));
// FIXME: this repeats what WebAssemblyMCInstLower::
// GetExternalSymbolSymbol does, since if there's no code that
// refers to this symbol, we have to set it here.
SPSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
SPSym->setGlobalType(wasm::WasmGlobalType{
uint8_t(Asm->getSubtargetInfo().getTargetTriple().getArch() ==
Triple::wasm64
? wasm::WASM_TYPE_I64
: wasm::WASM_TYPE_I32),
true});
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
if (!isDwoUnit()) {
addLabel(*Loc, dwarf::DW_FORM_data4, SPSym);
DD->addArangeLabel(SymbolCU(this, SPSym));
} else {
// FIXME: when writing dwo, we need to avoid relocations. Probably
// the "right" solution is to treat globals the way func and data
// symbols are (with entries in .debug_addr).
// For now, since we only ever use index 0, this should work as-is.
addUInt(*Loc, dwarf::DW_FORM_data4, FrameBase.Location.WasmLoc.Index);
}
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
} else {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
DIExpressionCursor Cursor({});
DwarfExpr.addWasmLocation(FrameBase.Location.WasmLoc.Kind,
FrameBase.Location.WasmLoc.Index);
DwarfExpr.addExpression(std::move(Cursor));
addBlock(*SPDie, dwarf::DW_AT_frame_base, DwarfExpr.finalize());
}
break;
}
}
}
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_subprogram nodes.
DD->addSubprogramNames(*CUNode, SP, *SPDie);
return *SPDie;
}
// Construct a DIE for this scope.
void DwarfCompileUnit::constructScopeDIE(
LexicalScope *Scope, SmallVectorImpl<DIE *> &FinalChildren) {
if (!Scope || !Scope->getScopeNode())
return;
auto *DS = Scope->getScopeNode();
assert((Scope->getInlinedAt() || !isa<DISubprogram>(DS)) &&
"Only handle inlined subprograms here, use "
"constructSubprogramScopeDIE for non-inlined "
"subprograms");
SmallVector<DIE *, 8> Children;
// We try to create the scope DIE first, then the children DIEs. This will
// avoid creating un-used children then removing them later when we find out
// the scope DIE is null.
DIE *ScopeDIE;
if (Scope->getParent() && isa<DISubprogram>(DS)) {
ScopeDIE = constructInlinedScopeDIE(Scope);
if (!ScopeDIE)
return;
// We create children when the scope DIE is not null.
createScopeChildrenDIE(Scope, Children);
} else {
// Early exit when we know the scope DIE is going to be null.
if (DD->isLexicalScopeDIENull(Scope))
return;
bool HasNonScopeChildren = false;
// We create children here when we know the scope DIE is not going to be
// null and the children will be added to the scope DIE.
createScopeChildrenDIE(Scope, Children, &HasNonScopeChildren);
// If there are only other scopes as children, put them directly in the
// parent instead, as this scope would serve no purpose.
if (!HasNonScopeChildren) {
FinalChildren.insert(FinalChildren.end(),
std::make_move_iterator(Children.begin()),
std::make_move_iterator(Children.end()));
return;
}
ScopeDIE = constructLexicalScopeDIE(Scope);
assert(ScopeDIE && "Scope DIE should not be null.");
}
// Add children
for (auto &I : Children)
ScopeDIE->addChild(std::move(I));
FinalChildren.push_back(std::move(ScopeDIE));
}
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
SmallVector<RangeSpan, 2> Range) {
HasRangeLists = true;
// Add the range list to the set of ranges to be emitted.
auto IndexAndList =
(DD->getDwarfVersion() < 5 && Skeleton ? Skeleton->DU : DU)
->addRange(*(Skeleton ? Skeleton : this), std::move(Range));
uint32_t Index = IndexAndList.first;
auto &List = *IndexAndList.second;
// Under fission, ranges are specified by constant offsets relative to the
// CU's DW_AT_GNU_ranges_base.
// FIXME: For DWARF v5, do not generate the DW_AT_ranges attribute under
// fission until we support the forms using the .debug_addr section
// (DW_RLE_startx_endx etc.).
if (DD->getDwarfVersion() >= 5)
addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index);
else {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
const MCSymbol *RangeSectionSym =
TLOF.getDwarfRangesSection()->getBeginSymbol();
if (isDwoUnit())
addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
RangeSectionSym);
else
addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
RangeSectionSym);
}
}
void DwarfCompileUnit::attachRangesOrLowHighPC(
DIE &Die, SmallVector<RangeSpan, 2> Ranges) {
assert(!Ranges.empty());
if (!DD->useRangesSection() ||
(Ranges.size() == 1 &&
(!DD->alwaysUseRanges() ||
DD->getSectionLabel(&Ranges.front().Begin->getSection()) ==
Ranges.front().Begin))) {
const RangeSpan &Front = Ranges.front();
const RangeSpan &Back = Ranges.back();
attachLowHighPC(Die, Front.Begin, Back.End);
} else
addScopeRangeList(Die, std::move(Ranges));
}
void DwarfCompileUnit::attachRangesOrLowHighPC(
DIE &Die, const SmallVectorImpl<InsnRange> &Ranges) {
SmallVector<RangeSpan, 2> List;
List.reserve(Ranges.size());
for (const InsnRange &R : Ranges) {
auto *BeginLabel = DD->getLabelBeforeInsn(R.first);
auto *EndLabel = DD->getLabelAfterInsn(R.second);
const auto *BeginMBB = R.first->getParent();
const auto *EndMBB = R.second->getParent();
const auto *MBB = BeginMBB;
// Basic block sections allows basic block subsets to be placed in unique
// sections. For each section, the begin and end label must be added to the
// list. If there is more than one range, debug ranges must be used.
// Otherwise, low/high PC can be used.
// FIXME: Debug Info Emission depends on block order and this assumes that
// the order of blocks will be frozen beyond this point.
do {
if (MBB->sameSection(EndMBB) || MBB->isEndSection()) {
auto MBBSectionRange = Asm->MBBSectionRanges[MBB->getSectionIDNum()];
List.push_back(
{MBB->sameSection(BeginMBB) ? BeginLabel
: MBBSectionRange.BeginLabel,
MBB->sameSection(EndMBB) ? EndLabel : MBBSectionRange.EndLabel});
}
if (MBB->sameSection(EndMBB))
break;
MBB = MBB->getNextNode();
} while (true);
}
attachRangesOrLowHighPC(Die, std::move(List));
}
// This scope represents inlined body of a function. Construct DIE to
// represent this concrete inlined copy of the function.
DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
assert(Scope->getScopeNode());
auto *DS = Scope->getScopeNode();
auto *InlinedSP = getDISubprogram(DS);
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
DIE *OriginDIE = getAbstractSPDies()[InlinedSP];
assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine);
addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE);
attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
// Add the call site information to the DIE.
const DILocation *IA = Scope->getInlinedAt();
addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
getOrCreateSourceID(IA->getFile()));
addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
if (IA->getColumn())
addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn());
if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
IA->getDiscriminator());
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_inlined_subprogram nodes.
DD->addSubprogramNames(*CUNode, InlinedSP, *ScopeDIE);
return ScopeDIE;
}
// Construct new DW_TAG_lexical_block for this scope and attach
// DW_AT_low_pc/DW_AT_high_pc labels.
DIE *DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
if (DD->isLexicalScopeDIENull(Scope))
return nullptr;
auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_lexical_block);
if (Scope->isAbstractScope())
return ScopeDIE;
attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
return ScopeDIE;
}
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) {
auto D = constructVariableDIEImpl(DV, Abstract);
DV.setDIE(*D);
return D;
}
DIE *DwarfCompileUnit::constructLabelDIE(DbgLabel &DL,
const LexicalScope &Scope) {
auto LabelDie = DIE::get(DIEValueAllocator, DL.getTag());
insertDIE(DL.getLabel(), LabelDie);
DL.setDIE(*LabelDie);
if (Scope.isAbstractScope())
applyLabelAttributes(DL, *LabelDie);
return LabelDie;
}
DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
bool Abstract) {
// Define variable debug information entry.
auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag());
insertDIE(DV.getVariable(), VariableDie);
if (Abstract) {
applyVariableAttributes(DV, *VariableDie);
return VariableDie;
}
// Add variable address.
unsigned Index = DV.getDebugLocListIndex();
if (Index != ~0U) {
addLocationList(*VariableDie, dwarf::DW_AT_location, Index);
auto TagOffset = DV.getDebugLocListTagOffset();
if (TagOffset)
addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
*TagOffset);
return VariableDie;
}
// Check if variable has a single location description.
if (auto *DVal = DV.getValueLoc()) {
if (!DVal->isVariadic()) {
const DbgValueLocEntry *Entry = DVal->getLocEntries().begin();
if (Entry->isLocation()) {
addVariableAddress(DV, *VariableDie, Entry->getLoc());
} else if (Entry->isInt()) {
auto *Expr = DV.getSingleExpression();
if (Expr && Expr->getNumElements()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
// If there is an expression, emit raw unsigned bytes.
DwarfExpr.addFragmentOffset(Expr);
DwarfExpr.addUnsignedConstant(Entry->getInt());
DwarfExpr.addExpression(Expr);
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
if (DwarfExpr.TagOffset)
addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset,
dwarf::DW_FORM_data1, *DwarfExpr.TagOffset);
} else
addConstantValue(*VariableDie, Entry->getInt(), DV.getType());
} else if (Entry->isConstantFP()) {
addConstantFPValue(*VariableDie, Entry->getConstantFP());
} else if (Entry->isConstantInt()) {
addConstantValue(*VariableDie, Entry->getConstantInt(), DV.getType());
} else if (Entry->isTargetIndexLocation()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
const DIBasicType *BT = dyn_cast<DIBasicType>(
static_cast<const Metadata *>(DV.getVariable()->getType()));
DwarfDebug::emitDebugLocValue(*Asm, BT, *DVal, DwarfExpr);
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
}
return VariableDie;
}
// If any of the location entries are registers with the value 0, then the
// location is undefined.
if (any_of(DVal->getLocEntries(), [](const DbgValueLocEntry &Entry) {
return Entry.isLocation() && !Entry.getLoc().getReg();
}))
return VariableDie;
const DIExpression *Expr = DV.getSingleExpression();
assert(Expr && "Variadic Debug Value must have an Expression.");
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
DwarfExpr.addFragmentOffset(Expr);
DIExpressionCursor Cursor(Expr);
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
auto AddEntry = [&](const DbgValueLocEntry &Entry,
DIExpressionCursor &Cursor) {
if (Entry.isLocation()) {
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor,
Entry.getLoc().getReg()))
return false;
} else if (Entry.isInt()) {
// If there is an expression, emit raw unsigned bytes.
DwarfExpr.addUnsignedConstant(Entry.getInt());
} else if (Entry.isConstantFP()) {
APInt RawBytes = Entry.getConstantFP()->getValueAPF().bitcastToAPInt();
DwarfExpr.addUnsignedConstant(RawBytes);
} else if (Entry.isConstantInt()) {
APInt RawBytes = Entry.getConstantInt()->getValue();
DwarfExpr.addUnsignedConstant(RawBytes);
} else if (Entry.isTargetIndexLocation()) {
TargetIndexLocation Loc = Entry.getTargetIndexLocation();
// TODO TargetIndexLocation is a target-independent. Currently only the
// WebAssembly-specific encoding is supported.
assert(Asm->TM.getTargetTriple().isWasm());
DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
} else {
llvm_unreachable("Unsupported Entry type.");
}
return true;
};
DwarfExpr.addExpression(
std::move(Cursor),
[&](unsigned Idx, DIExpressionCursor &Cursor) -> bool {
return AddEntry(DVal->getLocEntries()[Idx], Cursor);
});
// Now attach the location information to the DIE.
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
if (DwarfExpr.TagOffset)
addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
*DwarfExpr.TagOffset);
return VariableDie;
}
// .. else use frame index.
if (!DV.hasFrameIndexExprs())
return VariableDie;
Optional<unsigned> NVPTXAddressSpace;
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
for (auto &Fragment : DV.getFrameIndexExprs()) {
Register FrameReg;
const DIExpression *Expr = Fragment.Expr;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
StackOffset Offset =
TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
DwarfExpr.addFragmentOffset(Expr);
auto *TRI = Asm->MF->getSubtarget().getRegisterInfo();
SmallVector<uint64_t, 8> Ops;
TRI->getOffsetOpcodes(Offset, Ops);
// According to
// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
// cuda-gdb requires DW_AT_address_class for all variables to be able to
// correctly interpret address space of the variable address.
// Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
// sequence for the NVPTX + gdb target.
unsigned LocalNVPTXAddressSpace;
if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
const DIExpression *NewExpr =
DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
if (NewExpr != Expr) {
Expr = NewExpr;
NVPTXAddressSpace = LocalNVPTXAddressSpace;
}
}
if (Expr)
Ops.append(Expr->elements_begin(), Expr->elements_end());
DIExpressionCursor Cursor(Ops);
DwarfExpr.setMemoryLocationKind();
if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol())
addOpAddress(*Loc, FrameSymbol);
else
DwarfExpr.addMachineRegExpression(
*Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
DwarfExpr.addExpression(std::move(Cursor));
}
if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
// According to
// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
// cuda-gdb requires DW_AT_address_class for all variables to be able to
// correctly interpret address space of the variable address.
const unsigned NVPTX_ADDR_local_space = 6;
addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space);
}
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
if (DwarfExpr.TagOffset)
addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
*DwarfExpr.TagOffset);
return VariableDie;
}
DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
const LexicalScope &Scope,
DIE *&ObjectPointer) {
auto Var = constructVariableDIE(DV, Scope.isAbstractScope());
if (DV.isObjectPointer())
ObjectPointer = Var;
return Var;
}
/// Return all DIVariables that appear in count: expressions.
static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) {
SmallVector<const DIVariable *, 2> Result;
auto *Array = dyn_cast<DICompositeType>(Var->getType());
if (!Array || Array->getTag() != dwarf::DW_TAG_array_type)
return Result;
if (auto *DLVar = Array->getDataLocation())
Result.push_back(DLVar);
if (auto *AsVar = Array->getAssociated())
Result.push_back(AsVar);
if (auto *AlVar = Array->getAllocated())
Result.push_back(AlVar);
for (auto *El : Array->getElements()) {
if (auto *Subrange = dyn_cast<DISubrange>(El)) {
if (auto Count = Subrange->getCount())
if (auto *Dependency = Count.dyn_cast<DIVariable *>())
Result.push_back(Dependency);
if (auto LB = Subrange->getLowerBound())
if (auto *Dependency = LB.dyn_cast<DIVariable *>())
Result.push_back(Dependency);
if (auto UB = Subrange->getUpperBound())
if (auto *Dependency = UB.dyn_cast<DIVariable *>())
Result.push_back(Dependency);
if (auto ST = Subrange->getStride())
if (auto *Dependency = ST.dyn_cast<DIVariable *>())
Result.push_back(Dependency);
} else if (auto *GenericSubrange = dyn_cast<DIGenericSubrange>(El)) {
if (auto Count = GenericSubrange->getCount())
if (auto *Dependency = Count.dyn_cast<DIVariable *>())
Result.push_back(Dependency);
if (auto LB = GenericSubrange->getLowerBound())
if (auto *Dependency = LB.dyn_cast<DIVariable *>())
Result.push_back(Dependency);
if (auto UB = GenericSubrange->getUpperBound())
if (auto *Dependency = UB.dyn_cast<DIVariable *>())
Result.push_back(Dependency);
if (auto ST = GenericSubrange->getStride())
if (auto *Dependency = ST.dyn_cast<DIVariable *>())
Result.push_back(Dependency);
}
}
return Result;
}
/// Sort local variables so that variables appearing inside of helper
/// expressions come first.
static SmallVector<DbgVariable *, 8>
sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
SmallVector<DbgVariable *, 8> Result;
SmallVector<PointerIntPair<DbgVariable *, 1>, 8> WorkList;
// Map back from a DIVariable to its containing DbgVariable.
SmallDenseMap<const DILocalVariable *, DbgVariable *> DbgVar;
// Set of DbgVariables in Result.
SmallDenseSet<DbgVariable *, 8> Visited;
// For cycle detection.
SmallDenseSet<DbgVariable *, 8> Visiting;
// Initialize the worklist and the DIVariable lookup table.
for (auto Var : reverse(Input)) {
DbgVar.insert({Var->getVariable(), Var});
WorkList.push_back({Var, 0});
}
// Perform a stable topological sort by doing a DFS.
while (!WorkList.empty()) {
auto Item = WorkList.back();
DbgVariable *Var = Item.getPointer();
bool visitedAllDependencies = Item.getInt();
WorkList.pop_back();
// Dependency is in a different lexical scope or a global.
if (!Var)
continue;
// Already handled.
if (Visited.count(Var))
continue;
// Add to Result if all dependencies are visited.
if (visitedAllDependencies) {
Visited.insert(Var);
Result.push_back(Var);
continue;
}
// Detect cycles.
auto Res = Visiting.insert(Var);
if (!Res.second) {
assert(false && "dependency cycle in local variables");
return Result;
}
// Push dependencies and this node onto the worklist, so that this node is
// visited again after all of its dependencies are handled.
WorkList.push_back({Var, 1});
for (auto *Dependency : dependencies(Var)) {
auto Dep = dyn_cast_or_null<const DILocalVariable>(Dependency);
WorkList.push_back({DbgVar[Dep], 0});
}
}
return Result;
}
DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
SmallVectorImpl<DIE *> &Children,
bool *HasNonScopeChildren) {
assert(Children.empty());
DIE *ObjectPointer = nullptr;
// Emit function arguments (order is significant).
auto Vars = DU->getScopeVariables().lookup(Scope);
for (auto &DV : Vars.Args)
Children.push_back(constructVariableDIE(*DV.second, *Scope, ObjectPointer));
// Emit local variables.
auto Locals = sortLocalVars(Vars.Locals);
for (DbgVariable *DV : Locals)
Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer));
// Skip imported directives in gmlt-like data.
if (!includeMinimalInlineScopes()) {
// There is no need to emit empty lexical block DIE.
for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
Children.push_back(
constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
}
if (HasNonScopeChildren)
*HasNonScopeChildren = !Children.empty();
for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
Children.push_back(constructLabelDIE(*DL, *Scope));
for (LexicalScope *LS : Scope->getChildren())
constructScopeDIE(LS, Children);
return ObjectPointer;
}
DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
LexicalScope *Scope) {
DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
if (Scope) {
assert(!Scope->getInlinedAt());
assert(!Scope->isAbstractScope());
// Collect lexical scope children first.
// ObjectPointer might be a local (non-argument) local variable if it's a
// block's synthetic this pointer.
if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
// If this is a variadic function, add an unspecified parameter.
DITypeRefArray FnArgs = Sub->getType()->getTypeArray();
// If we have a single element of null, it is a function that returns void.
// If we have more than one elements and the last one is null, it is a
// variadic function.
if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] &&
!includeMinimalInlineScopes())
ScopeDIE.addChild(
DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters));
return ScopeDIE;
}
DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
DIE &ScopeDIE) {
// We create children when the scope DIE is not null.
SmallVector<DIE *, 8> Children;
DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children);
// Add children
for (auto &I : Children)
ScopeDIE.addChild(std::move(I));
return ObjectPointer;
}
void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
LexicalScope *Scope) {
DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()];
if (AbsDef)
return;
auto *SP = cast<DISubprogram>(Scope->getScopeNode());
DIE *ContextDIE;
DwarfCompileUnit *ContextCU = this;
if (includeMinimalInlineScopes())
ContextDIE = &getUnitDie();
// Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
// the important distinction that the debug node is not associated with the
// DIE (since the debug node will be associated with the concrete DIE, if
// any). It could be refactored to some common utility function.
else if (auto *SPDecl = SP->getDeclaration()) {
ContextDIE = &getUnitDie();
getOrCreateSubprogramDIE(SPDecl);
} else {
ContextDIE = getOrCreateContextDIE(SP->getScope());
// The scope may be shared with a subprogram that has already been
// constructed in another CU, in which case we need to construct this
// subprogram in the same CU.
ContextCU = DD->lookupCU(ContextDIE->getUnitDie());
}
// Passing null as the associated node because the abstract definition
// shouldn't be found by lookup.
AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);
ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef);
if (!ContextCU->includeMinimalInlineScopes())
ContextCU->addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef))
ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const {
return DD->getDwarfVersion() == 4 && !DD->tuneForLLDB();
}
dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const {
if (!useGNUAnalogForDwarf5Feature())
return Tag;
switch (Tag) {
case dwarf::DW_TAG_call_site:
return dwarf::DW_TAG_GNU_call_site;
case dwarf::DW_TAG_call_site_parameter:
return dwarf::DW_TAG_GNU_call_site_parameter;
default:
llvm_unreachable("DWARF5 tag with no GNU analog");
}
}
dwarf::Attribute
DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const {
if (!useGNUAnalogForDwarf5Feature())
return Attr;
switch (Attr) {
case dwarf::DW_AT_call_all_calls:
return dwarf::DW_AT_GNU_all_call_sites;
case dwarf::DW_AT_call_target:
return dwarf::DW_AT_GNU_call_site_target;
case dwarf::DW_AT_call_origin:
return dwarf::DW_AT_abstract_origin;
case dwarf::DW_AT_call_return_pc:
return dwarf::DW_AT_low_pc;
case dwarf::DW_AT_call_value:
return dwarf::DW_AT_GNU_call_site_value;
case dwarf::DW_AT_call_tail_call:
return dwarf::DW_AT_GNU_tail_call;
default:
llvm_unreachable("DWARF5 attribute with no GNU analog");
}
}
dwarf::LocationAtom
DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const {
if (!useGNUAnalogForDwarf5Feature())
return Loc;
switch (Loc) {
case dwarf::DW_OP_entry_value:
return dwarf::DW_OP_GNU_entry_value;
default:
llvm_unreachable("DWARF5 location atom with no GNU analog");
}
}
DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
- DIE *CalleeDIE,
+ const DISubprogram *CalleeSP,
bool IsTail,
const MCSymbol *PCAddr,
const MCSymbol *CallAddr,
unsigned CallReg) {
// Insert a call site entry DIE within ScopeDIE.
DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site),
ScopeDIE, nullptr);
if (CallReg) {
// Indirect call.
addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
MachineLocation(CallReg));
} else {
- assert(CalleeDIE && "No DIE for call site entry origin");
+ DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
+ assert(CalleeDIE && "Could not create DIE for call site entry origin");
addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
*CalleeDIE);
}
if (IsTail) {
// Attach DW_AT_call_tail_call to tail calls for standards compliance.
addFlag(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_tail_call));
// Attach the address of the branch instruction to allow the debugger to
// show where the tail call occurred. This attribute has no GNU analog.
//
// GDB works backwards from non-standard usage of DW_AT_low_pc (in DWARF4
// mode -- equivalently, in DWARF5 mode, DW_AT_call_return_pc) at tail-call
// site entries to figure out the PC of tail-calling branch instructions.
// This means it doesn't need the compiler to emit DW_AT_call_pc, so we
// don't emit it here.
//
// There's no need to tie non-GDB debuggers to this non-standardness, as it
// adds unnecessary complexity to the debugger. For non-GDB debuggers, emit
// the standard DW_AT_call_pc info.
if (!useGNUAnalogForDwarf5Feature())
addLabelAddress(CallSiteDIE, dwarf::DW_AT_call_pc, CallAddr);
}
// Attach the return PC to allow the debugger to disambiguate call paths
// from one function to another.
//
// The return PC is only really needed when the call /isn't/ a tail call, but
// GDB expects it in DWARF4 mode, even for tail calls (see the comment above
// the DW_AT_call_pc emission logic for an explanation).
if (!IsTail || useGNUAnalogForDwarf5Feature()) {
assert(PCAddr && "Missing return PC information for a call");
addLabelAddress(CallSiteDIE,
getDwarf5OrGNUAttr(dwarf::DW_AT_call_return_pc), PCAddr);
}
return CallSiteDIE;
}
void DwarfCompileUnit::constructCallSiteParmEntryDIEs(
DIE &CallSiteDIE, SmallVector<DbgCallSiteParam, 4> &Params) {
for (const auto &Param : Params) {
unsigned Register = Param.getRegister();
auto CallSiteDieParam =
DIE::get(DIEValueAllocator,
getDwarf5OrGNUTag(dwarf::DW_TAG_call_site_parameter));
insertDIE(CallSiteDieParam);
addAddress(*CallSiteDieParam, dwarf::DW_AT_location,
MachineLocation(Register));
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
DwarfExpr.setCallSiteParamValueFlag();
DwarfDebug::emitDebugLocValue(*Asm, nullptr, Param.getValue(), DwarfExpr);
addBlock(*CallSiteDieParam, getDwarf5OrGNUAttr(dwarf::DW_AT_call_value),
DwarfExpr.finalize());
CallSiteDIE.addChild(CallSiteDieParam);
}
}
DIE *DwarfCompileUnit::constructImportedEntityDIE(
const DIImportedEntity *Module) {
DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
insertDIE(Module, IMDie);
DIE *EntityDie;
auto *Entity = Module->getEntity();
if (auto *NS = dyn_cast<DINamespace>(Entity))
EntityDie = getOrCreateNameSpace(NS);
else if (auto *M = dyn_cast<DIModule>(Entity))
EntityDie = getOrCreateModule(M);
else if (auto *SP = dyn_cast<DISubprogram>(Entity))
EntityDie = getOrCreateSubprogramDIE(SP);
else if (auto *T = dyn_cast<DIType>(Entity))
EntityDie = getOrCreateTypeDIE(T);
else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
EntityDie = getOrCreateGlobalVariableDIE(GV, {});
else
EntityDie = getDIE(Entity);
assert(EntityDie);
addSourceLine(*IMDie, Module->getLine(), Module->getFile());
addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
StringRef Name = Module->getName();
if (!Name.empty())
addString(*IMDie, dwarf::DW_AT_name, Name);
return IMDie;
}
void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
DIE *D = getDIE(SP);
if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) {
if (D)
// If this subprogram has an abstract definition, reference that
addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
} else {
assert(D || includeMinimalInlineScopes());
if (D)
// And attach the attributes
applySubprogramAttributesToDefinition(SP, *D);
}
}
void DwarfCompileUnit::finishEntityDefinition(const DbgEntity *Entity) {
DbgEntity *AbsEntity = getExistingAbstractEntity(Entity->getEntity());
auto *Die = Entity->getDIE();
/// Label may be used to generate DW_AT_low_pc, so put it outside
/// if/else block.
const DbgLabel *Label = nullptr;
if (AbsEntity && AbsEntity->getDIE()) {
addDIEEntry(*Die, dwarf::DW_AT_abstract_origin, *AbsEntity->getDIE());
Label = dyn_cast<const DbgLabel>(Entity);
} else {
if (const DbgVariable *Var = dyn_cast<const DbgVariable>(Entity))
applyVariableAttributes(*Var, *Die);
else if ((Label = dyn_cast<const DbgLabel>(Entity)))
applyLabelAttributes(*Label, *Die);
else
llvm_unreachable("DbgEntity must be DbgVariable or DbgLabel.");
}
if (Label)
if (const auto *Sym = Label->getSymbol())
addLabelAddress(*Die, dwarf::DW_AT_low_pc, Sym);
}
DbgEntity *DwarfCompileUnit::getExistingAbstractEntity(const DINode *Node) {
auto &AbstractEntities = getAbstractEntities();
auto I = AbstractEntities.find(Node);
if (I != AbstractEntities.end())
return I->second.get();
return nullptr;
}
void DwarfCompileUnit::createAbstractEntity(const DINode *Node,
LexicalScope *Scope) {
assert(Scope && Scope->isAbstractScope());
auto &Entity = getAbstractEntities()[Node];
if (isa<const DILocalVariable>(Node)) {
Entity = std::make_unique<DbgVariable>(
cast<const DILocalVariable>(Node), nullptr /* IA */);;
DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get()));
} else if (isa<const DILabel>(Node)) {
Entity = std::make_unique<DbgLabel>(
cast<const DILabel>(Node), nullptr /* IA */);
DU->addScopeLabel(Scope, cast<DbgLabel>(Entity.get()));
}
}
void DwarfCompileUnit::emitHeader(bool UseOffsets) {
// Don't bother labeling the .dwo unit, as its offset isn't used.
if (!Skeleton && !DD->useSectionsAsReferences()) {
LabelBegin = Asm->createTempSymbol("cu_begin");
Asm->OutStreamer->emitLabel(LabelBegin);
}
dwarf::UnitType UT = Skeleton ? dwarf::DW_UT_split_compile
: DD->useSplitDwarf() ? dwarf::DW_UT_skeleton
: dwarf::DW_UT_compile;
DwarfUnit::emitCommonHeader(UseOffsets, UT);
if (DD->getDwarfVersion() >= 5 && UT != dwarf::DW_UT_compile)
Asm->emitInt64(getDWOId());
}
bool DwarfCompileUnit::hasDwarfPubSections() const {
switch (CUNode->getNameTableKind()) {
case DICompileUnit::DebugNameTableKind::None:
return false;
// Opting in to GNU Pubnames/types overrides the default to ensure these are
// generated for things like Gold's gdb_index generation.
case DICompileUnit::DebugNameTableKind::GNU:
return true;
case DICompileUnit::DebugNameTableKind::Default:
return DD->tuneForGDB() && !includeMinimalInlineScopes() &&
!CUNode->isDebugDirectivesOnly() &&
DD->getAccelTableKind() != AccelTableKind::Apple &&
DD->getDwarfVersion() < 5;
}
llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum");
}
/// addGlobalName - Add a new global name to the compile unit.
void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
const DIScope *Context) {
if (!hasDwarfPubSections())
return;
std::string FullName = getParentContextString(Context) + Name.str();
GlobalNames[FullName] = &Die;
}
void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
const DIScope *Context) {
if (!hasDwarfPubSections())
return;
std::string FullName = getParentContextString(Context) + Name.str();
// Insert, allowing the entry to remain as-is if it's already present
// This way the CU-level type DIE is preferred over the "can't describe this
// type as a unit offset because it's not really in the CU at all, it's only
// in a type unit"
GlobalNames.insert(std::make_pair(std::move(FullName), &getUnitDie()));
}
/// Add a new global type to the unit.
void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) {
if (!hasDwarfPubSections())
return;
std::string FullName = getParentContextString(Context) + Ty->getName().str();
GlobalTypes[FullName] = &Die;
}
void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
const DIScope *Context) {
if (!hasDwarfPubSections())
return;
std::string FullName = getParentContextString(Context) + Ty->getName().str();
// Insert, allowing the entry to remain as-is if it's already present
// This way the CU-level type DIE is preferred over the "can't describe this
// type as a unit offset because it's not really in the CU at all, it's only
// in a type unit"
GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie()));
}
void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
MachineLocation Location) {
if (DV.hasComplexAddress())
addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
else
addAddress(Die, dwarf::DW_AT_location, Location);
}
/// Add an address attribute to a die based on the location provided.
void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
if (Location.isIndirect())
DwarfExpr.setMemoryLocationKind();
DIExpressionCursor Cursor({});
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
DwarfExpr.addExpression(std::move(Cursor));
// Now attach the location information to the DIE.
addBlock(Die, Attribute, DwarfExpr.finalize());
if (DwarfExpr.TagOffset)
addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
*DwarfExpr.TagOffset);
}
/// Start with the address based on the location provided, and generate the
/// DWARF information necessary to find the actual variable given the extra
/// address information encoded in the DbgVariable, starting from the starting
/// location. Add the DWARF information to the die.
void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
const DIExpression *DIExpr = DV.getSingleExpression();
DwarfExpr.addFragmentOffset(DIExpr);
DwarfExpr.setLocation(Location, DIExpr);
DIExpressionCursor Cursor(DIExpr);
if (DIExpr->isEntryValue())
DwarfExpr.beginEntryValueExpression(Cursor);
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
DwarfExpr.addExpression(std::move(Cursor));
// Now attach the location information to the DIE.
addBlock(Die, Attribute, DwarfExpr.finalize());
if (DwarfExpr.TagOffset)
addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
*DwarfExpr.TagOffset);
}
/// Add a Dwarf loclistptr attribute data and value.
void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
unsigned Index) {
dwarf::Form Form = (DD->getDwarfVersion() >= 5)
? dwarf::DW_FORM_loclistx
: DD->getDwarfSectionOffsetForm();
addAttribute(Die, Attribute, Form, DIELocList(Index));
}
void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
DIE &VariableDie) {
StringRef Name = Var.getName();
if (!Name.empty())
addString(VariableDie, dwarf::DW_AT_name, Name);
const auto *DIVar = Var.getVariable();
if (DIVar)
if (uint32_t AlignInBytes = DIVar->getAlignInBytes())
addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
addSourceLine(VariableDie, DIVar);
addType(VariableDie, Var.getType());
if (Var.isArtificial())
addFlag(VariableDie, dwarf::DW_AT_artificial);
}
void DwarfCompileUnit::applyLabelAttributes(const DbgLabel &Label,
DIE &LabelDie) {
StringRef Name = Label.getName();
if (!Name.empty())
addString(LabelDie, dwarf::DW_AT_name, Name);
const auto *DILabel = Label.getLabel();
addSourceLine(LabelDie, DILabel);
}
/// Add a Dwarf expression attribute data and value.
void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
const MCExpr *Expr) {
addAttribute(Die, (dwarf::Attribute)0, Form, DIEExpr(Expr));
}
void DwarfCompileUnit::applySubprogramAttributesToDefinition(
const DISubprogram *SP, DIE &SPDie) {
auto *SPDecl = SP->getDeclaration();
auto *Context = SPDecl ? SPDecl->getScope() : SP->getScope();
applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
addGlobalName(SP->getName(), SPDie, Context);
}
bool DwarfCompileUnit::isDwoUnit() const {
return DD->useSplitDwarf() && Skeleton;
}
void DwarfCompileUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
constructTypeDIE(D, CTy);
}
bool DwarfCompileUnit::includeMinimalInlineScopes() const {
return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
(DD->useSplitDwarf() && !Skeleton);
}
void DwarfCompileUnit::addAddrTableBase() {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
MCSymbol *Label = DD->getAddressPool().getLabel();
addSectionLabel(getUnitDie(),
DD->getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base
: dwarf::DW_AT_GNU_addr_base,
Label, TLOF.getDwarfAddrSection()->getBeginSymbol());
}
void DwarfCompileUnit::addBaseTypeRef(DIEValueList &Die, int64_t Idx) {
addAttribute(Die, (dwarf::Attribute)0, dwarf::DW_FORM_udata,
new (DIEValueAllocator) DIEBaseTypeRef(this, Idx));
}
void DwarfCompileUnit::createBaseTypeDIEs() {
// Insert the base_type DIEs directly after the CU so that their offsets will
// fit in the fixed size ULEB128 used inside the location expressions.
// Maintain order by iterating backwards and inserting to the front of CU
// child list.
for (auto &Btr : reverse(ExprRefedBaseTypes)) {
DIE &Die = getUnitDie().addChildFront(
DIE::get(DIEValueAllocator, dwarf::DW_TAG_base_type));
SmallString<32> Str;
addString(Die, dwarf::DW_AT_name,
Twine(dwarf::AttributeEncodingString(Btr.Encoding) +
"_" + Twine(Btr.BitSize)).toStringRef(Str));
addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding);
addUInt(Die, dwarf::DW_AT_byte_size, None, Btr.BitSize / 8);
Btr.Die = &Die;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 6d8186a5ee2b..6e9261087686 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -1,372 +1,370 @@
//===- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains support for writing dwarf compile unit.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
#include "DwarfDebug.h"
#include "DwarfUnit.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/Casting.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <memory>
namespace llvm {
class AsmPrinter;
class DIE;
class DIELoc;
class DIEValueList;
class DwarfFile;
class GlobalVariable;
class MCExpr;
class MCSymbol;
class MDNode;
enum class UnitKind { Skeleton, Full };
class DwarfCompileUnit final : public DwarfUnit {
/// A numeric ID unique among all CUs in the module
unsigned UniqueID;
bool HasRangeLists = false;
/// The start of the unit line section, this is also
/// reused in appyStmtList.
MCSymbol *LineTableStartSym;
/// Skeleton unit associated with this unit.
DwarfCompileUnit *Skeleton = nullptr;
/// The start of the unit within its section.
MCSymbol *LabelBegin = nullptr;
/// The start of the unit macro info within macro section.
MCSymbol *MacroLabelBegin;
using ImportedEntityList = SmallVector<const MDNode *, 8>;
using ImportedEntityMap = DenseMap<const MDNode *, ImportedEntityList>;
ImportedEntityMap ImportedEntities;
/// GlobalNames - A map of globally visible named entities for this unit.
StringMap<const DIE *> GlobalNames;
/// GlobalTypes - A map of globally visible types for this unit.
StringMap<const DIE *> GlobalTypes;
// List of ranges for a given compile unit.
SmallVector<RangeSpan, 2> CURanges;
// The base address of this unit, if any. Used for relative references in
// ranges/locs.
const MCSymbol *BaseAddress = nullptr;
DenseMap<const MDNode *, DIE *> AbstractSPDies;
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
/// DWO ID for correlating skeleton and split units.
uint64_t DWOId = 0;
/// Construct a DIE for the given DbgVariable without initializing the
/// DbgVariable's DIE reference.
DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);
bool isDwoUnit() const override;
DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
return AbstractSPDies;
return DU->getAbstractSPDies();
}
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
return AbstractEntities;
return DU->getAbstractEntities();
}
void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
UnitKind Kind = UnitKind::Full);
bool hasRangeLists() const { return HasRangeLists; }
unsigned getUniqueID() const { return UniqueID; }
DwarfCompileUnit *getSkeleton() const {
return Skeleton;
}
bool includeMinimalInlineScopes() const;
void initStmtList();
/// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
void applyStmtList(DIE &D);
/// Get line table start symbol for this unit.
MCSymbol *getLineTableStartSym() const { return LineTableStartSym; }
/// A pair of GlobalVariable and DIExpression.
struct GlobalExpr {
const GlobalVariable *Var;
const DIExpression *Expr;
};
struct BaseTypeRef {
BaseTypeRef(unsigned BitSize, dwarf::TypeKind Encoding) :
BitSize(BitSize), Encoding(Encoding) {}
unsigned BitSize;
dwarf::TypeKind Encoding;
DIE *Die = nullptr;
};
std::vector<BaseTypeRef> ExprRefedBaseTypes;
/// Get or create global variable DIE.
DIE *
getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV,
ArrayRef<GlobalExpr> GlobalExprs);
DIE *getOrCreateCommonBlock(const DICommonBlock *CB,
ArrayRef<GlobalExpr> GlobalExprs);
void addLocationAttribute(DIE *ToDIE, const DIGlobalVariable *GV,
ArrayRef<GlobalExpr> GlobalExprs);
/// addLabelAddress - Add a dwarf label attribute data and value using
/// either DW_FORM_addr or DW_FORM_GNU_addr_index.
void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label);
/// addLocalLabelAddress - Add a dwarf label attribute data and value using
/// DW_FORM_addr only.
void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label);
DwarfCompileUnit &getCU() override { return *this; }
unsigned getOrCreateSourceID(const DIFile *File) override;
void addImportedEntity(const DIImportedEntity* IE) {
DIScope *Scope = IE->getScope();
assert(Scope && "Invalid Scope encoding!");
if (!isa<DILocalScope>(Scope))
// No need to add imported enities that are not local declaration.
return;
auto *LocalScope = cast<DILocalScope>(Scope)->getNonLexicalBlockFileScope();
ImportedEntities[LocalScope].push_back(IE);
}
/// addRange - Add an address range to the list of ranges for this unit.
void addRange(RangeSpan Range);
void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End);
/// Find DIE for the given subprogram and attach appropriate
/// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
/// variables in this scope then create and insert DIEs for these
/// variables.
DIE &updateSubprogramScopeDIE(const DISubprogram *SP);
void constructScopeDIE(LexicalScope *Scope,
SmallVectorImpl<DIE *> &FinalChildren);
/// A helper function to construct a RangeSpanList for a given
/// lexical scope.
void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range);
void attachRangesOrLowHighPC(DIE &D, SmallVector<RangeSpan, 2> Ranges);
void attachRangesOrLowHighPC(DIE &D,
const SmallVectorImpl<InsnRange> &Ranges);
/// This scope represents inlined body of a function. Construct
/// DIE to represent this concrete inlined copy of the function.
DIE *constructInlinedScopeDIE(LexicalScope *Scope);
/// Construct new DW_TAG_lexical_block for this scope and
/// attach DW_AT_low_pc/DW_AT_high_pc labels.
DIE *constructLexicalScopeDIE(LexicalScope *Scope);
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
DIE *constructVariableDIE(DbgVariable &DV, bool Abstract = false);
DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope,
DIE *&ObjectPointer);
/// Construct a DIE for the given DbgLabel.
DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope);
/// A helper function to create children of a Scope DIE.
DIE *createScopeChildrenDIE(LexicalScope *Scope,
SmallVectorImpl<DIE *> &Children,
bool *HasNonScopeChildren = nullptr);
void createBaseTypeDIEs();
/// Construct a DIE for this subprogram scope.
DIE &constructSubprogramScopeDIE(const DISubprogram *Sub,
LexicalScope *Scope);
DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
/// Whether to use the GNU analog for a DWARF5 tag, attribute, or location
/// atom. Only applicable when emitting otherwise DWARF4-compliant debug info.
bool useGNUAnalogForDwarf5Feature() const;
/// This takes a DWARF 5 tag and returns it or a GNU analog.
dwarf::Tag getDwarf5OrGNUTag(dwarf::Tag Tag) const;
/// This takes a DWARF 5 attribute and returns it or a GNU analog.
dwarf::Attribute getDwarf5OrGNUAttr(dwarf::Attribute Attr) const;
/// This takes a DWARF 5 location atom and either returns it or a GNU analog.
dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const;
/// Construct a call site entry DIE describing a call within \p Scope to a
- /// callee described by \p CalleeDIE.
- /// \p CalleeDIE is a declaration or definition subprogram DIE for the callee.
- /// For indirect calls \p CalleeDIE is set to nullptr.
+ /// callee described by \p CalleeSP.
/// \p IsTail specifies whether the call is a tail call.
/// \p PCAddr points to the PC value after the call instruction.
/// \p CallAddr points to the PC value at the call instruction (or is null).
/// \p CallReg is a register location for an indirect call. For direct calls
/// the \p CallReg is set to 0.
- DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, DIE *CalleeDIE, bool IsTail,
- const MCSymbol *PCAddr,
+ DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP,
+ bool IsTail, const MCSymbol *PCAddr,
const MCSymbol *CallAddr, unsigned CallReg);
/// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params
/// were collected by the \ref collectCallSiteParameters.
/// Note: The order of parameters does not matter, since debuggers recognize
/// call site parameters by the DW_AT_location attribute.
void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE,
SmallVector<DbgCallSiteParam, 4> &Params);
/// Construct import_module DIE.
DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
void finishSubprogramDefinition(const DISubprogram *SP);
void finishEntityDefinition(const DbgEntity *Entity);
/// Find abstract variable associated with Var.
using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
DbgEntity *getExistingAbstractEntity(const DINode *Node);
void createAbstractEntity(const DINode *Node, LexicalScope *Scope);
/// Set the skeleton unit associated with this unit.
void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
unsigned getHeaderSize() const override {
// DWARF v5 added the DWO ID to the header for split/skeleton units.
unsigned DWOIdSize =
DD->getDwarfVersion() >= 5 && DD->useSplitDwarf() ? sizeof(uint64_t)
: 0;
return DwarfUnit::getHeaderSize() + DWOIdSize;
}
unsigned getLength() {
return Asm->getUnitLengthFieldByteSize() + // Length field
getHeaderSize() + getUnitDie().getSize();
}
void emitHeader(bool UseOffsets) override;
/// Add the DW_AT_addr_base attribute to the unit DIE.
void addAddrTableBase();
MCSymbol *getLabelBegin() const {
assert(LabelBegin && "LabelBegin is not initialized");
return LabelBegin;
}
MCSymbol *getMacroLabelBegin() const {
return MacroLabelBegin;
}
/// Add a new global name to the compile unit.
void addGlobalName(StringRef Name, const DIE &Die,
const DIScope *Context) override;
/// Add a new global name present in a type unit to this compile unit.
void addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context);
/// Add a new global type to the compile unit.
void addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) override;
/// Add a new global type present in a type unit to this compile unit.
void addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context);
const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
/// Add DW_AT_location attribute for a DbgVariable based on provided
/// MachineLocation.
void addVariableAddress(const DbgVariable &DV, DIE &Die,
MachineLocation Location);
/// Add an address attribute to a die based on the location provided.
void addAddress(DIE &Die, dwarf::Attribute Attribute,
const MachineLocation &Location);
/// Start with the address based on the location provided, and generate the
/// DWARF information necessary to find the actual variable (navigating the
/// extra location information encoded in the type) based on the starting
/// location. Add the DWARF information to the die.
void addComplexAddress(const DbgVariable &DV, DIE &Die,
dwarf::Attribute Attribute,
const MachineLocation &Location);
/// Add a Dwarf loclistptr attribute data and value.
void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index);
void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);
/// Add a Dwarf expression attribute data and value.
void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
void applySubprogramAttributesToDefinition(const DISubprogram *SP,
DIE &SPDie);
void applyLabelAttributes(const DbgLabel &Label, DIE &LabelDie);
/// getRanges - Get the list of ranges for this unit.
const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
SmallVector<RangeSpan, 2> takeRanges() { return std::move(CURanges); }
void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; }
const MCSymbol *getBaseAddress() const { return BaseAddress; }
uint64_t getDWOId() const { return DWOId; }
void setDWOId(uint64_t DwoId) { DWOId = DwoId; }
bool hasDwarfPubSections() const;
void addBaseTypeRef(DIEValueList &Die, int64_t Idx);
};
} // end namespace llvm
#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index ee14423ca3d0..52591a18791f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1,3552 +1,3537 @@
//===- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains support for writing dwarf debug info into asm files.
//
//===----------------------------------------------------------------------===//
#include "DwarfDebug.h"
#include "ByteStreamer.h"
#include "DIEHash.h"
#include "DwarfCompileUnit.h"
#include "DwarfExpression.h"
#include "DwarfUnit.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cstddef>
#include <iterator>
#include <string>
using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
STATISTIC(NumCSParams, "Number of dbg call site params created");
static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier(
"use-dwarf-ranges-base-address-specifier", cl::Hidden,
cl::desc("Use base address specifiers in debug_ranges"), cl::init(false));
static cl::opt<bool> GenerateARangeSection("generate-arange-section",
cl::Hidden,
cl::desc("Generate dwarf aranges"),
cl::init(false));
static cl::opt<bool>
GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
cl::desc("Generate DWARF4 type units."),
cl::init(false));
static cl::opt<bool> SplitDwarfCrossCuReferences(
"split-dwarf-cross-cu-references", cl::Hidden,
cl::desc("Enable cross-cu references in DWO files"), cl::init(false));
enum DefaultOnOff { Default, Enable, Disable };
static cl::opt<DefaultOnOff> UnknownLocations(
"use-unknown-locations", cl::Hidden,
cl::desc("Make an absence of debug location information explicit."),
cl::values(clEnumVal(Default, "At top of block or after label"),
clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")),
cl::init(Default));
static cl::opt<AccelTableKind> AccelTables(
"accel-tables", cl::Hidden, cl::desc("Output dwarf accelerator tables."),
cl::values(clEnumValN(AccelTableKind::Default, "Default",
"Default for platform"),
clEnumValN(AccelTableKind::None, "Disable", "Disabled."),
clEnumValN(AccelTableKind::Apple, "Apple", "Apple"),
clEnumValN(AccelTableKind::Dwarf, "Dwarf", "DWARF")),
cl::init(AccelTableKind::Default));
static cl::opt<DefaultOnOff>
DwarfInlinedStrings("dwarf-inlined-strings", cl::Hidden,
cl::desc("Use inlined strings rather than string section."),
cl::values(clEnumVal(Default, "Default for platform"),
clEnumVal(Enable, "Enabled"),
clEnumVal(Disable, "Disabled")),
cl::init(Default));
static cl::opt<bool>
NoDwarfRangesSection("no-dwarf-ranges-section", cl::Hidden,
cl::desc("Disable emission .debug_ranges section."),
cl::init(false));
static cl::opt<DefaultOnOff> DwarfSectionsAsReferences(
"dwarf-sections-as-references", cl::Hidden,
cl::desc("Use sections+offset as references rather than labels."),
cl::values(clEnumVal(Default, "Default for platform"),
clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
cl::init(Default));
static cl::opt<bool>
UseGNUDebugMacro("use-gnu-debug-macro", cl::Hidden,
cl::desc("Emit the GNU .debug_macro format with DWARF <5"),
cl::init(false));
static cl::opt<DefaultOnOff> DwarfOpConvert(
"dwarf-op-convert", cl::Hidden,
cl::desc("Enable use of the DWARFv5 DW_OP_convert operator"),
cl::values(clEnumVal(Default, "Default for platform"),
clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
cl::init(Default));
enum LinkageNameOption {
DefaultLinkageNames,
AllLinkageNames,
AbstractLinkageNames
};
static cl::opt<LinkageNameOption>
DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
cl::desc("Which DWARF linkage-name attributes to emit."),
cl::values(clEnumValN(DefaultLinkageNames, "Default",
"Default for platform"),
clEnumValN(AllLinkageNames, "All", "All"),
clEnumValN(AbstractLinkageNames, "Abstract",
"Abstract subprograms")),
cl::init(DefaultLinkageNames));
static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option(
"minimize-addr-in-v5", cl::Hidden,
cl::desc("Always use DW_AT_ranges in DWARFv5 whenever it could allow more "
"address pool entry sharing to reduce relocations/object size"),
cl::values(clEnumValN(DwarfDebug::MinimizeAddrInV5::Default, "Default",
"Default address minimization strategy"),
clEnumValN(DwarfDebug::MinimizeAddrInV5::Ranges, "Ranges",
"Use rnglists for contiguous ranges if that allows "
"using a pre-existing base address"),
clEnumValN(DwarfDebug::MinimizeAddrInV5::Expressions,
"Expressions",
"Use exprloc addrx+offset expressions for any "
"address with a prior base address"),
clEnumValN(DwarfDebug::MinimizeAddrInV5::Form, "Form",
"Use addrx+offset extension form for any address "
"with a prior base address"),
clEnumValN(DwarfDebug::MinimizeAddrInV5::Disabled, "Disabled",
"Stuff")),
cl::init(DwarfDebug::MinimizeAddrInV5::Default));
static constexpr unsigned ULEB128PadSize = 4;
void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
getActiveStreamer().emitInt8(
Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
: dwarf::OperationEncodingString(Op));
}
void DebugLocDwarfExpression::emitSigned(int64_t Value) {
getActiveStreamer().emitSLEB128(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
getActiveStreamer().emitULEB128(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitData1(uint8_t Value) {
getActiveStreamer().emitInt8(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit");
getActiveStreamer().emitULEB128(Idx, Twine(Idx), ULEB128PadSize);
}
bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
llvm::Register MachineReg) {
// This information is not available while emitting .debug_loc entries.
return false;
}
void DebugLocDwarfExpression::enableTemporaryBuffer() {
assert(!IsBuffering && "Already buffering?");
if (!TmpBuf)
TmpBuf = std::make_unique<TempBuffer>(OutBS.GenerateComments);
IsBuffering = true;
}
void DebugLocDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; }
unsigned DebugLocDwarfExpression::getTemporaryBufferSize() {
return TmpBuf ? TmpBuf->Bytes.size() : 0;
}
void DebugLocDwarfExpression::commitTemporaryBuffer() {
if (!TmpBuf)
return;
for (auto Byte : enumerate(TmpBuf->Bytes)) {
const char *Comment = (Byte.index() < TmpBuf->Comments.size())
? TmpBuf->Comments[Byte.index()].c_str()
: "";
OutBS.emitInt8(Byte.value(), Comment);
}
TmpBuf->Bytes.clear();
TmpBuf->Comments.clear();
}
const DIType *DbgVariable::getType() const {
return getVariable()->getType();
}
/// Get .debug_loc entry for the instruction range starting at MI.
static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
const DIExpression *Expr = MI->getDebugExpression();
const bool IsVariadic = MI->isDebugValueList();
assert(MI->getNumOperands() >= 3);
SmallVector<DbgValueLocEntry, 4> DbgValueLocEntries;
for (const MachineOperand &Op : MI->debug_operands()) {
if (Op.isReg()) {
MachineLocation MLoc(Op.getReg(),
MI->isNonListDebugValue() && MI->isDebugOffsetImm());
DbgValueLocEntries.push_back(DbgValueLocEntry(MLoc));
} else if (Op.isTargetIndex()) {
DbgValueLocEntries.push_back(
DbgValueLocEntry(TargetIndexLocation(Op.getIndex(), Op.getOffset())));
} else if (Op.isImm())
DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getImm()));
else if (Op.isFPImm())
DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getFPImm()));
else if (Op.isCImm())
DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getCImm()));
else
llvm_unreachable("Unexpected debug operand in DBG_VALUE* instruction!");
}
return DbgValueLoc(Expr, DbgValueLocEntries, IsVariadic);
}
void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) {
assert(FrameIndexExprs.empty() && "Already initialized?");
assert(!ValueLoc.get() && "Already initialized?");
assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
"Wrong inlined-at");
ValueLoc = std::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue));
if (auto *E = DbgValue->getDebugExpression())
if (E->getNumElements())
FrameIndexExprs.push_back({0, E});
}
ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
if (FrameIndexExprs.size() == 1)
return FrameIndexExprs;
assert(llvm::all_of(FrameIndexExprs,
[](const FrameIndexExpr &A) {
return A.Expr->isFragment();
}) &&
"multiple FI expressions without DW_OP_LLVM_fragment");
llvm::sort(FrameIndexExprs,
[](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
return A.Expr->getFragmentInfo()->OffsetInBits <
B.Expr->getFragmentInfo()->OffsetInBits;
});
return FrameIndexExprs;
}
void DbgVariable::addMMIEntry(const DbgVariable &V) {
assert(DebugLocListIndex == ~0U && !ValueLoc.get() && "not an MMI entry");
assert(V.DebugLocListIndex == ~0U && !V.ValueLoc.get() && "not an MMI entry");
assert(V.getVariable() == getVariable() && "conflicting variable");
assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location");
assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
// FIXME: This logic should not be necessary anymore, as we now have proper
// deduplication. However, without it, we currently run into the assertion
// below, which means that we are likely dealing with broken input, i.e. two
// non-fragment entries for the same variable at different frame indices.
if (FrameIndexExprs.size()) {
auto *Expr = FrameIndexExprs.back().Expr;
if (!Expr || !Expr->isFragment())
return;
}
for (const auto &FIE : V.FrameIndexExprs)
// Ignore duplicate entries.
if (llvm::none_of(FrameIndexExprs, [&](const FrameIndexExpr &Other) {
return FIE.FI == Other.FI && FIE.Expr == Other.Expr;
}))
FrameIndexExprs.push_back(FIE);
assert((FrameIndexExprs.size() == 1 ||
llvm::all_of(FrameIndexExprs,
[](FrameIndexExpr &FIE) {
return FIE.Expr && FIE.Expr->isFragment();
})) &&
"conflicting locations for variable");
}
static AccelTableKind computeAccelTableKind(unsigned DwarfVersion,
bool GenerateTypeUnits,
DebuggerKind Tuning,
const Triple &TT) {
// Honor an explicit request.
if (AccelTables != AccelTableKind::Default)
return AccelTables;
// Accelerator tables with type units are currently not supported.
if (GenerateTypeUnits)
return AccelTableKind::None;
// Accelerator tables get emitted if targetting DWARF v5 or LLDB. DWARF v5
// always implies debug_names. For lower standard versions we use apple
// accelerator tables on apple platforms and debug_names elsewhere.
if (DwarfVersion >= 5)
return AccelTableKind::Dwarf;
if (Tuning == DebuggerKind::LLDB)
return TT.isOSBinFormatMachO() ? AccelTableKind::Apple
: AccelTableKind::Dwarf;
return AccelTableKind::None;
}
DwarfDebug::DwarfDebug(AsmPrinter *A)
: DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
InfoHolder(A, "info_string", DIEValueAllocator),
SkeletonHolder(A, "skel_string", DIEValueAllocator),
IsDarwin(A->TM.getTargetTriple().isOSDarwin()) {
const Triple &TT = Asm->TM.getTargetTriple();
// Make sure we know our "debugger tuning". The target option takes
// precedence; fall back to triple-based defaults.
if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
DebuggerTuning = Asm->TM.Options.DebuggerTuning;
else if (IsDarwin)
DebuggerTuning = DebuggerKind::LLDB;
else if (TT.isPS4CPU())
DebuggerTuning = DebuggerKind::SCE;
else if (TT.isOSAIX())
DebuggerTuning = DebuggerKind::DBX;
else
DebuggerTuning = DebuggerKind::GDB;
if (DwarfInlinedStrings == Default)
UseInlineStrings = TT.isNVPTX() || tuneForDBX();
else
UseInlineStrings = DwarfInlinedStrings == Enable;
UseLocSection = !TT.isNVPTX();
HasAppleExtensionAttributes = tuneForLLDB();
// Handle split DWARF.
HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty();
// SCE defaults to linkage names only for abstract subprograms.
if (DwarfLinkageNames == DefaultLinkageNames)
UseAllLinkageNames = !tuneForSCE();
else
UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames;
unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
: MMI->getModule()->getDwarfVersion();
// Use dwarf 4 by default if nothing is requested. For NVPTX, use dwarf 2.
DwarfVersion =
TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION);
bool Dwarf64 = DwarfVersion >= 3 && // DWARF64 was introduced in DWARFv3.
TT.isArch64Bit(); // DWARF64 requires 64-bit relocations.
// Support DWARF64
// 1: For ELF when requested.
// 2: For XCOFF64: the AIX assembler will fill in debug section lengths
// according to the DWARF64 format for 64-bit assembly, so we must use
// DWARF64 in the compiler too for 64-bit mode.
Dwarf64 &=
((Asm->TM.Options.MCOptions.Dwarf64 || MMI->getModule()->isDwarf64()) &&
TT.isOSBinFormatELF()) ||
TT.isOSBinFormatXCOFF();
if (!Dwarf64 && TT.isArch64Bit() && TT.isOSBinFormatXCOFF())
report_fatal_error("XCOFF requires DWARF64 for 64-bit mode!");
UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX();
// Use sections as references. Force for NVPTX.
if (DwarfSectionsAsReferences == Default)
UseSectionsAsReferences = TT.isNVPTX();
else
UseSectionsAsReferences = DwarfSectionsAsReferences == Enable;
// Don't generate type units for unsupported object file formats.
GenerateTypeUnits = (A->TM.getTargetTriple().isOSBinFormatELF() ||
A->TM.getTargetTriple().isOSBinFormatWasm()) &&
GenerateDwarfTypeUnits;
TheAccelTableKind = computeAccelTableKind(
DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple());
// Work around a GDB bug. GDB doesn't support the standard opcode;
// SCE doesn't support GNU's; LLDB prefers the standard opcode, which
// is defined as of DWARF 3.
// See GDB bug 11616 - DW_OP_form_tls_address is unimplemented
// https://sourceware.org/bugzilla/show_bug.cgi?id=11616
UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3;
// GDB does not fully support the DWARF 4 representation for bitfields.
UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB();
// The DWARF v5 string offsets table has - possibly shared - contributions
// from each compile and type unit each preceded by a header. The string
// offsets table used by the pre-DWARF v5 split-DWARF implementation uses
// a monolithic string offsets table without any header.
UseSegmentedStringOffsetsTable = DwarfVersion >= 5;
// Emit call-site-param debug info for GDB and LLDB, if the target supports
// the debug entry values feature. It can also be enabled explicitly.
EmitDebugEntryValues = Asm->TM.Options.ShouldEmitDebugEntryValues();
// It is unclear if the GCC .debug_macro extension is well-specified
// for split DWARF. For now, do not allow LLVM to emit it.
UseDebugMacroSection =
DwarfVersion >= 5 || (UseGNUDebugMacro && !useSplitDwarf());
if (DwarfOpConvert == Default)
EnableOpConvert = !((tuneForGDB() && useSplitDwarf()) || (tuneForLLDB() && !TT.isOSBinFormatMachO()));
else
EnableOpConvert = (DwarfOpConvert == Enable);
// Split DWARF would benefit object size significantly by trading reductions
// in address pool usage for slightly increased range list encodings.
if (DwarfVersion >= 5) {
MinimizeAddr = MinimizeAddrInV5Option;
// FIXME: In the future, enable this by default for Split DWARF where the
// tradeoff is more pronounced due to being able to offload the range
// lists to the dwo file and shrink object files/reduce relocations there.
if (MinimizeAddr == MinimizeAddrInV5::Default)
MinimizeAddr = MinimizeAddrInV5::Disabled;
}
Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
Asm->OutStreamer->getContext().setDwarfFormat(Dwarf64 ? dwarf::DWARF64
: dwarf::DWARF32);
}
// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
DwarfDebug::~DwarfDebug() = default;
static bool isObjCClass(StringRef Name) {
return Name.startswith("+") || Name.startswith("-");
}
static bool hasObjCCategory(StringRef Name) {
if (!isObjCClass(Name))
return false;
return Name.find(") ") != StringRef::npos;
}
static void getObjCClassCategory(StringRef In, StringRef &Class,
StringRef &Category) {
if (!hasObjCCategory(In)) {
Class = In.slice(In.find('[') + 1, In.find(' '));
Category = "";
return;
}
Class = In.slice(In.find('[') + 1, In.find('('));
Category = In.slice(In.find('[') + 1, In.find(' '));
}
static StringRef getObjCMethodName(StringRef In) {
return In.slice(In.find(' ') + 1, In.find(']'));
}
// Add the various names to the Dwarf accelerator table names.
void DwarfDebug::addSubprogramNames(const DICompileUnit &CU,
const DISubprogram *SP, DIE &Die) {
if (getAccelTableKind() != AccelTableKind::Apple &&
CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
return;
if (!SP->isDefinition())
return;
if (SP->getName() != "")
addAccelName(CU, SP->getName(), Die);
// If the linkage name is different than the name, go ahead and output that as
// well into the name table. Only do that if we are going to actually emit
// that name.
if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() &&
(useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP)))
addAccelName(CU, SP->getLinkageName(), Die);
// If this is an Objective-C selector name add it to the ObjC accelerator
// too.
if (isObjCClass(SP->getName())) {
StringRef Class, Category;
getObjCClassCategory(SP->getName(), Class, Category);
addAccelObjC(CU, Class, Die);
if (Category != "")
addAccelObjC(CU, Category, Die);
// Also add the base method name to the name table.
addAccelName(CU, getObjCMethodName(SP->getName()), Die);
}
}
/// Check whether we should create a DIE for the given Scope, return true
/// if we don't create a DIE (the corresponding DIE is null).
bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
if (Scope->isAbstractScope())
return false;
// We don't create a DIE if there is no Range.
const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges();
if (Ranges.empty())
return true;
if (Ranges.size() > 1)
return false;
// We don't create a DIE if we have a single Range and the end label
// is null.
return !getLabelAfterInsn(Ranges.front().second);
}
template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) {
F(CU);
if (auto *SkelCU = CU.getSkeleton())
if (CU.getCUNode()->getSplitDebugInlining())
F(*SkelCU);
}
bool DwarfDebug::shareAcrossDWOCUs() const {
return SplitDwarfCrossCuReferences;
}
void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
LexicalScope *Scope) {
assert(Scope && Scope->getScopeNode());
assert(Scope->isAbstractScope());
assert(!Scope->getInlinedAt());
auto *SP = cast<DISubprogram>(Scope->getScopeNode());
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
if (useSplitDwarf() && !shareAcrossDWOCUs() && !SP->getUnit()->getSplitDebugInlining())
// Avoid building the original CU if it won't be used
SrcCU.constructAbstractSubprogramScopeDIE(Scope);
else {
auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
if (auto *SkelCU = CU.getSkeleton()) {
(shareAcrossDWOCUs() ? CU : SrcCU)
.constructAbstractSubprogramScopeDIE(Scope);
if (CU.getCUNode()->getSplitDebugInlining())
SkelCU->constructAbstractSubprogramScopeDIE(Scope);
} else
CU.constructAbstractSubprogramScopeDIE(Scope);
}
}
-DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) {
- DICompileUnit *Unit = SP->getUnit();
- assert(SP->isDefinition() && "Subprogram not a definition");
- assert(Unit && "Subprogram definition without parent unit");
- auto &CU = getOrCreateDwarfCompileUnit(Unit);
- return *CU.getOrCreateSubprogramDIE(SP);
-}
-
/// Represents a parameter whose call site value can be described by applying a
/// debug expression to a register in the forwarded register worklist.
struct FwdRegParamInfo {
/// The described parameter register.
unsigned ParamReg;
/// Debug expression that has been built up when walking through the
/// instruction chain that produces the parameter's value.
const DIExpression *Expr;
};
/// Register worklist for finding call site values.
using FwdRegWorklist = MapVector<unsigned, SmallVector<FwdRegParamInfo, 2>>;
/// Append the expression \p Addition to \p Original and return the result.
static const DIExpression *combineDIExpressions(const DIExpression *Original,
const DIExpression *Addition) {
std::vector<uint64_t> Elts = Addition->getElements().vec();
// Avoid multiple DW_OP_stack_values.
if (Original->isImplicit() && Addition->isImplicit())
erase_value(Elts, dwarf::DW_OP_stack_value);
const DIExpression *CombinedExpr =
(Elts.size() > 0) ? DIExpression::append(Original, Elts) : Original;
return CombinedExpr;
}
/// Emit call site parameter entries that are described by the given value and
/// debug expression.
template <typename ValT>
static void finishCallSiteParams(ValT Val, const DIExpression *Expr,
ArrayRef<FwdRegParamInfo> DescribedParams,
ParamSet &Params) {
for (auto Param : DescribedParams) {
bool ShouldCombineExpressions = Expr && Param.Expr->getNumElements() > 0;
// TODO: Entry value operations can currently not be combined with any
// other expressions, so we can't emit call site entries in those cases.
if (ShouldCombineExpressions && Expr->isEntryValue())
continue;
// If a parameter's call site value is produced by a chain of
// instructions we may have already created an expression for the
// parameter when walking through the instructions. Append that to the
// base expression.
const DIExpression *CombinedExpr =
ShouldCombineExpressions ? combineDIExpressions(Expr, Param.Expr)
: Expr;
assert((!CombinedExpr || CombinedExpr->isValid()) &&
"Combined debug expression is invalid");
DbgValueLoc DbgLocVal(CombinedExpr, DbgValueLocEntry(Val));
DbgCallSiteParam CSParm(Param.ParamReg, DbgLocVal);
Params.push_back(CSParm);
++NumCSParams;
}
}
/// Add \p Reg to the worklist, if it's not already present, and mark that the
/// given parameter registers' values can (potentially) be described using
/// that register and an debug expression.
static void addToFwdRegWorklist(FwdRegWorklist &Worklist, unsigned Reg,
const DIExpression *Expr,
ArrayRef<FwdRegParamInfo> ParamsToAdd) {
auto I = Worklist.insert({Reg, {}});
auto &ParamsForFwdReg = I.first->second;
for (auto Param : ParamsToAdd) {
assert(none_of(ParamsForFwdReg,
[Param](const FwdRegParamInfo &D) {
return D.ParamReg == Param.ParamReg;
}) &&
"Same parameter described twice by forwarding reg");
// If a parameter's call site value is produced by a chain of
// instructions we may have already created an expression for the
// parameter when walking through the instructions. Append that to the
// new expression.
const DIExpression *CombinedExpr = combineDIExpressions(Expr, Param.Expr);
ParamsForFwdReg.push_back({Param.ParamReg, CombinedExpr});
}
}
/// Interpret values loaded into registers by \p CurMI.
static void interpretValues(const MachineInstr *CurMI,
FwdRegWorklist &ForwardedRegWorklist,
ParamSet &Params) {
const MachineFunction *MF = CurMI->getMF();
const DIExpression *EmptyExpr =
DIExpression::get(MF->getFunction().getContext(), {});
const auto &TRI = *MF->getSubtarget().getRegisterInfo();
const auto &TII = *MF->getSubtarget().getInstrInfo();
const auto &TLI = *MF->getSubtarget().getTargetLowering();
// If an instruction defines more than one item in the worklist, we may run
// into situations where a worklist register's value is (potentially)
// described by the previous value of another register that is also defined
// by that instruction.
//
// This can for example occur in cases like this:
//
// $r1 = mov 123
// $r0, $r1 = mvrr $r1, 456
// call @foo, $r0, $r1
//
// When describing $r1's value for the mvrr instruction, we need to make sure
// that we don't finalize an entry value for $r0, as that is dependent on the
// previous value of $r1 (123 rather than 456).
//
// In order to not have to distinguish between those cases when finalizing
// entry values, we simply postpone adding new parameter registers to the
// worklist, by first keeping them in this temporary container until the
// instruction has been handled.
FwdRegWorklist TmpWorklistItems;
// If the MI is an instruction defining one or more parameters' forwarding
// registers, add those defines.
auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI,
SmallSetVector<unsigned, 4> &Defs) {
if (MI.isDebugInstr())
return;
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef() &&
Register::isPhysicalRegister(MO.getReg())) {
for (auto &FwdReg : ForwardedRegWorklist)
if (TRI.regsOverlap(FwdReg.first, MO.getReg()))
Defs.insert(FwdReg.first);
}
}
};
// Set of worklist registers that are defined by this instruction.
SmallSetVector<unsigned, 4> FwdRegDefs;
getForwardingRegsDefinedByMI(*CurMI, FwdRegDefs);
if (FwdRegDefs.empty())
return;
for (auto ParamFwdReg : FwdRegDefs) {
if (auto ParamValue = TII.describeLoadedValue(*CurMI, ParamFwdReg)) {
if (ParamValue->first.isImm()) {
int64_t Val = ParamValue->first.getImm();
finishCallSiteParams(Val, ParamValue->second,
ForwardedRegWorklist[ParamFwdReg], Params);
} else if (ParamValue->first.isReg()) {
Register RegLoc = ParamValue->first.getReg();
Register SP = TLI.getStackPointerRegisterToSaveRestore();
Register FP = TRI.getFrameRegister(*MF);
bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
if (TRI.isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
MachineLocation MLoc(RegLoc, /*Indirect=*/IsSPorFP);
finishCallSiteParams(MLoc, ParamValue->second,
ForwardedRegWorklist[ParamFwdReg], Params);
} else {
// ParamFwdReg was described by the non-callee saved register
// RegLoc. Mark that the call site values for the parameters are
// dependent on that register instead of ParamFwdReg. Since RegLoc
// may be a register that will be handled in this iteration, we
// postpone adding the items to the worklist, and instead keep them
// in a temporary container.
addToFwdRegWorklist(TmpWorklistItems, RegLoc, ParamValue->second,
ForwardedRegWorklist[ParamFwdReg]);
}
}
}
}
// Remove all registers that this instruction defines from the worklist.
for (auto ParamFwdReg : FwdRegDefs)
ForwardedRegWorklist.erase(ParamFwdReg);
// Now that we are done handling this instruction, add items from the
// temporary worklist to the real one.
for (auto &New : TmpWorklistItems)
addToFwdRegWorklist(ForwardedRegWorklist, New.first, EmptyExpr, New.second);
TmpWorklistItems.clear();
}
static bool interpretNextInstr(const MachineInstr *CurMI,
FwdRegWorklist &ForwardedRegWorklist,
ParamSet &Params) {
// Skip bundle headers.
if (CurMI->isBundle())
return true;
// If the next instruction is a call we can not interpret parameter's
// forwarding registers or we finished the interpretation of all
// parameters.
if (CurMI->isCall())
return false;
if (ForwardedRegWorklist.empty())
return false;
// Avoid NOP description.
if (CurMI->getNumOperands() == 0)
return true;
interpretValues(CurMI, ForwardedRegWorklist, Params);
return true;
}
/// Try to interpret values loaded into registers that forward parameters
/// for \p CallMI. Store parameters with interpreted value into \p Params.
static void collectCallSiteParameters(const MachineInstr *CallMI,
ParamSet &Params) {
const MachineFunction *MF = CallMI->getMF();
const auto &CalleesMap = MF->getCallSitesInfo();
auto CallFwdRegsInfo = CalleesMap.find(CallMI);
// There is no information for the call instruction.
if (CallFwdRegsInfo == CalleesMap.end())
return;
const MachineBasicBlock *MBB = CallMI->getParent();
// Skip the call instruction.
auto I = std::next(CallMI->getReverseIterator());
FwdRegWorklist ForwardedRegWorklist;
const DIExpression *EmptyExpr =
DIExpression::get(MF->getFunction().getContext(), {});
// Add all the forwarding registers into the ForwardedRegWorklist.
for (const auto &ArgReg : CallFwdRegsInfo->second) {
bool InsertedReg =
ForwardedRegWorklist.insert({ArgReg.Reg, {{ArgReg.Reg, EmptyExpr}}})
.second;
assert(InsertedReg && "Single register used to forward two arguments?");
(void)InsertedReg;
}
// Do not emit CSInfo for undef forwarding registers.
for (auto &MO : CallMI->uses())
if (MO.isReg() && MO.isUndef())
ForwardedRegWorklist.erase(MO.getReg());
// We erase, from the ForwardedRegWorklist, those forwarding registers for
// which we successfully describe a loaded value (by using
// the describeLoadedValue()). For those remaining arguments in the working
// list, for which we do not describe a loaded value by
// the describeLoadedValue(), we try to generate an entry value expression
// for their call site value description, if the call is within the entry MBB.
// TODO: Handle situations when call site parameter value can be described
// as the entry value within basic blocks other than the first one.
bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin();
// Search for a loading value in forwarding registers inside call delay slot.
if (CallMI->hasDelaySlot()) {
auto Suc = std::next(CallMI->getIterator());
// Only one-instruction delay slot is supported.
auto BundleEnd = llvm::getBundleEnd(CallMI->getIterator());
(void)BundleEnd;
assert(std::next(Suc) == BundleEnd &&
"More than one instruction in call delay slot");
// Try to interpret value loaded by instruction.
if (!interpretNextInstr(&*Suc, ForwardedRegWorklist, Params))
return;
}
// Search for a loading value in forwarding registers.
for (; I != MBB->rend(); ++I) {
// Try to interpret values loaded by instruction.
if (!interpretNextInstr(&*I, ForwardedRegWorklist, Params))
return;
}
// Emit the call site parameter's value as an entry value.
if (ShouldTryEmitEntryVals) {
// Create an expression where the register's entry value is used.
DIExpression *EntryExpr = DIExpression::get(
MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1});
for (auto &RegEntry : ForwardedRegWorklist) {
MachineLocation MLoc(RegEntry.first);
finishCallSiteParams(MLoc, EntryExpr, RegEntry.second, Params);
}
}
}
void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
DwarfCompileUnit &CU, DIE &ScopeDIE,
const MachineFunction &MF) {
// Add a call site-related attribute (DWARF5, Sec. 3.3.1.3). Do this only if
// the subprogram is required to have one.
if (!SP.areAllCallsDescribed() || !SP.isDefinition())
return;
// Use DW_AT_call_all_calls to express that call site entries are present
// for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls
// because one of its requirements is not met: call site entries for
// optimized-out calls are elided.
CU.addFlag(ScopeDIE, CU.getDwarf5OrGNUAttr(dwarf::DW_AT_call_all_calls));
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
assert(TII && "TargetInstrInfo not found: cannot label tail calls");
// Delay slot support check.
auto delaySlotSupported = [&](const MachineInstr &MI) {
if (!MI.isBundledWithSucc())
return false;
auto Suc = std::next(MI.getIterator());
auto CallInstrBundle = getBundleStart(MI.getIterator());
(void)CallInstrBundle;
auto DelaySlotBundle = getBundleStart(Suc);
(void)DelaySlotBundle;
// Ensure that label after call is following delay slot instruction.
// Ex. CALL_INSTRUCTION {
// DELAY_SLOT_INSTRUCTION }
// LABEL_AFTER_CALL
assert(getLabelAfterInsn(&*CallInstrBundle) ==
getLabelAfterInsn(&*DelaySlotBundle) &&
"Call and its successor instruction don't have same label after.");
return true;
};
// Emit call site entries for each call or tail call in the function.
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB.instrs()) {
// Bundles with call in them will pass the isCall() test below but do not
// have callee operand information so skip them here. Iterator will
// eventually reach the call MI.
if (MI.isBundle())
continue;
// Skip instructions which aren't calls. Both calls and tail-calling jump
// instructions (e.g TAILJMPd64) are classified correctly here.
if (!MI.isCandidateForCallSiteEntry())
continue;
// Skip instructions marked as frame setup, as they are not interesting to
// the user.
if (MI.getFlag(MachineInstr::FrameSetup))
continue;
// Check if delay slot support is enabled.
if (MI.hasDelaySlot() && !delaySlotSupported(*&MI))
return;
// If this is a direct call, find the callee's subprogram.
// In the case of an indirect call find the register that holds
// the callee.
const MachineOperand &CalleeOp = TII->getCalleeOperand(MI);
if (!CalleeOp.isGlobal() &&
(!CalleeOp.isReg() ||
!Register::isPhysicalRegister(CalleeOp.getReg())))
continue;
unsigned CallReg = 0;
- DIE *CalleeDIE = nullptr;
+ const DISubprogram *CalleeSP = nullptr;
const Function *CalleeDecl = nullptr;
if (CalleeOp.isReg()) {
CallReg = CalleeOp.getReg();
if (!CallReg)
continue;
} else {
CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
if (!CalleeDecl || !CalleeDecl->getSubprogram())
continue;
- const DISubprogram *CalleeSP = CalleeDecl->getSubprogram();
-
- if (CalleeSP->isDefinition()) {
- // Ensure that a subprogram DIE for the callee is available in the
- // appropriate CU.
- CalleeDIE = &constructSubprogramDefinitionDIE(CalleeSP);
- } else {
- // Create the declaration DIE if it is missing. This is required to
- // support compilation of old bitcode with an incomplete list of
- // retained metadata.
- CalleeDIE = CU.getOrCreateSubprogramDIE(CalleeSP);
- }
- assert(CalleeDIE && "Must have a DIE for the callee");
+ CalleeSP = CalleeDecl->getSubprogram();
}
// TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
bool IsTail = TII->isTailCall(MI);
// If MI is in a bundle, the label was created after the bundle since
// EmitFunctionBody iterates over top-level MIs. Get that top-level MI
// to search for that label below.
const MachineInstr *TopLevelCallMI =
MI.isInsideBundle() ? &*getBundleStart(MI.getIterator()) : &MI;
// For non-tail calls, the return PC is needed to disambiguate paths in
// the call graph which could lead to some target function. For tail
// calls, no return PC information is needed, unless tuning for GDB in
// DWARF4 mode in which case we fake a return PC for compatibility.
const MCSymbol *PCAddr =
(!IsTail || CU.useGNUAnalogForDwarf5Feature())
? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI))
: nullptr;
// For tail calls, it's necessary to record the address of the branch
// instruction so that the debugger can show where the tail call occurred.
const MCSymbol *CallAddr =
IsTail ? getLabelBeforeInsn(TopLevelCallMI) : nullptr;
assert((IsTail || PCAddr) && "Non-tail call without return PC");
LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> "
<< (CalleeDecl ? CalleeDecl->getName()
: StringRef(MF.getSubtarget()
.getRegisterInfo()
->getName(CallReg)))
<< (IsTail ? " [IsTail]" : "") << "\n");
DIE &CallSiteDIE = CU.constructCallSiteEntryDIE(
- ScopeDIE, CalleeDIE, IsTail, PCAddr, CallAddr, CallReg);
+ ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg);
// Optionally emit call-site-param debug info.
if (emitDebugEntryValues()) {
ParamSet Params;
// Try to interpret values of call site parameters.
collectCallSiteParameters(&MI, Params);
CU.constructCallSiteParmEntryDIEs(CallSiteDIE, Params);
}
}
}
}
void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
if (!U.hasDwarfPubSections())
return;
U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
}
void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
DwarfCompileUnit &NewCU) {
DIE &Die = NewCU.getUnitDie();
StringRef FN = DIUnit->getFilename();
StringRef Producer = DIUnit->getProducer();
StringRef Flags = DIUnit->getFlags();
if (!Flags.empty() && !useAppleExtensionAttributes()) {
std::string ProducerWithFlags = Producer.str() + " " + Flags.str();
NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags);
} else
NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit->getSourceLanguage());
NewCU.addString(Die, dwarf::DW_AT_name, FN);
StringRef SysRoot = DIUnit->getSysRoot();
if (!SysRoot.empty())
NewCU.addString(Die, dwarf::DW_AT_LLVM_sysroot, SysRoot);
StringRef SDK = DIUnit->getSDK();
if (!SDK.empty())
NewCU.addString(Die, dwarf::DW_AT_APPLE_sdk, SDK);
// Add DW_str_offsets_base to the unit DIE, except for split units.
if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
NewCU.addStringOffsetsStart();
if (!useSplitDwarf()) {
NewCU.initStmtList();
// If we're using split dwarf the compilation dir is going to be in the
// skeleton CU and so we don't need to duplicate it here.
if (!CompilationDir.empty())
NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
addGnuPubAttributes(NewCU, Die);
}
if (useAppleExtensionAttributes()) {
if (DIUnit->isOptimized())
NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);
StringRef Flags = DIUnit->getFlags();
if (!Flags.empty())
NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
if (unsigned RVer = DIUnit->getRuntimeVersion())
NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
dwarf::DW_FORM_data1, RVer);
}
if (DIUnit->getDWOId()) {
// This CU is either a clang module DWO or a skeleton CU.
NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
DIUnit->getDWOId());
if (!DIUnit->getSplitDebugFilename().empty()) {
// This is a prefabricated skeleton CU.
dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
? dwarf::DW_AT_dwo_name
: dwarf::DW_AT_GNU_dwo_name;
NewCU.addString(Die, attrDWOName, DIUnit->getSplitDebugFilename());
}
}
}
// Create new DwarfCompileUnit for the given metadata node with tag
// DW_TAG_compile_unit.
DwarfCompileUnit &
DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
if (auto *CU = CUMap.lookup(DIUnit))
return *CU;
CompilationDir = DIUnit->getDirectory();
auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
InfoHolder.addUnit(std::move(OwnedUnit));
for (auto *IE : DIUnit->getImportedEntities())
NewCU.addImportedEntity(IE);
// LTO with assembly output shares a single line table amongst multiple CUs.
// To avoid the compilation directory being ambiguous, let the line table
// explicitly describe the directory of all files, never relying on the
// compilation directory.
if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
Asm->OutStreamer->emitDwarfFile0Directive(
CompilationDir, DIUnit->getFilename(), getMD5AsBytes(DIUnit->getFile()),
DIUnit->getSource(), NewCU.getUniqueID());
if (useSplitDwarf()) {
NewCU.setSkeleton(constructSkeletonCU(NewCU));
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
} else {
finishUnitAttributes(DIUnit, NewCU);
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
}
+ // Create DIEs for function declarations used for call site debug info.
+ for (auto Scope : DIUnit->getRetainedTypes())
+ if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
+ NewCU.getOrCreateSubprogramDIE(SP);
+
CUMap.insert({DIUnit, &NewCU});
CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
return NewCU;
}
void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
const DIImportedEntity *N) {
if (isa<DILocalScope>(N->getScope()))
return;
if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope()))
D->addChild(TheCU.constructImportedEntityDIE(N));
}
/// Sort and unique GVEs by comparing their fragment offset.
static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
llvm::sort(
GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
// Sort order: first null exprs, then exprs without fragment
// info, then sort by fragment offset in bits.
// FIXME: Come up with a more comprehensive comparator so
// the sorting isn't non-deterministic, and so the following
// std::unique call works correctly.
if (!A.Expr || !B.Expr)
return !!B.Expr;
auto FragmentA = A.Expr->getFragmentInfo();
auto FragmentB = B.Expr->getFragmentInfo();
if (!FragmentA || !FragmentB)
return !!FragmentB;
return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
});
GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
[](DwarfCompileUnit::GlobalExpr A,
DwarfCompileUnit::GlobalExpr B) {
return A.Expr == B.Expr;
}),
GVEs.end());
return GVEs;
}
// Emit all Dwarf sections that should come prior to the content. Create
// global DIEs and emit initial debug info sections. This is invoked by
// the target AsmPrinter.
void DwarfDebug::beginModule(Module *M) {
DebugHandlerBase::beginModule(M);
if (!Asm || !MMI->hasDebugInfo())
return;
unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
assert(NumDebugCUs > 0 && "Asm unexpectedly initialized");
assert(MMI->hasDebugInfo() &&
"DebugInfoAvailabilty unexpectedly not initialized");
SingleCU = NumDebugCUs == 1;
DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
GVMap;
for (const GlobalVariable &Global : M->globals()) {
SmallVector<DIGlobalVariableExpression *, 1> GVs;
Global.getDebugInfo(GVs);
for (auto *GVE : GVs)
GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()});
}
// Create the symbol that designates the start of the unit's contribution
// to the string offsets table. In a split DWARF scenario, only the skeleton
// unit has the DW_AT_str_offsets_base attribute (and hence needs the symbol).
if (useSegmentedStringOffsetsTable())
(useSplitDwarf() ? SkeletonHolder : InfoHolder)
.setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base"));
// Create the symbols that designates the start of the DWARF v5 range list
// and locations list tables. They are located past the table headers.
if (getDwarfVersion() >= 5) {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
Holder.setRnglistsTableBaseSym(
Asm->createTempSymbol("rnglists_table_base"));
if (useSplitDwarf())
InfoHolder.setRnglistsTableBaseSym(
Asm->createTempSymbol("rnglists_dwo_table_base"));
}
// Create the symbol that points to the first entry following the debug
// address table (.debug_addr) header.
AddrPool.setLabel(Asm->createTempSymbol("addr_table_base"));
DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
for (DICompileUnit *CUNode : M->debug_compile_units()) {
// FIXME: Move local imported entities into a list attached to the
// subprogram, then this search won't be needed and a
// getImportedEntities().empty() test should go below with the rest.
bool HasNonLocalImportedEntities = llvm::any_of(
CUNode->getImportedEntities(), [](const DIImportedEntity *IE) {
return !isa<DILocalScope>(IE->getScope());
});
if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() &&
CUNode->getRetainedTypes().empty() &&
CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty())
continue;
DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode);
// Global Variables.
for (auto *GVE : CUNode->getGlobalVariables()) {
// Don't bother adding DIGlobalVariableExpressions listed in the CU if we
// already know about the variable and it isn't adding a constant
// expression.
auto &GVMapEntry = GVMap[GVE->getVariable()];
auto *Expr = GVE->getExpression();
if (!GVMapEntry.size() || (Expr && Expr->isConstant()))
GVMapEntry.push_back({nullptr, Expr});
}
DenseSet<DIGlobalVariable *> Processed;
for (auto *GVE : CUNode->getGlobalVariables()) {
DIGlobalVariable *GV = GVE->getVariable();
if (Processed.insert(GV).second)
CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV]));
}
for (auto *Ty : CUNode->getEnumTypes()) {
// The enum types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
CU.getOrCreateTypeDIE(cast<DIType>(Ty));
}
for (auto *Ty : CUNode->getRetainedTypes()) {
// The retained types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
if (DIType *RT = dyn_cast<DIType>(Ty))
// There is no point in force-emitting a forward declaration.
CU.getOrCreateTypeDIE(RT);
}
// Emit imported_modules last so that the relevant context is already
// available.
for (auto *IE : CUNode->getImportedEntities())
constructAndAddImportedEntityDIE(CU, IE);
}
}
void DwarfDebug::finishEntityDefinitions() {
for (const auto &Entity : ConcreteEntities) {
DIE *Die = Entity->getDIE();
assert(Die);
// FIXME: Consider the time-space tradeoff of just storing the unit pointer
// in the ConcreteEntities list, rather than looking it up again here.
// DIE::getUnit isn't simple - it walks parent pointers, etc.
DwarfCompileUnit *Unit = CUDieMap.lookup(Die->getUnitDie());
assert(Unit);
Unit->finishEntityDefinition(Entity.get());
}
}
void DwarfDebug::finishSubprogramDefinitions() {
for (const DISubprogram *SP : ProcessedSPNodes) {
assert(SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug);
forBothCUs(
getOrCreateDwarfCompileUnit(SP->getUnit()),
[&](DwarfCompileUnit &CU) { CU.finishSubprogramDefinition(SP); });
}
}
void DwarfDebug::finalizeModuleInfo() {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
finishSubprogramDefinitions();
finishEntityDefinitions();
// Include the DWO file name in the hash if there's more than one CU.
// This handles ThinLTO's situation where imported CUs may very easily be
// duplicate with the same CU partially imported into another ThinLTO unit.
StringRef DWOName;
if (CUMap.size() > 1)
DWOName = Asm->TM.Options.MCOptions.SplitDwarfFile;
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
for (const auto &P : CUMap) {
auto &TheCU = *P.second;
if (TheCU.getCUNode()->isDebugDirectivesOnly())
continue;
// Emit DW_AT_containing_type attribute to connect types with their
// vtable holding type.
TheCU.constructContainingTypeDIEs();
// Add CU specific attributes if we need to add any.
// If we're splitting the dwarf out now that we've got the entire
// CU then add the dwo id to it.
auto *SkCU = TheCU.getSkeleton();
bool HasSplitUnit = SkCU && !TheCU.getUnitDie().children().empty();
if (HasSplitUnit) {
dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
? dwarf::DW_AT_dwo_name
: dwarf::DW_AT_GNU_dwo_name;
finishUnitAttributes(TheCU.getCUNode(), TheCU);
TheCU.addString(TheCU.getUnitDie(), attrDWOName,
Asm->TM.Options.MCOptions.SplitDwarfFile);
SkCU->addString(SkCU->getUnitDie(), attrDWOName,
Asm->TM.Options.MCOptions.SplitDwarfFile);
// Emit a unique identifier for this CU.
uint64_t ID =
DIEHash(Asm, &TheCU).computeCUSignature(DWOName, TheCU.getUnitDie());
if (getDwarfVersion() >= 5) {
TheCU.setDWOId(ID);
SkCU->setDWOId(ID);
} else {
TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
dwarf::DW_FORM_data8, ID);
SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
dwarf::DW_FORM_data8, ID);
}
if (getDwarfVersion() < 5 && !SkeletonHolder.getRangeLists().empty()) {
const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
Sym, Sym);
}
} else if (SkCU) {
finishUnitAttributes(SkCU->getCUNode(), *SkCU);
}
// If we have code split among multiple sections or non-contiguous
// ranges of code then emit a DW_AT_ranges attribute on the unit that will
// remain in the .o file, otherwise add a DW_AT_low_pc.
// FIXME: We should use ranges allow reordering of code ala
// .subsections_via_symbols in mach-o. This would mean turning on
// ranges for all subprogram DIEs for mach-o.
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
if (unsigned NumRanges = TheCU.getRanges().size()) {
if (NumRanges > 1 && useRangesSection())
// A DW_AT_low_pc attribute may also be specified in combination with
// DW_AT_ranges to specify the default base address for use in
// location lists (see Section 2.6.2) and range lists (see Section
// 2.17.3).
U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
else
U.setBaseAddress(TheCU.getRanges().front().Begin);
U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
// We don't keep track of which addresses are used in which CU so this
// is a bit pessimistic under LTO.
if ((HasSplitUnit || getDwarfVersion() >= 5) && !AddrPool.isEmpty())
U.addAddrTableBase();
if (getDwarfVersion() >= 5) {
if (U.hasRangeLists())
U.addRnglistsBase();
if (!DebugLocs.getLists().empty()) {
if (!useSplitDwarf())
U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
DebugLocs.getSym(),
TLOF.getDwarfLoclistsSection()->getBeginSymbol());
}
}
auto *CUNode = cast<DICompileUnit>(P.first);
// If compile Unit has macros, emit "DW_AT_macro_info/DW_AT_macros"
// attribute.
if (CUNode->getMacros()) {
if (UseDebugMacroSection) {
if (useSplitDwarf())
TheCU.addSectionDelta(
TheCU.getUnitDie(), dwarf::DW_AT_macros, U.getMacroLabelBegin(),
TLOF.getDwarfMacroDWOSection()->getBeginSymbol());
else {
dwarf::Attribute MacrosAttr = getDwarfVersion() >= 5
? dwarf::DW_AT_macros
: dwarf::DW_AT_GNU_macros;
U.addSectionLabel(U.getUnitDie(), MacrosAttr, U.getMacroLabelBegin(),
TLOF.getDwarfMacroSection()->getBeginSymbol());
}
} else {
if (useSplitDwarf())
TheCU.addSectionDelta(
TheCU.getUnitDie(), dwarf::DW_AT_macro_info,
U.getMacroLabelBegin(),
TLOF.getDwarfMacinfoDWOSection()->getBeginSymbol());
else
U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
U.getMacroLabelBegin(),
TLOF.getDwarfMacinfoSection()->getBeginSymbol());
}
}
}
// Emit all frontend-produced Skeleton CUs, i.e., Clang modules.
for (auto *CUNode : MMI->getModule()->debug_compile_units())
if (CUNode->getDWOId())
getOrCreateDwarfCompileUnit(CUNode);
// Compute DIE offsets and sizes.
InfoHolder.computeSizeAndOffsets();
if (useSplitDwarf())
SkeletonHolder.computeSizeAndOffsets();
}
// Emit all Dwarf sections that should come after the content.
void DwarfDebug::endModule() {
assert(CurFn == nullptr);
assert(CurMI == nullptr);
for (const auto &P : CUMap) {
auto &CU = *P.second;
CU.createBaseTypeDIEs();
}
// If we aren't actually generating debug info (check beginModule -
// conditionalized on the presence of the llvm.dbg.cu metadata node)
if (!Asm || !MMI->hasDebugInfo())
return;
// Finalize the debug info for the module.
finalizeModuleInfo();
if (useSplitDwarf())
// Emit debug_loc.dwo/debug_loclists.dwo section.
emitDebugLocDWO();
else
// Emit debug_loc/debug_loclists section.
emitDebugLoc();
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
// Emit all the DIEs into a debug info section.
emitDebugInfo();
// Emit info into a debug aranges section.
if (GenerateARangeSection)
emitDebugARanges();
// Emit info into a debug ranges section.
emitDebugRanges();
if (useSplitDwarf())
// Emit info into a debug macinfo.dwo section.
emitDebugMacinfoDWO();
else
// Emit info into a debug macinfo/macro section.
emitDebugMacinfo();
emitDebugStr();
if (useSplitDwarf()) {
emitDebugStrDWO();
emitDebugInfoDWO();
emitDebugAbbrevDWO();
emitDebugLineDWO();
emitDebugRangesDWO();
}
emitDebugAddr();
// Emit info into the dwarf accelerator table sections.
switch (getAccelTableKind()) {
case AccelTableKind::Apple:
emitAccelNames();
emitAccelObjC();
emitAccelNamespaces();
emitAccelTypes();
break;
case AccelTableKind::Dwarf:
emitAccelDebugNames();
break;
case AccelTableKind::None:
break;
case AccelTableKind::Default:
llvm_unreachable("Default should have already been resolved.");
}
// Emit the pubnames and pubtypes sections if requested.
emitDebugPubSections();
// clean up.
// FIXME: AbstractVariables.clear();
}
void DwarfDebug::ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
const DINode *Node,
const MDNode *ScopeNode) {
if (CU.getExistingAbstractEntity(Node))
return;
CU.createAbstractEntity(Node, LScopes.getOrCreateAbstractScope(
cast<DILocalScope>(ScopeNode)));
}
void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
const DINode *Node, const MDNode *ScopeNode) {
if (CU.getExistingAbstractEntity(Node))
return;
if (LexicalScope *Scope =
LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
CU.createAbstractEntity(Node, Scope);
}
// Collect variable information from side table maintained by MF.
void DwarfDebug::collectVariableInfoFromMFTable(
DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) {
SmallDenseMap<InlinedEntity, DbgVariable *> MFVars;
LLVM_DEBUG(dbgs() << "DwarfDebug: collecting variables from MF side table\n");
for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
if (!VI.Var)
continue;
assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
"Expected inlined-at fields to agree");
InlinedEntity Var(VI.Var, VI.Loc->getInlinedAt());
Processed.insert(Var);
LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
// If variable scope is not found then skip this variable.
if (!Scope) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << VI.Var->getName()
<< ", no variable scope found\n");
continue;
}
ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
auto RegVar = std::make_unique<DbgVariable>(
cast<DILocalVariable>(Var.first), Var.second);
RegVar->initializeMMI(VI.Expr, VI.Slot);
LLVM_DEBUG(dbgs() << "Created DbgVariable for " << VI.Var->getName()
<< "\n");
if (DbgVariable *DbgVar = MFVars.lookup(Var))
DbgVar->addMMIEntry(*RegVar);
else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
MFVars.insert({Var, RegVar.get()});
ConcreteEntities.push_back(std::move(RegVar));
}
}
}
/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
/// enclosing lexical scope. The check ensures there are no other instructions
/// in the same lexical scope preceding the DBG_VALUE and that its range is
/// either open or otherwise rolls off the end of the scope.
static bool validThroughout(LexicalScopes &LScopes,
const MachineInstr *DbgValue,
const MachineInstr *RangeEnd,
const InstructionOrdering &Ordering) {
assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
auto MBB = DbgValue->getParent();
auto DL = DbgValue->getDebugLoc();
auto *LScope = LScopes.findLexicalScope(DL);
// Scope doesn't exist; this is a dead DBG_VALUE.
if (!LScope)
return false;
auto &LSRange = LScope->getRanges();
if (LSRange.size() == 0)
return false;
const MachineInstr *LScopeBegin = LSRange.front().first;
// If the scope starts before the DBG_VALUE then we may have a negative
// result. Otherwise the location is live coming into the scope and we
// can skip the following checks.
if (!Ordering.isBefore(DbgValue, LScopeBegin)) {
// Exit if the lexical scope begins outside of the current block.
if (LScopeBegin->getParent() != MBB)
return false;
MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
for (++Pred; Pred != MBB->rend(); ++Pred) {
if (Pred->getFlag(MachineInstr::FrameSetup))
break;
auto PredDL = Pred->getDebugLoc();
if (!PredDL || Pred->isMetaInstruction())
continue;
// Check whether the instruction preceding the DBG_VALUE is in the same
// (sub)scope as the DBG_VALUE.
if (DL->getScope() == PredDL->getScope())
return false;
auto *PredScope = LScopes.findLexicalScope(PredDL);
if (!PredScope || LScope->dominates(PredScope))
return false;
}
}
// If the range of the DBG_VALUE is open-ended, report success.
if (!RangeEnd)
return true;
// Single, constant DBG_VALUEs in the prologue are promoted to be live
// throughout the function. This is a hack, presumably for DWARF v2 and not
// necessarily correct. It would be much better to use a dbg.declare instead
// if we know the constant is live throughout the scope.
if (MBB->pred_empty() &&
all_of(DbgValue->debug_operands(),
[](const MachineOperand &Op) { return Op.isImm(); }))
return true;
// Test if the location terminates before the end of the scope.
const MachineInstr *LScopeEnd = LSRange.back().second;
if (Ordering.isBefore(RangeEnd, LScopeEnd))
return false;
// There's a single location which starts at the scope start, and ends at or
// after the scope end.
return true;
}
/// Build the location list for all DBG_VALUEs in the function that
/// describe the same variable. The resulting DebugLocEntries will have
/// strict monotonically increasing begin addresses and will never
/// overlap. If the resulting list has only one entry that is valid
/// throughout variable's scope return true.
//
// See the definition of DbgValueHistoryMap::Entry for an explanation of the
// different kinds of history map entries. One thing to be aware of is that if
// a debug value is ended by another entry (rather than being valid until the
// end of the function), that entry's instruction may or may not be included in
// the range, depending on if the entry is a clobbering entry (it has an
// instruction that clobbers one or more preceding locations), or if it is an
// (overlapping) debug value entry. This distinction can be seen in the example
// below. The first debug value is ended by the clobbering entry 2, and the
// second and third debug values are ended by the overlapping debug value entry
// 4.
//
// Input:
//
// History map entries [type, end index, mi]
//
// 0 | [DbgValue, 2, DBG_VALUE $reg0, [...] (fragment 0, 32)]
// 1 | | [DbgValue, 4, DBG_VALUE $reg1, [...] (fragment 32, 32)]
// 2 | | [Clobber, $reg0 = [...], -, -]
// 3 | | [DbgValue, 4, DBG_VALUE 123, [...] (fragment 64, 32)]
// 4 [DbgValue, ~0, DBG_VALUE @g, [...] (fragment 0, 96)]
//
// Output [start, end) [Value...]:
//
// [0-1) [(reg0, fragment 0, 32)]
// [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)]
// [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)]
// [4-) [(@g, fragment 0, 96)]
bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
const DbgValueHistoryMap::Entries &Entries) {
using OpenRange =
std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>;
SmallVector<OpenRange, 4> OpenRanges;
bool isSafeForSingleLocation = true;
const MachineInstr *StartDebugMI = nullptr;
const MachineInstr *EndMI = nullptr;
for (auto EB = Entries.begin(), EI = EB, EE = Entries.end(); EI != EE; ++EI) {
const MachineInstr *Instr = EI->getInstr();
// Remove all values that are no longer live.
size_t Index = std::distance(EB, EI);
erase_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; });
// If we are dealing with a clobbering entry, this iteration will result in
// a location list entry starting after the clobbering instruction.
const MCSymbol *StartLabel =
EI->isClobber() ? getLabelAfterInsn(Instr) : getLabelBeforeInsn(Instr);
assert(StartLabel &&
"Forgot label before/after instruction starting a range!");
const MCSymbol *EndLabel;
if (std::next(EI) == Entries.end()) {
const MachineBasicBlock &EndMBB = Asm->MF->back();
EndLabel = Asm->MBBSectionRanges[EndMBB.getSectionIDNum()].EndLabel;
if (EI->isClobber())
EndMI = EI->getInstr();
}
else if (std::next(EI)->isClobber())
EndLabel = getLabelAfterInsn(std::next(EI)->getInstr());
else
EndLabel = getLabelBeforeInsn(std::next(EI)->getInstr());
assert(EndLabel && "Forgot label after instruction ending a range!");
if (EI->isDbgValue())
LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Instr << "\n");
// If this history map entry has a debug value, add that to the list of
// open ranges and check if its location is valid for a single value
// location.
if (EI->isDbgValue()) {
// Do not add undef debug values, as they are redundant information in
// the location list entries. An undef debug results in an empty location
// description. If there are any non-undef fragments then padding pieces
// with empty location descriptions will automatically be inserted, and if
// all fragments are undef then the whole location list entry is
// redundant.
if (!Instr->isUndefDebugValue()) {
auto Value = getDebugLocValue(Instr);
OpenRanges.emplace_back(EI->getEndIndex(), Value);
// TODO: Add support for single value fragment locations.
if (Instr->getDebugExpression()->isFragment())
isSafeForSingleLocation = false;
if (!StartDebugMI)
StartDebugMI = Instr;
} else {
isSafeForSingleLocation = false;
}
}
// Location list entries with empty location descriptions are redundant
// information in DWARF, so do not emit those.
if (OpenRanges.empty())
continue;
// Omit entries with empty ranges as they do not have any effect in DWARF.
if (StartLabel == EndLabel) {
LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n");
continue;
}
SmallVector<DbgValueLoc, 4> Values;
for (auto &R : OpenRanges)
Values.push_back(R.second);
// With Basic block sections, it is posssible that the StartLabel and the
// Instr are not in the same section. This happens when the StartLabel is
// the function begin label and the dbg value appears in a basic block
// that is not the entry. In this case, the range needs to be split to
// span each individual section in the range from StartLabel to EndLabel.
if (Asm->MF->hasBBSections() && StartLabel == Asm->getFunctionBegin() &&
!Instr->getParent()->sameSection(&Asm->MF->front())) {
const MCSymbol *BeginSectionLabel = StartLabel;
for (const MachineBasicBlock &MBB : *Asm->MF) {
if (MBB.isBeginSection() && &MBB != &Asm->MF->front())
BeginSectionLabel = MBB.getSymbol();
if (MBB.sameSection(Instr->getParent())) {
DebugLoc.emplace_back(BeginSectionLabel, EndLabel, Values);
break;
}
if (MBB.isEndSection())
DebugLoc.emplace_back(BeginSectionLabel, MBB.getEndSymbol(), Values);
}
} else {
DebugLoc.emplace_back(StartLabel, EndLabel, Values);
}
// Attempt to coalesce the ranges of two otherwise identical
// DebugLocEntries.
auto CurEntry = DebugLoc.rbegin();
LLVM_DEBUG({
dbgs() << CurEntry->getValues().size() << " Values:\n";
for (auto &Value : CurEntry->getValues())
Value.dump();
dbgs() << "-----\n";
});
auto PrevEntry = std::next(CurEntry);
if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
DebugLoc.pop_back();
}
if (!isSafeForSingleLocation ||
!validThroughout(LScopes, StartDebugMI, EndMI, getInstOrdering()))
return false;
if (DebugLoc.size() == 1)
return true;
if (!Asm->MF->hasBBSections())
return false;
// Check here to see if loclist can be merged into a single range. If not,
// we must keep the split loclists per section. This does exactly what
// MergeRanges does without sections. We don't actually merge the ranges
// as the split ranges must be kept intact if this cannot be collapsed
// into a single range.
const MachineBasicBlock *RangeMBB = nullptr;
if (DebugLoc[0].getBeginSym() == Asm->getFunctionBegin())
RangeMBB = &Asm->MF->front();
else
RangeMBB = Entries.begin()->getInstr()->getParent();
auto *CurEntry = DebugLoc.begin();
auto *NextEntry = std::next(CurEntry);
while (NextEntry != DebugLoc.end()) {
// Get the last machine basic block of this section.
while (!RangeMBB->isEndSection())
RangeMBB = RangeMBB->getNextNode();
if (!RangeMBB->getNextNode())
return false;
// CurEntry should end the current section and NextEntry should start
// the next section and the Values must match for these two ranges to be
// merged.
if (CurEntry->getEndSym() != RangeMBB->getEndSymbol() ||
NextEntry->getBeginSym() != RangeMBB->getNextNode()->getSymbol() ||
CurEntry->getValues() != NextEntry->getValues())
return false;
RangeMBB = RangeMBB->getNextNode();
CurEntry = NextEntry;
NextEntry = std::next(CurEntry);
}
return true;
}
DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
LexicalScope &Scope,
const DINode *Node,
const DILocation *Location,
const MCSymbol *Sym) {
ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode());
if (isa<const DILocalVariable>(Node)) {
ConcreteEntities.push_back(
std::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
Location));
InfoHolder.addScopeVariable(&Scope,
cast<DbgVariable>(ConcreteEntities.back().get()));
} else if (isa<const DILabel>(Node)) {
ConcreteEntities.push_back(
std::make_unique<DbgLabel>(cast<const DILabel>(Node),
Location, Sym));
InfoHolder.addScopeLabel(&Scope,
cast<DbgLabel>(ConcreteEntities.back().get()));
}
return ConcreteEntities.back().get();
}
// Find variables for each lexical scope.
void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
const DISubprogram *SP,
DenseSet<InlinedEntity> &Processed) {
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMFTable(TheCU, Processed);
for (const auto &I : DbgValues) {
InlinedEntity IV = I.first;
if (Processed.count(IV))
continue;
// Instruction ranges, specifying where IV is accessible.
const auto &HistoryMapEntries = I.second;
// Try to find any non-empty variable location. Do not create a concrete
// entity if there are no locations.
if (!DbgValues.hasNonEmptyLocation(HistoryMapEntries))
continue;
LexicalScope *Scope = nullptr;
const DILocalVariable *LocalVar = cast<DILocalVariable>(IV.first);
if (const DILocation *IA = IV.second)
Scope = LScopes.findInlinedScope(LocalVar->getScope(), IA);
else
Scope = LScopes.findLexicalScope(LocalVar->getScope());
// If variable scope is not found then skip this variable.
if (!Scope)
continue;
Processed.insert(IV);
DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU,
*Scope, LocalVar, IV.second));
const MachineInstr *MInsn = HistoryMapEntries.front().getInstr();
assert(MInsn->isDebugValue() && "History must begin with debug value");
// Check if there is a single DBG_VALUE, valid throughout the var's scope.
// If the history map contains a single debug value, there may be an
// additional entry which clobbers the debug value.
size_t HistSize = HistoryMapEntries.size();
bool SingleValueWithClobber =
HistSize == 2 && HistoryMapEntries[1].isClobber();
if (HistSize == 1 || SingleValueWithClobber) {
const auto *End =
SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr;
if (validThroughout(LScopes, MInsn, End, getInstOrdering())) {
RegVar->initializeDbgValue(MInsn);
continue;
}
}
// Do not emit location lists if .debug_loc secton is disabled.
if (!useLocSection())
continue;
// Handle multiple DBG_VALUE instructions describing one variable.
DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn);
// Build the location list for this variable.
SmallVector<DebugLocEntry, 8> Entries;
bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries);
// Check whether buildLocationList managed to merge all locations to one
// that is valid throughout the variable's scope. If so, produce single
// value location.
if (isValidSingleLocation) {
RegVar->initializeDbgValue(Entries[0].getValues()[0]);
continue;
}
// If the variable has a DIBasicType, extract it. Basic types cannot have
// unique identifiers, so don't bother resolving the type with the
// identifier map.
const DIBasicType *BT = dyn_cast<DIBasicType>(
static_cast<const Metadata *>(LocalVar->getType()));
// Finalize the entry by lowering it into a DWARF bytestream.
for (auto &Entry : Entries)
Entry.finalize(*Asm, List, BT, TheCU);
}
// For each InlinedEntity collected from DBG_LABEL instructions, convert to
// DWARF-related DbgLabel.
for (const auto &I : DbgLabels) {
InlinedEntity IL = I.first;
const MachineInstr *MI = I.second;
if (MI == nullptr)
continue;
LexicalScope *Scope = nullptr;
const DILabel *Label = cast<DILabel>(IL.first);
// The scope could have an extra lexical block file.
const DILocalScope *LocalScope =
Label->getScope()->getNonLexicalBlockFileScope();
// Get inlined DILocation if it is inlined label.
if (const DILocation *IA = IL.second)
Scope = LScopes.findInlinedScope(LocalScope, IA);
else
Scope = LScopes.findLexicalScope(LocalScope);
// If label scope is not found then skip this label.
if (!Scope)
continue;
Processed.insert(IL);
/// At this point, the temporary label is created.
/// Save the temporary label to DbgLabel entity to get the
/// actually address when generating Dwarf DIE.
MCSymbol *Sym = getLabelBeforeInsn(MI);
createConcreteEntity(TheCU, *Scope, Label, IL.second, Sym);
}
// Collect info for variables/labels that were optimized out.
for (const DINode *DN : SP->getRetainedNodes()) {
if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
continue;
LexicalScope *Scope = nullptr;
if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
Scope = LScopes.findLexicalScope(DV->getScope());
} else if (auto *DL = dyn_cast<DILabel>(DN)) {
Scope = LScopes.findLexicalScope(DL->getScope());
}
if (Scope)
createConcreteEntity(TheCU, *Scope, DN, nullptr);
}
}
// Process beginning of an instruction.
void DwarfDebug::beginInstruction(const MachineInstr *MI) {
const MachineFunction &MF = *MI->getMF();
const auto *SP = MF.getFunction().getSubprogram();
bool NoDebug =
!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug;
// Delay slot support check.
auto delaySlotSupported = [](const MachineInstr &MI) {
if (!MI.isBundledWithSucc())
return false;
auto Suc = std::next(MI.getIterator());
(void)Suc;
// Ensure that delay slot instruction is successor of the call instruction.
// Ex. CALL_INSTRUCTION {
// DELAY_SLOT_INSTRUCTION }
assert(Suc->isBundledWithPred() &&
"Call bundle instructions are out of order");
return true;
};
// When describing calls, we need a label for the call instruction.
if (!NoDebug && SP->areAllCallsDescribed() &&
MI->isCandidateForCallSiteEntry(MachineInstr::AnyInBundle) &&
(!MI->hasDelaySlot() || delaySlotSupported(*MI))) {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
bool IsTail = TII->isTailCall(*MI);
// For tail calls, we need the address of the branch instruction for
// DW_AT_call_pc.
if (IsTail)
requestLabelBeforeInsn(MI);
// For non-tail calls, we need the return address for the call for
// DW_AT_call_return_pc. Under GDB tuning, this information is needed for
// tail calls as well.
requestLabelAfterInsn(MI);
}
DebugHandlerBase::beginInstruction(MI);
if (!CurMI)
return;
if (NoDebug)
return;
// Check if source location changes, but ignore DBG_VALUE and CFI locations.
// If the instruction is part of the function frame setup code, do not emit
// any line record, as there is no correspondence with any user code.
if (MI->isMetaInstruction() || MI->getFlag(MachineInstr::FrameSetup))
return;
const DebugLoc &DL = MI->getDebugLoc();
// When we emit a line-0 record, we don't update PrevInstLoc; so look at
// the last line number actually emitted, to see if it was line 0.
unsigned LastAsmLine =
Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
if (DL == PrevInstLoc) {
// If we have an ongoing unspecified location, nothing to do here.
if (!DL)
return;
// We have an explicit location, same as the previous location.
// But we might be coming back to it after a line 0 record.
if (LastAsmLine == 0 && DL.getLine() != 0) {
// Reinstate the source location but not marked as a statement.
const MDNode *Scope = DL.getScope();
recordSourceLine(DL.getLine(), DL.getCol(), Scope, /*Flags=*/0);
}
return;
}
if (!DL) {
// We have an unspecified location, which might want to be line 0.
// If we have already emitted a line-0 record, don't repeat it.
if (LastAsmLine == 0)
return;
// If user said Don't Do That, don't do that.
if (UnknownLocations == Disable)
return;
// See if we have a reason to emit a line-0 record now.
// Reasons to emit a line-0 record include:
// - User asked for it (UnknownLocations).
// - Instruction has a label, so it's referenced from somewhere else,
// possibly debug information; we want it to have a source location.
// - Instruction is at the top of a block; we don't want to inherit the
// location from the physically previous (maybe unrelated) block.
if (UnknownLocations == Enable || PrevLabel ||
(PrevInstBB && PrevInstBB != MI->getParent())) {
// Preserve the file and column numbers, if we can, to save space in
// the encoded line table.
// Do not update PrevInstLoc, it remembers the last non-0 line.
const MDNode *Scope = nullptr;
unsigned Column = 0;
if (PrevInstLoc) {
Scope = PrevInstLoc.getScope();
Column = PrevInstLoc.getCol();
}
recordSourceLine(/*Line=*/0, Column, Scope, /*Flags=*/0);
}
return;
}
// We have an explicit location, different from the previous location.
// Don't repeat a line-0 record, but otherwise emit the new location.
// (The new location might be an explicit line 0, which we do emit.)
if (DL.getLine() == 0 && LastAsmLine == 0)
return;
unsigned Flags = 0;
if (DL == PrologEndLoc) {
Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT;
PrologEndLoc = DebugLoc();
}
// If the line changed, we call that a new statement; unless we went to
// line 0 and came back, in which case it is not a new statement.
unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine;
if (DL.getLine() && DL.getLine() != OldLine)
Flags |= DWARF2_FLAG_IS_STMT;
const MDNode *Scope = DL.getScope();
recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
// If we're not at line 0, remember this location.
if (DL.getLine())
PrevInstLoc = DL;
}
static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
// First known non-DBG_VALUE and non-frame setup location marks
// the beginning of the function body.
for (const auto &MBB : *MF)
for (const auto &MI : MBB)
if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
MI.getDebugLoc())
return MI.getDebugLoc();
return DebugLoc();
}
/// Register a source line with debug info. Returns the unique label that was
/// emitted and which provides correspondence to the source line list.
static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col,
const MDNode *S, unsigned Flags, unsigned CUID,
uint16_t DwarfVersion,
ArrayRef<std::unique_ptr<DwarfCompileUnit>> DCUs) {
StringRef Fn;
unsigned FileNo = 1;
unsigned Discriminator = 0;
if (auto *Scope = cast_or_null<DIScope>(S)) {
Fn = Scope->getFilename();
if (Line != 0 && DwarfVersion >= 4)
if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
Discriminator = LBF->getDiscriminator();
FileNo = static_cast<DwarfCompileUnit &>(*DCUs[CUID])
.getOrCreateSourceID(Scope->getFile());
}
Asm.OutStreamer->emitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
Discriminator, Fn);
}
DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF,
unsigned CUID) {
// Get beginning of function.
if (DebugLoc PrologEndLoc = findPrologueEndLoc(&MF)) {
// Ensure the compile unit is created if the function is called before
// beginFunction().
(void)getOrCreateDwarfCompileUnit(
MF.getFunction().getSubprogram()->getUnit());
// We'd like to list the prologue as "not statements" but GDB behaves
// poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
const DISubprogram *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram();
::recordSourceLine(*Asm, SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT,
CUID, getDwarfVersion(), getUnits());
return PrologEndLoc;
}
return DebugLoc();
}
// Gather pre-function debug information. Assumes being called immediately
// after the function entry point has been emitted.
void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn = MF;
auto *SP = MF->getFunction().getSubprogram();
assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode());
if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
return;
DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
// belongs to so that we add to the correct per-cu line table in the
// non-asm case.
if (Asm->OutStreamer->hasRawTextSupport())
// Use a single line table if we are generating assembly.
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
else
Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID());
// Record beginning of function.
PrologEndLoc = emitInitialLocDirective(
*MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
}
void DwarfDebug::skippedNonDebugFunction() {
// If we don't have a subprogram for this function then there will be a hole
// in the range information. Keep note of this by setting the previously used
// section to nullptr.
PrevCU = nullptr;
CurFn = nullptr;
}
// Gather and emit post-function debug information.
void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
const DISubprogram *SP = MF->getFunction().getSubprogram();
assert(CurFn == MF &&
"endFunction should be called with the same function as beginFunction");
// Set DwarfDwarfCompileUnitID in MCContext to default value.
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
assert(!FnScope || SP == FnScope->getScopeNode());
DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
if (TheCU.getCUNode()->isDebugDirectivesOnly()) {
PrevLabel = nullptr;
CurFn = nullptr;
return;
}
DenseSet<InlinedEntity> Processed;
collectEntityInfo(TheCU, SP, Processed);
// Add the range of this function to the list of ranges for the CU.
// With basic block sections, add ranges for all basic block sections.
for (const auto &R : Asm->MBBSectionRanges)
TheCU.addRange({R.second.BeginLabel, R.second.EndLabel});
// Under -gmlt, skip building the subprogram if there are no inlined
// subroutines inside it. But with -fdebug-info-for-profiling, the subprogram
// is still needed as we need its source location.
if (!TheCU.getCUNode()->getDebugInfoForProfiling() &&
TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
LScopes.getAbstractScopesList().empty() && !IsDarwin) {
assert(InfoHolder.getScopeVariables().empty());
PrevLabel = nullptr;
CurFn = nullptr;
return;
}
#ifndef NDEBUG
size_t NumAbstractScopes = LScopes.getAbstractScopesList().size();
#endif
// Construct abstract scopes.
for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
auto *SP = cast<DISubprogram>(AScope->getScopeNode());
for (const DINode *DN : SP->getRetainedNodes()) {
if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
continue;
const MDNode *Scope = nullptr;
if (auto *DV = dyn_cast<DILocalVariable>(DN))
Scope = DV->getScope();
else if (auto *DL = dyn_cast<DILabel>(DN))
Scope = DL->getScope();
else
llvm_unreachable("Unexpected DI type!");
// Collect info for variables/labels that were optimized out.
ensureAbstractEntityIsCreated(TheCU, DN, Scope);
assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
&& "ensureAbstractEntityIsCreated inserted abstract scopes");
}
constructAbstractSubprogramScopeDIE(TheCU, AScope);
}
ProcessedSPNodes.insert(SP);
DIE &ScopeDIE = TheCU.constructSubprogramScopeDIE(SP, FnScope);
if (auto *SkelCU = TheCU.getSkeleton())
if (!LScopes.getAbstractScopesList().empty() &&
TheCU.getCUNode()->getSplitDebugInlining())
SkelCU->constructSubprogramScopeDIE(SP, FnScope);
// Construct call site entries.
constructCallSiteEntryDIEs(*SP, TheCU, ScopeDIE, *MF);
// Clear debug info
// Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
// DbgVariables except those that are also in AbstractVariables (since they
// can be used cross-function)
InfoHolder.getScopeVariables().clear();
InfoHolder.getScopeLabels().clear();
PrevLabel = nullptr;
CurFn = nullptr;
}
// Register a source line with debug info. Returns the unique label that was
// emitted and which provides correspondence to the source line list.
void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
unsigned Flags) {
::recordSourceLine(*Asm, Line, Col, S, Flags,
Asm->OutStreamer->getContext().getDwarfCompileUnitID(),
getDwarfVersion(), getUnits());
}
//===----------------------------------------------------------------------===//
// Emit Methods
//===----------------------------------------------------------------------===//
// Emit the debug info section.
void DwarfDebug::emitDebugInfo() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
Holder.emitUnits(/* UseOffsets */ false);
}
// Emit the abbreviation section.
void DwarfDebug::emitAbbreviations() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
}
void DwarfDebug::emitStringOffsetsTableHeader() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
Holder.getStringPool().emitStringOffsetsTableHeader(
*Asm, Asm->getObjFileLowering().getDwarfStrOffSection(),
Holder.getStringOffsetsStartSym());
}
template <typename AccelTableT>
void DwarfDebug::emitAccel(AccelTableT &Accel, MCSection *Section,
StringRef TableName) {
Asm->OutStreamer->SwitchSection(Section);
// Emit the full data.
emitAppleAccelTable(Asm, Accel, TableName, Section->getBeginSymbol());
}
void DwarfDebug::emitAccelDebugNames() {
// Don't emit anything if we have no compilation units to index.
if (getUnits().empty())
return;
emitDWARF5AccelTable(Asm, AccelDebugNames, *this, getUnits());
}
// Emit visible names into a hashed accelerator table section.
void DwarfDebug::emitAccelNames() {
emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(),
"Names");
}
// Emit objective C classes and categories into a hashed accelerator table
// section.
void DwarfDebug::emitAccelObjC() {
emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(),
"ObjC");
}
// Emit namespace dies into a hashed accelerator table.
void DwarfDebug::emitAccelNamespaces() {
emitAccel(AccelNamespace,
Asm->getObjFileLowering().getDwarfAccelNamespaceSection(),
"namespac");
}
// Emit type dies into a hashed accelerator table.
void DwarfDebug::emitAccelTypes() {
emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(),
"types");
}
// Public name handling.
// The format for the various pubnames:
//
// dwarf pubnames - offset/name pairs where the offset is the offset into the CU
// for the DIE that is named.
//
// gnu pubnames - offset/index value/name tuples where the offset is the offset
// into the CU and the index value is computed according to the type of value
// for the DIE that is named.
//
// For type units the offset is the offset of the skeleton DIE. For split dwarf
// it's the offset within the debug_info/debug_types dwo section, however, the
// reference in the pubname header doesn't change.
/// computeIndexValue - Compute the gdb index value for the DIE and CU.
static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
const DIE *Die) {
// Entities that ended up only in a Type Unit reference the CU instead (since
// the pub entry has offsets within the CU there's no real offset that can be
// provided anyway). As it happens all such entities (namespaces and types,
// types only in C++ at that) are rendered as TYPE+EXTERNAL. If this turns out
// not to be true it would be necessary to persist this information from the
// point at which the entry is added to the index data structure - since by
// the time the index is built from that, the original type/namespace DIE in a
// type unit has already been destroyed so it can't be queried for properties
// like tag, etc.
if (Die->getTag() == dwarf::DW_TAG_compile_unit)
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE,
dwarf::GIEL_EXTERNAL);
dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC;
// We could have a specification DIE that has our most of our knowledge,
// look for that now.
if (DIEValue SpecVal = Die->findAttribute(dwarf::DW_AT_specification)) {
DIE &SpecDIE = SpecVal.getDIEEntry().getEntry();
if (SpecDIE.findAttribute(dwarf::DW_AT_external))
Linkage = dwarf::GIEL_EXTERNAL;
} else if (Die->findAttribute(dwarf::DW_AT_external))
Linkage = dwarf::GIEL_EXTERNAL;
switch (Die->getTag()) {
case dwarf::DW_TAG_class_type:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_enumeration_type:
return dwarf::PubIndexEntryDescriptor(
dwarf::GIEK_TYPE,
dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage())
? dwarf::GIEL_EXTERNAL
: dwarf::GIEL_STATIC);
case dwarf::DW_TAG_typedef:
case dwarf::DW_TAG_base_type:
case dwarf::DW_TAG_subrange_type:
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC);
case dwarf::DW_TAG_namespace:
return dwarf::GIEK_TYPE;
case dwarf::DW_TAG_subprogram:
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage);
case dwarf::DW_TAG_variable:
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage);
case dwarf::DW_TAG_enumerator:
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE,
dwarf::GIEL_STATIC);
default:
return dwarf::GIEK_NONE;
}
}
/// emitDebugPubSections - Emit visible names and types into debug pubnames and
/// pubtypes sections.
void DwarfDebug::emitDebugPubSections() {
for (const auto &NU : CUMap) {
DwarfCompileUnit *TheU = NU.second;
if (!TheU->hasDwarfPubSections())
continue;
bool GnuStyle = TheU->getCUNode()->getNameTableKind() ==
DICompileUnit::DebugNameTableKind::GNU;
Asm->OutStreamer->SwitchSection(
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
: Asm->getObjFileLowering().getDwarfPubNamesSection());
emitDebugPubSection(GnuStyle, "Names", TheU, TheU->getGlobalNames());
Asm->OutStreamer->SwitchSection(
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
: Asm->getObjFileLowering().getDwarfPubTypesSection());
emitDebugPubSection(GnuStyle, "Types", TheU, TheU->getGlobalTypes());
}
}
void DwarfDebug::emitSectionReference(const DwarfCompileUnit &CU) {
if (useSectionsAsReferences())
Asm->emitDwarfOffset(CU.getSection()->getBeginSymbol(),
CU.getDebugSectionOffset());
else
Asm->emitDwarfSymbolReference(CU.getLabelBegin());
}
void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
DwarfCompileUnit *TheU,
const StringMap<const DIE *> &Globals) {
if (auto *Skeleton = TheU->getSkeleton())
TheU = Skeleton;
// Emit the header.
MCSymbol *EndLabel = Asm->emitDwarfUnitLength(
"pub" + Name, "Length of Public " + Name + " Info");
Asm->OutStreamer->AddComment("DWARF Version");
Asm->emitInt16(dwarf::DW_PUBNAMES_VERSION);
Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
emitSectionReference(*TheU);
Asm->OutStreamer->AddComment("Compilation Unit Length");
Asm->emitDwarfLengthOrOffset(TheU->getLength());
// Emit the pubnames for this compilation unit.
for (const auto &GI : Globals) {
const char *Name = GI.getKeyData();
const DIE *Entity = GI.second;
Asm->OutStreamer->AddComment("DIE offset");
Asm->emitDwarfLengthOrOffset(Entity->getOffset());
if (GnuStyle) {
dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
Asm->OutStreamer->AddComment(
Twine("Attributes: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) +
", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
Asm->emitInt8(Desc.toBits());
}
Asm->OutStreamer->AddComment("External Name");
Asm->OutStreamer->emitBytes(StringRef(Name, GI.getKeyLength() + 1));
}
Asm->OutStreamer->AddComment("End Mark");
Asm->emitDwarfLengthOrOffset(0);
Asm->OutStreamer->emitLabel(EndLabel);
}
/// Emit null-terminated strings into a debug str section.
void DwarfDebug::emitDebugStr() {
MCSection *StringOffsetsSection = nullptr;
if (useSegmentedStringOffsetsTable()) {
emitStringOffsetsTableHeader();
StringOffsetsSection = Asm->getObjFileLowering().getDwarfStrOffSection();
}
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection(),
StringOffsetsSection, /* UseRelativeOffsets = */ true);
}
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
const DebugLocStream::Entry &Entry,
const DwarfCompileUnit *CU) {
auto &&Comments = DebugLocs.getComments(Entry);
auto Comment = Comments.begin();
auto End = Comments.end();
// The expressions are inserted into a byte stream rather early (see
// DwarfExpression::addExpression) so for those ops (e.g. DW_OP_convert) that
// need to reference a base_type DIE the offset of that DIE is not yet known.
// To deal with this we instead insert a placeholder early and then extract
// it here and replace it with the real reference.
unsigned PtrSize = Asm->MAI->getCodePointerSize();
DWARFDataExtractor Data(StringRef(DebugLocs.getBytes(Entry).data(),
DebugLocs.getBytes(Entry).size()),
Asm->getDataLayout().isLittleEndian(), PtrSize);
DWARFExpression Expr(Data, PtrSize, Asm->OutContext.getDwarfFormat());
using Encoding = DWARFExpression::Operation::Encoding;
uint64_t Offset = 0;
for (auto &Op : Expr) {
assert(Op.getCode() != dwarf::DW_OP_const_type &&
"3 operand ops not yet supported");
Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
Offset++;
for (unsigned I = 0; I < 2; ++I) {
if (Op.getDescription().Op[I] == Encoding::SizeNA)
continue;
if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
uint64_t Offset =
CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset();
assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
Streamer.emitULEB128(Offset, "", ULEB128PadSize);
// Make sure comments stay aligned.
for (unsigned J = 0; J < ULEB128PadSize; ++J)
if (Comment != End)
Comment++;
} else {
for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
Streamer.emitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
}
Offset = Op.getOperandEndOffset(I);
}
assert(Offset == Op.getEndOffset());
}
}
void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
const DbgValueLoc &Value,
DwarfExpression &DwarfExpr) {
auto *DIExpr = Value.getExpression();
DIExpressionCursor ExprCursor(DIExpr);
DwarfExpr.addFragmentOffset(DIExpr);
// If the DIExpr is is an Entry Value, we want to follow the same code path
// regardless of whether the DBG_VALUE is variadic or not.
if (DIExpr && DIExpr->isEntryValue()) {
// Entry values can only be a single register with no additional DIExpr,
// so just add it directly.
assert(Value.getLocEntries().size() == 1);
assert(Value.getLocEntries()[0].isLocation());
MachineLocation Location = Value.getLocEntries()[0].getLoc();
DwarfExpr.setLocation(Location, DIExpr);
DwarfExpr.beginEntryValueExpression(ExprCursor);
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, ExprCursor, Location.getReg()))
return;
return DwarfExpr.addExpression(std::move(ExprCursor));
}
// Regular entry.
auto EmitValueLocEntry = [&DwarfExpr, &BT,
&AP](const DbgValueLocEntry &Entry,
DIExpressionCursor &Cursor) -> bool {
if (Entry.isInt()) {
if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
BT->getEncoding() == dwarf::DW_ATE_signed_char))
DwarfExpr.addSignedConstant(Entry.getInt());
else
DwarfExpr.addUnsignedConstant(Entry.getInt());
} else if (Entry.isLocation()) {
MachineLocation Location = Entry.getLoc();
if (Location.isIndirect())
DwarfExpr.setMemoryLocationKind();
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return false;
} else if (Entry.isTargetIndexLocation()) {
TargetIndexLocation Loc = Entry.getTargetIndexLocation();
// TODO TargetIndexLocation is a target-independent. Currently only the
// WebAssembly-specific encoding is supported.
assert(AP.TM.getTargetTriple().isWasm());
DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
} else if (Entry.isConstantFP()) {
if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE() &&
!Cursor) {
DwarfExpr.addConstantFP(Entry.getConstantFP()->getValueAPF(), AP);
} else if (Entry.getConstantFP()
->getValueAPF()
.bitcastToAPInt()
.getBitWidth() <= 64 /*bits*/) {
DwarfExpr.addUnsignedConstant(
Entry.getConstantFP()->getValueAPF().bitcastToAPInt());
} else {
LLVM_DEBUG(
dbgs() << "Skipped DwarfExpression creation for ConstantFP of size"
<< Entry.getConstantFP()
->getValueAPF()
.bitcastToAPInt()
.getBitWidth()
<< " bits\n");
return false;
}
}
return true;
};
if (!Value.isVariadic()) {
if (!EmitValueLocEntry(Value.getLocEntries()[0], ExprCursor))
return;
DwarfExpr.addExpression(std::move(ExprCursor));
return;
}
// If any of the location entries are registers with the value 0, then the
// location is undefined.
if (any_of(Value.getLocEntries(), [](const DbgValueLocEntry &Entry) {
return Entry.isLocation() && !Entry.getLoc().getReg();
}))
return;
DwarfExpr.addExpression(
std::move(ExprCursor),
[EmitValueLocEntry, &Value](unsigned Idx,
DIExpressionCursor &Cursor) -> bool {
return EmitValueLocEntry(Value.getLocEntries()[Idx], Cursor);
});
}
void DebugLocEntry::finalize(const AsmPrinter &AP,
DebugLocStream::ListBuilder &List,
const DIBasicType *BT,
DwarfCompileUnit &TheCU) {
assert(!Values.empty() &&
"location list entries without values are redundant");
assert(Begin != End && "unexpected location list entry with empty range");
DebugLocStream::EntryBuilder Entry(List, Begin, End);
BufferByteStreamer Streamer = Entry.getStreamer();
DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer, TheCU);
const DbgValueLoc &Value = Values[0];
if (Value.isFragment()) {
// Emit all fragments that belong to the same variable and range.
assert(llvm::all_of(Values, [](DbgValueLoc P) {
return P.isFragment();
}) && "all values are expected to be fragments");
assert(llvm::is_sorted(Values) && "fragments are expected to be sorted");
for (const auto &Fragment : Values)
DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
} else {
assert(Values.size() == 1 && "only fragments may have >1 value");
DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr);
}
DwarfExpr.finalize();
if (DwarfExpr.TagOffset)
List.setTagOffset(*DwarfExpr.TagOffset);
}
void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
const DwarfCompileUnit *CU) {
// Emit the size.
Asm->OutStreamer->AddComment("Loc expr size");
if (getDwarfVersion() >= 5)
Asm->emitULEB128(DebugLocs.getBytes(Entry).size());
else if (DebugLocs.getBytes(Entry).size() <= std::numeric_limits<uint16_t>::max())
Asm->emitInt16(DebugLocs.getBytes(Entry).size());
else {
// The entry is too big to fit into 16 bit, drop it as there is nothing we
// can do.
Asm->emitInt16(0);
return;
}
// Emit the entry.
APByteStreamer Streamer(*Asm);
emitDebugLocEntry(Streamer, Entry, CU);
}
// Emit the header of a DWARF 5 range list table list table. Returns the symbol
// that designates the end of the table for the caller to emit when the table is
// complete.
static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
const DwarfFile &Holder) {
MCSymbol *TableEnd = mcdwarf::emitListsTableHeaderStart(*Asm->OutStreamer);
Asm->OutStreamer->AddComment("Offset entry count");
Asm->emitInt32(Holder.getRangeLists().size());
Asm->OutStreamer->emitLabel(Holder.getRnglistsTableBaseSym());
for (const RangeSpanList &List : Holder.getRangeLists())
Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(),
Asm->getDwarfOffsetByteSize());
return TableEnd;
}
// Emit the header of a DWARF 5 locations list table. Returns the symbol that
// designates the end of the table for the caller to emit when the table is
// complete.
static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
const DwarfDebug &DD) {
MCSymbol *TableEnd = mcdwarf::emitListsTableHeaderStart(*Asm->OutStreamer);
const auto &DebugLocs = DD.getDebugLocs();
Asm->OutStreamer->AddComment("Offset entry count");
Asm->emitInt32(DebugLocs.getLists().size());
Asm->OutStreamer->emitLabel(DebugLocs.getSym());
for (const auto &List : DebugLocs.getLists())
Asm->emitLabelDifference(List.Label, DebugLocs.getSym(),
Asm->getDwarfOffsetByteSize());
return TableEnd;
}
template <typename Ranges, typename PayloadEmitter>
static void emitRangeList(
DwarfDebug &DD, AsmPrinter *Asm, MCSymbol *Sym, const Ranges &R,
const DwarfCompileUnit &CU, unsigned BaseAddressx, unsigned OffsetPair,
unsigned StartxLength, unsigned EndOfList,
StringRef (*StringifyEnum)(unsigned),
bool ShouldUseBaseAddress,
PayloadEmitter EmitPayload) {
auto Size = Asm->MAI->getCodePointerSize();
bool UseDwarf5 = DD.getDwarfVersion() >= 5;
// Emit our symbol so we can find the beginning of the range.
Asm->OutStreamer->emitLabel(Sym);
// Gather all the ranges that apply to the same section so they can share
// a base address entry.
MapVector<const MCSection *, std::vector<decltype(&*R.begin())>> SectionRanges;
for (const auto &Range : R)
SectionRanges[&Range.Begin->getSection()].push_back(&Range);
const MCSymbol *CUBase = CU.getBaseAddress();
bool BaseIsSet = false;
for (const auto &P : SectionRanges) {
auto *Base = CUBase;
if (!Base && ShouldUseBaseAddress) {
const MCSymbol *Begin = P.second.front()->Begin;
const MCSymbol *NewBase = DD.getSectionLabel(&Begin->getSection());
if (!UseDwarf5) {
Base = NewBase;
BaseIsSet = true;
Asm->OutStreamer->emitIntValue(-1, Size);
Asm->OutStreamer->AddComment(" base address");
Asm->OutStreamer->emitSymbolValue(Base, Size);
} else if (NewBase != Begin || P.second.size() > 1) {
// Only use a base address if
// * the existing pool address doesn't match (NewBase != Begin)
// * or, there's more than one entry to share the base address
Base = NewBase;
BaseIsSet = true;
Asm->OutStreamer->AddComment(StringifyEnum(BaseAddressx));
Asm->emitInt8(BaseAddressx);
Asm->OutStreamer->AddComment(" base address index");
Asm->emitULEB128(DD.getAddressPool().getIndex(Base));
}
} else if (BaseIsSet && !UseDwarf5) {
BaseIsSet = false;
assert(!Base);
Asm->OutStreamer->emitIntValue(-1, Size);
Asm->OutStreamer->emitIntValue(0, Size);
}
for (const auto *RS : P.second) {
const MCSymbol *Begin = RS->Begin;
const MCSymbol *End = RS->End;
assert(Begin && "Range without a begin symbol?");
assert(End && "Range without an end symbol?");
if (Base) {
if (UseDwarf5) {
// Emit offset_pair when we have a base.
Asm->OutStreamer->AddComment(StringifyEnum(OffsetPair));
Asm->emitInt8(OffsetPair);
Asm->OutStreamer->AddComment(" starting offset");
Asm->emitLabelDifferenceAsULEB128(Begin, Base);
Asm->OutStreamer->AddComment(" ending offset");
Asm->emitLabelDifferenceAsULEB128(End, Base);
} else {
Asm->emitLabelDifference(Begin, Base, Size);
Asm->emitLabelDifference(End, Base, Size);
}
} else if (UseDwarf5) {
Asm->OutStreamer->AddComment(StringifyEnum(StartxLength));
Asm->emitInt8(StartxLength);
Asm->OutStreamer->AddComment(" start index");
Asm->emitULEB128(DD.getAddressPool().getIndex(Begin));
Asm->OutStreamer->AddComment(" length");
Asm->emitLabelDifferenceAsULEB128(End, Begin);
} else {
Asm->OutStreamer->emitSymbolValue(Begin, Size);
Asm->OutStreamer->emitSymbolValue(End, Size);
}
EmitPayload(*RS);
}
}
if (UseDwarf5) {
Asm->OutStreamer->AddComment(StringifyEnum(EndOfList));
Asm->emitInt8(EndOfList);
} else {
// Terminate the list with two 0 values.
Asm->OutStreamer->emitIntValue(0, Size);
Asm->OutStreamer->emitIntValue(0, Size);
}
}
// Handles emission of both debug_loclist / debug_loclist.dwo
static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) {
emitRangeList(DD, Asm, List.Label, DD.getDebugLocs().getEntries(List),
*List.CU, dwarf::DW_LLE_base_addressx,
dwarf::DW_LLE_offset_pair, dwarf::DW_LLE_startx_length,
dwarf::DW_LLE_end_of_list, llvm::dwarf::LocListEncodingString,
/* ShouldUseBaseAddress */ true,
[&](const DebugLocStream::Entry &E) {
DD.emitDebugLocEntryLocation(E, List.CU);
});
}
void DwarfDebug::emitDebugLocImpl(MCSection *Sec) {
if (DebugLocs.getLists().empty())
return;
Asm->OutStreamer->SwitchSection(Sec);
MCSymbol *TableEnd = nullptr;
if (getDwarfVersion() >= 5)
TableEnd = emitLoclistsTableHeader(Asm, *this);
for (const auto &List : DebugLocs.getLists())
emitLocList(*this, Asm, List);
if (TableEnd)
Asm->OutStreamer->emitLabel(TableEnd);
}
// Emit locations into the .debug_loc/.debug_loclists section.
void DwarfDebug::emitDebugLoc() {
emitDebugLocImpl(
getDwarfVersion() >= 5
? Asm->getObjFileLowering().getDwarfLoclistsSection()
: Asm->getObjFileLowering().getDwarfLocSection());
}
// Emit locations into the .debug_loc.dwo/.debug_loclists.dwo section.
void DwarfDebug::emitDebugLocDWO() {
if (getDwarfVersion() >= 5) {
emitDebugLocImpl(
Asm->getObjFileLowering().getDwarfLoclistsDWOSection());
return;
}
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLocDWOSection());
Asm->OutStreamer->emitLabel(List.Label);
for (const auto &Entry : DebugLocs.getEntries(List)) {
// GDB only supports startx_length in pre-standard split-DWARF.
// (in v5 standard loclists, it currently* /only/ supports base_address +
// offset_pair, so the implementations can't really share much since they
// need to use different representations)
// * as of October 2018, at least
//
// In v5 (see emitLocList), this uses SectionLabels to reuse existing
// addresses in the address pool to minimize object size/relocations.
Asm->emitInt8(dwarf::DW_LLE_startx_length);
unsigned idx = AddrPool.getIndex(Entry.Begin);
Asm->emitULEB128(idx);
// Also the pre-standard encoding is slightly different, emitting this as
// an address-length entry here, but its a ULEB128 in DWARFv5 loclists.
Asm->emitLabelDifference(Entry.End, Entry.Begin, 4);
emitDebugLocEntryLocation(Entry, List.CU);
}
Asm->emitInt8(dwarf::DW_LLE_end_of_list);
}
}
struct ArangeSpan {
const MCSymbol *Start, *End;
};
// Emit a debug aranges section, containing a CU lookup for any
// address we can tie back to a CU.
void DwarfDebug::emitDebugARanges() {
// Provides a unique id per text section.
MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
// Filter labels by section.
for (const SymbolCU &SCU : ArangeLabels) {
if (SCU.Sym->isInSection()) {
// Make a note of this symbol and it's section.
MCSection *Section = &SCU.Sym->getSection();
if (!Section->getKind().isMetadata())
SectionMap[Section].push_back(SCU);
} else {
// Some symbols (e.g. common/bss on mach-o) can have no section but still
// appear in the output. This sucks as we rely on sections to build
// arange spans. We can do it without, but it's icky.
SectionMap[nullptr].push_back(SCU);
}
}
DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans;
for (auto &I : SectionMap) {
MCSection *Section = I.first;
SmallVector<SymbolCU, 8> &List = I.second;
if (List.size() < 1)
continue;
// If we have no section (e.g. common), just write out
// individual spans for each symbol.
if (!Section) {
for (const SymbolCU &Cur : List) {
ArangeSpan Span;
Span.Start = Cur.Sym;
Span.End = nullptr;
assert(Cur.CU);
Spans[Cur.CU].push_back(Span);
}
continue;
}
// Sort the symbols by offset within the section.
llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) {
unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
// Symbols with no order assigned should be placed at the end.
// (e.g. section end labels)
if (IA == 0)
return false;
if (IB == 0)
return true;
return IA < IB;
});
// Insert a final terminator.
List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section)));
// Build spans between each label.
const MCSymbol *StartSym = List[0].Sym;
for (size_t n = 1, e = List.size(); n < e; n++) {
const SymbolCU &Prev = List[n - 1];
const SymbolCU &Cur = List[n];
// Try and build the longest span we can within the same CU.
if (Cur.CU != Prev.CU) {
ArangeSpan Span;
Span.Start = StartSym;
Span.End = Cur.Sym;
assert(Prev.CU);
Spans[Prev.CU].push_back(Span);
StartSym = Cur.Sym;
}
}
}
// Start the dwarf aranges section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfARangesSection());
unsigned PtrSize = Asm->MAI->getCodePointerSize();
// Build a list of CUs used.
std::vector<DwarfCompileUnit *> CUs;
for (const auto &it : Spans) {
DwarfCompileUnit *CU = it.first;
CUs.push_back(CU);
}
// Sort the CU list (again, to ensure consistent output order).
llvm::sort(CUs, [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
return A->getUniqueID() < B->getUniqueID();
});
// Emit an arange table for each CU we used.
for (DwarfCompileUnit *CU : CUs) {
std::vector<ArangeSpan> &List = Spans[CU];
// Describe the skeleton CU's offset and length, not the dwo file's.
if (auto *Skel = CU->getSkeleton())
CU = Skel;
// Emit size of content not including length itself.
unsigned ContentSize =
sizeof(int16_t) + // DWARF ARange version number
Asm->getDwarfOffsetByteSize() + // Offset of CU in the .debug_info
// section
sizeof(int8_t) + // Pointer Size (in bytes)
sizeof(int8_t); // Segment Size (in bytes)
unsigned TupleSize = PtrSize * 2;
// 7.20 in the Dwarf specs requires the table to be aligned to a tuple.
unsigned Padding = offsetToAlignment(
Asm->getUnitLengthFieldByteSize() + ContentSize, Align(TupleSize));
ContentSize += Padding;
ContentSize += (List.size() + 1) * TupleSize;
// For each compile unit, write the list of spans it covers.
Asm->emitDwarfUnitLength(ContentSize, "Length of ARange Set");
Asm->OutStreamer->AddComment("DWARF Arange version number");
Asm->emitInt16(dwarf::DW_ARANGES_VERSION);
Asm->OutStreamer->AddComment("Offset Into Debug Info Section");
emitSectionReference(*CU);
Asm->OutStreamer->AddComment("Address Size (in bytes)");
Asm->emitInt8(PtrSize);
Asm->OutStreamer->AddComment("Segment Size (in bytes)");
Asm->emitInt8(0);
Asm->OutStreamer->emitFill(Padding, 0xff);
for (const ArangeSpan &Span : List) {
Asm->emitLabelReference(Span.Start, PtrSize);
// Calculate the size as being from the span start to it's end.
if (Span.End) {
Asm->emitLabelDifference(Span.End, Span.Start, PtrSize);
} else {
// For symbols without an end marker (e.g. common), we
// write a single arange entry containing just that one symbol.
uint64_t Size = SymSize[Span.Start];
if (Size == 0)
Size = 1;
Asm->OutStreamer->emitIntValue(Size, PtrSize);
}
}
Asm->OutStreamer->AddComment("ARange terminator");
Asm->OutStreamer->emitIntValue(0, PtrSize);
Asm->OutStreamer->emitIntValue(0, PtrSize);
}
}
/// Emit a single range list. We handle both DWARF v5 and earlier.
static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm,
const RangeSpanList &List) {
emitRangeList(DD, Asm, List.Label, List.Ranges, *List.CU,
dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair,
dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list,
llvm::dwarf::RangeListEncodingString,
List.CU->getCUNode()->getRangesBaseAddress() ||
DD.getDwarfVersion() >= 5,
[](auto) {});
}
void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section) {
if (Holder.getRangeLists().empty())
return;
assert(useRangesSection());
assert(!CUMap.empty());
assert(llvm::any_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
return !Pair.second->getCUNode()->isDebugDirectivesOnly();
}));
Asm->OutStreamer->SwitchSection(Section);
MCSymbol *TableEnd = nullptr;
if (getDwarfVersion() >= 5)
TableEnd = emitRnglistsTableHeader(Asm, Holder);
for (const RangeSpanList &List : Holder.getRangeLists())
emitRangeList(*this, Asm, List);
if (TableEnd)
Asm->OutStreamer->emitLabel(TableEnd);
}
/// Emit address ranges into the .debug_ranges section or into the DWARF v5
/// .debug_rnglists section.
void DwarfDebug::emitDebugRanges() {
const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
emitDebugRangesImpl(Holder,
getDwarfVersion() >= 5
? Asm->getObjFileLowering().getDwarfRnglistsSection()
: Asm->getObjFileLowering().getDwarfRangesSection());
}
void DwarfDebug::emitDebugRangesDWO() {
emitDebugRangesImpl(InfoHolder,
Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
}
/// Emit the header of a DWARF 5 macro section, or the GNU extension for
/// DWARF 4.
static void emitMacroHeader(AsmPrinter *Asm, const DwarfDebug &DD,
const DwarfCompileUnit &CU, uint16_t DwarfVersion) {
enum HeaderFlagMask {
#define HANDLE_MACRO_FLAG(ID, NAME) MACRO_FLAG_##NAME = ID,
#include "llvm/BinaryFormat/Dwarf.def"
};
Asm->OutStreamer->AddComment("Macro information version");
Asm->emitInt16(DwarfVersion >= 5 ? DwarfVersion : 4);
// We emit the line offset flag unconditionally here, since line offset should
// be mostly present.
if (Asm->isDwarf64()) {
Asm->OutStreamer->AddComment("Flags: 64 bit, debug_line_offset present");
Asm->emitInt8(MACRO_FLAG_OFFSET_SIZE | MACRO_FLAG_DEBUG_LINE_OFFSET);
} else {
Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present");
Asm->emitInt8(MACRO_FLAG_DEBUG_LINE_OFFSET);
}
Asm->OutStreamer->AddComment("debug_line_offset");
if (DD.useSplitDwarf())
Asm->emitDwarfLengthOrOffset(0);
else
Asm->emitDwarfSymbolReference(CU.getLineTableStartSym());
}
void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
for (auto *MN : Nodes) {
if (auto *M = dyn_cast<DIMacro>(MN))
emitMacro(*M);
else if (auto *F = dyn_cast<DIMacroFile>(MN))
emitMacroFile(*F, U);
else
llvm_unreachable("Unexpected DI type!");
}
}
void DwarfDebug::emitMacro(DIMacro &M) {
StringRef Name = M.getName();
StringRef Value = M.getValue();
// There should be one space between the macro name and the macro value in
// define entries. In undef entries, only the macro name is emitted.
std::string Str = Value.empty() ? Name.str() : (Name + " " + Value).str();
if (UseDebugMacroSection) {
if (getDwarfVersion() >= 5) {
unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
? dwarf::DW_MACRO_define_strx
: dwarf::DW_MACRO_undef_strx;
Asm->OutStreamer->AddComment(dwarf::MacroString(Type));
Asm->emitULEB128(Type);
Asm->OutStreamer->AddComment("Line Number");
Asm->emitULEB128(M.getLine());
Asm->OutStreamer->AddComment("Macro String");
Asm->emitULEB128(
InfoHolder.getStringPool().getIndexedEntry(*Asm, Str).getIndex());
} else {
unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
? dwarf::DW_MACRO_GNU_define_indirect
: dwarf::DW_MACRO_GNU_undef_indirect;
Asm->OutStreamer->AddComment(dwarf::GnuMacroString(Type));
Asm->emitULEB128(Type);
Asm->OutStreamer->AddComment("Line Number");
Asm->emitULEB128(M.getLine());
Asm->OutStreamer->AddComment("Macro String");
Asm->emitDwarfSymbolReference(
InfoHolder.getStringPool().getEntry(*Asm, Str).getSymbol());
}
} else {
Asm->OutStreamer->AddComment(dwarf::MacinfoString(M.getMacinfoType()));
Asm->emitULEB128(M.getMacinfoType());
Asm->OutStreamer->AddComment("Line Number");
Asm->emitULEB128(M.getLine());
Asm->OutStreamer->AddComment("Macro String");
Asm->OutStreamer->emitBytes(Str);
Asm->emitInt8('\0');
}
}
void DwarfDebug::emitMacroFileImpl(
DIMacroFile &MF, DwarfCompileUnit &U, unsigned StartFile, unsigned EndFile,
StringRef (*MacroFormToString)(unsigned Form)) {
Asm->OutStreamer->AddComment(MacroFormToString(StartFile));
Asm->emitULEB128(StartFile);
Asm->OutStreamer->AddComment("Line Number");
Asm->emitULEB128(MF.getLine());
Asm->OutStreamer->AddComment("File Number");
DIFile &F = *MF.getFile();
if (useSplitDwarf())
Asm->emitULEB128(getDwoLineTable(U)->getFile(
F.getDirectory(), F.getFilename(), getMD5AsBytes(&F),
Asm->OutContext.getDwarfVersion(), F.getSource()));
else
Asm->emitULEB128(U.getOrCreateSourceID(&F));
handleMacroNodes(MF.getElements(), U);
Asm->OutStreamer->AddComment(MacroFormToString(EndFile));
Asm->emitULEB128(EndFile);
}
void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
// DWARFv5 macro and DWARFv4 macinfo share some common encodings,
// so for readibility/uniformity, We are explicitly emitting those.
assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
if (UseDebugMacroSection)
emitMacroFileImpl(
F, U, dwarf::DW_MACRO_start_file, dwarf::DW_MACRO_end_file,
(getDwarfVersion() >= 5) ? dwarf::MacroString : dwarf::GnuMacroString);
else
emitMacroFileImpl(F, U, dwarf::DW_MACINFO_start_file,
dwarf::DW_MACINFO_end_file, dwarf::MacinfoString);
}
void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
for (const auto &P : CUMap) {
auto &TheCU = *P.second;
auto *SkCU = TheCU.getSkeleton();
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
auto *CUNode = cast<DICompileUnit>(P.first);
DIMacroNodeArray Macros = CUNode->getMacros();
if (Macros.empty())
continue;
Asm->OutStreamer->SwitchSection(Section);
Asm->OutStreamer->emitLabel(U.getMacroLabelBegin());
if (UseDebugMacroSection)
emitMacroHeader(Asm, *this, U, getDwarfVersion());
handleMacroNodes(Macros, U);
Asm->OutStreamer->AddComment("End Of Macro List Mark");
Asm->emitInt8(0);
}
}
/// Emit macros into a debug macinfo/macro section.
void DwarfDebug::emitDebugMacinfo() {
auto &ObjLower = Asm->getObjFileLowering();
emitDebugMacinfoImpl(UseDebugMacroSection
? ObjLower.getDwarfMacroSection()
: ObjLower.getDwarfMacinfoSection());
}
void DwarfDebug::emitDebugMacinfoDWO() {
auto &ObjLower = Asm->getObjFileLowering();
emitDebugMacinfoImpl(UseDebugMacroSection
? ObjLower.getDwarfMacroDWOSection()
: ObjLower.getDwarfMacinfoDWOSection());
}
// DWARF5 Experimental Separate Dwarf emitters.
void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
std::unique_ptr<DwarfCompileUnit> NewU) {
if (!CompilationDir.empty())
NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
addGnuPubAttributes(*NewU, Die);
SkeletonHolder.addUnit(std::move(NewU));
}
DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder,
UnitKind::Skeleton);
DwarfCompileUnit &NewCU = *OwnedUnit;
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
NewCU.initStmtList();
if (useSegmentedStringOffsetsTable())
NewCU.addStringOffsetsStart();
initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));
return NewCU;
}
// Emit the .debug_info.dwo section for separated dwarf. This contains the
// compile units that would normally be in debug_info.
void DwarfDebug::emitDebugInfoDWO() {
assert(useSplitDwarf() && "No split dwarf debug info?");
// Don't emit relocations into the dwo file.
InfoHolder.emitUnits(/* UseOffsets */ true);
}
// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
// abbreviations for the .debug_info.dwo section.
void DwarfDebug::emitDebugAbbrevDWO() {
assert(useSplitDwarf() && "No split dwarf?");
InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection());
}
void DwarfDebug::emitDebugLineDWO() {
assert(useSplitDwarf() && "No split dwarf?");
SplitTypeUnitFileTable.Emit(
*Asm->OutStreamer, MCDwarfLineTableParams(),
Asm->getObjFileLowering().getDwarfLineDWOSection());
}
void DwarfDebug::emitStringOffsetsTableHeaderDWO() {
assert(useSplitDwarf() && "No split dwarf?");
InfoHolder.getStringPool().emitStringOffsetsTableHeader(
*Asm, Asm->getObjFileLowering().getDwarfStrOffDWOSection(),
InfoHolder.getStringOffsetsStartSym());
}
// Emit the .debug_str.dwo section for separated dwarf. This contains the
// string section and is identical in format to traditional .debug_str
// sections.
void DwarfDebug::emitDebugStrDWO() {
if (useSegmentedStringOffsetsTable())
emitStringOffsetsTableHeaderDWO();
assert(useSplitDwarf() && "No split dwarf?");
MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection();
InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
OffSec, /* UseRelativeOffsets = */ false);
}
// Emit address pool.
void DwarfDebug::emitDebugAddr() {
AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
}
MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
if (!useSplitDwarf())
return nullptr;
const DICompileUnit *DIUnit = CU.getCUNode();
SplitTypeUnitFileTable.maybeSetRootFile(
DIUnit->getDirectory(), DIUnit->getFilename(),
getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource());
return &SplitTypeUnitFileTable;
}
uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
MD5 Hash;
Hash.update(Identifier);
// ... take the least significant 8 bytes and return those. Our MD5
// implementation always returns its results in little endian, so we actually
// need the "high" word.
MD5::MD5Result Result;
Hash.final(Result);
return Result.high();
}
void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
StringRef Identifier, DIE &RefDie,
const DICompositeType *CTy) {
// Fast path if we're building some type units and one has already used the
// address pool we know we're going to throw away all this work anyway, so
// don't bother building dependent types.
if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed())
return;
auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0));
if (!Ins.second) {
CU.addDIETypeSignature(RefDie, Ins.first->second);
return;
}
bool TopLevelType = TypeUnitsUnderConstruction.empty();
AddrPool.resetUsedFlag();
auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
getDwoLineTable(CU));
DwarfTypeUnit &NewTU = *OwnedUnit;
DIE &UnitDie = NewTU.getUnitDie();
TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy);
NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
CU.getLanguage());
uint64_t Signature = makeTypeSignature(Identifier);
NewTU.setTypeSignature(Signature);
Ins.first->second = Signature;
if (useSplitDwarf()) {
MCSection *Section =
getDwarfVersion() <= 4
? Asm->getObjFileLowering().getDwarfTypesDWOSection()
: Asm->getObjFileLowering().getDwarfInfoDWOSection();
NewTU.setSection(Section);
} else {
MCSection *Section =
getDwarfVersion() <= 4
? Asm->getObjFileLowering().getDwarfTypesSection(Signature)
: Asm->getObjFileLowering().getDwarfInfoSection(Signature);
NewTU.setSection(Section);
// Non-split type units reuse the compile unit's line table.
CU.applyStmtList(UnitDie);
}
// Add DW_AT_str_offsets_base to the type unit DIE, but not for split type
// units.
if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
NewTU.addStringOffsetsStart();
NewTU.setType(NewTU.createTypeDIE(CTy));
if (TopLevelType) {
auto TypeUnitsToAdd = std::move(TypeUnitsUnderConstruction);
TypeUnitsUnderConstruction.clear();
// Types referencing entries in the address table cannot be placed in type
// units.
if (AddrPool.hasBeenUsed()) {
// Remove all the types built while building this type.
// This is pessimistic as some of these types might not be dependent on
// the type that used an address.
for (const auto &TU : TypeUnitsToAdd)
TypeSignatures.erase(TU.second);
// Construct this type in the CU directly.
// This is inefficient because all the dependent types will be rebuilt
// from scratch, including building them in type units, discovering that
// they depend on addresses, throwing them out and rebuilding them.
CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy));
return;
}
// If the type wasn't dependent on fission addresses, finish adding the type
// and all its dependent types.
for (auto &TU : TypeUnitsToAdd) {
InfoHolder.computeSizeAndOffsetsForUnit(TU.first.get());
InfoHolder.emitUnit(TU.first.get(), useSplitDwarf());
}
}
CU.addDIETypeSignature(RefDie, Signature);
}
DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
: DD(DD),
TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) {
DD->TypeUnitsUnderConstruction.clear();
DD->AddrPool.resetUsedFlag();
}
DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
DD->AddrPool.resetUsedFlag(AddrPoolUsed);
}
DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
return NonTypeUnitContext(this);
}
// Add the Name along with its companion DIE to the appropriate accelerator
// table (for AccelTableKind::Dwarf it's always AccelDebugNames, for
// AccelTableKind::Apple, we use the table we got as an argument). If
// accelerator tables are disabled, this function does nothing.
template <typename DataT>
void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
AccelTable<DataT> &AppleAccel, StringRef Name,
const DIE &Die) {
if (getAccelTableKind() == AccelTableKind::None)
return;
if (getAccelTableKind() != AccelTableKind::Apple &&
CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Default)
return;
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
DwarfStringPoolEntryRef Ref = Holder.getStringPool().getEntry(*Asm, Name);
switch (getAccelTableKind()) {
case AccelTableKind::Apple:
AppleAccel.addName(Ref, Die);
break;
case AccelTableKind::Dwarf:
AccelDebugNames.addName(Ref, Die);
break;
case AccelTableKind::Default:
llvm_unreachable("Default should have already been resolved.");
case AccelTableKind::None:
llvm_unreachable("None handled above");
}
}
void DwarfDebug::addAccelName(const DICompileUnit &CU, StringRef Name,
const DIE &Die) {
addAccelNameImpl(CU, AccelNames, Name, Die);
}
void DwarfDebug::addAccelObjC(const DICompileUnit &CU, StringRef Name,
const DIE &Die) {
// ObjC names go only into the Apple accelerator tables.
if (getAccelTableKind() == AccelTableKind::Apple)
addAccelNameImpl(CU, AccelObjC, Name, Die);
}
void DwarfDebug::addAccelNamespace(const DICompileUnit &CU, StringRef Name,
const DIE &Die) {
addAccelNameImpl(CU, AccelNamespace, Name, Die);
}
void DwarfDebug::addAccelType(const DICompileUnit &CU, StringRef Name,
const DIE &Die, char Flags) {
addAccelNameImpl(CU, AccelTypes, Name, Die);
}
uint16_t DwarfDebug::getDwarfVersion() const {
return Asm->OutStreamer->getContext().getDwarfVersion();
}
dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const {
if (Asm->getDwarfVersion() >= 4)
return dwarf::Form::DW_FORM_sec_offset;
assert((!Asm->isDwarf64() || (Asm->getDwarfVersion() == 3)) &&
"DWARF64 is not defined prior DWARFv3");
return Asm->isDwarf64() ? dwarf::Form::DW_FORM_data8
: dwarf::Form::DW_FORM_data4;
}
const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
auto I = SectionLabels.find(S);
if (I == SectionLabels.end())
return nullptr;
return I->second;
}
void DwarfDebug::insertSectionLabel(const MCSymbol *S) {
if (SectionLabels.insert(std::make_pair(&S->getSection(), S)).second)
if (useSplitDwarf() || getDwarfVersion() >= 5)
AddrPool.getIndex(S);
}
Optional<MD5::MD5Result> DwarfDebug::getMD5AsBytes(const DIFile *File) const {
assert(File);
if (getDwarfVersion() < 5)
return None;
Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
return None;
// Convert the string checksum to an MD5Result for the streamer.
// The verifier validates the checksum so we assume it's okay.
// An MD5 checksum is 16 bytes.
std::string ChecksumString = fromHex(Checksum->Value);
MD5::MD5Result CKMem;
std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
return CKMem;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 6356a65b50d3..b55be799b6bc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -1,858 +1,855 @@
//===- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains support for writing dwarf debug info into asm files.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
#include "AddressPool.h"
#include "DebugLocStream.h"
#include "DebugLocEntry.h"
#include "DwarfFile.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/CodeGen/DebugHandlerBase.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Metadata.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Target/TargetOptions.h"
#include <cassert>
#include <cstdint>
#include <limits>
#include <memory>
#include <utility>
#include <vector>
namespace llvm {
class AsmPrinter;
class ByteStreamer;
class DIE;
class DwarfCompileUnit;
class DwarfExpression;
class DwarfTypeUnit;
class DwarfUnit;
class LexicalScope;
class MachineFunction;
class MCSection;
class MCSymbol;
class Module;
//===----------------------------------------------------------------------===//
/// This class is defined as the common parent of DbgVariable and DbgLabel
/// such that it could levarage polymorphism to extract common code for
/// DbgVariable and DbgLabel.
class DbgEntity {
const DINode *Entity;
const DILocation *InlinedAt;
DIE *TheDIE = nullptr;
unsigned SubclassID;
public:
enum DbgEntityKind {
DbgVariableKind,
DbgLabelKind
};
DbgEntity(const DINode *N, const DILocation *IA, unsigned ID)
: Entity(N), InlinedAt(IA), SubclassID(ID) {}
virtual ~DbgEntity() {}
/// Accessors.
/// @{
const DINode *getEntity() const { return Entity; }
const DILocation *getInlinedAt() const { return InlinedAt; }
DIE *getDIE() const { return TheDIE; }
unsigned getDbgEntityID() const { return SubclassID; }
/// @}
void setDIE(DIE &D) { TheDIE = &D; }
static bool classof(const DbgEntity *N) {
switch (N->getDbgEntityID()) {
default:
return false;
case DbgVariableKind:
case DbgLabelKind:
return true;
}
}
};
//===----------------------------------------------------------------------===//
/// This class is used to track local variable information.
///
/// Variables can be created from allocas, in which case they're generated from
/// the MMI table. Such variables can have multiple expressions and frame
/// indices.
///
/// Variables can be created from \c DBG_VALUE instructions. Those whose
/// location changes over time use \a DebugLocListIndex, while those with a
/// single location use \a ValueLoc and (optionally) a single entry of \a Expr.
///
/// Variables that have been optimized out use none of these fields.
class DbgVariable : public DbgEntity {
/// Index of the entry list in DebugLocs.
unsigned DebugLocListIndex = ~0u;
/// DW_OP_LLVM_tag_offset value from DebugLocs.
Optional<uint8_t> DebugLocListTagOffset;
/// Single value location description.
std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;
struct FrameIndexExpr {
int FI;
const DIExpression *Expr;
};
mutable SmallVector<FrameIndexExpr, 1>
FrameIndexExprs; /// Frame index + expression.
public:
/// Construct a DbgVariable.
///
/// Creates a variable without any DW_AT_location. Call \a initializeMMI()
/// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions.
DbgVariable(const DILocalVariable *V, const DILocation *IA)
: DbgEntity(V, IA, DbgVariableKind) {}
/// Initialize from the MMI table.
void initializeMMI(const DIExpression *E, int FI) {
assert(FrameIndexExprs.empty() && "Already initialized?");
assert(!ValueLoc.get() && "Already initialized?");
assert((!E || E->isValid()) && "Expected valid expression");
assert(FI != std::numeric_limits<int>::max() && "Expected valid index");
FrameIndexExprs.push_back({FI, E});
}
// Initialize variable's location.
void initializeDbgValue(DbgValueLoc Value) {
assert(FrameIndexExprs.empty() && "Already initialized?");
assert(!ValueLoc && "Already initialized?");
assert(!Value.getExpression()->isFragment() && "Fragments not supported.");
ValueLoc = std::make_unique<DbgValueLoc>(Value);
if (auto *E = ValueLoc->getExpression())
if (E->getNumElements())
FrameIndexExprs.push_back({0, E});
}
/// Initialize from a DBG_VALUE instruction.
void initializeDbgValue(const MachineInstr *DbgValue);
// Accessors.
const DILocalVariable *getVariable() const {
return cast<DILocalVariable>(getEntity());
}
const DIExpression *getSingleExpression() const {
assert(ValueLoc.get() && FrameIndexExprs.size() <= 1);
return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
}
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
void setDebugLocListTagOffset(uint8_t O) { DebugLocListTagOffset = O; }
Optional<uint8_t> getDebugLocListTagOffset() const { return DebugLocListTagOffset; }
StringRef getName() const { return getVariable()->getName(); }
const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
/// Get the FI entries, sorted by fragment offset.
ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
void addMMIEntry(const DbgVariable &V);
// Translate tag to proper Dwarf tag.
dwarf::Tag getTag() const {
// FIXME: Why don't we just infer this tag and store it all along?
if (getVariable()->isParameter())
return dwarf::DW_TAG_formal_parameter;
return dwarf::DW_TAG_variable;
}
/// Return true if DbgVariable is artificial.
bool isArtificial() const {
if (getVariable()->isArtificial())
return true;
if (getType()->isArtificial())
return true;
return false;
}
bool isObjectPointer() const {
if (getVariable()->isObjectPointer())
return true;
if (getType()->isObjectPointer())
return true;
return false;
}
bool hasComplexAddress() const {
assert(ValueLoc.get() && "Expected DBG_VALUE, not MMI variable");
assert((FrameIndexExprs.empty() ||
(FrameIndexExprs.size() == 1 &&
FrameIndexExprs[0].Expr->getNumElements())) &&
"Invalid Expr for DBG_VALUE");
return !FrameIndexExprs.empty();
}
const DIType *getType() const;
static bool classof(const DbgEntity *N) {
return N->getDbgEntityID() == DbgVariableKind;
}
};
//===----------------------------------------------------------------------===//
/// This class is used to track label information.
///
/// Labels are collected from \c DBG_LABEL instructions.
class DbgLabel : public DbgEntity {
const MCSymbol *Sym; /// Symbol before DBG_LABEL instruction.
public:
/// We need MCSymbol information to generate DW_AT_low_pc.
DbgLabel(const DILabel *L, const DILocation *IA, const MCSymbol *Sym = nullptr)
: DbgEntity(L, IA, DbgLabelKind), Sym(Sym) {}
/// Accessors.
/// @{
const DILabel *getLabel() const { return cast<DILabel>(getEntity()); }
const MCSymbol *getSymbol() const { return Sym; }
StringRef getName() const { return getLabel()->getName(); }
/// @}
/// Translate tag to proper Dwarf tag.
dwarf::Tag getTag() const {
return dwarf::DW_TAG_label;
}
static bool classof(const DbgEntity *N) {
return N->getDbgEntityID() == DbgLabelKind;
}
};
/// Used for tracking debug info about call site parameters.
class DbgCallSiteParam {
private:
unsigned Register; ///< Parameter register at the callee entry point.
DbgValueLoc Value; ///< Corresponding location for the parameter value at
///< the call site.
public:
DbgCallSiteParam(unsigned Reg, DbgValueLoc Val)
: Register(Reg), Value(Val) {
assert(Reg && "Parameter register cannot be undef");
}
unsigned getRegister() const { return Register; }
DbgValueLoc getValue() const { return Value; }
};
/// Collection used for storing debug call site parameters.
using ParamSet = SmallVector<DbgCallSiteParam, 4>;
/// Helper used to pair up a symbol and its DWARF compile unit.
struct SymbolCU {
SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
const MCSymbol *Sym;
DwarfCompileUnit *CU;
};
/// The kind of accelerator tables we should emit.
enum class AccelTableKind {
Default, ///< Platform default.
None, ///< None.
Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc.
Dwarf, ///< DWARF v5 .debug_names.
};
/// Collects and handles dwarf debug information.
class DwarfDebug : public DebugHandlerBase {
/// All DIEValues are allocated through this allocator.
BumpPtrAllocator DIEValueAllocator;
/// Maps MDNode with its corresponding DwarfCompileUnit.
MapVector<const MDNode *, DwarfCompileUnit *> CUMap;
/// Maps a CU DIE with its corresponding DwarfCompileUnit.
DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap;
/// List of all labels used in aranges generation.
std::vector<SymbolCU> ArangeLabels;
/// Size of each symbol emitted (for those symbols that have a specific size).
DenseMap<const MCSymbol *, uint64_t> SymSize;
/// Collection of abstract variables/labels.
SmallVector<std::unique_ptr<DbgEntity>, 64> ConcreteEntities;
/// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
/// can refer to them in spite of insertions into this list.
DebugLocStream DebugLocs;
/// This is a collection of subprogram MDNodes that are processed to
/// create DIEs.
SetVector<const DISubprogram *, SmallVector<const DISubprogram *, 16>,
SmallPtrSet<const DISubprogram *, 16>>
ProcessedSPNodes;
/// If nonnull, stores the current machine function we're processing.
const MachineFunction *CurFn = nullptr;
/// If nonnull, stores the CU in which the previous subprogram was contained.
const DwarfCompileUnit *PrevCU = nullptr;
/// As an optimization, there is no need to emit an entry in the directory
/// table for the same directory as DW_AT_comp_dir.
StringRef CompilationDir;
/// Holder for the file specific debug information.
DwarfFile InfoHolder;
/// Holders for the various debug information flags that we might need to
/// have exposed. See accessor functions below for description.
/// Map from MDNodes for user-defined types to their type signatures. Also
/// used to keep track of which types we have emitted type units for.
DenseMap<const MDNode *, uint64_t> TypeSignatures;
DenseMap<const MCSection *, const MCSymbol *> SectionLabels;
SmallVector<
std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
TypeUnitsUnderConstruction;
/// Whether to use the GNU TLS opcode (instead of the standard opcode).
bool UseGNUTLSOpcode;
/// Whether to use DWARF 2 bitfields (instead of the DWARF 4 format).
bool UseDWARF2Bitfields;
/// Whether to emit all linkage names, or just abstract subprograms.
bool UseAllLinkageNames;
/// Use inlined strings.
bool UseInlineStrings = false;
/// Allow emission of .debug_ranges section.
bool UseRangesSection = true;
/// True if the sections itself must be used as references and don't create
/// temp symbols inside DWARF sections.
bool UseSectionsAsReferences = false;
///Allow emission of the .debug_loc section.
bool UseLocSection = true;
/// Generate DWARF v4 type units.
bool GenerateTypeUnits;
/// Emit a .debug_macro section instead of .debug_macinfo.
bool UseDebugMacroSection;
/// Avoid using DW_OP_convert due to consumer incompatibilities.
bool EnableOpConvert;
public:
enum class MinimizeAddrInV5 {
Default,
Disabled,
Ranges,
Expressions,
Form,
};
private:
/// Force the use of DW_AT_ranges even for single-entry range lists.
MinimizeAddrInV5 MinimizeAddr = MinimizeAddrInV5::Disabled;
/// DWARF5 Experimental Options
/// @{
AccelTableKind TheAccelTableKind;
bool HasAppleExtensionAttributes;
bool HasSplitDwarf;
/// Whether to generate the DWARF v5 string offsets table.
/// It consists of a series of contributions, each preceded by a header.
/// The pre-DWARF v5 string offsets table for split dwarf is, in contrast,
/// a monolithic sequence of string offsets.
bool UseSegmentedStringOffsetsTable;
/// Enable production of call site parameters needed to print the debug entry
/// values. Useful for testing purposes when a debugger does not support the
/// feature yet.
bool EmitDebugEntryValues;
/// Separated Dwarf Variables
/// In general these will all be for bits that are left in the
/// original object file, rather than things that are meant
/// to be in the .dwo sections.
/// Holder for the skeleton information.
DwarfFile SkeletonHolder;
/// Store file names for type units under fission in a line table
/// header that will be emitted into debug_line.dwo.
// FIXME: replace this with a map from comp_dir to table so that we
// can emit multiple tables during LTO each of which uses directory
// 0, referencing the comp_dir of all the type units that use it.
MCDwarfDwoLineTable SplitTypeUnitFileTable;
/// @}
/// True iff there are multiple CUs in this module.
bool SingleCU;
bool IsDarwin;
/// Map for tracking Fortran deferred CHARACTER lengths.
DenseMap<const DIStringType *, unsigned> StringTypeLocMap;
AddressPool AddrPool;
/// Accelerator tables.
AccelTable<DWARF5AccelTableData> AccelDebugNames;
AccelTable<AppleAccelTableOffsetData> AccelNames;
AccelTable<AppleAccelTableOffsetData> AccelObjC;
AccelTable<AppleAccelTableOffsetData> AccelNamespace;
AccelTable<AppleAccelTableTypeData> AccelTypes;
/// Identify a debugger for "tuning" the debug info.
///
/// The "tuning" should be used to set defaults for individual feature flags
/// in DwarfDebug; if a given feature has a more specific command-line option,
/// that option should take precedence over the tuning.
DebuggerKind DebuggerTuning = DebuggerKind::Default;
MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() {
return InfoHolder.getUnits();
}
using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
void ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
const DINode *Node,
const MDNode *Scope);
void ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
const DINode *Node,
const MDNode *Scope);
DbgEntity *createConcreteEntity(DwarfCompileUnit &TheCU,
LexicalScope &Scope,
const DINode *Node,
const DILocation *Location,
const MCSymbol *Sym = nullptr);
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
- /// Construct a DIE for the subprogram definition \p SP and return it.
- DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP);
-
/// Construct DIEs for call site entries describing the calls in \p MF.
void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
DIE &ScopeDIE, const MachineFunction &MF);
template <typename DataT>
void addAccelNameImpl(const DICompileUnit &CU, AccelTable<DataT> &AppleAccel,
StringRef Name, const DIE &Die);
void finishEntityDefinitions();
void finishSubprogramDefinitions();
/// Finish off debug information after all functions have been
/// processed.
void finalizeModuleInfo();
/// Emit the debug info section.
void emitDebugInfo();
/// Emit the abbreviation section.
void emitAbbreviations();
/// Emit the string offsets table header.
void emitStringOffsetsTableHeader();
/// Emit a specified accelerator table.
template <typename AccelTableT>
void emitAccel(AccelTableT &Accel, MCSection *Section, StringRef TableName);
/// Emit DWARF v5 accelerator table.
void emitAccelDebugNames();
/// Emit visible names into a hashed accelerator table section.
void emitAccelNames();
/// Emit objective C classes and categories into a hashed
/// accelerator table section.
void emitAccelObjC();
/// Emit namespace dies into a hashed accelerator table.
void emitAccelNamespaces();
/// Emit type dies into a hashed accelerator table.
void emitAccelTypes();
/// Emit visible names and types into debug pubnames and pubtypes sections.
void emitDebugPubSections();
void emitDebugPubSection(bool GnuStyle, StringRef Name,
DwarfCompileUnit *TheU,
const StringMap<const DIE *> &Globals);
/// Emit null-terminated strings into a debug str section.
void emitDebugStr();
/// Emit variable locations into a debug loc section.
void emitDebugLoc();
/// Emit variable locations into a debug loc dwo section.
void emitDebugLocDWO();
void emitDebugLocImpl(MCSection *Sec);
/// Emit address ranges into a debug aranges section.
void emitDebugARanges();
/// Emit address ranges into a debug ranges section.
void emitDebugRanges();
void emitDebugRangesDWO();
void emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section);
/// Emit macros into a debug macinfo section.
void emitDebugMacinfo();
/// Emit macros into a debug macinfo.dwo section.
void emitDebugMacinfoDWO();
void emitDebugMacinfoImpl(MCSection *Section);
void emitMacro(DIMacro &M);
void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U);
void emitMacroFileImpl(DIMacroFile &F, DwarfCompileUnit &U,
unsigned StartFile, unsigned EndFile,
StringRef (*MacroFormToString)(unsigned Form));
void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U);
/// DWARF 5 Experimental Split Dwarf Emitters
/// Initialize common features of skeleton units.
void initSkeletonUnit(const DwarfUnit &U, DIE &Die,
std::unique_ptr<DwarfCompileUnit> NewU);
/// Construct the split debug info compile unit for the debug info section.
/// In DWARF v5, the skeleton unit DIE may have the following attributes:
/// DW_AT_addr_base, DW_AT_comp_dir, DW_AT_dwo_name, DW_AT_high_pc,
/// DW_AT_low_pc, DW_AT_ranges, DW_AT_stmt_list, and DW_AT_str_offsets_base.
/// Prior to DWARF v5 it may also have DW_AT_GNU_dwo_id. DW_AT_GNU_dwo_name
/// is used instead of DW_AT_dwo_name, Dw_AT_GNU_addr_base instead of
/// DW_AT_addr_base, and DW_AT_GNU_ranges_base instead of DW_AT_rnglists_base.
DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);
/// Emit the debug info dwo section.
void emitDebugInfoDWO();
/// Emit the debug abbrev dwo section.
void emitDebugAbbrevDWO();
/// Emit the debug line dwo section.
void emitDebugLineDWO();
/// Emit the dwo stringoffsets table header.
void emitStringOffsetsTableHeaderDWO();
/// Emit the debug str dwo section.
void emitDebugStrDWO();
/// Emit DWO addresses.
void emitDebugAddr();
/// Flags to let the linker know we have emitted new style pubnames. Only
/// emit it here if we don't have a skeleton CU for split dwarf.
void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const;
/// Create new DwarfCompileUnit for the given metadata node with tag
/// DW_TAG_compile_unit.
DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit);
void finishUnitAttributes(const DICompileUnit *DIUnit,
DwarfCompileUnit &NewCU);
/// Construct imported_module or imported_declaration DIE.
void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
const DIImportedEntity *N);
/// Register a source line with debug info. Returns the unique
/// label that was emitted and which provides correspondence to the
/// source line list.
void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
unsigned Flags);
/// Populate LexicalScope entries with variables' info.
void collectEntityInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
DenseSet<InlinedEntity> &ProcessedVars);
/// Build the location list for all DBG_VALUEs in the
/// function that describe the same variable. If the resulting
/// list has only one entry that is valid for entire variable's
/// scope return true.
bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
const DbgValueHistoryMap::Entries &Entries);
/// Collect variable information from the side table maintained by MF.
void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
DenseSet<InlinedEntity> &P);
/// Emit the reference to the section.
void emitSectionReference(const DwarfCompileUnit &CU);
protected:
/// Gather pre-function debug information.
void beginFunctionImpl(const MachineFunction *MF) override;
/// Gather and emit post-function debug information.
void endFunctionImpl(const MachineFunction *MF) override;
void skippedNonDebugFunction() override;
public:
//===--------------------------------------------------------------------===//
// Main entry points.
//
DwarfDebug(AsmPrinter *A);
~DwarfDebug() override;
/// Emit all Dwarf sections that should come prior to the
/// content.
void beginModule(Module *M) override;
/// Emit all Dwarf sections that should come after the content.
void endModule() override;
/// Emits inital debug location directive.
DebugLoc emitInitialLocDirective(const MachineFunction &MF, unsigned CUID);
/// Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
/// Perform an MD5 checksum of \p Identifier and return the lower 64 bits.
static uint64_t makeTypeSignature(StringRef Identifier);
/// Add a DIE to the set of types that we're going to pull into
/// type units.
void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
DIE &Die, const DICompositeType *CTy);
class NonTypeUnitContext {
DwarfDebug *DD;
decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
bool AddrPoolUsed;
friend class DwarfDebug;
NonTypeUnitContext(DwarfDebug *DD);
public:
NonTypeUnitContext(NonTypeUnitContext&&) = default;
~NonTypeUnitContext();
};
NonTypeUnitContext enterNonTypeUnitContext();
/// Add a label so that arange data can be generated for it.
void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
/// For symbols that have a size designated (e.g. common symbols),
/// this tracks that size.
void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {
SymSize[Sym] = Size;
}
/// Returns whether we should emit all DW_AT_[MIPS_]linkage_name.
/// If not, we still might emit certain cases.
bool useAllLinkageNames() const { return UseAllLinkageNames; }
/// Returns whether to use DW_OP_GNU_push_tls_address, instead of the
/// standard DW_OP_form_tls_address opcode
bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
/// Returns whether to use the DWARF2 format for bitfields instyead of the
/// DWARF4 format.
bool useDWARF2Bitfields() const { return UseDWARF2Bitfields; }
/// Returns whether to use inline strings.
bool useInlineStrings() const { return UseInlineStrings; }
/// Returns whether ranges section should be emitted.
bool useRangesSection() const { return UseRangesSection; }
/// Returns whether range encodings should be used for single entry range
/// lists.
bool alwaysUseRanges() const {
return MinimizeAddr == MinimizeAddrInV5::Ranges;
}
// Returns whether novel exprloc addrx+offset encodings should be used to
// reduce debug_addr size.
bool useAddrOffsetExpressions() const {
return MinimizeAddr == MinimizeAddrInV5::Expressions;
}
// Returns whether addrx+offset LLVM extension form should be used to reduce
// debug_addr size.
bool useAddrOffsetForm() const {
return MinimizeAddr == MinimizeAddrInV5::Form;
}
/// Returns whether to use sections as labels rather than temp symbols.
bool useSectionsAsReferences() const {
return UseSectionsAsReferences;
}
/// Returns whether .debug_loc section should be emitted.
bool useLocSection() const { return UseLocSection; }
/// Returns whether to generate DWARF v4 type units.
bool generateTypeUnits() const { return GenerateTypeUnits; }
// Experimental DWARF5 features.
/// Returns what kind (if any) of accelerator tables to emit.
AccelTableKind getAccelTableKind() const { return TheAccelTableKind; }
bool useAppleExtensionAttributes() const {
return HasAppleExtensionAttributes;
}
/// Returns whether or not to change the current debug info for the
/// split dwarf proposal support.
bool useSplitDwarf() const { return HasSplitDwarf; }
/// Returns whether to generate a string offsets table with (possibly shared)
/// contributions from each CU and type unit. This implies the use of
/// DW_FORM_strx* indirect references with DWARF v5 and beyond. Note that
/// DW_FORM_GNU_str_index is also an indirect reference, but it is used with
/// a pre-DWARF v5 implementation of split DWARF sections, which uses a
/// monolithic string offsets table.
bool useSegmentedStringOffsetsTable() const {
return UseSegmentedStringOffsetsTable;
}
bool emitDebugEntryValues() const {
return EmitDebugEntryValues;
}
bool useOpConvert() const {
return EnableOpConvert;
}
bool shareAcrossDWOCUs() const;
/// Returns the Dwarf Version.
uint16_t getDwarfVersion() const;
/// Returns a suitable DWARF form to represent a section offset, i.e.
/// * DW_FORM_sec_offset for DWARF version >= 4;
/// * DW_FORM_data8 for 64-bit DWARFv3;
/// * DW_FORM_data4 for 32-bit DWARFv3 and DWARFv2.
dwarf::Form getDwarfSectionOffsetForm() const;
/// Returns the previous CU that was being updated
const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
/// Returns the entries for the .debug_loc section.
const DebugLocStream &getDebugLocs() const { return DebugLocs; }
/// Emit an entry for the debug loc section. This can be used to
/// handle an entry that's going to be emitted into the debug loc section.
void emitDebugLocEntry(ByteStreamer &Streamer,
const DebugLocStream::Entry &Entry,
const DwarfCompileUnit *CU);
/// Emit the location for a debug loc entry, including the size header.
void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
const DwarfCompileUnit *CU);
void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP,
DIE &Die);
AddressPool &getAddressPool() { return AddrPool; }
void addAccelName(const DICompileUnit &CU, StringRef Name, const DIE &Die);
void addAccelObjC(const DICompileUnit &CU, StringRef Name, const DIE &Die);
void addAccelNamespace(const DICompileUnit &CU, StringRef Name,
const DIE &Die);
void addAccelType(const DICompileUnit &CU, StringRef Name, const DIE &Die,
char Flags);
const MachineFunction *getCurrentFunction() const { return CurFn; }
/// A helper function to check whether the DIE for a given Scope is
/// going to be null.
bool isLexicalScopeDIENull(LexicalScope *Scope);
/// Find the matching DwarfCompileUnit for the given CU DIE.
DwarfCompileUnit *lookupCU(const DIE *Die) { return CUDieMap.lookup(Die); }
const DwarfCompileUnit *lookupCU(const DIE *Die) const {
return CUDieMap.lookup(Die);
}
unsigned getStringTypeLoc(const DIStringType *ST) const {
return StringTypeLocMap.lookup(ST);
}
void addStringTypeLoc(const DIStringType *ST, unsigned Loc) {
assert(ST);
if (Loc)
StringTypeLocMap[ST] = Loc;
}
/// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
///
/// Returns whether we are "tuning" for a given debugger.
/// @{
bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
bool tuneForDBX() const { return DebuggerTuning == DebuggerKind::DBX; }
/// @}
const MCSymbol *getSectionLabel(const MCSection *S);
void insertSectionLabel(const MCSymbol *S);
static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
const DbgValueLoc &Value,
DwarfExpression &DwarfExpr);
/// If the \p File has an MD5 checksum, return it as an MD5Result
/// allocated in the MCContext.
Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
};
} // end namespace llvm
#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 344d30fad347..9d7b3d6e1891 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1,1817 +1,1818 @@
//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains support for constructing a dwarf compile unit.
//
//===----------------------------------------------------------------------===//
#include "DwarfUnit.h"
#include "AddressPool.h"
#include "DwarfCompileUnit.h"
#include "DwarfExpression.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Metadata.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include <cassert>
#include <cstdint>
#include <string>
#include <utility>
using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP,
DwarfCompileUnit &CU, DIELoc &DIE)
: DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), OutDIE(DIE) {}
void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) {
CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Op);
}
void DIEDwarfExpression::emitSigned(int64_t Value) {
CU.addSInt(getActiveDIE(), dwarf::DW_FORM_sdata, Value);
}
void DIEDwarfExpression::emitUnsigned(uint64_t Value) {
CU.addUInt(getActiveDIE(), dwarf::DW_FORM_udata, Value);
}
void DIEDwarfExpression::emitData1(uint8_t Value) {
CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Value);
}
void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
CU.addBaseTypeRef(getActiveDIE(), Idx);
}
void DIEDwarfExpression::enableTemporaryBuffer() {
assert(!IsBuffering && "Already buffering?");
IsBuffering = true;
}
void DIEDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; }
unsigned DIEDwarfExpression::getTemporaryBufferSize() {
return TmpDIE.ComputeSize(&AP);
}
void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); }
bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
llvm::Register MachineReg) {
return MachineReg == TRI.getFrameRegister(*AP.MF);
}
DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
: DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU),
IndexTyDie(nullptr) {}
DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
MCDwarfDwoLineTable *SplitLineTable)
: DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU),
SplitLineTable(SplitLineTable) {
}
DwarfUnit::~DwarfUnit() {
for (DIEBlock *B : DIEBlocks)
B->~DIEBlock();
for (DIELoc *L : DIELocs)
L->~DIELoc();
}
int64_t DwarfUnit::getDefaultLowerBound() const {
switch (getLanguage()) {
default:
break;
// The languages below have valid values in all DWARF versions.
case dwarf::DW_LANG_C:
case dwarf::DW_LANG_C89:
case dwarf::DW_LANG_C_plus_plus:
return 0;
case dwarf::DW_LANG_Fortran77:
case dwarf::DW_LANG_Fortran90:
return 1;
// The languages below have valid values only if the DWARF version >= 3.
case dwarf::DW_LANG_C99:
case dwarf::DW_LANG_ObjC:
case dwarf::DW_LANG_ObjC_plus_plus:
if (DD->getDwarfVersion() >= 3)
return 0;
break;
case dwarf::DW_LANG_Fortran95:
if (DD->getDwarfVersion() >= 3)
return 1;
break;
// Starting with DWARF v4, all defined languages have valid values.
case dwarf::DW_LANG_D:
case dwarf::DW_LANG_Java:
case dwarf::DW_LANG_Python:
case dwarf::DW_LANG_UPC:
if (DD->getDwarfVersion() >= 4)
return 0;
break;
case dwarf::DW_LANG_Ada83:
case dwarf::DW_LANG_Ada95:
case dwarf::DW_LANG_Cobol74:
case dwarf::DW_LANG_Cobol85:
case dwarf::DW_LANG_Modula2:
case dwarf::DW_LANG_Pascal83:
case dwarf::DW_LANG_PLI:
if (DD->getDwarfVersion() >= 4)
return 1;
break;
// The languages below are new in DWARF v5.
case dwarf::DW_LANG_BLISS:
case dwarf::DW_LANG_C11:
case dwarf::DW_LANG_C_plus_plus_03:
case dwarf::DW_LANG_C_plus_plus_11:
case dwarf::DW_LANG_C_plus_plus_14:
case dwarf::DW_LANG_Dylan:
case dwarf::DW_LANG_Go:
case dwarf::DW_LANG_Haskell:
case dwarf::DW_LANG_OCaml:
case dwarf::DW_LANG_OpenCL:
case dwarf::DW_LANG_RenderScript:
case dwarf::DW_LANG_Rust:
case dwarf::DW_LANG_Swift:
if (DD->getDwarfVersion() >= 5)
return 0;
break;
case dwarf::DW_LANG_Fortran03:
case dwarf::DW_LANG_Fortran08:
case dwarf::DW_LANG_Julia:
case dwarf::DW_LANG_Modula3:
if (DD->getDwarfVersion() >= 5)
return 1;
break;
}
return -1;
}
/// Check whether the DIE for this MDNode can be shared across CUs.
bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
- // When the MDNode can be part of the type system (this includes subprogram
- // declarations *and* subprogram definitions, even local definitions), the
- // DIE must be shared across CUs.
+ // When the MDNode can be part of the type system, the DIE can be shared
+ // across CUs.
// Combining type units and cross-CU DIE sharing is lower value (since
// cross-CU DIE sharing is used in LTO and removes type redundancy at that
// level already) but may be implementable for some value in projects
// building multiple independent libraries with LTO and then linking those
// together.
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
return false;
- return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits();
+ return (isa<DIType>(D) ||
+ (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
+ !DD->generateTypeUnits();
}
DIE *DwarfUnit::getDIE(const DINode *D) const {
if (isShareableAcrossCUs(D))
return DU->getDIE(D);
return MDNodeToDieMap.lookup(D);
}
void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) {
if (isShareableAcrossCUs(Desc)) {
DU->insertDIE(Desc, D);
return;
}
MDNodeToDieMap.insert(std::make_pair(Desc, D));
}
void DwarfUnit::insertDIE(DIE *D) {
MDNodeToDieMap.insert(std::make_pair(nullptr, D));
}
void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
if (DD->getDwarfVersion() >= 4)
addAttribute(Die, Attribute, dwarf::DW_FORM_flag_present, DIEInteger(1));
else
addAttribute(Die, Attribute, dwarf::DW_FORM_flag, DIEInteger(1));
}
void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, uint64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(false, Integer);
assert(Form != dwarf::DW_FORM_implicit_const &&
"DW_FORM_implicit_const is used only for signed integers");
addAttribute(Die, Attribute, *Form, DIEInteger(Integer));
}
void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form,
uint64_t Integer) {
addUInt(Block, (dwarf::Attribute)0, Form, Integer);
}
void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, int64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(true, Integer);
addAttribute(Die, Attribute, *Form, DIEInteger(Integer));
}
void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
int64_t Integer) {
addSInt(Die, (dwarf::Attribute)0, Form, Integer);
}
void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
StringRef String) {
if (CUNode->isDebugDirectivesOnly())
return;
if (DD->useInlineStrings()) {
addAttribute(Die, Attribute, dwarf::DW_FORM_string,
new (DIEValueAllocator)
DIEInlineString(String, DIEValueAllocator));
return;
}
dwarf::Form IxForm =
isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp;
auto StringPoolEntry =
useSegmentedStringOffsetsTable() || IxForm == dwarf::DW_FORM_GNU_str_index
? DU->getStringPool().getIndexedEntry(*Asm, String)
: DU->getStringPool().getEntry(*Asm, String);
// For DWARF v5 and beyond, use the smallest strx? form possible.
if (useSegmentedStringOffsetsTable()) {
IxForm = dwarf::DW_FORM_strx1;
unsigned Index = StringPoolEntry.getIndex();
if (Index > 0xffffff)
IxForm = dwarf::DW_FORM_strx4;
else if (Index > 0xffff)
IxForm = dwarf::DW_FORM_strx3;
else if (Index > 0xff)
IxForm = dwarf::DW_FORM_strx2;
}
addAttribute(Die, Attribute, IxForm, DIEString(StringPoolEntry));
}
void DwarfUnit::addLabel(DIEValueList &Die, dwarf::Attribute Attribute,
dwarf::Form Form, const MCSymbol *Label) {
addAttribute(Die, Attribute, Form, DIELabel(Label));
}
void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
addLabel(Die, (dwarf::Attribute)0, Form, Label);
}
void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
uint64_t Integer) {
addUInt(Die, Attribute, DD->getDwarfSectionOffsetForm(), Integer);
}
unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
if (!SplitLineTable)
return getCU().getOrCreateSourceID(File);
if (!UsedLineTable) {
UsedLineTable = true;
// This is a split type unit that needs a line table.
addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
}
return SplitLineTable->getFile(
File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
Asm->OutContext.getDwarfVersion(), File->getSource());
}
void DwarfUnit::addPoolOpAddress(DIEValueList &Die, const MCSymbol *Label) {
bool UseAddrOffsetFormOrExpressions =
DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions();
const MCSymbol *Base = nullptr;
if (Label->isInSection() && UseAddrOffsetFormOrExpressions)
Base = DD->getSectionLabel(&Label->getSection());
uint32_t Index = DD->getAddressPool().getIndex(Base ? Base : Label);
if (DD->getDwarfVersion() >= 5) {
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx);
addUInt(Die, dwarf::DW_FORM_addrx, Index);
} else {
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
addUInt(Die, dwarf::DW_FORM_GNU_addr_index, Index);
}
if (Base && Base != Label) {
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_const4u);
addLabelDelta(Die, (dwarf::Attribute)0, Label, Base);
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
}
}
void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
if (DD->getDwarfVersion() >= 5) {
addPoolOpAddress(Die, Sym);
return;
}
if (DD->useSplitDwarf()) {
addPoolOpAddress(Die, Sym);
return;
}
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
addLabel(Die, dwarf::DW_FORM_addr, Sym);
}
void DwarfUnit::addLabelDelta(DIEValueList &Die, dwarf::Attribute Attribute,
const MCSymbol *Hi, const MCSymbol *Lo) {
addAttribute(Die, Attribute, dwarf::DW_FORM_data4,
new (DIEValueAllocator) DIEDelta(Hi, Lo));
}
void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) {
addDIEEntry(Die, Attribute, DIEEntry(Entry));
}
void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) {
// Flag the type unit reference as a declaration so that if it contains
// members (implicit special members, static data member definitions, member
// declarations for definitions in this CU, etc) consumers don't get confused
// and think this is a full definition.
addFlag(Die, dwarf::DW_AT_declaration);
addAttribute(Die, dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8,
DIEInteger(Signature));
}
void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
DIEEntry Entry) {
const DIEUnit *CU = Die.getUnit();
const DIEUnit *EntryCU = Entry.getEntry().getUnit();
if (!CU)
// We assume that Die belongs to this CU, if it is not linked to any CU yet.
CU = getUnitDie().getUnit();
if (!EntryCU)
EntryCU = getUnitDie().getUnit();
addAttribute(Die, Attribute,
EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
Entry);
}
DIE &DwarfUnit::createAndAddDIE(dwarf::Tag Tag, DIE &Parent, const DINode *N) {
DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, Tag));
if (N)
insertDIE(N, &Die);
return Die;
}
void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) {
Loc->ComputeSize(Asm);
DIELocs.push_back(Loc); // Memoize so we can call the destructor later on.
addAttribute(Die, Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc);
}
void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
DIEBlock *Block) {
Block->ComputeSize(Asm);
DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
addAttribute(Die, Attribute, Form, Block);
}
void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
DIEBlock *Block) {
addBlock(Die, Attribute, Block->BestForm(), Block);
}
void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {
if (Line == 0)
return;
unsigned FileID = getOrCreateSourceID(File);
addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
}
void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) {
assert(V);
addSourceLine(Die, V->getLine(), V->getFile());
}
void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) {
assert(G);
addSourceLine(Die, G->getLine(), G->getFile());
}
void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {
assert(SP);
addSourceLine(Die, SP->getLine(), SP->getFile());
}
void DwarfUnit::addSourceLine(DIE &Die, const DILabel *L) {
assert(L);
addSourceLine(Die, L->getLine(), L->getFile());
}
void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {
assert(Ty);
addSourceLine(Die, Ty->getLine(), Ty->getFile());
}
void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
assert(Ty);
addSourceLine(Die, Ty->getLine(), Ty->getFile());
}
void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) {
// Pass this down to addConstantValue as an unsigned bag of bits.
addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true);
}
void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI,
const DIType *Ty) {
addConstantValue(Die, CI->getValue(), Ty);
}
void DwarfUnit::addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty) {
addConstantValue(Die, DD->isUnsignedDIType(Ty), Val);
}
void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
// FIXME: This is a bit conservative/simple - it emits negative values always
// sign extended to 64 bits rather than minimizing the number of bytes.
addUInt(Die, dwarf::DW_AT_const_value,
Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val);
}
void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) {
addConstantValue(Die, Val, DD->isUnsignedDIType(Ty));
}
void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
unsigned CIBitWidth = Val.getBitWidth();
if (CIBitWidth <= 64) {
addConstantValue(Die, Unsigned,
Unsigned ? Val.getZExtValue() : Val.getSExtValue());
return;
}
DIEBlock *Block = new (DIEValueAllocator) DIEBlock;
// Get the raw data form of the large APInt.
const uint64_t *Ptr64 = Val.getRawData();
int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
bool LittleEndian = Asm->getDataLayout().isLittleEndian();
// Output the constant to DWARF one byte at a time.
for (int i = 0; i < NumBytes; i++) {
uint8_t c;
if (LittleEndian)
c = Ptr64[i / 8] >> (8 * (i & 7));
else
c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7));
addUInt(*Block, dwarf::DW_FORM_data1, c);
}
addBlock(Die, dwarf::DW_AT_const_value, Block);
}
void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
if (!LinkageName.empty())
addString(Die,
DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
: dwarf::DW_AT_MIPS_linkage_name,
GlobalValue::dropLLVMManglingEscape(LinkageName));
}
void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) {
// Add template parameters.
for (const auto *Element : TParams) {
if (auto *TTP = dyn_cast<DITemplateTypeParameter>(Element))
constructTemplateTypeParameterDIE(Buffer, TTP);
else if (auto *TVP = dyn_cast<DITemplateValueParameter>(Element))
constructTemplateValueParameterDIE(Buffer, TVP);
}
}
/// Add thrown types.
void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) {
for (const auto *Ty : ThrownTypes) {
DIE &TT = createAndAddDIE(dwarf::DW_TAG_thrown_type, Die);
addType(TT, cast<DIType>(Ty));
}
}
DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
if (!Context || isa<DIFile>(Context))
return &getUnitDie();
if (auto *T = dyn_cast<DIType>(Context))
return getOrCreateTypeDIE(T);
if (auto *NS = dyn_cast<DINamespace>(Context))
return getOrCreateNameSpace(NS);
if (auto *SP = dyn_cast<DISubprogram>(Context))
return getOrCreateSubprogramDIE(SP);
if (auto *M = dyn_cast<DIModule>(Context))
return getOrCreateModule(M);
return getDIE(Context);
}
DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
auto *Context = Ty->getScope();
DIE *ContextDIE = getOrCreateContextDIE(Context);
if (DIE *TyDIE = getDIE(Ty))
return TyDIE;
// Create new type.
DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));
updateAcceleratorTables(Context, Ty, TyDIE);
return &TyDIE;
}
DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
const DIType *Ty) {
// Create new type.
DIE &TyDIE = createAndAddDIE(Ty->getTag(), ContextDIE, Ty);
updateAcceleratorTables(Context, Ty, TyDIE);
if (auto *BT = dyn_cast<DIBasicType>(Ty))
constructTypeDIE(TyDIE, BT);
else if (auto *ST = dyn_cast<DIStringType>(Ty))
constructTypeDIE(TyDIE, ST);
else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
constructTypeDIE(TyDIE, STy);
else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
if (DD->generateTypeUnits() && !Ty->isForwardDecl() &&
(Ty->getRawName() || CTy->getRawIdentifier())) {
// Skip updating the accelerator tables since this is not the full type.
if (MDString *TypeId = CTy->getRawIdentifier())
DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
else {
auto X = DD->enterNonTypeUnitContext();
finishNonUnitTypeDIE(TyDIE, CTy);
}
return &TyDIE;
}
constructTypeDIE(TyDIE, CTy);
} else {
constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
}
return &TyDIE;
}
DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
if (!TyNode)
return nullptr;
auto *Ty = cast<DIType>(TyNode);
// DW_TAG_restrict_type is not supported in DWARF2
if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());
// DW_TAG_atomic_type is not supported in DWARF < 5
if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5)
return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
auto *Context = Ty->getScope();
DIE *ContextDIE = getOrCreateContextDIE(Context);
assert(ContextDIE);
if (DIE *TyDIE = getDIE(Ty))
return TyDIE;
return static_cast<DwarfUnit *>(ContextDIE->getUnit())
->createTypeDIE(Context, *ContextDIE, Ty);
}
void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
const DIType *Ty, const DIE &TyDIE) {
if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
bool IsImplementation = false;
if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
// A runtime language of 0 actually means C/C++ and that any
// non-negative value is some version of Objective-C/C++.
IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
}
unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags);
if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
isa<DINamespace>(Context) || isa<DICommonBlock>(Context))
addGlobalType(Ty, TyDIE, Context);
}
}
void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
dwarf::Attribute Attribute) {
assert(Ty && "Trying to add a type that doesn't exist?");
addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty)));
}
std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
if (!Context)
return "";
// FIXME: Decide whether to implement this for non-C++ languages.
if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage()))
return "";
std::string CS;
SmallVector<const DIScope *, 1> Parents;
while (!isa<DICompileUnit>(Context)) {
Parents.push_back(Context);
if (const DIScope *S = Context->getScope())
Context = S;
else
// Structure, etc types will have a NULL context if they're at the top
// level.
break;
}
// Reverse iterate over our list to go from the outermost construct to the
// innermost.
for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) {
StringRef Name = Ctx->getName();
if (Name.empty() && isa<DINamespace>(Ctx))
Name = "(anonymous namespace)";
if (!Name.empty()) {
CS += Name;
CS += "::";
}
}
return CS;
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
// Get core information.
StringRef Name = BTy->getName();
// Add name if not anonymous or intermediate type.
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
// An unspecified type only has a name attribute.
if (BTy->getTag() == dwarf::DW_TAG_unspecified_type)
return;
if (BTy->getTag() != dwarf::DW_TAG_string_type)
addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
BTy->getEncoding());
uint64_t Size = BTy->getSizeInBits() >> 3;
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
if (BTy->isBigEndian())
addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_big);
else if (BTy->isLittleEndian())
addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little);
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) {
// Get core information.
StringRef Name = STy->getName();
// Add name if not anonymous or intermediate type.
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
if (DIVariable *Var = STy->getStringLength()) {
if (auto *VarDIE = getDIE(Var))
addDIEEntry(Buffer, dwarf::DW_AT_string_length, *VarDIE);
} else if (DIExpression *Expr = STy->getStringLengthExp()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
// This is to describe the memory location of the
// length of a Fortran deferred length string, so
// lock it down as such.
DwarfExpr.setMemoryLocationKind();
DwarfExpr.addExpression(Expr);
addBlock(Buffer, dwarf::DW_AT_string_length, DwarfExpr.finalize());
} else {
uint64_t Size = STy->getSizeInBits() >> 3;
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
}
if (STy->getEncoding()) {
// For eventual Unicode support.
addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
STy->getEncoding());
}
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
// Get core information.
StringRef Name = DTy->getName();
uint64_t Size = DTy->getSizeInBits() >> 3;
uint16_t Tag = Buffer.getTag();
// Map to main type, void will not have a type.
const DIType *FromTy = DTy->getBaseType();
if (FromTy)
addType(Buffer, FromTy);
// Add name if not anonymous or intermediate type.
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
// If alignment is specified for a typedef , create and insert DW_AT_alignment
// attribute in DW_TAG_typedef DIE.
if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) {
uint32_t AlignInBytes = DTy->getAlignInBytes();
if (AlignInBytes > 0)
addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
}
// Add size if non-zero (derived types might be zero-sized.)
if (Size && Tag != dwarf::DW_TAG_pointer_type
&& Tag != dwarf::DW_TAG_ptr_to_member_type
&& Tag != dwarf::DW_TAG_reference_type
&& Tag != dwarf::DW_TAG_rvalue_reference_type)
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
if (Tag == dwarf::DW_TAG_ptr_to_member_type)
addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
*getOrCreateTypeDIE(cast<DIDerivedType>(DTy)->getClassType()));
// Add source line info if available and TyDesc is not a forward declaration.
if (!DTy->isForwardDecl())
addSourceLine(Buffer, DTy);
// If DWARF address space value is other than None, add it. The IR
// verifier checks that DWARF address space only exists for pointer
// or reference types.
if (DTy->getDWARFAddressSpace())
addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
DTy->getDWARFAddressSpace().getValue());
}
void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
for (unsigned i = 1, N = Args.size(); i < N; ++i) {
const DIType *Ty = Args[i];
if (!Ty) {
assert(i == N-1 && "Unspecified parameter must be the last argument");
createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
} else {
DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
addType(Arg, Ty);
if (Ty->isArtificial())
addFlag(Arg, dwarf::DW_AT_artificial);
}
}
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
// Add return type. A void return won't have a type.
auto Elements = cast<DISubroutineType>(CTy)->getTypeArray();
if (Elements.size())
if (auto RTy = Elements[0])
addType(Buffer, RTy);
bool isPrototyped = true;
if (Elements.size() == 2 && !Elements[1])
isPrototyped = false;
constructSubprogramArguments(Buffer, Elements);
// Add prototype flag if we're dealing with a C language and the function has
// been prototyped.
uint16_t Language = getLanguage();
if (isPrototyped &&
(Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
Language == dwarf::DW_LANG_ObjC))
addFlag(Buffer, dwarf::DW_AT_prototyped);
// Add a DW_AT_calling_convention if this has an explicit convention.
if (CTy->getCC() && CTy->getCC() != dwarf::DW_CC_normal)
addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1,
CTy->getCC());
if (CTy->isLValueReference())
addFlag(Buffer, dwarf::DW_AT_reference);
if (CTy->isRValueReference())
addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
// Add name if not anonymous or intermediate type.
StringRef Name = CTy->getName();
uint64_t Size = CTy->getSizeInBits() >> 3;
uint16_t Tag = Buffer.getTag();
switch (Tag) {
case dwarf::DW_TAG_array_type:
constructArrayTypeDIE(Buffer, CTy);
break;
case dwarf::DW_TAG_enumeration_type:
constructEnumTypeDIE(Buffer, CTy);
break;
case dwarf::DW_TAG_variant_part:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_class_type: {
// Emit the discriminator for a variant part.
DIDerivedType *Discriminator = nullptr;
if (Tag == dwarf::DW_TAG_variant_part) {
Discriminator = CTy->getDiscriminator();
if (Discriminator) {
// DWARF says:
// If the variant part has a discriminant, the discriminant is
// represented by a separate debugging information entry which is
// a child of the variant part entry.
DIE &DiscMember = constructMemberDIE(Buffer, Discriminator);
addDIEEntry(Buffer, dwarf::DW_AT_discr, DiscMember);
}
}
// Add template parameters to a class, structure or union types.
if (Tag == dwarf::DW_TAG_class_type ||
Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
addTemplateParams(Buffer, CTy->getTemplateParams());
// Add elements to structure type.
DINodeArray Elements = CTy->getElements();
for (const auto *Element : Elements) {
if (!Element)
continue;
if (auto *SP = dyn_cast<DISubprogram>(Element))
getOrCreateSubprogramDIE(SP);
else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
if (DDTy->getTag() == dwarf::DW_TAG_friend) {
DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
addType(ElemDie, DDTy->getBaseType(), dwarf::DW_AT_friend);
} else if (DDTy->isStaticMember()) {
getOrCreateStaticMemberDIE(DDTy);
} else if (Tag == dwarf::DW_TAG_variant_part) {
// When emitting a variant part, wrap each member in
// DW_TAG_variant.
DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer);
if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) {
if (DD->isUnsignedDIType(Discriminator->getBaseType()))
addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue());
else
addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue());
}
constructMemberDIE(Variant, DDTy);
} else {
constructMemberDIE(Buffer, DDTy);
}
} else if (auto *Property = dyn_cast<DIObjCProperty>(Element)) {
DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer);
StringRef PropertyName = Property->getName();
addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
if (Property->getType())
addType(ElemDie, Property->getType());
addSourceLine(ElemDie, Property);
StringRef GetterName = Property->getGetterName();
if (!GetterName.empty())
addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
StringRef SetterName = Property->getSetterName();
if (!SetterName.empty())
addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
if (unsigned PropertyAttributes = Property->getAttributes())
addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,
PropertyAttributes);
} else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
if (Composite->getTag() == dwarf::DW_TAG_variant_part) {
DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer);
constructTypeDIE(VariantPart, Composite);
}
}
}
if (CTy->isAppleBlockExtension())
addFlag(Buffer, dwarf::DW_AT_APPLE_block);
if (CTy->getExportSymbols())
addFlag(Buffer, dwarf::DW_AT_export_symbols);
// This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
// inside C++ composite types to point to the base class with the vtable.
// Rust uses DW_AT_containing_type to link a vtable to the type
// for which it was created.
if (auto *ContainingType = CTy->getVTableHolder())
addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
*getOrCreateTypeDIE(ContainingType));
if (CTy->isObjcClassComplete())
addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
// Add the type's non-standard calling convention.
// DW_CC_pass_by_value/DW_CC_pass_by_reference are introduced in DWARF 5.
if (!Asm->TM.Options.DebugStrictDwarf || DD->getDwarfVersion() >= 5) {
uint8_t CC = 0;
if (CTy->isTypePassByValue())
CC = dwarf::DW_CC_pass_by_value;
else if (CTy->isTypePassByReference())
CC = dwarf::DW_CC_pass_by_reference;
if (CC)
addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1,
CC);
}
break;
}
default:
break;
}
// Add name if not anonymous or intermediate type.
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
if (Tag == dwarf::DW_TAG_enumeration_type ||
Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type ||
Tag == dwarf::DW_TAG_union_type) {
// Add size if non-zero (derived types might be zero-sized.)
// Ignore the size if it's a non-enum forward decl.
// TODO: Do we care about size for enum forward declarations?
if (Size &&
(!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type))
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
else if (!CTy->isForwardDecl())
// Add zero size if it is not a forward declaration.
addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0);
// If we're a forward decl, say so.
if (CTy->isForwardDecl())
addFlag(Buffer, dwarf::DW_AT_declaration);
// Add source line info if available.
if (!CTy->isForwardDecl())
addSourceLine(Buffer, CTy);
// No harm in adding the runtime language to the declaration.
unsigned RLang = CTy->getRuntimeLang();
if (RLang)
addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
RLang);
// Add align info if available.
if (uint32_t AlignInBytes = CTy->getAlignInBytes())
addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
}
}
void DwarfUnit::constructTemplateTypeParameterDIE(
DIE &Buffer, const DITemplateTypeParameter *TP) {
DIE &ParamDIE =
createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
// Add the type if it exists, it could be void and therefore no type.
if (TP->getType())
addType(ParamDIE, TP->getType());
if (!TP->getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
if (TP->isDefault() && (DD->getDwarfVersion() >= 5))
addFlag(ParamDIE, dwarf::DW_AT_default_value);
}
void DwarfUnit::constructTemplateValueParameterDIE(
DIE &Buffer, const DITemplateValueParameter *VP) {
DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer);
// Add the type if there is one, template template and template parameter
// packs will not have a type.
if (VP->getTag() == dwarf::DW_TAG_template_value_parameter)
addType(ParamDIE, VP->getType());
if (!VP->getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
if (VP->isDefault() && (DD->getDwarfVersion() >= 5))
addFlag(ParamDIE, dwarf::DW_AT_default_value);
if (Metadata *Val = VP->getValue()) {
if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
addConstantValue(ParamDIE, CI, VP->getType());
else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
// We cannot describe the location of dllimport'd entities: the
// computation of their address requires loads from the IAT.
if (!GV->hasDLLImportStorageClass()) {
// For declaration non-type template parameters (such as global values
// and functions)
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
addOpAddress(*Loc, Asm->getSymbol(GV));
// Emit DW_OP_stack_value to use the address as the immediate value of
// the parameter, rather than a pointer to it.
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
}
} else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) {
assert(isa<MDString>(Val));
addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
cast<MDString>(Val)->getString());
} else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
addTemplateParams(ParamDIE, cast<MDTuple>(Val));
}
}
}
DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
DIE *ContextDIE = getOrCreateContextDIE(NS->getScope());
if (DIE *NDie = getDIE(NS))
return NDie;
DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
StringRef Name = NS->getName();
if (!Name.empty())
addString(NDie, dwarf::DW_AT_name, NS->getName());
else
Name = "(anonymous namespace)";
DD->addAccelNamespace(*CUNode, Name, NDie);
addGlobalName(Name, NDie, NS->getScope());
if (NS->getExportSymbols())
addFlag(NDie, dwarf::DW_AT_export_symbols);
return &NDie;
}
DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
DIE *ContextDIE = getOrCreateContextDIE(M->getScope());
if (DIE *MDie = getDIE(M))
return MDie;
DIE &MDie = createAndAddDIE(dwarf::DW_TAG_module, *ContextDIE, M);
if (!M->getName().empty()) {
addString(MDie, dwarf::DW_AT_name, M->getName());
addGlobalName(M->getName(), MDie, M->getScope());
}
if (!M->getConfigurationMacros().empty())
addString(MDie, dwarf::DW_AT_LLVM_config_macros,
M->getConfigurationMacros());
if (!M->getIncludePath().empty())
addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());
if (!M->getAPINotesFile().empty())
addString(MDie, dwarf::DW_AT_LLVM_apinotes, M->getAPINotesFile());
if (M->getFile())
addUInt(MDie, dwarf::DW_AT_decl_file, None,
getOrCreateSourceID(M->getFile()));
if (M->getLineNo())
addUInt(MDie, dwarf::DW_AT_decl_line, None, M->getLineNo());
if (M->getIsDecl())
addFlag(MDie, dwarf::DW_AT_declaration);
return &MDie;
}
DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE (as is the case for member function
// declarations).
DIE *ContextDIE =
Minimal ? &getUnitDie() : getOrCreateContextDIE(SP->getScope());
if (DIE *SPDie = getDIE(SP))
return SPDie;
if (auto *SPDecl = SP->getDeclaration()) {
if (!Minimal) {
// Add subprogram definitions to the CU die directly.
ContextDIE = &getUnitDie();
// Build the decl now to ensure it precedes the definition.
getOrCreateSubprogramDIE(SPDecl);
}
}
// DW_TAG_inlined_subroutine may refer to this DIE.
DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP);
// Stop here and fill this in later, depending on whether or not this
// subprogram turns out to have inlined instances or not.
if (SP->isDefinition())
return &SPDie;
static_cast<DwarfUnit *>(SPDie.getUnit())
->applySubprogramAttributes(SP, SPDie);
return &SPDie;
}
bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
DIE &SPDie, bool Minimal) {
DIE *DeclDie = nullptr;
StringRef DeclLinkageName;
if (auto *SPDecl = SP->getDeclaration()) {
if (!Minimal) {
DITypeRefArray DeclArgs, DefinitionArgs;
DeclArgs = SPDecl->getType()->getTypeArray();
DefinitionArgs = SP->getType()->getTypeArray();
if (DeclArgs.size() && DefinitionArgs.size())
if (DefinitionArgs[0] != NULL && DeclArgs[0] != DefinitionArgs[0])
addType(SPDie, DefinitionArgs[0]);
DeclDie = getDIE(SPDecl);
assert(DeclDie && "This DIE should've already been constructed when the "
"definition DIE was created in "
"getOrCreateSubprogramDIE");
// Look at the Decl's linkage name only if we emitted it.
if (DD->useAllLinkageNames())
DeclLinkageName = SPDecl->getLinkageName();
unsigned DeclID = getOrCreateSourceID(SPDecl->getFile());
unsigned DefID = getOrCreateSourceID(SP->getFile());
if (DeclID != DefID)
addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID);
if (SP->getLine() != SPDecl->getLine())
addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine());
}
}
// Add function template parameters.
addTemplateParams(SPDie, SP->getTemplateParams());
// Add the linkage name if we have one and it isn't in the Decl.
StringRef LinkageName = SP->getLinkageName();
assert(((LinkageName.empty() || DeclLinkageName.empty()) ||
LinkageName == DeclLinkageName) &&
"decl has a linkage name and it is different");
if (DeclLinkageName.empty() &&
// Always emit it for abstract subprograms.
(DD->useAllLinkageNames() || DU->getAbstractSPDies().lookup(SP)))
addLinkageName(SPDie, LinkageName);
if (!DeclDie)
return false;
// Refer to the function declaration where all the other attributes will be
// found.
addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie);
return true;
}
void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
bool SkipSPAttributes) {
// If -fdebug-info-for-profiling is enabled, need to emit the subprogram
// and its source location.
bool SkipSPSourceLocation = SkipSPAttributes &&
!CUNode->getDebugInfoForProfiling();
if (!SkipSPSourceLocation)
if (applySubprogramDefinitionAttributes(SP, SPDie, SkipSPAttributes))
return;
// Constructors and operators for anonymous aggregates do not have names.
if (!SP->getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP->getName());
if (!SkipSPSourceLocation)
addSourceLine(SPDie, SP);
// Skip the rest of the attributes under -gmlt to save space.
if (SkipSPAttributes)
return;
// Add the prototype if we have a prototype and we have a C like
// language.
uint16_t Language = getLanguage();
if (SP->isPrototyped() &&
(Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
Language == dwarf::DW_LANG_ObjC))
addFlag(SPDie, dwarf::DW_AT_prototyped);
if (SP->isObjCDirect())
addFlag(SPDie, dwarf::DW_AT_APPLE_objc_direct);
unsigned CC = 0;
DITypeRefArray Args;
if (const DISubroutineType *SPTy = SP->getType()) {
Args = SPTy->getTypeArray();
CC = SPTy->getCC();
}
// Add a DW_AT_calling_convention if this has an explicit convention.
if (CC && CC != dwarf::DW_CC_normal)
addUInt(SPDie, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, CC);
// Add a return type. If this is a type like a C/C++ void type we don't add a
// return type.
if (Args.size())
if (auto Ty = Args[0])
addType(SPDie, Ty);
unsigned VK = SP->getVirtuality();
if (VK) {
addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
if (SP->getVirtualIndex() != -1u) {
DIELoc *Block = getDIELoc();
addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
}
ContainingTypeMap.insert(std::make_pair(&SPDie, SP->getContainingType()));
}
if (!SP->isDefinition()) {
addFlag(SPDie, dwarf::DW_AT_declaration);
// Add arguments. Do not add arguments for subprogram definition. They will
// be handled while processing variables.
constructSubprogramArguments(SPDie, Args);
}
addThrownTypes(SPDie, SP->getThrownTypes());
if (SP->isArtificial())
addFlag(SPDie, dwarf::DW_AT_artificial);
if (!SP->isLocalToUnit())
addFlag(SPDie, dwarf::DW_AT_external);
if (DD->useAppleExtensionAttributes()) {
if (SP->isOptimized())
addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
if (unsigned isa = Asm->getISAEncoding())
addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
}
if (SP->isLValueReference())
addFlag(SPDie, dwarf::DW_AT_reference);
if (SP->isRValueReference())
addFlag(SPDie, dwarf::DW_AT_rvalue_reference);
if (SP->isNoReturn())
addFlag(SPDie, dwarf::DW_AT_noreturn);
if (SP->isProtected())
addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_protected);
else if (SP->isPrivate())
addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
else if (SP->isPublic())
addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (SP->isExplicit())
addFlag(SPDie, dwarf::DW_AT_explicit);
if (SP->isMainSubprogram())
addFlag(SPDie, dwarf::DW_AT_main_subprogram);
if (SP->isPure())
addFlag(SPDie, dwarf::DW_AT_pure);
if (SP->isElemental())
addFlag(SPDie, dwarf::DW_AT_elemental);
if (SP->isRecursive())
addFlag(SPDie, dwarf::DW_AT_recursive);
if (DD->getDwarfVersion() >= 5 && SP->isDeleted())
addFlag(SPDie, dwarf::DW_AT_deleted);
}
void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
DIE *IndexTy) {
DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy);
// The LowerBound value defines the lower bounds which is typically zero for
// C/C++. The Count value is the number of elements. Values are 64 bit. If
// Count == -1 then the array is unbounded and we do not emit
// DW_AT_lower_bound and DW_AT_count attributes.
int64_t DefaultLowerBound = getDefaultLowerBound();
auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
DISubrange::BoundType Bound) -> void {
if (auto *BV = Bound.dyn_cast<DIVariable *>()) {
if (auto *VarDIE = getDIE(BV))
addDIEEntry(DW_Subrange, Attr, *VarDIE);
} else if (auto *BE = Bound.dyn_cast<DIExpression *>()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
DwarfExpr.setMemoryLocationKind();
DwarfExpr.addExpression(BE);
addBlock(DW_Subrange, Attr, DwarfExpr.finalize());
} else if (auto *BI = Bound.dyn_cast<ConstantInt *>()) {
if (Attr == dwarf::DW_AT_count) {
if (BI->getSExtValue() != -1)
addUInt(DW_Subrange, Attr, None, BI->getSExtValue());
} else if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 ||
BI->getSExtValue() != DefaultLowerBound)
addSInt(DW_Subrange, Attr, dwarf::DW_FORM_sdata, BI->getSExtValue());
}
};
AddBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound());
AddBoundTypeEntry(dwarf::DW_AT_count, SR->getCount());
AddBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound());
AddBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride());
}
void DwarfUnit::constructGenericSubrangeDIE(DIE &Buffer,
const DIGenericSubrange *GSR,
DIE *IndexTy) {
DIE &DwGenericSubrange =
createAndAddDIE(dwarf::DW_TAG_generic_subrange, Buffer);
addDIEEntry(DwGenericSubrange, dwarf::DW_AT_type, *IndexTy);
int64_t DefaultLowerBound = getDefaultLowerBound();
auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
DIGenericSubrange::BoundType Bound) -> void {
if (auto *BV = Bound.dyn_cast<DIVariable *>()) {
if (auto *VarDIE = getDIE(BV))
addDIEEntry(DwGenericSubrange, Attr, *VarDIE);
} else if (auto *BE = Bound.dyn_cast<DIExpression *>()) {
if (BE->isConstant() &&
DIExpression::SignedOrUnsignedConstant::SignedConstant ==
*BE->isConstant()) {
if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 ||
static_cast<int64_t>(BE->getElement(1)) != DefaultLowerBound)
addSInt(DwGenericSubrange, Attr, dwarf::DW_FORM_sdata,
BE->getElement(1));
} else {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
DwarfExpr.setMemoryLocationKind();
DwarfExpr.addExpression(BE);
addBlock(DwGenericSubrange, Attr, DwarfExpr.finalize());
}
}
};
AddBoundTypeEntry(dwarf::DW_AT_lower_bound, GSR->getLowerBound());
AddBoundTypeEntry(dwarf::DW_AT_count, GSR->getCount());
AddBoundTypeEntry(dwarf::DW_AT_upper_bound, GSR->getUpperBound());
AddBoundTypeEntry(dwarf::DW_AT_byte_stride, GSR->getStride());
}
DIE *DwarfUnit::getIndexTyDie() {
if (IndexTyDie)
return IndexTyDie;
// Construct an integer type to use for indexes.
IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie());
StringRef Name = "__ARRAY_SIZE_TYPE__";
addString(*IndexTyDie, dwarf::DW_AT_name, Name);
addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
dwarf::DW_ATE_unsigned);
DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0);
return IndexTyDie;
}
/// Returns true if the vector's size differs from the sum of sizes of elements
/// the user specified. This can occur if the vector has been rounded up to
/// fit memory alignment constraints.
static bool hasVectorBeenPadded(const DICompositeType *CTy) {
assert(CTy && CTy->isVector() && "Composite type is not a vector");
const uint64_t ActualSize = CTy->getSizeInBits();
// Obtain the size of each element in the vector.
DIType *BaseTy = CTy->getBaseType();
assert(BaseTy && "Unknown vector element type.");
const uint64_t ElementSize = BaseTy->getSizeInBits();
// Locate the number of elements in the vector.
const DINodeArray Elements = CTy->getElements();
assert(Elements.size() == 1 &&
Elements[0]->getTag() == dwarf::DW_TAG_subrange_type &&
"Invalid vector element array, expected one element of type subrange");
const auto Subrange = cast<DISubrange>(Elements[0]);
const auto NumVecElements =
Subrange->getCount()
? Subrange->getCount().get<ConstantInt *>()->getSExtValue()
: 0;
// Ensure we found the element count and that the actual size is wide
// enough to contain the requested size.
assert(ActualSize >= (NumVecElements * ElementSize) && "Invalid vector size");
return ActualSize != (NumVecElements * ElementSize);
}
void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (CTy->isVector()) {
addFlag(Buffer, dwarf::DW_AT_GNU_vector);
if (hasVectorBeenPadded(CTy))
addUInt(Buffer, dwarf::DW_AT_byte_size, None,
CTy->getSizeInBits() / CHAR_BIT);
}
if (DIVariable *Var = CTy->getDataLocation()) {
if (auto *VarDIE = getDIE(Var))
addDIEEntry(Buffer, dwarf::DW_AT_data_location, *VarDIE);
} else if (DIExpression *Expr = CTy->getDataLocationExp()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
DwarfExpr.setMemoryLocationKind();
DwarfExpr.addExpression(Expr);
addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize());
}
if (DIVariable *Var = CTy->getAssociated()) {
if (auto *VarDIE = getDIE(Var))
addDIEEntry(Buffer, dwarf::DW_AT_associated, *VarDIE);
} else if (DIExpression *Expr = CTy->getAssociatedExp()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
DwarfExpr.setMemoryLocationKind();
DwarfExpr.addExpression(Expr);
addBlock(Buffer, dwarf::DW_AT_associated, DwarfExpr.finalize());
}
if (DIVariable *Var = CTy->getAllocated()) {
if (auto *VarDIE = getDIE(Var))
addDIEEntry(Buffer, dwarf::DW_AT_allocated, *VarDIE);
} else if (DIExpression *Expr = CTy->getAllocatedExp()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
DwarfExpr.setMemoryLocationKind();
DwarfExpr.addExpression(Expr);
addBlock(Buffer, dwarf::DW_AT_allocated, DwarfExpr.finalize());
}
if (auto *RankConst = CTy->getRankConst()) {
addSInt(Buffer, dwarf::DW_AT_rank, dwarf::DW_FORM_sdata,
RankConst->getSExtValue());
} else if (auto *RankExpr = CTy->getRankExp()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
DwarfExpr.setMemoryLocationKind();
DwarfExpr.addExpression(RankExpr);
addBlock(Buffer, dwarf::DW_AT_rank, DwarfExpr.finalize());
}
// Emit the element type.
addType(Buffer, CTy->getBaseType());
// Get an anonymous type for index type.
// FIXME: This type should be passed down from the front end
// as different languages may have different sizes for indexes.
DIE *IdxTy = getIndexTyDie();
// Add subranges to array type.
DINodeArray Elements = CTy->getElements();
for (DINode *E : Elements) {
// FIXME: Should this really be such a loose cast?
if (auto *Element = dyn_cast_or_null<DINode>(E)) {
if (Element->getTag() == dwarf::DW_TAG_subrange_type)
constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy);
else if (Element->getTag() == dwarf::DW_TAG_generic_subrange)
constructGenericSubrangeDIE(Buffer, cast<DIGenericSubrange>(Element),
IdxTy);
}
}
}
void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
const DIType *DTy = CTy->getBaseType();
bool IsUnsigned = DTy && DD->isUnsignedDIType(DTy);
if (DTy) {
if (DD->getDwarfVersion() >= 3)
addType(Buffer, DTy);
if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagEnumClass))
addFlag(Buffer, dwarf::DW_AT_enum_class);
}
auto *Context = CTy->getScope();
bool IndexEnumerators = !Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
isa<DINamespace>(Context) || isa<DICommonBlock>(Context);
DINodeArray Elements = CTy->getElements();
// Add enumerators to enumeration type.
for (const DINode *E : Elements) {
auto *Enum = dyn_cast_or_null<DIEnumerator>(E);
if (Enum) {
DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
StringRef Name = Enum->getName();
addString(Enumerator, dwarf::DW_AT_name, Name);
addConstantValue(Enumerator, Enum->getValue(), IsUnsigned);
if (IndexEnumerators)
addGlobalName(Name, Enumerator, Context);
}
}
}
void DwarfUnit::constructContainingTypeDIEs() {
for (auto &P : ContainingTypeMap) {
DIE &SPDie = *P.first;
const DINode *D = P.second;
if (!D)
continue;
DIE *NDie = getDIE(D);
if (!NDie)
continue;
addDIEEntry(SPDie, dwarf::DW_AT_containing_type, *NDie);
}
}
DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer);
StringRef Name = DT->getName();
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, Name);
if (DIType *Resolved = DT->getBaseType())
addType(MemberDie, Resolved);
addSourceLine(MemberDie, DT);
if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) {
// For C++, virtual base classes are not at fixed offset. Use following
// expression to extract appropriate offset from vtable.
// BaseAddr = ObAddr + *((*ObAddr) - Offset)
DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc;
addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits());
addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie);
} else {
uint64_t Size = DT->getSizeInBits();
uint64_t FieldSize = DD->getBaseTypeSize(DT);
uint32_t AlignInBytes = DT->getAlignInBytes();
uint64_t OffsetInBytes;
bool IsBitfield = FieldSize && Size != FieldSize;
if (IsBitfield) {
// Handle bitfield, assume bytes are 8 bits.
if (DD->useDWARF2Bitfields())
addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
uint64_t Offset = DT->getOffsetInBits();
// We can't use DT->getAlignInBits() here: AlignInBits for member type
// is non-zero if and only if alignment was forced (e.g. _Alignas()),
// which can't be done with bitfields. Thus we use FieldSize here.
uint32_t AlignInBits = FieldSize;
uint32_t AlignMask = ~(AlignInBits - 1);
// The bits from the start of the storage unit to the start of the field.
uint64_t StartBitOffset = Offset - (Offset & AlignMask);
// The byte offset of the field's aligned storage unit inside the struct.
OffsetInBytes = (Offset - StartBitOffset) / 8;
if (DD->useDWARF2Bitfields()) {
uint64_t HiMark = (Offset + FieldSize) & AlignMask;
uint64_t FieldOffset = (HiMark - FieldSize);
Offset -= FieldOffset;
// Maybe we need to work from the other end.
if (Asm->getDataLayout().isLittleEndian())
Offset = FieldSize - (Offset + Size);
addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset);
OffsetInBytes = FieldOffset >> 3;
} else {
addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, None, Offset);
}
} else {
// This is not a bitfield.
OffsetInBytes = DT->getOffsetInBits() / 8;
if (AlignInBytes)
addUInt(MemberDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
}
if (DD->getDwarfVersion() <= 2) {
DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc;
addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes);
addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie);
} else if (!IsBitfield || DD->useDWARF2Bitfields()) {
// In DWARF v3, DW_FORM_data4/8 in DW_AT_data_member_location are
// interpreted as location-list pointers. Interpreting constants as
// pointers is not expected, so we use DW_FORM_udata to encode the
// constants here.
if (DD->getDwarfVersion() == 3)
addUInt(MemberDie, dwarf::DW_AT_data_member_location,
dwarf::DW_FORM_udata, OffsetInBytes);
else
addUInt(MemberDie, dwarf::DW_AT_data_member_location, None,
OffsetInBytes);
}
}
if (DT->isProtected())
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_protected);
else if (DT->isPrivate())
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
// Otherwise C++ member and base classes are considered public.
else if (DT->isPublic())
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (DT->isVirtual())
addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
dwarf::DW_VIRTUALITY_virtual);
// Objective-C properties.
if (DINode *PNode = DT->getObjCProperty())
if (DIE *PDie = getDIE(PNode))
addAttribute(MemberDie, dwarf::DW_AT_APPLE_property,
dwarf::DW_FORM_ref4, DIEEntry(*PDie));
if (DT->isArtificial())
addFlag(MemberDie, dwarf::DW_AT_artificial);
return MemberDie;
}
DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
if (!DT)
return nullptr;
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
DIE *ContextDIE = getOrCreateContextDIE(DT->getScope());
assert(dwarf::isType(ContextDIE->getTag()) &&
"Static member should belong to a type.");
if (DIE *StaticMemberDIE = getDIE(DT))
return StaticMemberDIE;
DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT);
const DIType *Ty = DT->getBaseType();
addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName());
addType(StaticMemberDIE, Ty);
addSourceLine(StaticMemberDIE, DT);
addFlag(StaticMemberDIE, dwarf::DW_AT_external);
addFlag(StaticMemberDIE, dwarf::DW_AT_declaration);
// FIXME: We could omit private if the parent is a class_type, and
// public if the parent is something else.
if (DT->isProtected())
addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_protected);
else if (DT->isPrivate())
addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
else if (DT->isPublic())
addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant()))
addConstantValue(StaticMemberDIE, CI, Ty);
if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant()))
addConstantFPValue(StaticMemberDIE, CFP);
if (uint32_t AlignInBytes = DT->getAlignInBytes())
addUInt(StaticMemberDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
return &StaticMemberDIE;
}
void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
// Emit size of content not including length itself
if (!DD->useSectionsAsReferences())
EndLabel = Asm->emitDwarfUnitLength(
isDwoUnit() ? "debug_info_dwo" : "debug_info", "Length of Unit");
else
Asm->emitDwarfUnitLength(getHeaderSize() + getUnitDie().getSize(),
"Length of Unit");
Asm->OutStreamer->AddComment("DWARF version number");
unsigned Version = DD->getDwarfVersion();
Asm->emitInt16(Version);
// DWARF v5 reorders the address size and adds a unit type.
if (Version >= 5) {
Asm->OutStreamer->AddComment("DWARF Unit Type");
Asm->emitInt8(UT);
Asm->OutStreamer->AddComment("Address Size (in bytes)");
Asm->emitInt8(Asm->MAI->getCodePointerSize());
}
// We share one abbreviations table across all units so it's always at the
// start of the section. Use a relocatable offset where needed to ensure
// linking doesn't invalidate that offset.
Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
if (UseOffsets)
Asm->emitDwarfLengthOrOffset(0);
else
Asm->emitDwarfSymbolReference(
TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);
if (Version <= 4) {
Asm->OutStreamer->AddComment("Address Size (in bytes)");
Asm->emitInt8(Asm->MAI->getCodePointerSize());
}
}
void DwarfTypeUnit::emitHeader(bool UseOffsets) {
DwarfUnit::emitCommonHeader(UseOffsets,
DD->useSplitDwarf() ? dwarf::DW_UT_split_type
: dwarf::DW_UT_type);
Asm->OutStreamer->AddComment("Type Signature");
Asm->OutStreamer->emitIntValue(TypeSignature, sizeof(TypeSignature));
Asm->OutStreamer->AddComment("Type DIE Offset");
// In a skeleton type unit there is no type DIE so emit a zero offset.
Asm->emitDwarfLengthOrOffset(Ty ? Ty->getOffset() : 0);
}
void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Hi, const MCSymbol *Lo) {
addAttribute(Die, Attribute, DD->getDwarfSectionOffsetForm(),
new (DIEValueAllocator) DIEDelta(Hi, Lo));
}
void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label, const MCSymbol *Sec) {
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label);
else
addSectionDelta(Die, Attribute, Label, Sec);
}
bool DwarfTypeUnit::isDwoUnit() const {
// Since there are no skeleton type units, all type units are dwo type units
// when split DWARF is being used.
return DD->useSplitDwarf();
}
void DwarfTypeUnit::addGlobalName(StringRef Name, const DIE &Die,
const DIScope *Context) {
getCU().addGlobalNameForTypeUnit(Name, Context);
}
void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) {
getCU().addGlobalTypeUnitType(Ty, Context);
}
const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const {
if (!Asm->MAI->doesDwarfUseRelocationsAcrossSections())
return nullptr;
if (isDwoUnit())
return nullptr;
return getSection()->getBeginSymbol();
}
void DwarfUnit::addStringOffsetsStart() {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
addSectionLabel(getUnitDie(), dwarf::DW_AT_str_offsets_base,
DU->getStringOffsetsStartSym(),
TLOF.getDwarfStrOffSection()->getBeginSymbol());
}
void DwarfUnit::addRnglistsBase() {
assert(DD->getDwarfVersion() >= 5 &&
"DW_AT_rnglists_base requires DWARF version 5 or later");
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
addSectionLabel(getUnitDie(), dwarf::DW_AT_rnglists_base,
DU->getRnglistsTableBaseSym(),
TLOF.getDwarfRnglistsSection()->getBeginSymbol());
}
void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
addFlag(D, dwarf::DW_AT_declaration);
StringRef Name = CTy->getName();
if (!Name.empty())
addString(D, dwarf::DW_AT_name, Name);
getCU().createTypeDIE(CTy);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
index 4bbc3d163089..248ef6c23974 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -1,541 +1,540 @@
//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// Insert hardware loop intrinsics into loops which are deemed profitable by
/// the target, by querying TargetTransformInfo. A hardware loop comprises of
/// two intrinsics: one, outside the loop, to set the loop iteration count and
/// another, in the exit block, to decrement the counter. The decremented value
/// can either be carried through the loop via a phi or handled in some opaque
/// way by the target.
///
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#define DEBUG_TYPE "hardware-loops"
#define HW_LOOPS_NAME "Hardware Loop Insertion"
using namespace llvm;
static cl::opt<bool>
ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
cl::desc("Force hardware loops intrinsics to be inserted"));
static cl::opt<bool>
ForceHardwareLoopPHI(
"force-hardware-loop-phi", cl::Hidden, cl::init(false),
cl::desc("Force hardware loop counter to be updated through a phi"));
static cl::opt<bool>
ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
cl::desc("Force allowance of nested hardware loops"));
static cl::opt<unsigned>
LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
cl::desc("Set the loop decrement value"));
static cl::opt<unsigned>
CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
cl::desc("Set the loop counter bitwidth"));
static cl::opt<bool>
ForceGuardLoopEntry(
"force-hardware-loop-guard", cl::Hidden, cl::init(false),
cl::desc("Force generation of loop guard intrinsic"));
STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
#ifndef NDEBUG
static void debugHWLoopFailure(const StringRef DebugMsg,
Instruction *I) {
dbgs() << "HWLoops: " << DebugMsg;
if (I)
dbgs() << ' ' << *I;
else
dbgs() << '.';
dbgs() << '\n';
}
#endif
static OptimizationRemarkAnalysis
createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
Value *CodeRegion = L->getHeader();
DebugLoc DL = L->getStartLoc();
if (I) {
CodeRegion = I->getParent();
// If there is no debug location attached to the instruction, revert back to
// using the loop's.
if (I->getDebugLoc())
DL = I->getDebugLoc();
}
OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
R << "hardware-loop not created: ";
return R;
}
namespace {
void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
LLVM_DEBUG(debugHWLoopFailure(Msg, I));
ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
}
using TTI = TargetTransformInfo;
class HardwareLoops : public FunctionPass {
public:
static char ID;
HardwareLoops() : FunctionPass(ID) {
initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
// Try to convert the given Loop into a hardware loop.
bool TryConvertLoop(Loop *L);
// Given that the target believes the loop to be profitable, try to
// convert it.
bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
private:
ScalarEvolution *SE = nullptr;
LoopInfo *LI = nullptr;
const DataLayout *DL = nullptr;
OptimizationRemarkEmitter *ORE = nullptr;
const TargetTransformInfo *TTI = nullptr;
DominatorTree *DT = nullptr;
bool PreserveLCSSA = false;
AssumptionCache *AC = nullptr;
TargetLibraryInfo *LibInfo = nullptr;
Module *M = nullptr;
bool MadeChange = false;
};
class HardwareLoop {
// Expand the trip count scev into a value that we can use.
Value *InitLoopCount();
// Insert the set_loop_iteration intrinsic.
Value *InsertIterationSetup(Value *LoopCountInit);
// Insert the loop_decrement intrinsic.
void InsertLoopDec();
// Insert the loop_decrement_reg intrinsic.
Instruction *InsertLoopRegDec(Value *EltsRem);
// If the target requires the counter value to be updated in the loop,
// insert a phi to hold the value. The intended purpose is for use by
// loop_decrement_reg.
PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
// Create a new cmp, that checks the returned value of loop_decrement*,
// and update the exit branch to use it.
void UpdateBranch(Value *EltsRem);
public:
HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
const DataLayout &DL,
OptimizationRemarkEmitter *ORE) :
SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
ExitCount(Info.ExitCount),
CountType(Info.CountType),
ExitBranch(Info.ExitBranch),
LoopDecrement(Info.LoopDecrement),
UsePHICounter(Info.CounterInReg),
UseLoopGuard(Info.PerformEntryTest) { }
void Create();
private:
ScalarEvolution &SE;
const DataLayout &DL;
OptimizationRemarkEmitter *ORE = nullptr;
Loop *L = nullptr;
Module *M = nullptr;
const SCEV *ExitCount = nullptr;
Type *CountType = nullptr;
BranchInst *ExitBranch = nullptr;
Value *LoopDecrement = nullptr;
bool UsePHICounter = false;
bool UseLoopGuard = false;
BasicBlock *BeginBB = nullptr;
};
}
char HardwareLoops::ID = 0;
bool HardwareLoops::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DL = &F.getParent()->getDataLayout();
ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
M = F.getParent();
for (Loop *L : *LI)
if (L->isOutermost())
TryConvertLoop(L);
return MadeChange;
}
// Return true if the search should stop, which will be when an inner loop is
// converted and the parent loop doesn't support containing a hardware loop.
bool HardwareLoops::TryConvertLoop(Loop *L) {
// Process nested loops first.
bool AnyChanged = false;
for (Loop *SL : *L)
AnyChanged |= TryConvertLoop(SL);
if (AnyChanged) {
reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
ORE, L);
return true; // Stop search.
}
LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
HardwareLoopInfo HWLoopInfo(L);
if (!HWLoopInfo.canAnalyze(*LI)) {
reportHWLoopFailure("cannot analyze loop, irreducible control flow",
"HWLoopCannotAnalyze", ORE, L);
return false;
}
if (!ForceHardwareLoops &&
!TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
reportHWLoopFailure("it's not profitable to create a hardware-loop",
"HWLoopNotProfitable", ORE, L);
return false;
}
// Allow overriding of the counter width and loop decrement value.
if (CounterBitWidth.getNumOccurrences())
HWLoopInfo.CountType =
IntegerType::get(M->getContext(), CounterBitWidth);
if (LoopDecrement.getNumOccurrences())
HWLoopInfo.LoopDecrement =
ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
MadeChange |= TryConvertLoop(HWLoopInfo);
return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
}
bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
Loop *L = HWLoopInfo.L;
LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
ForceHardwareLoopPHI)) {
// TODO: there can be many reasons a loop is not considered a
// candidate, so we should let isHardwareLoopCandidate fill in the
// reason and then report a better message here.
reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
return false;
}
assert(
(HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
"Hardware Loop must have set exit info.");
BasicBlock *Preheader = L->getLoopPreheader();
// If we don't have a preheader, then insert one.
if (!Preheader)
Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
if (!Preheader)
return false;
HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
HWLoop.Create();
++NumHWLoops;
return true;
}
void HardwareLoop::Create() {
LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
Value *LoopCountInit = InitLoopCount();
if (!LoopCountInit) {
reportHWLoopFailure("could not safely create a loop count expression",
"HWLoopNotSafe", ORE, L);
return;
}
Value *Setup = InsertIterationSetup(LoopCountInit);
if (UsePHICounter || ForceHardwareLoopPHI) {
Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
Value *EltsRem = InsertPHICounter(Setup, LoopDec);
LoopDec->setOperand(0, EltsRem);
UpdateBranch(LoopDec);
} else
InsertLoopDec();
// Run through the basic blocks of the loop and see if any of them have dead
// PHIs that can be removed.
for (auto I : L->blocks())
DeleteDeadPHIs(I);
}
static bool CanGenerateTest(Loop *L, Value *Count) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader->getSinglePredecessor())
return false;
BasicBlock *Pred = Preheader->getSinglePredecessor();
if (!isa<BranchInst>(Pred->getTerminator()))
return false;
auto *BI = cast<BranchInst>(Pred->getTerminator());
if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
return false;
// Check that the icmp is checking for equality of Count and zero and that
// a non-zero value results in entering the loop.
auto ICmp = cast<ICmpInst>(BI->getCondition());
LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
if (!ICmp->isEquality())
return false;
auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
return false;
};
if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
return false;
unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
if (BI->getSuccessor(SuccIdx) != Preheader)
return false;
return true;
}
Value *HardwareLoop::InitLoopCount() {
LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
// Can we replace a conditional branch with an intrinsic that sets the
// loop counter and tests that is not zero?
SCEVExpander SCEVE(SE, DL, "loopcnt");
-
if (!ExitCount->getType()->isPointerTy() &&
- ExitCount->getType() != CountType)
+ ExitCount->getType() != CountType)
ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
// If we're trying to use the 'test and set' form of the intrinsic, we need
// to replace a conditional branch that is controlling entry to the loop. It
// is likely (guaranteed?) that the preheader has an unconditional branch to
// the loop header, so also check if it has a single predecessor.
if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
- SE.getZero(ExitCount->getType()))) {
+ SE.getZero(ExitCount->getType()))) {
LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
UseLoopGuard |= ForceGuardLoopEntry;
} else
UseLoopGuard = false;
BasicBlock *BB = L->getLoopPreheader();
if (UseLoopGuard && BB->getSinglePredecessor() &&
cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
BasicBlock *Predecessor = BB->getSinglePredecessor();
// If it's not safe to create a while loop then don't force it and create a
// do-while loop instead
if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE))
UseLoopGuard = false;
else
BB = Predecessor;
}
if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
<< *ExitCount << "\n");
return nullptr;
}
Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
BB->getTerminator());
// FIXME: We've expanded Count where we hope to insert the counter setting
// intrinsic. But, in the case of the 'test and set' form, we may fallback to
// the just 'set' form and in which case the insertion block is most likely
// different. It means there will be instruction(s) in a block that possibly
// aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
// but it's doesn't appear to work in all cases.
UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
<< " - Expanded Count in " << BB->getName() << "\n"
<< " - Will insert set counter intrinsic into: "
<< BeginBB->getName() << "\n");
return Count;
}
Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
IRBuilder<> Builder(BeginBB->getTerminator());
Type *Ty = LoopCountInit->getType();
bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
Intrinsic::ID ID = UseLoopGuard
? (UsePhi ? Intrinsic::test_start_loop_iterations
: Intrinsic::test_set_loop_iterations)
: (UsePhi ? Intrinsic::start_loop_iterations
: Intrinsic::set_loop_iterations);
Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);
// Use the return value of the intrinsic to control the entry of the loop.
if (UseLoopGuard) {
assert((isa<BranchInst>(BeginBB->getTerminator()) &&
cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
"Expected conditional branch");
Value *SetCount =
UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
LoopGuard->setCondition(SetCount);
if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
LoopGuard->swapSuccessors();
}
LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
<< "\n");
if (UsePhi && UseLoopGuard)
LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
return !UsePhi ? LoopCountInit : LoopSetup;
}
void HardwareLoop::InsertLoopDec() {
IRBuilder<> CondBuilder(ExitBranch);
Function *DecFunc =
Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
LoopDecrement->getType());
Value *Ops[] = { LoopDecrement };
Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
Value *OldCond = ExitBranch->getCondition();
ExitBranch->setCondition(NewCond);
// The false branch must exit the loop.
if (!L->contains(ExitBranch->getSuccessor(0)))
ExitBranch->swapSuccessors();
// The old condition may be dead now, and may have even created a dead PHI
// (the original induction variable).
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
}
Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
IRBuilder<> CondBuilder(ExitBranch);
Function *DecFunc =
Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
{ EltsRem->getType() });
Value *Ops[] = { EltsRem, LoopDecrement };
Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
return cast<Instruction>(Call);
}
PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = ExitBranch->getParent();
IRBuilder<> Builder(Header->getFirstNonPHI());
PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
Index->addIncoming(NumElts, Preheader);
Index->addIncoming(EltsRem, Latch);
LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
return Index;
}
void HardwareLoop::UpdateBranch(Value *EltsRem) {
IRBuilder<> CondBuilder(ExitBranch);
Value *NewCond =
CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
Value *OldCond = ExitBranch->getCondition();
ExitBranch->setCondition(NewCond);
// The false branch must exit the loop.
if (!L->contains(ExitBranch->getSuccessor(0)))
ExitBranch->swapSuccessors();
// The old condition may be dead now, and may have even created a dead PHI
// (the original induction variable).
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
}
INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4f730b2cf372..dc245f0d7b16 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1,23435 +1,23436 @@
//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
// both before and after the DAG is legalized.
//
// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
// primarily intended to handle simplification opportunities that are implicit
// in the LLVM IR and exposed by the various codegen lowering phases.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <functional>
#include <iterator>
#include <string>
#include <tuple>
#include <utility>
using namespace llvm;
#define DEBUG_TYPE "dagcombine"
STATISTIC(NodesCombined , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
STATISTIC(SlicedLoads, "Number of load sliced");
STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Enable DAG combiner's use of IR alias analysis"));
static cl::opt<bool>
UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
cl::desc("Enable DAG combiner's use of TBAA"));
#ifndef NDEBUG
static cl::opt<std::string>
CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
cl::desc("Only use DAG-combiner alias analysis in this"
" function"));
#endif
/// Hidden option to stress test load slicing, i.e., when this option
/// is enabled, load slicing bypasses most of its profitability guards.
static cl::opt<bool>
StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
cl::desc("Bypass the profitability model of load slicing"),
cl::init(false));
static cl::opt<bool>
MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
cl::desc("DAG combiner may split indexing from loads"));
static cl::opt<bool>
EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
cl::desc("DAG combiner enable merging multiple stores "
"into a wider store"));
static cl::opt<unsigned> TokenFactorInlineLimit(
"combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
cl::desc("Limit the number of operands to inline for Token Factors"));
static cl::opt<unsigned> StoreMergeDependenceLimit(
"combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
cl::desc("Limit the number of times for the same StoreNode and RootNode "
"to bail out in store merging dependence check"));
static cl::opt<bool> EnableReduceLoadOpStoreWidth(
"combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
cl::desc("DAG cominber enable reducing the width of load/op/store "
"sequence"));
static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
"combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
cl::desc("DAG cominber enable load/<replace bytes>/store with "
"a narrower store"));
namespace {
class DAGCombiner {
SelectionDAG &DAG;
const TargetLowering &TLI;
const SelectionDAGTargetInfo *STI;
CombineLevel Level;
CodeGenOpt::Level OptLevel;
bool LegalDAG = false;
bool LegalOperations = false;
bool LegalTypes = false;
bool ForCodeSize;
bool DisableGenericCombines;
/// Worklist of all of the nodes that need to be simplified.
///
/// This must behave as a stack -- new nodes to process are pushed onto the
/// back and when processing we pop off of the back.
///
/// The worklist will not contain duplicates but may contain null entries
/// due to nodes being deleted from the underlying DAG.
SmallVector<SDNode *, 64> Worklist;
/// Mapping from an SDNode to its position on the worklist.
///
/// This is used to find and remove nodes from the worklist (by nulling
/// them) when they are deleted from the underlying DAG. It relies on
/// stable indices of nodes within the worklist.
DenseMap<SDNode *, unsigned> WorklistMap;
/// This records all nodes attempted to add to the worklist since we
/// considered a new worklist entry. As we keep do not add duplicate nodes
/// in the worklist, this is different from the tail of the worklist.
SmallSetVector<SDNode *, 32> PruningList;
/// Set of nodes which have been combined (at least once).
///
/// This is used to allow us to reliably add any operands of a DAG node
/// which have not yet been combined to the worklist.
SmallPtrSet<SDNode *, 32> CombinedNodes;
/// Map from candidate StoreNode to the pair of RootNode and count.
/// The count is used to track how many times we have seen the StoreNode
/// with the same RootNode bail out in dependence check. If we have seen
/// the bail out for the same pair many times over a limit, we won't
/// consider the StoreNode with the same RootNode as store merging
/// candidate again.
DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis *AA;
/// When an instruction is simplified, add all users of the instruction to
/// the work lists because they might get more simplified now.
void AddUsersToWorklist(SDNode *N) {
for (SDNode *Node : N->uses())
AddToWorklist(Node);
}
/// Convenient shorthand to add a node and all of its user to the worklist.
void AddToWorklistWithUsers(SDNode *N) {
AddUsersToWorklist(N);
AddToWorklist(N);
}
// Prune potentially dangling nodes. This is called after
// any visit to a node, but should also be called during a visit after any
// failed combine which may have created a DAG node.
void clearAddedDanglingWorklistEntries() {
// Check any nodes added to the worklist to see if they are prunable.
while (!PruningList.empty()) {
auto *N = PruningList.pop_back_val();
if (N->use_empty())
recursivelyDeleteUnusedNodes(N);
}
}
SDNode *getNextWorklistEntry() {
// Before we do any work, remove nodes that are not in use.
clearAddedDanglingWorklistEntries();
SDNode *N = nullptr;
// The Worklist holds the SDNodes in order, but it may contain null
// entries.
while (!N && !Worklist.empty()) {
N = Worklist.pop_back_val();
}
if (N) {
bool GoodWorklistEntry = WorklistMap.erase(N);
(void)GoodWorklistEntry;
assert(GoodWorklistEntry &&
"Found a worklist entry without a corresponding map entry!");
}
return N;
}
/// Call the node-specific routine that folds each particular type of node.
SDValue visit(SDNode *N);
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()),
STI(D.getSubtarget().getSelectionDAGInfo()),
Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
ForCodeSize = DAG.shouldOptForSize();
DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
MaximumLegalStoreInBits = 0;
// We use the minimum store size here, since that's all we can guarantee
// for the scalable vector types.
for (MVT VT : MVT::all_valuetypes())
if (EVT(VT).isSimple() && VT != MVT::Other &&
TLI.isTypeLegal(EVT(VT)) &&
VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
}
void ConsiderForPruning(SDNode *N) {
// Mark this for potential pruning.
PruningList.insert(N);
}
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
void AddToWorklist(SDNode *N) {
assert(N->getOpcode() != ISD::DELETED_NODE &&
"Deleted Node added to Worklist");
// Skip handle nodes as they can't usefully be combined and confuse the
// zero-use deletion strategy.
if (N->getOpcode() == ISD::HANDLENODE)
return;
ConsiderForPruning(N);
if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
Worklist.push_back(N);
}
/// Remove all instances of N from the worklist.
void removeFromWorklist(SDNode *N) {
CombinedNodes.erase(N);
PruningList.remove(N);
StoreRootCountMap.erase(N);
auto It = WorklistMap.find(N);
if (It == WorklistMap.end())
return; // Not in the worklist.
// Null out the entry rather than erasing it to avoid a linear operation.
Worklist[It->second] = nullptr;
WorklistMap.erase(It);
}
void deleteAndRecombine(SDNode *N);
bool recursivelyDeleteUnusedNodes(SDNode *N);
/// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo = true);
/// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
return CombineTo(N, &Res, 1, AddTo);
}
/// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
bool AddTo = true) {
SDValue To[] = { Res0, Res1 };
return CombineTo(N, To, 2, AddTo);
}
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
private:
unsigned MaximumLegalStoreInBits;
/// Check the specified integer node value to see if it can be simplified or
/// if things it uses can be simplified by bit propagation.
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getScalarValueSizeInBits();
APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
return SimplifyDemandedBits(Op, DemandedBits);
}
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
KnownBits Known;
if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
return false;
// Revisit the node.
AddToWorklist(Op.getNode());
CommitTargetLoweringOpt(TLO);
return true;
}
/// Check the specified vector node value to see if it can be simplified or
/// if things it uses can be simplified as it only uses some of the
/// elements. If so, return true.
bool SimplifyDemandedVectorElts(SDValue Op) {
// TODO: For now just pretend it cannot be simplified.
if (Op.getValueType().isScalableVector())
return false;
unsigned NumElts = Op.getValueType().getVectorNumElements();
APInt DemandedElts = APInt::getAllOnesValue(NumElts);
return SimplifyDemandedVectorElts(Op, DemandedElts);
}
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
const APInt &DemandedElts,
bool AssumeSingleUse = false);
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
bool AssumeSingleUse = false);
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
// Scalars have size 0 to distinguish from singleton vectors.
SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
/// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
/// load.
///
/// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
/// \param InVecVT type of the input vector to EVE with bitcasts resolved.
/// \param EltNo index of the vector element to load.
/// \param OriginalLoad load that EVE came from to be replaced.
/// \returns EVE on success SDValue() on failure.
SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
SDValue EltNo,
LoadSDNode *OriginalLoad);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
SDValue PromoteIntBinOp(SDValue Op);
SDValue PromoteIntShiftOp(SDValue Op);
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
/// Call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
SDValue combine(SDNode *N);
// Visitation implementation - Implement dag node combining for different
// node types. The semantics are as follows:
// Return Value:
// SDValue.getNode() == 0 - No change was made
// SDValue.getNode() == N - N was replaced, is dead and has been handled.
// otherwise - N should be replaced by the returned Operand.
//
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
SDValue visitADDLike(SDNode *N);
SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitSUB(SDNode *N);
SDValue visitADDSAT(SDNode *N);
SDValue visitSUBSAT(SDNode *N);
SDValue visitADDC(SDNode *N);
SDValue visitADDO(SDNode *N);
SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitSUBC(SDNode *N);
SDValue visitSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitADDCARRY(SDNode *N);
SDValue visitSADDO_CARRY(SDNode *N);
SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitSUBCARRY(SDNode *N);
SDValue visitSSUBO_CARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitUDIV(SDNode *N);
SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitMULO(SDNode *N);
SDValue visitIMINMAX(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitOR(SDNode *N);
SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitXOR(SDNode *N);
SDValue SimplifyVBinOp(SDNode *N);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitFunnelShift(SDNode *N);
SDValue visitRotate(SDNode *N);
SDValue visitABS(SDNode *N);
SDValue visitBSWAP(SDNode *N);
SDValue visitBITREVERSE(SDNode *N);
SDValue visitCTLZ(SDNode *N);
SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTTZ(SDNode *N);
SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
SDValue visitSETCCCARRY(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitAssertExt(SDNode *N);
SDValue visitAssertAlign(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
SDValue visitBITCAST(SDNode *N);
SDValue visitFREEZE(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
SDValue visitSTRICT_FADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
SDValue visitFMA(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
SDValue visitFPOW(SDNode *N);
SDValue visitSINT_TO_FP(SDNode *N);
SDValue visitUINT_TO_FP(SDNode *N);
SDValue visitFP_TO_SINT(SDNode *N);
SDValue visitFP_TO_UINT(SDNode *N);
SDValue visitFP_ROUND(SDNode *N);
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
SDValue visitFCEIL(SDNode *N);
SDValue visitFTRUNC(SDNode *N);
SDValue visitFFLOOR(SDNode *N);
SDValue visitFMINNUM(SDNode *N);
SDValue visitFMAXNUM(SDNode *N);
SDValue visitFMINIMUM(SDNode *N);
SDValue visitFMAXIMUM(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
SDValue visitSTORE(SDNode *N);
SDValue visitLIFETIME_END(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
SDValue visitBUILD_VECTOR(SDNode *N);
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitSCALAR_TO_VECTOR(SDNode *N);
SDValue visitINSERT_SUBVECTOR(SDNode *N);
SDValue visitMLOAD(SDNode *N);
SDValue visitMSTORE(SDNode *N);
SDValue visitMGATHER(SDNode *N);
SDValue visitMSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
SDValue visitFMULForFMADistributiveCombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
bool reassociationCanBreakAddressingModePattern(unsigned Opc,
const SDLoc &DL, SDValue N0,
SDValue N1);
SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1);
SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
SDValue visitShiftByConstant(SDNode *N);
SDValue foldSelectOfConstants(SDNode *N);
SDValue foldVSelectOfConstants(SDNode *N);
SDValue foldBinOpIntoSelect(SDNode *BO);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC,
bool NotExtCompare = false);
SDValue convertSelectOfFPConstantsToLoadOffset(
const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC);
SDValue foldSignChangeInBitcast(SDNode *N);
SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC);
SDValue foldSelectOfBinops(SDNode *N);
SDValue foldSextSetcc(SDNode *N);
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
const SDLoc &DL);
SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
SDValue unfoldMaskedMerge(SDNode *N);
SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, bool foldBooleans);
SDValue rebuildSetCC(SDValue N);
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC, bool MatchStrict = false) const;
bool isOneUseSetCC(SDValue N) const;
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue CombineExtLoad(SDNode *N);
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
SDNodeFlags Flags, bool Reciprocal);
SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
SDNodeFlags Flags, bool Reciprocal);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
SDValue InnerPos, SDValue InnerNeg,
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
SDValue InnerPos, SDValue InnerNeg,
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
SDValue mergeTruncStores(StoreSDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue convertBuildVecZextToZext(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecTruncToBitCast(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
SDValue VecIn2, unsigned LeftIdx,
bool DidSplitVec);
SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
/// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallVectorImpl<SDValue> &Aliases);
/// Return true if there is any possibility that the two addresses overlap.
bool isAlias(SDNode *Op0, SDNode *Op1) const;
/// Walk up chain skipping non-aliasing memory nodes, looking for a better
/// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
/// Try to replace a store and any possibly adjacent stores on
/// consecutive chains with better chains. Return true only if St is
/// replaced.
///
/// Notice that other chains may still be replaced even if the function
/// returns false.
bool findBetterNeighborChains(StoreSDNode *St);
// Helper for findBetterNeighborChains. Walk up store chain add additional
// chained stores that do not overlap and can be parallelized.
bool parallelizeChainedStores(StoreSDNode *St);
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
// Ptr to the mem node.
LSBaseSDNode *MemNode;
// Offset from the base ptr.
int64_t OffsetFromBase;
MemOpLink(LSBaseSDNode *N, int64_t Offset)
: MemNode(N), OffsetFromBase(Offset) {}
};
// Classify the origin of a stored value.
enum class StoreSource { Unknown, Constant, Extract, Load };
StoreSource getStoreSource(SDValue StoreVal) {
switch (StoreVal.getOpcode()) {
case ISD::Constant:
case ISD::ConstantFP:
return StoreSource::Constant;
case ISD::EXTRACT_VECTOR_ELT:
case ISD::EXTRACT_SUBVECTOR:
return StoreSource::Extract;
case ISD::LOAD:
return StoreSource::Load;
default:
return StoreSource::Unknown;
}
}
/// This is a helper function for visitMUL to check the profitability
/// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
/// MulNode is the original multiply, AddNode is (add x, c1),
/// and ConstNode is c2.
bool isMulAddWithConstProfitable(SDNode *MulNode,
SDValue &AddNode,
SDValue &ConstNode);
/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
/// the type of the loaded value to be extended.
bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
EVT LoadResultTy, EVT &ExtVT);
/// Helper function to calculate whether the given Load/Store can have its
/// width reduced to ExtVT.
bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
EVT &MemVT, unsigned ShAmt = 0);
/// Used by BackwardsPropagateMask to find suitable loads.
bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
SmallPtrSetImpl<SDNode*> &NodesWithConsts,
ConstantSDNode *Mask, SDNode *&NodeToMask);
/// Attempt to propagate a given AND node back to load leaves so that they
/// can be combined into narrow loads.
bool BackwardsPropagateMask(SDNode *N);
/// Helper function for mergeConsecutiveStores which merges the component
/// store chains.
SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores);
/// This is a helper function for mergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store introducing
/// bitcasts if necessary. \return True if a merged store was created.
bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector,
bool UseTrunc);
/// This is a helper function for mergeConsecutiveStores. Stores that
/// potentially may be merged with St are placed in StoreNodes. RootNode is
/// a chain predecessor to all store candidates.
void getStoreMergeCandidates(StoreSDNode *St,
SmallVectorImpl<MemOpLink> &StoreNodes,
SDNode *&Root);
/// Helper function for mergeConsecutiveStores. Checks if candidate stores
/// have indirect dependency through their operands. RootNode is the
/// predecessor to all stores calculated by getStoreMergeCandidates and is
/// used to prune the dependency check. \return True if safe to merge.
bool checkMergeStoreCandidatesForDependencies(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
SDNode *RootNode);
/// This is a helper function for mergeConsecutiveStores. Given a list of
/// store candidates, find the first N that are consecutive in memory.
/// Returns 0 if there are not at least 2 consecutive stores to try merging.
unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
int64_t ElementSizeBytes) const;
/// This is a helper function for mergeConsecutiveStores. It is used for
/// store chains that are composed entirely of constant values.
bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumConsecutiveStores,
EVT MemVT, SDNode *Root, bool AllowVectors);
/// This is a helper function for mergeConsecutiveStores. It is used for
/// store chains that are composed entirely of extracted vector elements.
/// When extracting multiple vector elements, try to store them in one
/// vector store rather than a sequence of scalar stores.
bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumConsecutiveStores, EVT MemVT,
SDNode *Root);
/// This is a helper function for mergeConsecutiveStores. It is used for
/// store chains that are composed entirely of loaded values.
bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumConsecutiveStores, EVT MemVT,
SDNode *Root, bool AllowVectors,
bool IsNonTemporalStore, bool IsNonTemporalLoad);
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return true if stores were merged.
bool mergeConsecutiveStores(StoreSDNode *St);
/// Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
///
/// \p N needs to be a truncation and its first operand an AND. Other
/// requirements are checked by the function (e.g. that trunc is
/// single-use) and if missed an empty SDValue is returned.
SDValue distributeTruncateThroughAnd(SDNode *N);
/// Helper function to determine whether the target supports operation
/// given by \p Opcode for type \p VT, that is, whether the operation
/// is legal or custom before legalizing operations, and whether is
/// legal (but not custom) after legalization.
bool hasOperation(unsigned Opcode, EVT VT) {
return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
}
public:
/// Runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
SelectionDAG &getDAG() const { return DAG; }
/// Returns a type large enough to hold any valid shift amount - before type
/// legalization these can be huge.
EVT getShiftAmountTy(EVT LHSTy) {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
}
/// This method returns true if we are running before type legalization or
/// if the specified VT is legal.
bool isTypeLegal(const EVT &VT) {
if (!LegalTypes) return true;
return TLI.isTypeLegal(VT);
}
/// Convenience wrapper around TargetLowering::getSetCCResultType
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
SDValue OrigLoad, SDValue ExtLoad,
ISD::NodeType ExtType);
};
/// This class is a DAGUpdateListener that removes any deleted
/// nodes from the worklist.
class WorklistRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
explicit WorklistRemover(DAGCombiner &dc)
: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
void NodeDeleted(SDNode *N, SDNode *E) override {
DC.removeFromWorklist(N);
}
};
class WorklistInserter : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
explicit WorklistInserter(DAGCombiner &dc)
: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
// FIXME: Ideally we could add N to the worklist, but this causes exponential
// compile time costs in large DAGs, e.g. Halide.
void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// TargetLowering::DAGCombinerInfo implementation
//===----------------------------------------------------------------------===//
void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
((DAGCombiner*)DC)->AddToWorklist(N);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
}
bool TargetLowering::DAGCombinerInfo::
recursivelyDeleteUnusedNodes(SDNode *N) {
return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
}
void TargetLowering::DAGCombinerInfo::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
}
//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//
void DAGCombiner::deleteAndRecombine(SDNode *N) {
removeFromWorklist(N);
// If the operands of this node are only used by the node, they will now be
// dead. Make sure to re-visit them and recursively delete dead nodes.
for (const SDValue &Op : N->ops())
// For an operand generating multiple values, one of the values may
// become dead allowing further simplification (e.g. split index
// arithmetic from an indexed load).
if (Op->hasOneUse() || Op->getNumValues() > 1)
AddToWorklist(Op.getNode());
DAG.DeleteNode(N);
}
// APInts must be the same size for most operations, this helper
// function zero extends the shorter of the pair so that they match.
// We provide an Offset so that we can create bitwidths that won't overflow.
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
LHS = LHS.zextOrSelf(Bits);
RHS = RHS.zextOrSelf(Bits);
}
// Return true if this node is a setcc, or is a select_cc
// that selects between the target values used for true and false, making it
// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
// the appropriate nodes based on the type of node we are checking. This
// simplifies life a bit for the callers.
bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC, bool MatchStrict) const {
if (N.getOpcode() == ISD::SETCC) {
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(2);
return true;
}
if (MatchStrict &&
(N.getOpcode() == ISD::STRICT_FSETCC ||
N.getOpcode() == ISD::STRICT_FSETCCS)) {
LHS = N.getOperand(1);
RHS = N.getOperand(2);
CC = N.getOperand(3);
return true;
}
if (N.getOpcode() != ISD::SELECT_CC ||
!TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
!TLI.isConstFalseVal(N.getOperand(3).getNode()))
return false;
if (TLI.getBooleanContents(N.getValueType()) ==
TargetLowering::UndefinedBooleanContent)
return false;
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(4);
return true;
}
/// Return true if this is a SetCC-equivalent operation with only one use.
/// If this is true, it allows the users to invert the operation for free when
/// it is profitable to do so.
bool DAGCombiner::isOneUseSetCC(SDValue N) const {
SDValue N0, N1, N2;
if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
return true;
return false;
}
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
if (!ScalarTy.isSimple())
return false;
uint64_t MaskForTy = 0ULL;
switch (ScalarTy.getSimpleVT().SimpleTy) {
case MVT::i8:
MaskForTy = 0xFFULL;
break;
case MVT::i16:
MaskForTy = 0xFFFFULL;
break;
case MVT::i32:
MaskForTy = 0xFFFFFFFFULL;
break;
default:
return false;
break;
}
APInt Val;
if (ISD::isConstantSplatVector(N, Val))
return Val.getLimitedValue() == MaskForTy;
return false;
}
// Determines if it is a constant integer or a splat/build vector of constant
// integers (and undefs).
// Do not permit build vector implicit truncation.
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
return !(Const->isOpaque() && NoOpaques);
if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
return false;
unsigned BitWidth = N.getScalarValueSizeInBits();
for (const SDValue &Op : N->op_values()) {
if (Op.isUndef())
continue;
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
(Const->isOpaque() && NoOpaques))
return false;
}
return true;
}
// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
// undef's.
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
if (V.getOpcode() != ISD::BUILD_VECTOR)
return false;
return isConstantOrConstantVector(V, NoOpaques) ||
ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
}
// Determine if this an indexed load with an opaque target constant index.
static bool canSplitIdx(LoadSDNode *LD) {
return MaySplitLoadIndex &&
(LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
!cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
}
bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
const SDLoc &DL,
SDValue N0,
SDValue N1) {
// Currently this only tries to ensure we don't undo the GEP splits done by
// CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
// we check if the following transformation would be problematic:
// (load/store (add, (add, x, offset1), offset2)) ->
// (load/store (add, x, offset1+offset2)).
if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
return false;
if (N0.hasOneUse())
return false;
auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N1);
if (!C1 || !C2)
return false;
const APInt &C1APIntVal = C1->getAPIntValue();
const APInt &C2APIntVal = C2->getAPIntValue();
if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
return false;
const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
if (CombinedValueIntVal.getBitWidth() > 64)
return false;
const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
for (SDNode *Node : N0->uses()) {
auto LoadStore = dyn_cast<MemSDNode>(Node);
if (LoadStore) {
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
EVT VT = LoadStore->getMemoryVT();
unsigned AS = LoadStore->getAddressSpace();
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
continue;
// Would x[offset1+offset2] still be a legal addressing mode?
AM.BaseOffs = CombinedValue;
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
return true;
}
}
return false;
}
// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
SDValue N0, SDValue N1) {
EVT VT = N0.getValueType();
if (N0.getOpcode() != Opc)
return SDValue();
if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
if (SDValue OpNode =
DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
return SDValue();
}
if (N0.hasOneUse()) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
if (!OpNode.getNode())
return SDValue();
return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
return SDValue();
}
// Try to reassociate commutative binops.
SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags) {
assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
// Floating-point reassociation is not allowed without loose FP math.
if (N0.getValueType().isFloatingPoint() ||
N1.getValueType().isFloatingPoint())
if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
return SDValue();
if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
return Combined;
if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
return Combined;
return SDValue();
}
SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo) {
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
To[0].getNode()->dump(&DAG);
dbgs() << " and " << NumTo - 1 << " other values\n");
for (unsigned i = 0, e = NumTo; i != e; ++i)
assert((!To[i].getNode() ||
N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesWith(N, To);
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
if (To[i].getNode()) {
AddToWorklist(To[i].getNode());
AddUsersToWorklist(To[i].getNode());
}
}
}
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
if (N->use_empty())
deleteAndRecombine(N);
return SDValue(N, 0);
}
void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Replace the old value with the new one.
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
dbgs() << '\n');
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
AddToWorklistWithUsers(TLO.New.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
if (TLO.Old.getNode()->use_empty())
deleteAndRecombine(TLO.Old.getNode());
}
/// Check the specified integer node value to see if it can be simplified or if
/// things it uses can be simplified by bit propagation. If so, return true.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
const APInt &DemandedElts,
bool AssumeSingleUse) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
KnownBits Known;
if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
AssumeSingleUse))
return false;
// Revisit the node.
AddToWorklist(Op.getNode());
CommitTargetLoweringOpt(TLO);
return true;
}
/// Check the specified vector node value to see if it can be simplified or
/// if things it uses can be simplified as it only uses some of the elements.
/// If so, return true.
bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
const APInt &DemandedElts,
bool AssumeSingleUse) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownUndef, KnownZero;
if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
TLO, 0, AssumeSingleUse))
return false;
// Revisit the node.
AddToWorklist(Op.getNode());
CommitTargetLoweringOpt(TLO);
return true;
}
void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
SDLoc DL(Load);
EVT VT = Load->getValueType(0);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
Trunc.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
deleteAndRecombine(Load);
AddToWorklist(Trunc.getNode());
}
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
Replace = false;
SDLoc DL(Op);
if (ISD::isUNINDEXEDLoad(Op.getNode())) {
LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
: LD->getExtensionType();
Replace = true;
return DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
}
unsigned Opc = Op.getOpcode();
switch (Opc) {
default: break;
case ISD::AssertSext:
if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
break;
case ISD::AssertZext:
if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
break;
case ISD::Constant: {
unsigned ExtOpc =
Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, DL, PVT, Op);
}
}
if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
return SDValue();
return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
}
SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
return SDValue();
EVT OldVT = Op.getValueType();
SDLoc DL(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
return SDValue();
AddToWorklist(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
DAG.getValueType(OldVT));
}
SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
EVT OldVT = Op.getValueType();
SDLoc DL(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
return SDValue();
AddToWorklist(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
}
/// Promote the specified integer binary operation if the target indicates it is
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
/// i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
bool Replace1 = false;
SDValue N1 = Op.getOperand(1);
SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
SDLoc DL(Op);
SDValue RV =
DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
// We are always replacing N0/N1's use in N and only need additional
// replacements if there are additional uses.
// Note: We are checking uses of the *nodes* (SDNode) rather than values
// (SDValue) here because the node may reference multiple values
// (for example, the chain value of a load node).
Replace0 &= !N0->hasOneUse();
Replace1 &= (N0 != N1) && !N1->hasOneUse();
// Combine Op here so it is preserved past replacements.
CombineTo(Op.getNode(), RV);
// If operands have a use ordering, make sure we deal with
// predecessor first.
if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
std::swap(N0, N1);
std::swap(NN0, NN1);
}
if (Replace0) {
AddToWorklist(NN0.getNode());
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
}
if (Replace1) {
AddToWorklist(NN1.getNode());
ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
}
return Op;
}
return SDValue();
}
/// Promote the specified integer shift operation if the target indicates it is
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
/// i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
bool Replace = false;
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
if (Opc == ISD::SRA)
N0 = SExtPromoteOperand(N0, PVT);
else if (Opc == ISD::SRL)
N0 = ZExtPromoteOperand(N0, PVT);
else
N0 = PromoteOperand(N0, PVT, Replace);
if (!N0.getNode())
return SDValue();
SDLoc DL(Op);
SDValue RV =
DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
// Deal with Op being deleted.
if (Op && Op.getOpcode() != ISD::DELETED_NODE)
return RV;
}
return SDValue();
}
SDValue DAGCombiner::PromoteExtend(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
}
return SDValue();
}
bool DAGCombiner::PromoteLoad(SDValue Op) {
if (!LegalOperations)
return false;
if (!ISD::isUNINDEXEDLoad(Op.getNode()))
return false;
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return false;
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return false;
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
SDLoc DL(Op);
SDNode *N = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
: LD->getExtensionType();
SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
Result.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
deleteAndRecombine(N);
AddToWorklist(Result.getNode());
return true;
}
return false;
}
/// Recursively delete a node which has no uses and any operands for
/// which it is the only use.
///
/// Note that this both deletes the nodes and removes them from the worklist.
/// It also adds any nodes who have had a user deleted to the worklist as they
/// may now have only one use and subject to other combines.
bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
if (!N->use_empty())
return false;
SmallSetVector<SDNode *, 16> Nodes;
Nodes.insert(N);
do {
N = Nodes.pop_back_val();
if (!N)
continue;
if (N->use_empty()) {
for (const SDValue &ChildN : N->op_values())
Nodes.insert(ChildN.getNode());
removeFromWorklist(N);
DAG.DeleteNode(N);
} else {
AddToWorklist(N);
}
} while (!Nodes.empty());
return true;
}
//===----------------------------------------------------------------------===//
// Main DAG Combiner implementation
//===----------------------------------------------------------------------===//
void DAGCombiner::Run(CombineLevel AtLevel) {
// set the instance variables, so that the various visit routines may use it.
Level = AtLevel;
LegalDAG = Level >= AfterLegalizeDAG;
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
WorklistInserter AddNodes(*this);
// Add all the dag nodes to the worklist.
for (SDNode &Node : DAG.allnodes())
AddToWorklist(&Node);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
// changes of the root.
HandleSDNode Dummy(DAG.getRoot());
// While we have a valid worklist entry node, try to combine it.
while (SDNode *N = getNextWorklistEntry()) {
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
// reduced number of uses, allowing other xforms.
if (recursivelyDeleteUnusedNodes(N))
continue;
WorklistRemover DeadNodes(*this);
// If this combine is running after legalizing the DAG, re-legalize any
// nodes pulled off the worklist.
if (LegalDAG) {
SmallSetVector<SDNode *, 16> UpdatedNodes;
bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
for (SDNode *LN : UpdatedNodes)
AddToWorklistWithUsers(LN);
if (!NIsValid)
continue;
}
LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
// Add any operands of the new node which have not yet been combined to the
// worklist as well. Because the worklist uniques things already, this
// won't repeatedly process the same operand.
CombinedNodes.insert(N);
for (const SDValue &ChildN : N->op_values())
if (!CombinedNodes.count(ChildN.getNode()))
AddToWorklist(ChildN.getNode());
SDValue RV = combine(N);
if (!RV.getNode())
continue;
++NodesCombined;
// If we get back the same node we passed in, rather than a new node or
// zero, we know that the node must have defined multiple values and
// CombineTo was used. Since CombineTo takes care of the worklist
// mechanics for us, we have no work to do in this case.
if (RV.getNode() == N)
continue;
assert(N->getOpcode() != ISD::DELETED_NODE &&
RV.getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
assert(N->getValueType(0) == RV.getValueType() &&
N->getNumValues() == 1 && "Type mismatch");
DAG.ReplaceAllUsesWith(N, &RV);
}
// Push the new node and any users onto the worklist. Omit this if the
// new node is the EntryToken (e.g. if a store managed to get optimized
// out), because re-visiting the EntryToken and its users will not uncover
// any additional opportunities, but there may be a large number of such
// users, potentially causing compile time explosion.
if (RV.getOpcode() != ISD::EntryToken) {
AddToWorklist(RV.getNode());
AddUsersToWorklist(RV.getNode());
}
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node. This will also take care of adding any
// operands which have lost a user to the worklist.
recursivelyDeleteUnusedNodes(N);
}
// If the root changed (e.g. it was a dead load, update the root).
DAG.setRoot(Dummy.getValue());
DAG.RemoveDeadNodes();
}
SDValue DAGCombiner::visit(SDNode *N) {
switch (N->getOpcode()) {
default: break;
case ISD::TokenFactor: return visitTokenFactor(N);
case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::SADDSAT:
case ISD::UADDSAT: return visitADDSAT(N);
case ISD::SSUBSAT:
case ISD::USUBSAT: return visitSUBSAT(N);
case ISD::ADDC: return visitADDC(N);
case ISD::SADDO:
case ISD::UADDO: return visitADDO(N);
case ISD::SUBC: return visitSUBC(N);
case ISD::SSUBO:
case ISD::USUBO: return visitSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::ADDCARRY: return visitADDCARRY(N);
case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::SUBCARRY: return visitSUBCARRY(N);
case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT: return visitMULFIX(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
case ISD::SREM:
case ISD::UREM: return visitREM(N);
case ISD::MULHU: return visitMULHU(N);
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO:
case ISD::UMULO: return visitMULO(N);
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX: return visitIMINMAX(N);
case ISD::AND: return visitAND(N);
case ISD::OR: return visitOR(N);
case ISD::XOR: return visitXOR(N);
case ISD::SHL: return visitSHL(N);
case ISD::SRA: return visitSRA(N);
case ISD::SRL: return visitSRL(N);
case ISD::ROTR:
case ISD::ROTL: return visitRotate(N);
case ISD::FSHL:
case ISD::FSHR: return visitFunnelShift(N);
case ISD::ABS: return visitABS(N);
case ISD::BSWAP: return visitBSWAP(N);
case ISD::BITREVERSE: return visitBITREVERSE(N);
case ISD::CTLZ: return visitCTLZ(N);
case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
case ISD::CTTZ: return visitCTTZ(N);
case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
case ISD::SETCCCARRY: return visitSETCCCARRY(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::AssertSext:
case ISD::AssertZext: return visitAssertExt(N);
case ISD::AssertAlign: return visitAssertAlign(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
case ISD::FPOW: return visitFPOW(N);
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
case ISD::FP_ROUND: return visitFP_ROUND(N);
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
case ISD::FFLOOR: return visitFFLOOR(N);
case ISD::FMINNUM: return visitFMINNUM(N);
case ISD::FMAXNUM: return visitFMAXNUM(N);
case ISD::FMINIMUM: return visitFMINIMUM(N);
case ISD::FMAXIMUM: return visitFMAXIMUM(N);
case ISD::FCEIL: return visitFCEIL(N);
case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
case ISD::STORE: return visitSTORE(N);
case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
case ISD::MGATHER: return visitMGATHER(N);
case ISD::MLOAD: return visitMLOAD(N);
case ISD::MSCATTER: return visitMSCATTER(N);
case ISD::MSTORE: return visitMSTORE(N);
case ISD::LIFETIME_END: return visitLIFETIME_END(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
case ISD::FREEZE: return visitFREEZE(N);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
}
return SDValue();
}
SDValue DAGCombiner::combine(SDNode *N) {
SDValue RV;
if (!DisableGenericCombines)
RV = visit(N);
// If nothing happened, try a target-specific DAG combine.
if (!RV.getNode()) {
assert(N->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned NULL!");
if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
// Expose the DAG combiner to the target combiner impls.
TargetLowering::DAGCombinerInfo
DagCombineInfo(DAG, Level, false, this);
RV = TLI.PerformDAGCombine(N, DagCombineInfo);
}
}
// If nothing happened still, try promoting the operation.
if (!RV.getNode()) {
switch (N->getOpcode()) {
default: break;
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
RV = PromoteIntBinOp(SDValue(N, 0));
break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
RV = PromoteIntShiftOp(SDValue(N, 0));
break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
RV = PromoteExtend(SDValue(N, 0));
break;
case ISD::LOAD:
if (PromoteLoad(SDValue(N, 0)))
RV = SDValue(N, 0);
break;
}
}
// If N is a commutative binary node, try to eliminate it if the commuted
// version is already present in the DAG.
if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// Constant operands are canonicalized to RHS.
if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
SDValue Ops[] = {N1, N0};
SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
N->getFlags());
if (CSENode)
return SDValue(CSENode, 0);
}
}
return RV;
}
/// Given a node, return its input chain if it has one, otherwise return a null
/// sd operand.
static SDValue getInputChainForNode(SDNode *N) {
if (unsigned NumOps = N->getNumOperands()) {
if (N->getOperand(0).getValueType() == MVT::Other)
return N->getOperand(0);
if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
return N->getOperand(NumOps-1);
for (unsigned i = 1; i < NumOps-1; ++i)
if (N->getOperand(i).getValueType() == MVT::Other)
return N->getOperand(i);
}
return SDValue();
}
SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// If N has two operands, where one has an input chain equal to the other,
// the 'other' chain is redundant.
if (N->getNumOperands() == 2) {
if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
return N->getOperand(0);
if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
return N->getOperand(1);
}
// Don't simplify token factors if optnone.
if (OptLevel == CodeGenOpt::None)
return SDValue();
// Don't simplify the token factor if the node itself has too many operands.
if (N->getNumOperands() > TokenFactorInlineLimit)
return SDValue();
// If the sole user is a token factor, we should make sure we have a
// chance to merge them together. This prevents TF chains from inhibiting
// optimizations.
if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
AddToWorklist(*(N->use_begin()));
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
bool Changed = false; // If we should replace this token factor.
// Start out with this token factor.
TFs.push_back(N);
// Iterate through token factors. The TFs grows when new token factors are
// encountered.
for (unsigned i = 0; i < TFs.size(); ++i) {
// Limit number of nodes to inline, to avoid quadratic compile times.
// We have to add the outstanding Token Factors to Ops, otherwise we might
// drop Ops from the resulting Token Factors.
if (Ops.size() > TokenFactorInlineLimit) {
for (unsigned j = i; j < TFs.size(); j++)
Ops.emplace_back(TFs[j], 0);
// Drop unprocessed Token Factors from TFs, so we do not add them to the
// combiner worklist later.
TFs.resize(i);
break;
}
SDNode *TF = TFs[i];
// Check each of the operands.
for (const SDValue &Op : TF->op_values()) {
switch (Op.getOpcode()) {
case ISD::EntryToken:
// Entry tokens don't need to be added to the list. They are
// redundant.
Changed = true;
break;
case ISD::TokenFactor:
if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
// Queue up for processing.
TFs.push_back(Op.getNode());
Changed = true;
break;
}
LLVM_FALLTHROUGH;
default:
// Only add if it isn't already in the list.
if (SeenOps.insert(Op.getNode()).second)
Ops.push_back(Op);
else
Changed = true;
break;
}
}
}
// Re-visit inlined Token Factors, to clean them up in case they have been
// removed. Skip the first Token Factor, as this is the current node.
for (unsigned i = 1, e = TFs.size(); i < e; i++)
AddToWorklist(TFs[i]);
// Remove Nodes that are chained to another node in the list. Do so
// by walking up chains breath-first stopping when we've seen
// another operand. In general we must climb to the EntryNode, but we can exit
// early if we find all remaining work is associated with just one operand as
// no further pruning is possible.
// List of nodes to search through and original Ops from which they originate.
SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
SmallPtrSet<SDNode *, 16> SeenChains;
bool DidPruneOps = false;
unsigned NumLeftToConsider = 0;
for (const SDValue &Op : Ops) {
Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
OpWorkCount.push_back(1);
}
auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
// If this is an Op, we can remove the op from the list. Remark any
// search associated with it as from the current OpNumber.
if (SeenOps.contains(Op)) {
Changed = true;
DidPruneOps = true;
unsigned OrigOpNumber = 0;
while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
OrigOpNumber++;
assert((OrigOpNumber != Ops.size()) &&
"expected to find TokenFactor Operand");
// Re-mark worklist from OrigOpNumber to OpNumber
for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
if (Worklist[i].second == OrigOpNumber) {
Worklist[i].second = OpNumber;
}
}
OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
OpWorkCount[OrigOpNumber] = 0;
NumLeftToConsider--;
}
// Add if it's a new chain
if (SeenChains.insert(Op).second) {
OpWorkCount[OpNumber]++;
Worklist.push_back(std::make_pair(Op, OpNumber));
}
};
for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
// We need at least be consider at least 2 Ops to prune.
if (NumLeftToConsider <= 1)
break;
auto CurNode = Worklist[i].first;
auto CurOpNumber = Worklist[i].second;
assert((OpWorkCount[CurOpNumber] > 0) &&
"Node should not appear in worklist");
switch (CurNode->getOpcode()) {
case ISD::EntryToken:
// Hitting EntryToken is the only way for the search to terminate without
// hitting
// another operand's search. Prevent us from marking this operand
// considered.
NumLeftToConsider++;
break;
case ISD::TokenFactor:
for (const SDValue &Op : CurNode->op_values())
AddToWorklist(i, Op.getNode(), CurOpNumber);
break;
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
case ISD::CopyFromReg:
case ISD::CopyToReg:
AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
break;
default:
if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
break;
}
OpWorkCount[CurOpNumber]--;
if (OpWorkCount[CurOpNumber] == 0)
NumLeftToConsider--;
}
// If we've changed things around then replace token factor.
if (Changed) {
SDValue Result;
if (Ops.empty()) {
// The entry token is the only possible outcome.
Result = DAG.getEntryNode();
} else {
if (DidPruneOps) {
SmallVector<SDValue, 8> PrunedOps;
//
for (const SDValue &Op : Ops) {
if (SeenChains.count(Op.getNode()) == 0)
PrunedOps.push_back(Op);
}
Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
} else {
Result = DAG.getTokenFactor(SDLoc(N), Ops);
}
}
return Result;
}
return SDValue();
}
/// MERGE_VALUES can always be eliminated.
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
WorklistRemover DeadNodes(*this);
// Replacing results may cause a different MERGE_VALUES to suddenly
// be CSE'd with N, and carry its uses with it. Iterate until no
// uses remain, to ensure that the node can be safely deleted.
// First add the users of this node to the work list so that they
// can be tried again once they have new operands.
AddUsersToWorklist(N);
do {
// Do as a single replacement to avoid rewalking use lists.
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
Ops.push_back(N->getOperand(i));
DAG.ReplaceAllUsesWith(N, Ops.data());
} while (!N->use_empty());
deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
/// ConstantSDNode pointer else nullptr.
static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
}
/// Return true if 'Use' is a load or a store that uses N as its base pointer
/// and that N may be folded in the load / store addressing mode.
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
const TargetLowering &TLI) {
EVT VT;
unsigned AS;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
return false;
VT = LD->getMemoryVT();
AS = LD->getAddressSpace();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
return false;
VT = ST->getMemoryVT();
AS = ST->getAddressSpace();
} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
return false;
VT = LD->getMemoryVT();
AS = LD->getAddressSpace();
} else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
return false;
VT = ST->getMemoryVT();
AS = ST->getAddressSpace();
} else
return false;
TargetLowering::AddrMode AM;
if (N->getOpcode() == ISD::ADD) {
AM.HasBaseReg = true;
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
// [reg +/- imm]
AM.BaseOffs = Offset->getSExtValue();
else
// [reg +/- reg]
AM.Scale = 1;
} else if (N->getOpcode() == ISD::SUB) {
AM.HasBaseReg = true;
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
// [reg +/- imm]
AM.BaseOffs = -Offset->getSExtValue();
else
// [reg +/- reg]
AM.Scale = 1;
} else
return false;
return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
VT.getTypeForEVT(*DAG.getContext()), AS);
}
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
"Unexpected binary operator");
// Don't do this unless the old select is going away. We want to eliminate the
// binary operator, not replace a binop with a select.
// TODO: Handle ISD::SELECT_CC.
unsigned SelOpNo = 0;
SDValue Sel = BO->getOperand(0);
if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
SelOpNo = 1;
Sel = BO->getOperand(1);
}
if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
return SDValue();
SDValue CT = Sel.getOperand(1);
if (!isConstantOrConstantVector(CT, true) &&
!DAG.isConstantFPBuildVectorOrConstantFP(CT))
return SDValue();
SDValue CF = Sel.getOperand(2);
if (!isConstantOrConstantVector(CF, true) &&
!DAG.isConstantFPBuildVectorOrConstantFP(CF))
return SDValue();
// Bail out if any constants are opaque because we can't constant fold those.
// The exception is "and" and "or" with either 0 or -1 in which case we can
// propagate non constant operands into select. I.e.:
// and (select Cond, 0, -1), X --> select Cond, 0, X
// or X, (select Cond, -1, 0) --> select Cond, -1, X
auto BinOpcode = BO->getOpcode();
bool CanFoldNonConst =
(BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
(isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
(isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
SDValue CBO = BO->getOperand(SelOpNo ^ 1);
if (!CanFoldNonConst &&
!isConstantOrConstantVector(CBO, true) &&
!DAG.isConstantFPBuildVectorOrConstantFP(CBO))
return SDValue();
EVT VT = BO->getValueType(0);
// We have a select-of-constants followed by a binary operator with a
// constant. Eliminate the binop by pulling the constant math into the select.
// Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
SDLoc DL(Sel);
SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
: DAG.getNode(BinOpcode, DL, VT, CT, CBO);
if (!CanFoldNonConst && !NewCT.isUndef() &&
!isConstantOrConstantVector(NewCT, true) &&
!DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
return SDValue();
SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
: DAG.getNode(BinOpcode, DL, VT, CF, CBO);
if (!CanFoldNonConst && !NewCF.isUndef() &&
!isConstantOrConstantVector(NewCF, true) &&
!DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
return SDValue();
SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
SelectOp->setFlags(BO->getFlags());
return SelectOp;
}
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
"Expecting add or sub");
// Match a constant operand and a zext operand for the math instruction:
// add Z, C
// sub C, Z
bool IsAdd = N->getOpcode() == ISD::ADD;
SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
auto *CN = dyn_cast<ConstantSDNode>(C);
if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
return SDValue();
// Match the zext operand as a setcc of a boolean.
if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
Z.getOperand(0).getValueType() != MVT::i1)
return SDValue();
// Match the compare as: setcc (X & 1), 0, eq.
SDValue SetCC = Z.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
SetCC.getOperand(0).getOpcode() != ISD::AND ||
!isOneConstant(SetCC.getOperand(0).getOperand(1)))
return SDValue();
// We are adding/subtracting a constant and an inverted low bit. Turn that
// into a subtract/add of the low bit with incremented/decremented constant:
// add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
// sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
EVT VT = C.getValueType();
SDLoc DL(N);
SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
}
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
/// a shift and add with a different constant.
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
"Expecting add or sub");
// We need a constant operand for the add/sub, and the other operand is a
// logical shift right: add (srl), C or sub C, (srl).
bool IsAdd = N->getOpcode() == ISD::ADD;
SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
ShiftOp.getOpcode() != ISD::SRL)
return SDValue();
// The shift must be of a 'not' value.
SDValue Not = ShiftOp.getOperand(0);
if (!Not.hasOneUse() || !isBitwiseNot(Not))
return SDValue();
// The shift must be moving the sign bit to the least-significant-bit.
EVT VT = ShiftOp.getValueType();
SDValue ShAmt = ShiftOp.getOperand(1);
ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
return SDValue();
// Eliminate the 'not' by adjusting the shift and add/sub constant:
// add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
// sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
SDLoc DL(N);
auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
if (SDValue NewC =
DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
{ConstantOp, DAG.getConstant(1, DL, VT)}))
return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
return SDValue();
}
/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
/// are no common bits set in the operands).
SDValue DAGCombiner::visitADDLike(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (add x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
return N1;
}
// fold (add x, undef) -> undef
if (N0.isUndef())
return N0;
if (N1.isUndef())
return N1;
if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
// canonicalize constant to RHS
if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
// fold (add c1, c2) -> c1+c2
return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
}
// fold (add x, 0) -> x
if (isNullConstant(N1))
return N0;
if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
// fold ((A-c1)+c2) -> (A+(c2-c1))
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
SDValue Sub =
DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
assert(Sub && "Constant folding failed");
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
}
// fold ((c1-A)+c2) -> (c1+c2)-A
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
SDValue Add =
DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
assert(Add && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
}
// add (sext i1 X), 1 -> zext (not i1 X)
// We don't transform this pattern:
// add (zext i1 X), -1 -> sext (not i1 X)
// because most (?) targets generate better code for the zext form.
if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
isOneOrOneSplat(N1)) {
SDValue X = N0.getOperand(0);
if ((!LegalOperations ||
(TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
X.getScalarValueSizeInBits() == 1) {
SDValue Not = DAG.getNOT(DL, X, X.getValueType());
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
}
}
// Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
// equivalent to (add x, c0).
if (N0.getOpcode() == ISD::OR &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
{N1, N0.getOperand(1)}))
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
}
}
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// reassociate add
if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
return RADD;
// Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
// equivalent to (add x, c).
auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
return DAG.getNode(ISD::ADD, DL, VT,
DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
N0.getOperand(1));
}
return SDValue();
};
if (SDValue Add = ReassociateAddOr(N0, N1))
return Add;
if (SDValue Add = ReassociateAddOr(N1, N0))
return Add;
}
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
// fold (A + (0-B)) -> A-B
if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
// fold (A+(B-A)) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
return N1.getOperand(0);
// fold ((B-A)+A) -> B
if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
return N0.getOperand(0);
// fold ((A-B)+(C-A)) -> (C-B)
if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
N0.getOperand(0) == N1.getOperand(1))
return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
N0.getOperand(1));
// fold ((A-B)+(B-C)) -> (A-C)
if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
N0.getOperand(1) == N1.getOperand(0))
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
N1.getOperand(1));
// fold (A+(B-(A+C))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(0))
return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
N1.getOperand(1).getOperand(1));
// fold (A+(B-(C+A))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(1))
return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
N1.getOperand(1).getOperand(0));
// fold (A+((B-A)+or-C)) to (B+or-C)
if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
N1.getOperand(0).getOpcode() == ISD::SUB &&
N0 == N1.getOperand(0).getOperand(1))
return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
N1.getOperand(1));
// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
SDValue N10 = N1.getOperand(0);
SDValue N11 = N1.getOperand(1);
if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
}
// fold (add (umax X, C), -C) --> (usubsat X, C)
if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
return (!Max && !Op) ||
(Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
};
if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
/*AllowUndefs*/ true))
return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
N0.getOperand(1));
}
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
if (isOneOrOneSplat(N1)) {
// fold (add (xor a, -1), 1) -> (sub 0, a)
if (isBitwiseNot(N0))
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
N0.getOperand(0));
// fold (add (add (xor a, -1), b), 1) -> (sub b, a)
if (N0.getOpcode() == ISD::ADD) {
SDValue A, Xor;
if (isBitwiseNot(N0.getOperand(0))) {
A = N0.getOperand(1);
Xor = N0.getOperand(0);
} else if (isBitwiseNot(N0.getOperand(1))) {
A = N0.getOperand(0);
Xor = N0.getOperand(1);
}
if (Xor)
return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
}
// Look for:
// add (add x, y), 1
// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
N0.getOpcode() == ISD::ADD) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
}
}
// (x - y) + -1 -> add (xor y, -1), x
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
isAllOnesOrAllOnesSplat(N1)) {
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
}
if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
return Combined;
if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
return Combined;
return SDValue();
}
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
SDLoc DL(N);
if (SDValue Combined = visitADDLike(N))
return Combined;
if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
return V;
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
// Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
const APInt &C0 = N0->getConstantOperandAPInt(0);
const APInt &C1 = N1->getConstantOperandAPInt(0);
return DAG.getVScale(DL, VT, C0 + C1);
}
// fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
if ((N0.getOpcode() == ISD::ADD) &&
(N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
(N1.getOpcode() == ISD::VSCALE)) {
const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
const APInt &VS1 = N1->getConstantOperandAPInt(0);
SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
}
// Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
if (N0.getOpcode() == ISD::STEP_VECTOR &&
N1.getOpcode() == ISD::STEP_VECTOR) {
const APInt &C0 = N0->getConstantOperandAPInt(0);
const APInt &C1 = N1->getConstantOperandAPInt(0);
APInt NewStep = C0 + C1;
return DAG.getStepVector(DL, VT, NewStep);
}
// Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
if ((N0.getOpcode() == ISD::ADD) &&
(N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
(N1.getOpcode() == ISD::STEP_VECTOR)) {
const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
const APInt &SV1 = N1->getConstantOperandAPInt(0);
APInt NewStep = SV0 + SV1;
SDValue SV = DAG.getStepVector(DL, VT, NewStep);
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
}
return SDValue();
}
SDValue DAGCombiner::visitADDSAT(SDNode *N) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
// TODO SimplifyVBinOp
// fold (add_sat x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
return N1;
}
// fold (add_sat x, undef) -> -1
if (N0.isUndef() || N1.isUndef())
return DAG.getAllOnesConstant(DL, VT);
if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
// canonicalize constant to RHS
if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(Opcode, DL, VT, N1, N0);
// fold (add_sat c1, c2) -> c3
return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
}
// fold (add_sat x, 0) -> x
if (isNullConstant(N1))
return N0;
// If it cannot overflow, transform into an add.
if (Opcode == ISD::UADDSAT)
if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
return SDValue();
}
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
bool Masked = false;
// First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
while (true) {
if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
V = V.getOperand(0);
continue;
}
if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
Masked = true;
V = V.getOperand(0);
continue;
}
break;
}
// If this is not a carry, return.
if (V.getResNo() != 1)
return SDValue();
if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
return SDValue();
EVT VT = V.getNode()->getValueType(0);
if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
return SDValue();
// If the result is masked, then no matter what kind of bool it is we can
// return. If it isn't, then we need to make sure the bool type is either 0 or
// 1 and not other values.
if (Masked ||
TLI.getBooleanContents(V.getValueType()) ==
TargetLoweringBase::ZeroOrOneBooleanContent)
return V;
return SDValue();
}
/// Given the operands of an add/sub operation, see if the 2nd operand is a
/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
/// the opcode and bypass the mask operation.
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
SelectionDAG &DAG, const SDLoc &DL) {
if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
return SDValue();
EVT VT = N0.getValueType();
if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
return SDValue();
// add N0, (and (AssertSext X, i1), 1) --> sub N0, X
// sub N0, (and (AssertSext X, i1), 1) --> add N0, X
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
}
/// Helper for doing combines based on N0 and N1 being added to each other.
SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
SDNode *LocReference) {
EVT VT = N0.getValueType();
SDLoc DL(LocReference);
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N0,
DAG.getNode(ISD::SHL, DL, VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
return V;
// Look for:
// add (add x, 1), y
// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
}
// Hoist one-use subtraction by non-opaque constant:
// (x - C) + y -> (x + y) - C
// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
}
// Hoist one-use subtraction from non-opaque constant:
// (C - x) + y -> (y - x) + C
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
}
// If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
// rather than 'add 0/-1' (the zext should get folded).
// add (sext i1 Y), X --> sub X, (zext i1 Y)
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
// add X, (sextinreg Y i1) -> sub X, (and Y 1)
if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
if (TN->getVT() == MVT::i1) {
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
DAG.getConstant(1, DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
}
}
// (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
N1.getResNo() == 0)
return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
N0, N1.getOperand(0), N1.getOperand(2));
// (add X, Carry) -> (addcarry X, 0, Carry)
if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
if (SDValue Carry = getAsCarry(TLI, N1))
return DAG.getNode(ISD::ADDCARRY, DL,
DAG.getVTList(VT, Carry.getValueType()), N0,
DAG.getConstant(0, DL, VT), Carry);
return SDValue();
}
SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
SDLoc DL(N);
// If the flag result is dead, turn this into an ADD.
if (!N->hasAnyUseOfValue(1))
return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// canonicalize constant to RHS.
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
// fold (addc x, 0) -> x + no carry out
if (isNullConstant(N1))
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
DL, MVT::Glue));
// If it cannot overflow, transform into an add.
if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
return SDValue();
}
/**
* Flips a boolean if it is cheaper to compute. If the Force parameters is set,
* then the flip also occurs if computing the inverse is the same cost.
* This function returns an empty SDValue in case it cannot flip the boolean
* without increasing the cost of the computation. If you want to flip a boolean
* no matter what, use DAG.getLogicalNOT.
*/
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
const TargetLowering &TLI,
bool Force) {
if (Force && isa<ConstantSDNode>(V))
return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
if (V.getOpcode() != ISD::XOR)
return SDValue();
ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
if (!Const)
return SDValue();
EVT VT = V.getValueType();
bool IsFlip = false;
switch(TLI.getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
IsFlip = Const->isOne();
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
IsFlip = Const->isAllOnesValue();
break;
case TargetLowering::UndefinedBooleanContent:
IsFlip = (Const->getAPIntValue() & 0x01) == 1;
break;
}
if (IsFlip)
return V.getOperand(0);
if (Force)
return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
return SDValue();
}
SDValue DAGCombiner::visitADDO(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
bool IsSigned = (ISD::SADDO == N->getOpcode());
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
// If the flag result is dead, turn this into an ADD.
if (!N->hasAnyUseOfValue(1))
return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
DAG.getUNDEF(CarryVT));
// canonicalize constant to RHS.
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
// fold (addo x, 0) -> x + no carry out
if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
if (!IsSigned) {
// If it cannot overflow, transform into an add.
if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
DAG.getConstant(0, DL, CarryVT));
// fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
DAG.getConstant(0, DL, VT), N0.getOperand(0));
return CombineTo(
N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
}
if (SDValue Combined = visitUADDOLike(N0, N1, N))
return Combined;
if (SDValue Combined = visitUADDOLike(N1, N0, N))
return Combined;
}
return SDValue();
}
SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N0.getValueType();
if (VT.isVector())
return SDValue();
// (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
// If Y + 1 cannot overflow.
if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
SDValue Y = N1.getOperand(0);
SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
N1.getOperand(2));
}
// (uaddo X, Carry) -> (addcarry X, 0, Carry)
if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
if (SDValue Carry = getAsCarry(TLI, N1))
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
DAG.getConstant(0, SDLoc(N), VT), Carry);
return SDValue();
}
SDValue DAGCombiner::visitADDE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
// canonicalize constant to RHS
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
N1, N0, CarryIn);
// fold (adde x, y, false) -> (addc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
return SDValue();
}
SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
SDLoc DL(N);
// canonicalize constant to RHS
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
// fold (addcarry x, y, false) -> (uaddo x, y)
if (isNullConstant(CarryIn)) {
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
}
// fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
if (isNullConstant(N0) && isNullConstant(N1)) {
EVT VT = N0.getValueType();
EVT CarryVT = CarryIn.getValueType();
SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
AddToWorklist(CarryExt.getNode());
return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
DAG.getConstant(1, DL, VT)),
DAG.getConstant(0, DL, CarryVT));
}
if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
return Combined;
if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
return Combined;
return SDValue();
}
SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
SDLoc DL(N);
// canonicalize constant to RHS
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
// fold (saddo_carry x, y, false) -> (saddo x, y)
if (isNullConstant(CarryIn)) {
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
}
return SDValue();
}
/**
* If we are facing some sort of diamond carry propapagtion pattern try to
* break it up to generate something like:
* (addcarry X, 0, (addcarry A, B, Z):Carry)
*
* The end result is usually an increase in operation required, but because the
* carry is now linearized, other tranforms can kick in and optimize the DAG.
*
* Patterns typically look something like
* (uaddo A, B)
* / \
* Carry Sum
* | \
* | (addcarry *, 0, Z)
* | /
* \ Carry
* | /
* (addcarry X, *, *)
*
* But numerous variation exist. Our goal is to identify A, B, X and Z and
* produce a combine with a single path for carry propagation.
*/
static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
SDValue X, SDValue Carry0, SDValue Carry1,
SDNode *N) {
if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
return SDValue();
if (Carry1.getOpcode() != ISD::UADDO)
return SDValue();
SDValue Z;
/**
* First look for a suitable Z. It will present itself in the form of
* (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
*/
if (Carry0.getOpcode() == ISD::ADDCARRY &&
isNullConstant(Carry0.getOperand(1))) {
Z = Carry0.getOperand(2);
} else if (Carry0.getOpcode() == ISD::UADDO &&
isOneConstant(Carry0.getOperand(1))) {
EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
} else {
// We couldn't find a suitable Z.
return SDValue();
}
auto cancelDiamond = [&](SDValue A,SDValue B) {
SDLoc DL(N);
SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
Combiner.AddToWorklist(NewY.getNode());
return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
DAG.getConstant(0, DL, X.getValueType()),
NewY.getValue(1));
};
/**
* (uaddo A, B)
* |
* Sum
* |
* (addcarry *, 0, Z)
*/
if (Carry0.getOperand(0) == Carry1.getValue(0)) {
return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
}
/**
* (addcarry A, 0, Z)
* |
* Sum
* |
* (uaddo *, B)
*/
if (Carry1.getOperand(0) == Carry0.getValue(0)) {
return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
}
if (Carry1.getOperand(1) == Carry0.getValue(0)) {
return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
}
return SDValue();
}
// If we are facing some sort of diamond carry/borrow in/out pattern try to
// match patterns like:
//
// (uaddo A, B) CarryIn
// | \ |
// | \ |
// PartialSum PartialCarryOutX /
// | | /
// | ____|____________/
// | / |
// (uaddo *, *) \________
// | \ \
// | \ |
// | PartialCarryOutY |
// | \ |
// | \ /
// AddCarrySum | ______/
// | /
// CarryOut = (or *, *)
//
// And generate ADDCARRY (or SUBCARRY) with two result values:
//
// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
//
// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
// a single path for carry/borrow out propagation:
static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
const TargetLowering &TLI, SDValue Carry0,
SDValue Carry1, SDNode *N) {
if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
return SDValue();
unsigned Opcode = Carry0.getOpcode();
if (Opcode != Carry1.getOpcode())
return SDValue();
if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
return SDValue();
// Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
// carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
// the above ASCII art.)
if (Carry1.getOperand(0) != Carry0.getValue(0) &&
Carry1.getOperand(1) != Carry0.getValue(0))
std::swap(Carry0, Carry1);
if (Carry1.getOperand(0) != Carry0.getValue(0) &&
Carry1.getOperand(1) != Carry0.getValue(0))
return SDValue();
// The carry in value must be on the righthand side for subtraction.
unsigned CarryInOperandNum =
Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
return SDValue();
SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
return SDValue();
// Verify that the carry/borrow in is plausibly a carry/borrow bit.
// TODO: make getAsCarry() aware of how partial carries are merged.
if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
return SDValue();
CarryIn = CarryIn.getOperand(0);
if (CarryIn.getValueType() != MVT::i1)
return SDValue();
SDLoc DL(N);
SDValue Merged =
DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
Carry0.getOperand(1), CarryIn);
// Please note that because we have proven that the result of the UADDO/USUBO
// of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
// therefore prove that if the first UADDO/USUBO overflows, the second
// UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
// maximum value.
//
// 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
// 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
//
// This is important because it means that OR and XOR can be used to merge
// carry flags; and that AND can return a constant zero.
//
// TODO: match other operations that can merge flags (ADD, etc)
DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
if (N->getOpcode() == ISD::AND)
return DAG.getConstant(0, DL, MVT::i1);
return Merged.getValue(1);
}
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N) {
// fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
if (isBitwiseNot(N0))
if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
SDLoc DL(N);
SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
N0.getOperand(0), NotC);
return CombineTo(
N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
}
// Iff the flag result is dead:
// (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
// Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
// or the dependency between the instructions.
if ((N0.getOpcode() == ISD::ADD ||
(N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
N0.getValue(1) != CarryIn)) &&
isNullConstant(N1) && !N->hasAnyUseOfValue(1))
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
N0.getOperand(0), N0.getOperand(1), CarryIn);
/**
* When one of the addcarry argument is itself a carry, we may be facing
* a diamond carry propagation. In which case we try to transform the DAG
* to ensure linear carry propagation if that is possible.
*/
if (auto Y = getAsCarry(TLI, N1)) {
// Because both are carries, Y and Z can be swapped.
if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
return R;
if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
return R;
}
return SDValue();
}
// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
// clamp/truncation if necessary.
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
const SDLoc &DL) {
assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
"Illegal truncation");
if (DstVT == SrcVT)
return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
// If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
// clamping RHS.
APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
DstVT.getScalarSizeInBits());
if (!DAG.MaskedValueIsZero(LHS, UpperBits))
return SDValue();
SDValue SatLimit =
DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
DstVT.getScalarSizeInBits()),
DL, SrcVT);
RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
}
// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
// usubsat(a,b), optionally as a truncated type.
SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
if (N->getOpcode() != ISD::SUB ||
!(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
return SDValue();
EVT SubVT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// Try to find umax(a,b) - b or a - umin(a,b) patterns
// they may be converted to usubsat(a,b).
if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
SDValue MaxLHS = Op0.getOperand(0);
SDValue MaxRHS = Op0.getOperand(1);
if (MaxLHS == Op1)
return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
if (MaxRHS == Op1)
return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
}
if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
SDValue MinLHS = Op1.getOperand(0);
SDValue MinRHS = Op1.getOperand(1);
if (MinLHS == Op0)
return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
if (MinRHS == Op0)
return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
}
// sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
if (Op1.getOpcode() == ISD::TRUNCATE &&
Op1.getOperand(0).getOpcode() == ISD::UMIN &&
Op1.getOperand(0).hasOneUse()) {
SDValue MinLHS = Op1.getOperand(0).getOperand(0);
SDValue MinRHS = Op1.getOperand(0).getOperand(1);
if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
DAG, SDLoc(N));
if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
DAG, SDLoc(N));
}
return SDValue();
}
// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
SelectionDAG &DAG, bool LegalOperations) {
if (!VT.isVector())
return DAG.getConstant(0, DL, VT);
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
return DAG.getConstant(0, DL, VT);
return SDValue();
}
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (sub x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
}
// fold (sub x, x) -> 0
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
// fold (sub c1, c2) -> c3
if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
// fold (sub x, c) -> (add x, -c)
if (N1C) {
return DAG.getNode(ISD::ADD, DL, VT, N0,
DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
}
if (isNullOrNullSplat(N0)) {
unsigned BitWidth = VT.getScalarSizeInBits();
// Right-shifting everything out but the sign bit followed by negation is
// the same as flipping arithmetic/logical shift type without the negation:
// -(X >>u 31) -> (X >>s 31)
// -(X >>s 31) -> (X >>u 31)
if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
}
}
// 0 - X --> 0 if the sub is NUW.
if (N->getFlags().hasNoUnsignedWrap())
return N0;
if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
// N1 is either 0 or the minimum signed value. If the sub is NSW, then
// N1 must be 0 because negating the minimum signed value is undefined.
if (N->getFlags().hasNoSignedWrap())
return N0;
// 0 - X --> X if X is 0 or the minimum signed value.
return N1;
}
// Convert 0 - abs(x).
SDValue Result;
if (N1->getOpcode() == ISD::ABS &&
!TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
TLI.expandABS(N1.getNode(), Result, DAG, true))
return Result;
// Fold neg(splat(neg(x)) -> splat(x)
if (VT.isVector()) {
SDValue N1S = DAG.getSplatValue(N1, true);
if (N1S && N1S.getOpcode() == ISD::SUB &&
isNullConstant(N1S.getOperand(0))) {
if (VT.isScalableVector())
return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
}
}
}
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
if (isAllOnesOrAllOnesSplat(N0))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
// fold (A - (0-B)) -> A+B
if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
// fold A-(A-B) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
return N1.getOperand(1);
// fold (A+B)-A -> B
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
return N0.getOperand(1);
// fold (A+B)-B -> A
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
// fold (A+C1)-C2 -> A+(C1-C2)
if (N0.getOpcode() == ISD::ADD &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
SDValue NewC =
DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
}
// fold C2-(A+C1) -> (C2-C1)-A
if (N1.getOpcode() == ISD::ADD) {
SDValue N11 = N1.getOperand(1);
if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
}
}
// fold (A-C1)-C2 -> A-(C1+C2)
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
SDValue NewC =
DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
}
// fold (c1-A)-c2 -> (c1-c2)-A
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
SDValue NewC =
DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
}
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
(N0.getOperand(1).getOpcode() == ISD::SUB ||
N0.getOperand(1).getOpcode() == ISD::ADD) &&
N0.getOperand(1).getOperand(0) == N1)
return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
N0.getOperand(1).getOperand(1));
// fold ((A+(C+B))-B) -> A+C
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
N0.getOperand(1).getOperand(1) == N1)
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
N0.getOperand(1).getOperand(0));
// fold ((A-(B-C))-C) -> A-B
if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
N0.getOperand(1).getOperand(1) == N1)
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
N0.getOperand(1).getOperand(0));
// fold (A-(B-C)) -> A+(C-B)
if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
return DAG.getNode(ISD::ADD, DL, VT, N0,
DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
N1.getOperand(0)));
// A - (A & B) -> A & (~B)
if (N1.getOpcode() == ISD::AND) {
SDValue A = N1.getOperand(0);
SDValue B = N1.getOperand(1);
if (A != N0)
std::swap(A, B);
if (A == N0 &&
(N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
SDValue InvB =
DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::AND, DL, VT, A, InvB);
}
}
// fold (X - (-Y * Z)) -> (X + (Y * Z))
if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
if (N1.getOperand(0).getOpcode() == ISD::SUB &&
isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1));
return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
}
if (N1.getOperand(1).getOpcode() == ISD::SUB &&
isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
N1.getOperand(0),
N1.getOperand(1).getOperand(1));
return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
}
}
// If either operand of a sub is undef, the result is undef
if (N0.isUndef())
return N0;
if (N1.isUndef())
return N1;
if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
return V;
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
return V;
if (SDValue V = foldSubToUSubSat(VT, N))
return V;
// (x - y) - 1 -> add (xor y, -1), x
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
}
// Look for:
// sub y, (xor x, -1)
// And if the target does not like this form then turn into:
// add (add x, y), 1
if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
}
// Hoist one-use addition by non-opaque constant:
// (x + C) - y -> (x - y) + C
if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
}
// y - (x + C) -> (y - x) - C
if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
}
// (x - C) - y -> (x - y) - C
// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
}
// (C - x) - y -> C - (x + y)
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
}
// If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
// rather than 'sub 0/1' (the sext should get folded).
// sub X, (zext i1 Y) --> add X, (sext i1 Y)
if (N1.getOpcode() == ISD::ZERO_EXTEND &&
N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
TLI.getBooleanContents(VT) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
}
// fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
SDValue S0 = N1.getOperand(0);
if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
}
}
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
// fold (sub Sym, c) -> Sym-c
if (N1C && GA->getOpcode() == ISD::GlobalAddress)
return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
GA->getOffset() -
(uint64_t)N1C->getSExtValue());
// fold (sub Sym+c1, Sym+c2) -> c1-c2
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
DL, VT);
}
// sub X, (sextinreg Y i1) -> add X, (and Y 1)
if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
if (TN->getVT() == MVT::i1) {
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
DAG.getConstant(1, DL, VT));
return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
}
}
// canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
if (N1.getOpcode() == ISD::VSCALE) {
const APInt &IntVal = N1.getConstantOperandAPInt(0);
return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
}
// canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
APInt NewStep = -N1.getConstantOperandAPInt(0);
return DAG.getNode(ISD::ADD, DL, VT, N0,
DAG.getStepVector(DL, VT, NewStep));
}
// Prefer an add for more folding potential and possibly better codegen:
// sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
SDValue ShAmt = N1.getOperand(1);
ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
if (ShAmtC &&
ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
}
}
if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
// (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
if (SDValue Carry = getAsCarry(TLI, N0)) {
SDValue X = N1;
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
return DAG.getNode(ISD::ADDCARRY, DL,
DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
Carry);
}
}
return SDValue();
}
SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
// TODO SimplifyVBinOp
// fold (sub_sat x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
}
// fold (sub_sat x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
// fold (sub_sat x, x) -> 0
if (N0 == N1)
return DAG.getConstant(0, DL, VT);
// fold (sub_sat c1, c2) -> c3
if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
return C;
// fold (sub_sat x, 0) -> x
if (isNullConstant(N1))
return N0;
return SDValue();
}
SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
SDLoc DL(N);
// If the flag result is dead, turn this into an SUB.
if (!N->hasAnyUseOfValue(1))
return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// fold (subc x, x) -> 0 + no borrow
if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, DL, VT),
DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// fold (subc x, 0) -> x + no borrow
if (isNullConstant(N1))
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
if (isAllOnesConstant(N0))
return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
return SDValue();
}
SDValue DAGCombiner::visitSUBO(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
bool IsSigned = (ISD::SSUBO == N->getOpcode());
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
// If the flag result is dead, turn this into an SUB.
if (!N->hasAnyUseOfValue(1))
return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
DAG.getUNDEF(CarryVT));
// fold (subo x, x) -> 0 + no borrow
if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, DL, VT),
DAG.getConstant(0, DL, CarryVT));
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
// fold (subox, c) -> (addo x, -c)
if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
}
// fold (subo x, 0) -> x + no borrow
if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
DAG.getConstant(0, DL, CarryVT));
return SDValue();
}
SDValue DAGCombiner::visitSUBE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
// fold (sube x, y, false) -> (subc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
return SDValue();
}
SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
// fold (subcarry x, y, false) -> (usubo x, y)
if (isNullConstant(CarryIn)) {
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
}
return SDValue();
}
SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
// fold (ssubo_carry x, y, false) -> (ssubo x, y)
if (isNullConstant(CarryIn)) {
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
}
return SDValue();
}
// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
// UMULFIXSAT here.
SDValue DAGCombiner::visitMULFIX(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue Scale = N->getOperand(2);
EVT VT = N0.getValueType();
// fold (mulfix x, undef, scale) -> 0
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
// Canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
// fold (mulfix x, 0, scale) -> 0
if (isNullConstant(N1))
return DAG.getConstant(0, SDLoc(N), VT);
return SDValue();
}
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
// fold (mul x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
bool N1IsConst = false;
bool N1IsOpaqueConst = false;
APInt ConstValue1;
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
assert((!N1IsConst ||
ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
"Splat APInt should be element width");
} else {
N1IsConst = isa<ConstantSDNode>(N1);
if (N1IsConst) {
ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
}
}
// fold (mul c1, c2) -> c1*c2
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
return C;
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1.isNullValue())
return N1;
// fold (mul x, 1) -> x
if (N1IsConst && ConstValue1.isOneValue())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnesValue()) {
SDLoc DL(N);
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
}
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1) &&
(!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
SDLoc DL(N);
SDValue LogBase2 = BuildLogBase2(N1, DL);
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT),
DAG.getNode(ISD::SHL, DL, VT, N0,
DAG.getConstant(Log2Val, DL,
getShiftAmountTy(N0.getValueType()))));
}
// Try to transform:
// (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
// mul x, (2^N + 1) --> add (shl x, N), x
// mul x, (2^N - 1) --> sub (shl x, N), x
// Examples: x * 33 --> (x << 5) + x
// x * 15 --> (x << 4) - x
// x * -33 --> -((x << 5) + x)
// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
// (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
// mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
// mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
// Examples: x * 0x8800 --> (x << 15) + (x << 11)
// x * 0xf800 --> (x << 16) - (x << 11)
// x * -0x8800 --> -((x << 15) + (x << 11))
// x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
unsigned MathOp = ISD::DELETED_NODE;
APInt MulC = ConstValue1.abs();
// The constant `2` should be treated as (2^0 + 1).
unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
MulC.lshrInPlace(TZeros);
if ((MulC - 1).isPowerOf2())
MathOp = ISD::ADD;
else if ((MulC + 1).isPowerOf2())
MathOp = ISD::SUB;
if (MathOp != ISD::DELETED_NODE) {
unsigned ShAmt =
MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
ShAmt += TZeros;
assert(ShAmt < VT.getScalarSizeInBits() &&
"multiply-by-constant generated out of bounds shift");
SDLoc DL(N);
SDValue Shl =
DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
SDValue R =
TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
DAG.getNode(ISD::SHL, DL, VT, N0,
DAG.getConstant(TZeros, DL, VT)))
: DAG.getNode(MathOp, DL, VT, Shl, N0);
if (ConstValue1.isNegative())
R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
return R;
}
}
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
if (N0.getOpcode() == ISD::SHL &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
if (isConstantOrConstantVector(C3))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
}
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
// use.
{
SDValue Sh(nullptr, 0), Y(nullptr, 0);
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL &&
isConstantOrConstantVector(N0.getOperand(1)) &&
N0.getNode()->hasOneUse()) {
Sh = N0; Y = N1;
} else if (N1.getOpcode() == ISD::SHL &&
isConstantOrConstantVector(N1.getOperand(1)) &&
N1.getNode()->hasOneUse()) {
Sh = N1; Y = N0;
}
if (Sh.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
}
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
N0.getOpcode() == ISD::ADD &&
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
isMulAddWithConstProfitable(N, N0, N1))
return DAG.getNode(ISD::ADD, SDLoc(N), VT,
DAG.getNode(ISD::MUL, SDLoc(N0), VT,
N0.getOperand(0), N1),
DAG.getNode(ISD::MUL, SDLoc(N1), VT,
N0.getOperand(1), N1));
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
if (N0.getOpcode() == ISD::VSCALE)
if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
const APInt &C1 = NC1->getAPIntValue();
return DAG.getVScale(SDLoc(N), VT, C0 * C1);
}
// Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
APInt MulVal;
if (N0.getOpcode() == ISD::STEP_VECTOR)
if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
APInt NewStep = C0 * MulVal;
return DAG.getStepVector(SDLoc(N), VT, NewStep);
}
// Fold ((mul x, 0/undef) -> 0,
// (mul x, 1) -> x) -> x)
// -> and(x, mask)
// We can replace vectors with '0' and '1' factors with a clearing mask.
if (VT.isFixedLengthVector()) {
unsigned NumElts = VT.getVectorNumElements();
SmallBitVector ClearMask;
ClearMask.reserve(NumElts);
auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
if (!V || V->isNullValue()) {
ClearMask.push_back(true);
return true;
}
ClearMask.push_back(false);
return V->isOne();
};
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
SDLoc DL(N);
EVT LegalSVT = N1.getOperand(0).getValueType();
SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
for (unsigned I = 0; I != NumElts; ++I)
if (ClearMask[I])
Mask[I] = Zero;
return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
}
}
// reassociate mul
if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
return SDValue();
}
/// Return true if divmod libcall is available.
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
const TargetLowering &TLI) {
RTLIB::Libcall LC;
EVT NodeType = Node->getValueType(0);
if (!NodeType.isSimple())
return false;
switch (NodeType.getSimpleVT().SimpleTy) {
default: return false; // No libcall for vector types.
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
}
return TLI.getLibcallName(LC) != nullptr;
}
/// Issue divrem if both quotient and remainder are needed.
SDValue DAGCombiner::useDivRem(SDNode *Node) {
if (Node->use_empty())
return SDValue(); // This is a dead node, leave it alone.
unsigned Opcode = Node->getOpcode();
bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
// DivMod lib calls can still work on non-legal types if using lib-calls.
EVT VT = Node->getValueType(0);
if (VT.isVector() || !VT.isInteger())
return SDValue();
if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
return SDValue();
// If DIVREM is going to get expanded into a libcall,
// but there is no libcall available, then don't combine.
if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
!isDivRemLibcallAvailable(Node, isSigned, TLI))
return SDValue();
// If div is legal, it's better to do the normal expansion
unsigned OtherOpcode = 0;
if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
if (TLI.isOperationLegalOrCustom(Opcode, VT))
return SDValue();
} else {
OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
return SDValue();
}
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
SDValue combined;
for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
User->use_empty())
continue;
// Convert the other matching node(s), too;
// otherwise, the DIVREM may get target-legalized into something
// target-specific that we won't be able to recognize.
unsigned UserOpc = User->getOpcode();
if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
User->getOperand(0) == Op0 &&
User->getOperand(1) == Op1) {
if (!combined) {
if (UserOpc == OtherOpcode) {
SDVTList VTs = DAG.getVTList(VT, VT);
combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
} else if (UserOpc == DivRemOpc) {
combined = SDValue(User, 0);
} else {
assert(UserOpc == Opcode);
continue;
}
}
if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
CombineTo(User, combined);
else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
CombineTo(User, combined.getValue(1));
}
}
return combined;
}
static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
unsigned Opc = N->getOpcode();
bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// X / undef -> undef
// X % undef -> undef
// X / 0 -> undef
// X % 0 -> undef
// NOTE: This includes vectors where any divisor element is zero/undef.
if (DAG.isUndef(Opc, {N0, N1}))
return DAG.getUNDEF(VT);
// undef / X -> 0
// undef % X -> 0
if (N0.isUndef())
return DAG.getConstant(0, DL, VT);
// 0 / X -> 0
// 0 % X -> 0
ConstantSDNode *N0C = isConstOrConstSplat(N0);
if (N0C && N0C->isNullValue())
return N0;
// X / X -> 1
// X % X -> 0
if (N0 == N1)
return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
// X / 1 -> X
// X % 1 -> 0
// If this is a boolean op (single-bit element type), we can't have
// division-by-zero or remainder-by-zero, so assume the divisor is 1.
// TODO: Similarly, if we're zero-extending a boolean divisor, then assume
// it's a 1.
if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
return SDValue();
}
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
SDLoc DL(N);
// fold (sdiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
return C;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
if (N1C && N1C->getAPIntValue().isMinSignedValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT));
if (SDValue V = simplifyDivRem(N, DAG))
return V;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
if (SDValue V = visitSDIVLike(N0, N1, N)) {
// If the corresponding remainder node exists, update its users with
// (Dividend - (Quotient * Divisor).
if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
{ N0, N1 })) {
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
AddToWorklist(Mul.getNode());
AddToWorklist(Sub.getNode());
CombineTo(RemNode, Sub);
}
return V;
}
// sdiv, srem -> sdivrem
// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
// true. Otherwise, we break the simplification logic in visitREM().
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue DivRem = useDivRem(N))
return DivRem;
return SDValue();
}
SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
unsigned BitWidth = VT.getScalarSizeInBits();
// Helper for determining whether a value is a power-2 constant scalar or a
// vector of such elements.
auto IsPowerOfTwo = [](ConstantSDNode *C) {
if (C->isNullValue() || C->isOpaque())
return false;
if (C->getAPIntValue().isPowerOf2())
return true;
if ((-C->getAPIntValue()).isPowerOf2())
return true;
return false;
};
// fold (sdiv X, pow2) -> simple ops after legalize
// FIXME: We check for the exact bit here because the generic lowering gives
// better results in that case. The target-specific lowering should learn how
// to handle exact sdivs efficiently.
if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
// Target-specific implementation of sdiv x, pow2.
if (SDValue Res = BuildSDIVPow2(N))
return Res;
// Create constants that are functions of the shift amount value.
EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
if (!isConstantOrConstantVector(Inexact))
return SDValue();
// Splat the sign bit into the register
SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
AddToWorklist(Sign.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
AddToWorklist(Srl.getNode());
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
AddToWorklist(Add.getNode());
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
AddToWorklist(Sra.getNode());
// Special case: (sdiv X, 1) -> X
// Special Case: (sdiv X, -1) -> 0-X
SDValue One = DAG.getConstant(1, DL, VT);
SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
// If dividing by a positive value, we're done. Otherwise, the result must
// be negated.
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
// FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
return Res;
}
// If integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isConstantOrConstantVector(N1) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
return SDValue();
}
SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
SDLoc DL(N);
// fold (udiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
return C;
// fold (udiv X, -1) -> select(X == -1, 1, 0)
if (N1C && N1C->getAPIntValue().isAllOnesValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT));
if (SDValue V = simplifyDivRem(N, DAG))
return V;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
if (SDValue V = visitUDIVLike(N0, N1, N)) {
// If the corresponding remainder node exists, update its users with
// (Dividend - (Quotient * Divisor).
if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
{ N0, N1 })) {
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
AddToWorklist(Mul.getNode());
AddToWorklist(Sub.getNode());
CombineTo(RemNode, Sub);
}
return V;
}
// sdiv, srem -> sdivrem
// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
// true. Otherwise, we break the simplification logic in visitREM().
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue DivRem = useDivRem(N))
return DivRem;
return SDValue();
}
SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
// fold (udiv x, (1 << c)) -> x >>u c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1)) {
SDValue LogBase2 = BuildLogBase2(N1, DL);
AddToWorklist(LogBase2.getNode());
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
AddToWorklist(Trunc.getNode());
return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
}
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
SDValue N10 = N1.getOperand(0);
if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N10)) {
SDValue LogBase2 = BuildLogBase2(N10, DL);
AddToWorklist(LogBase2.getNode());
EVT ADDVT = N1.getOperand(1).getValueType();
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
AddToWorklist(Trunc.getNode());
SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
}
}
// fold (udiv x, c) -> alternate
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isConstantOrConstantVector(N1) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
return SDValue();
}
// handles ISD::SREM and ISD::UREM
SDValue DAGCombiner::visitREM(SDNode *N) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
bool isSigned = (Opcode == ISD::SREM);
SDLoc DL(N);
// fold (rem c1, c2) -> c1%c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
// fold (urem X, -1) -> select(X == -1, 0, x)
if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(0, DL, VT), N0);
if (SDValue V = simplifyDivRem(N, DAG))
return V;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
if (isSigned) {
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
} else {
if (DAG.isKnownToBeAPowerOfTwo(N1)) {
// fold (urem x, pow2) -> (and x, pow2-1)
SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
if (N1.getOpcode() == ISD::SHL &&
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
}
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
// Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
// speculative DIV must not cause a DIVREM conversion. We guard against this
// by skipping the simplification if isIntDivCheap(). When div is not cheap,
// combine will not return a DIVREM. Regardless, checking cheapness here
// makes sense since the simplification results in fatter code.
if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
SDValue OptimizedDiv =
isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
if (OptimizedDiv.getNode()) {
// If the equivalent Div node also exists, update its users.
unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
{ N0, N1 }))
CombineTo(DivNode, OptimizedDiv);
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
AddToWorklist(OptimizedDiv.getNode());
AddToWorklist(Mul.getNode());
return Sub;
}
}
// sdiv, srem -> sdivrem
if (SDValue DivRem = useDivRem(N))
return DivRem.getValue(1);
return SDValue();
}
SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
if (VT.isVector()) {
// fold (mulhs x, 0) -> 0
// do not return N0/N1, because undef node may exist.
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return DAG.getConstant(0, DL, VT);
}
// fold (mulhs c1, c2)
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
return C;
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
// fold (mulhs x, 1) -> (sra x, size(x)-1)
if (isOneConstant(N1))
return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
// fold (mulhs x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
!VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
DAG.getConstant(SimpleSize, DL,
getShiftAmountTy(N1.getValueType())));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
return SDValue();
}
SDValue DAGCombiner::visitMULHU(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
if (VT.isVector()) {
// fold (mulhu x, 0) -> 0
// do not return N0/N1, because undef node may exist.
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return DAG.getConstant(0, DL, VT);
}
// fold (mulhu c1, c2)
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
return C;
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
// fold (mulhu x, 1) -> 0
if (isOneConstant(N1))
return DAG.getConstant(0, DL, N0.getValueType());
// fold (mulhu x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
unsigned NumEltBits = VT.getScalarSizeInBits();
SDValue LogBase2 = BuildLogBase2(N1, DL);
SDValue SRLAmt = DAG.getNode(
ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
}
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
!VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
DAG.getConstant(SimpleSize, DL,
getShiftAmountTy(N1.getValueType())));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
return SDValue();
}
/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
/// give the opcodes for the two computations that are being performed. Return
/// true if a simplification was made.
SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp) {
// If the high half is not needed, just compute the low half.
bool HiExists = N->hasAnyUseOfValue(1);
if (!HiExists && (!LegalOperations ||
TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
return CombineTo(N, Res, Res);
}
// If the low half is not needed, just compute the high half.
bool LoExists = N->hasAnyUseOfValue(0);
if (!LoExists && (!LegalOperations ||
TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
return CombineTo(N, Res, Res);
}
// If both halves are used, return as it is.
if (LoExists && HiExists)
return SDValue();
// If the two computed results can be simplified separately, separate them.
if (LoExists) {
SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
AddToWorklist(Lo.getNode());
SDValue LoOpt = combine(Lo.getNode());
if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
return CombineTo(N, LoOpt, LoOpt);
}
if (HiExists) {
SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
AddToWorklist(Hi.getNode());
SDValue HiOpt = combine(Hi.getNode());
if (HiOpt.getNode() && HiOpt != Hi &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
return CombineTo(N, HiOpt, HiOpt);
}
return SDValue();
}
SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
return Res;
EVT VT = N->getValueType(0);
SDLoc DL(N);
// If the type is twice as wide is legal, transform the mulhu to a wider
// multiply plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
DAG.getConstant(SimpleSize, DL,
getShiftAmountTy(Lo.getValueType())));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
return CombineTo(N, Lo, Hi);
}
}
return SDValue();
}
SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
return Res;
EVT VT = N->getValueType(0);
SDLoc DL(N);
// (umul_lohi N0, 0) -> (0, 0)
if (isNullConstant(N->getOperand(1))) {
SDValue Zero = DAG.getConstant(0, DL, VT);
return CombineTo(N, Zero, Zero);
}
// (umul_lohi N0, 1) -> (N0, 0)
if (isOneConstant(N->getOperand(1))) {
SDValue Zero = DAG.getConstant(0, DL, VT);
return CombineTo(N, N->getOperand(0), Zero);
}
// If the type is twice as wide is legal, transform the mulhu to a wider
// multiply plus a shift.
if (VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
DAG.getConstant(SimpleSize, DL,
getShiftAmountTy(Lo.getValueType())));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
return CombineTo(N, Lo, Hi);
}
}
return SDValue();
}
SDValue DAGCombiner::visitMULO(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
bool IsSigned = (ISD::SMULO == N->getOpcode());
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold operation with constant operands.
// TODO: Move this to FoldConstantArithmetic when it supports nodes with
// multiple results.
if (N0C && N1C) {
bool Overflow;
APInt Result =
IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
: N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
return CombineTo(N, DAG.getConstant(Result, DL, VT),
DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
}
// canonicalize constant to RHS.
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
// fold (mulo x, 0) -> 0 + no carry out
if (isNullOrNullSplat(N1))
return CombineTo(N, DAG.getConstant(0, DL, VT),
DAG.getConstant(0, DL, CarryVT));
// (mulo x, 2) -> (addo x, x)
if (N1C && N1C->getAPIntValue() == 2)
return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
N->getVTList(), N0, N0);
if (IsSigned) {
// A 1 bit SMULO overflows if both inputs are 1.
if (VT.getScalarSizeInBits() == 1) {
SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
return CombineTo(N, And,
DAG.getSetCC(DL, CarryVT, And,
DAG.getConstant(0, DL, VT), ISD::SETNE));
}
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
unsigned SignBits = DAG.ComputeNumSignBits(N0);
if (SignBits > 1)
SignBits += DAG.ComputeNumSignBits(N1);
if (SignBits > VT.getScalarSizeInBits() + 1)
return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
DAG.getConstant(0, DL, CarryVT));
} else {
KnownBits N1Known = DAG.computeKnownBits(N1);
KnownBits N0Known = DAG.computeKnownBits(N0);
bool Overflow;
(void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
if (!Overflow)
return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
DAG.getConstant(0, DL, CarryVT));
}
return SDValue();
}
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
unsigned Opcode = N->getOpcode();
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold operation with constant operands.
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
if (!TLI.isOperationLegal(Opcode, VT) &&
(N0.isUndef() || DAG.SignBitIsZero(N0)) &&
(N1.isUndef() || DAG.SignBitIsZero(N1))) {
unsigned AltOpcode;
switch (Opcode) {
case ISD::SMIN: AltOpcode = ISD::UMIN; break;
case ISD::SMAX: AltOpcode = ISD::UMAX; break;
case ISD::UMIN: AltOpcode = ISD::SMIN; break;
case ISD::UMAX: AltOpcode = ISD::SMAX; break;
default: llvm_unreachable("Unknown MINMAX opcode");
}
if (TLI.isOperationLegal(AltOpcode, VT))
return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
}
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
}
/// If this is a bitwise logic instruction and both operands have the same
/// opcode, try to sink the other opcode after the logic instruction.
SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
EVT VT = N0.getValueType();
unsigned LogicOpcode = N->getOpcode();
unsigned HandOpcode = N0.getOpcode();
assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
LogicOpcode == ISD::XOR) && "Expected logic opcode");
assert(HandOpcode == N1.getOpcode() && "Bad input!");
// Bail early if none of these transforms apply.
if (N0.getNumOperands() == 0)
return SDValue();
// FIXME: We should check number of uses of the operands to not increase
// the instruction count for all transforms.
// Handle size-changing casts.
SDValue X = N0.getOperand(0);
SDValue Y = N1.getOperand(0);
EVT XVT = X.getValueType();
SDLoc DL(N);
if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
HandOpcode == ISD::SIGN_EXTEND) {
// If both operands have other uses, this transform would create extra
// instructions without eliminating anything.
if (!N0.hasOneUse() && !N1.hasOneUse())
return SDValue();
// We need matching integer source types.
if (XVT != Y.getValueType())
return SDValue();
// Don't create an illegal op during or after legalization. Don't ever
// create an unsupported vector op.
if ((VT.isVector() || LegalOperations) &&
!TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
return SDValue();
// Avoid infinite looping with PromoteIntBinOp.
// TODO: Should we apply desirable/legal constraints to all opcodes?
if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
!TLI.isTypeDesirableForOp(LogicOpcode, XVT))
return SDValue();
// logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
return DAG.getNode(HandOpcode, DL, VT, Logic);
}
// logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
if (HandOpcode == ISD::TRUNCATE) {
// If both operands have other uses, this transform would create extra
// instructions without eliminating anything.
if (!N0.hasOneUse() && !N1.hasOneUse())
return SDValue();
// We need matching source types.
if (XVT != Y.getValueType())
return SDValue();
// Don't create an illegal op during or after legalization.
if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
return SDValue();
// Be extra careful sinking truncate. If it's free, there's no benefit in
// widening a binop. Also, don't create a logic op on an illegal type.
if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
return SDValue();
if (!TLI.isTypeLegal(XVT))
return SDValue();
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
return DAG.getNode(HandOpcode, DL, VT, Logic);
}
// For binops SHL/SRL/SRA/AND:
// logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
N0.getOperand(1) == N1.getOperand(1)) {
// If either operand has other uses, this transform is not an improvement.
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
}
// Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
if (HandOpcode == ISD::BSWAP) {
// If either operand has other uses, this transform is not an improvement.
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
return DAG.getNode(HandOpcode, DL, VT, Logic);
}
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
// Only perform this optimization up until type legalization, before
// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
// adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
// we don't want to undo this promotion.
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
// on scalars.
if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
Level <= AfterLegalizeTypes) {
// Input types must be integer and the same.
if (XVT.isInteger() && XVT == Y.getValueType() &&
!(VT.isVector() && TLI.isTypeLegal(VT) &&
!XVT.isVector() && !TLI.isTypeLegal(XVT))) {
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
return DAG.getNode(HandOpcode, DL, VT, Logic);
}
}
// Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
// Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
// If both shuffles use the same mask, and both shuffle within a single
// vector, then it is worthwhile to move the swizzle after the operation.
// The type-legalizer generates this pattern when loading illegal
// vector types from memory. In many cases this allows additional shuffle
// optimizations.
// There are other cases where moving the shuffle after the xor/and/or
// is profitable even if shuffles don't perform a swizzle.
// If both shuffles use the same mask, and both shuffles have the same first
// or second operand, then it might still be profitable to move the shuffle
// after the xor/and/or operation.
if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
assert(X.getValueType() == Y.getValueType() &&
"Inputs to shuffles are not the same type");
// Check that both shuffles use the same mask. The masks are known to be of
// the same length because the result vector type is the same.
// Check also that shuffles have only one use to avoid introducing extra
// instructions.
if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
!SVN0->getMask().equals(SVN1->getMask()))
return SDValue();
// Don't try to fold this node if it requires introducing a
// build vector of all zeros that might be illegal at this stage.
SDValue ShOp = N0.getOperand(1);
if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
// (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
N0.getOperand(0), N1.getOperand(0));
return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
}
// Don't try to fold this node if it requires introducing a
// build vector of all zeros that might be illegal at this stage.
ShOp = N0.getOperand(0);
if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
// (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
N1.getOperand(1));
return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
}
}
return SDValue();
}
/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
const SDLoc &DL) {
SDValue LL, LR, RL, RR, N0CC, N1CC;
if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
!isSetCCEquivalent(N1, RL, RR, N1CC))
return SDValue();
assert(N0.getValueType() == N1.getValueType() &&
"Unexpected operand types for bitwise logic op");
assert(LL.getValueType() == LR.getValueType() &&
RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc");
// If we're here post-legalization or the logic op type is not i1, the logic
// op type must match a setcc result type. Also, all folds require new
// operations on the left and right operands, so those types must match.
EVT VT = N0.getValueType();
EVT OpVT = LL.getValueType();
if (LegalOperations || VT.getScalarType() != MVT::i1)
if (VT != getSetCCResultType(OpVT))
return SDValue();
if (OpVT != RL.getValueType())
return SDValue();
ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
bool IsInteger = OpVT.isInteger();
if (LR == RR && CC0 == CC1 && IsInteger) {
bool IsZero = isNullOrNullSplat(LR);
bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
// All bits clear?
bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
// All sign bits clear?
bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
// Any bits set?
bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
// Any sign bits set?
bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
// (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
// (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
// (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
// (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
AddToWorklist(Or.getNode());
return DAG.getSetCC(DL, VT, Or, LR, CC1);
}
// All bits set?
bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
// All sign bits set?
bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
// Any bits clear?
bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
// Any sign bits clear?
bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
// (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
// (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
// (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
// (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
AddToWorklist(And.getNode());
return DAG.getSetCC(DL, VT, And, LR, CC1);
}
}
// TODO: What is the 'or' equivalent of this fold?
// (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
IsInteger && CC0 == ISD::SETNE &&
((isNullConstant(LR) && isAllOnesConstant(RR)) ||
(isAllOnesConstant(LR) && isNullConstant(RR)))) {
SDValue One = DAG.getConstant(1, DL, OpVT);
SDValue Two = DAG.getConstant(2, DL, OpVT);
SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
AddToWorklist(Add.getNode());
return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
}
// Try more general transforms if the predicates match and the only user of
// the compares is the 'and' or 'or'.
if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
N0.hasOneUse() && N1.hasOneUse()) {
// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
// or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
SDValue Zero = DAG.getConstant(0, DL, OpVT);
return DAG.getSetCC(DL, VT, Or, Zero, CC1);
}
// Turn compare of constants whose difference is 1 bit into add+and+setcc.
// TODO - support non-uniform vector amounts.
if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
// Match a shared variable operand and 2 non-opaque constant operands.
ConstantSDNode *C0 = isConstOrConstSplat(LR);
ConstantSDNode *C1 = isConstOrConstSplat(RR);
if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
const APInt &CMax =
APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
const APInt &CMin =
APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
// The difference of the constants must be a single bit.
if ((CMax - CMin).isPowerOf2()) {
// and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
// setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
SDValue Zero = DAG.getConstant(0, DL, OpVT);
return DAG.getSetCC(DL, VT, And, Zero, CC0);
}
}
}
}
// Canonicalize equivalent operands to LL == RL.
if (LL == RR && LR == RL) {
CC1 = ISD::getSetCCSwappedOperands(CC1);
std::swap(RL, RR);
}
// (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
// (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
if (LL == RL && LR == RR) {
ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
: ISD::getSetCCOrOperation(CC0, CC1, OpVT);
if (NewCC != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
TLI.isOperationLegal(ISD::SETCC, OpVT))))
return DAG.getSetCC(DL, VT, LL, LR, NewCC);
}
return SDValue();
}
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
/// visitSELECT() already handles those cases.
SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N1.getValueType();
SDLoc DL(N);
// fold (and x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
return V;
+ // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
- VT.getSizeInBits() <= 64) {
+ VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
// immediate for an add, but it is legal if its top c2 bits are set,
// transform the ADD so the immediate doesn't need to be materialized
// in a register.
APInt ADDC = ADDI->getAPIntValue();
APInt SRLC = SRLI->getAPIntValue();
if (ADDC.getMinSignedBits() <= 64 &&
SRLC.ult(VT.getSizeInBits()) &&
!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
SRLC.getZExtValue());
if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
ADDC |= Mask;
if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
SDLoc DL0(N0);
SDValue NewAdd =
DAG.getNode(ISD::ADD, DL0, VT,
N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
CombineTo(N0.getNode(), NewAdd);
// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}
}
}
}
}
}
// Reduce bit extract of low half of an integer to the narrower type.
// (and (srl i64:x, K), KMask) ->
// (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
unsigned Size = VT.getSizeInBits();
const APInt &AndMask = CAnd->getAPIntValue();
unsigned ShiftBits = CShift->getZExtValue();
// Bail out, this node will probably disappear anyway.
if (ShiftBits == 0)
return SDValue();
unsigned MaskBits = AndMask.countTrailingOnes();
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
if (AndMask.isMask() &&
// Required bits must not span the two halves of the integer and
// must fit in the half size type.
(ShiftBits + MaskBits <= Size / 2) &&
TLI.isNarrowingProfitable(VT, HalfVT) &&
TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
TLI.isTruncateFree(VT, HalfVT) &&
TLI.isZExtFree(HalfVT, VT)) {
// The isNarrowingProfitable is to avoid regressions on PPC and
// AArch64 which match a few 64-bit bit insert / bit extract patterns
// on downstream users of this. Those patterns could probably be
// extended to handle extensions mixed in.
SDValue SL(N0);
assert(MaskBits <= Size);
// Extracting the highest bit of the low half.
EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
N0.getOperand(0));
SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
}
}
}
}
return SDValue();
}
bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
EVT LoadResultTy, EVT &ExtVT) {
if (!AndC->getAPIntValue().isMask())
return false;
unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
EVT LoadedVT = LoadN->getMemoryVT();
if (ExtVT == LoadedVT &&
(!LegalOperations ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
// ZEXTLOAD will match without needing to change the size of the value being
// loaded.
return true;
}
// Do not change the width of a volatile or atomic loads.
if (!LoadN->isSimple())
return false;
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
return false;
if (LegalOperations &&
!TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
return false;
if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
return false;
return true;
}
bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
ISD::LoadExtType ExtType, EVT &MemVT,
unsigned ShAmt) {
if (!LDST)
return false;
// Only allow byte offsets.
if (ShAmt % 8)
return false;
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
if (!MemVT.isRound())
return false;
// Don't change the width of a volatile or atomic loads.
if (!LDST->isSimple())
return false;
EVT LdStMemVT = LDST->getMemoryVT();
// Bail out when changing the scalable property, since we can't be sure that
// we're actually narrowing here.
if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
return false;
// Verify that we are actually reducing a load width here.
if (LdStMemVT.bitsLT(MemVT))
return false;
// Ensure that this isn't going to produce an unsupported memory access.
if (ShAmt) {
assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
const unsigned ByteShAmt = ShAmt / 8;
const Align LDSTAlign = LDST->getAlign();
const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
LDST->getAddressSpace(), NarrowAlign,
LDST->getMemOperand()->getFlags()))
return false;
}
// It's not possible to generate a constant of extended or untyped type.
EVT PtrType = LDST->getBasePtr().getValueType();
if (PtrType == MVT::Untyped || PtrType.isExtended())
return false;
if (isa<LoadSDNode>(LDST)) {
LoadSDNode *Load = cast<LoadSDNode>(LDST);
// Don't transform one with multiple uses, this would require adding a new
// load.
if (!SDValue(Load, 0).hasOneUse())
return false;
if (LegalOperations &&
!TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
return false;
// For the transform to be legal, the load must produce only two values
// (the value loaded and the chain). Don't transform a pre-increment
// load, for example, which produces an extra value. Otherwise the
// transformation is not equivalent, and the downstream logic to replace
// uses gets things wrong.
if (Load->getNumValues() > 2)
return false;
// If the load that we're shrinking is an extload and we're not just
// discarding the extension we can't simply shrink the load. Bail.
// TODO: It would be possible to merge the extensions in some cases.
if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
return false;
if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
return false;
} else {
assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
StoreSDNode *Store = cast<StoreSDNode>(LDST);
// Can't write outside the original store
if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
return false;
if (LegalOperations &&
!TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
return false;
}
return true;
}
bool DAGCombiner::SearchForAndLoads(SDNode *N,
SmallVectorImpl<LoadSDNode*> &Loads,
SmallPtrSetImpl<SDNode*> &NodesWithConsts,
ConstantSDNode *Mask,
SDNode *&NodeToMask) {
// Recursively search for the operands, looking for loads which can be
// narrowed.
for (SDValue Op : N->op_values()) {
if (Op.getValueType().isVector())
return false;
// Some constants may need fixing up later if they are too large.
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
(Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
NodesWithConsts.insert(N);
continue;
}
if (!Op.hasOneUse())
return false;
switch(Op.getOpcode()) {
case ISD::LOAD: {
auto *Load = cast<LoadSDNode>(Op);
EVT ExtVT;
if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
// ZEXTLOAD is already small enough.
if (Load->getExtensionType() == ISD::ZEXTLOAD &&
ExtVT.bitsGE(Load->getMemoryVT()))
continue;
// Use LE to convert equal sized loads to zext.
if (ExtVT.bitsLE(Load->getMemoryVT()))
Loads.push_back(Load);
continue;
}
return false;
}
case ISD::ZERO_EXTEND:
case ISD::AssertZext: {
unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
EVT VT = Op.getOpcode() == ISD::AssertZext ?
cast<VTSDNode>(Op.getOperand(1))->getVT() :
Op.getOperand(0).getValueType();
// We can accept extending nodes if the mask is wider or an equal
// width to the original type.
if (ExtVT.bitsGE(VT))
continue;
break;
}
case ISD::OR:
case ISD::XOR:
case ISD::AND:
if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
NodeToMask))
return false;
continue;
}
// Allow one node which will masked along with any loads found.
if (NodeToMask)
return false;
// Also ensure that the node to be masked only produces one data result.
NodeToMask = Op.getNode();
if (NodeToMask->getNumValues() > 1) {
bool HasValue = false;
for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
if (VT != MVT::Glue && VT != MVT::Other) {
if (HasValue) {
NodeToMask = nullptr;
return false;
}
HasValue = true;
}
}
assert(HasValue && "Node to be masked has no data result?");
}
}
return true;
}
bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!Mask)
return false;
if (!Mask->getAPIntValue().isMask())
return false;
// No need to do anything if the and directly uses a load.
if (isa<LoadSDNode>(N->getOperand(0)))
return false;
SmallVector<LoadSDNode*, 8> Loads;
SmallPtrSet<SDNode*, 2> NodesWithConsts;
SDNode *FixupNode = nullptr;
if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
if (Loads.size() == 0)
return false;
LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
SDValue MaskOp = N->getOperand(1);
// If it exists, fixup the single node we allow in the tree that needs
// masking.
if (FixupNode) {
LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
FixupNode->getValueType(0),
SDValue(FixupNode, 0), MaskOp);
DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
if (And.getOpcode() == ISD ::AND)
DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
}
// Narrow any constants that need it.
for (auto *LogicN : NodesWithConsts) {
SDValue Op0 = LogicN->getOperand(0);
SDValue Op1 = LogicN->getOperand(1);
if (isa<ConstantSDNode>(Op0))
std::swap(Op0, Op1);
SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
Op1, MaskOp);
DAG.UpdateNodeOperands(LogicN, Op0, And);
}
// Create narrow loads.
for (auto *Load : Loads) {
LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
SDValue(Load, 0), MaskOp);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
if (And.getOpcode() == ISD ::AND)
And = SDValue(
DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
SDValue NewLoad = ReduceLoadWidth(And.getNode());
assert(NewLoad &&
"Shouldn't be masking the load if it can't be narrowed");
CombineTo(Load, NewLoad, NewLoad.getValue(1));
}
DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
return true;
}
return false;
}
// Unfold
// x & (-1 'logical shift' y)
// To
// (x 'opposite logical shift' y) 'logical shift' y
// if it is better for performance.
SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
assert(N->getOpcode() == ISD::AND);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// Do we actually prefer shifts over mask?
if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
return SDValue();
// Try to match (-1 '[outer] logical shift' y)
unsigned OuterShift;
unsigned InnerShift; // The opposite direction to the OuterShift.
SDValue Y; // Shift amount.
auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
if (!M.hasOneUse())
return false;
OuterShift = M->getOpcode();
if (OuterShift == ISD::SHL)
InnerShift = ISD::SRL;
else if (OuterShift == ISD::SRL)
InnerShift = ISD::SHL;
else
return false;
if (!isAllOnesConstant(M->getOperand(0)))
return false;
Y = M->getOperand(1);
return true;
};
SDValue X;
if (matchMask(N1))
X = N0;
else if (matchMask(N0))
X = N1;
else
return SDValue();
SDLoc DL(N);
EVT VT = N->getValueType(0);
// tmp = x 'opposite logical shift' y
SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
// ret = tmp 'logical shift' y
SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
return T1;
}
/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
/// For a target with a bit test, this is expected to become test + set and save
/// at least 1 instruction.
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
// This is probably not worthwhile without a supported type.
EVT VT = And->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(VT))
return SDValue();
// Look through an optional extension and find a 'not'.
// TODO: Should we favor test+set even without the 'not' op?
SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
if (Not.getOpcode() == ISD::ANY_EXTEND)
Not = Not.getOperand(0);
if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
return SDValue();
// Look though an optional truncation. The source operand may not be the same
// type as the original 'and', but that is ok because we are masking off
// everything but the low bit.
SDValue Srl = Not.getOperand(0);
if (Srl.getOpcode() == ISD::TRUNCATE)
Srl = Srl.getOperand(0);
// Match a shift-right by constant.
if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
!isa<ConstantSDNode>(Srl.getOperand(1)))
return SDValue();
// We might have looked through casts that make this transform invalid.
// TODO: If the source type is wider than the result type, do the mask and
// compare in the source type.
const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
unsigned VTBitWidth = VT.getSizeInBits();
if (ShiftAmt.uge(VTBitWidth))
return SDValue();
// Turn this into a bit-test pattern using mask op + setcc:
// and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
SDLoc DL(And);
SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue Mask = DAG.getConstant(
APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
return DAG.getZExtOrTrunc(Setcc, DL, VT);
}
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N1.getValueType();
// x & x --> x
if (N0 == N1)
return N0;
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (and x, 0) -> 0, vector edition
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
// do not return N0, because undef node may exist in N0
return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
SDLoc(N), N0.getValueType());
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
return N1;
if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
return N0;
// fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
N0.hasOneUse() && N1.hasOneUse()) {
EVT LoadVT = MLoad->getMemoryVT();
EVT ExtVT = VT;
if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
// For this AND to be a zero extension of the masked load the elements
// of the BuildVec must mask the bottom bits of the extended element
// type
if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
uint64_t ElementSize =
LoadVT.getVectorElementType().getScalarSizeInBits();
if (Splat->getAPIntValue().isMask(ElementSize)) {
return DAG.getMaskedLoad(
ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
ISD::ZEXTLOAD, MLoad->isExpandingLoad());
}
}
}
}
}
// fold (and c1, c2) -> c1&c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
// fold (and x, -1) -> x
if (isAllOnesConstant(N1))
return N0;
// if (and x, c) is known to be zero, return 0
unsigned BitWidth = VT.getScalarSizeInBits();
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// reassociate and
if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
return RAND;
// Try to convert a constant mask AND into a shuffle clear mask.
if (VT.isVector())
if (SDValue Shuffle = XformToShuffleWithZero(N))
return Shuffle;
if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
return Combined;
// fold (and (or x, C), D) -> D if (C & D) == D
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
};
if (N0.getOpcode() == ISD::OR &&
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
return N1;
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
APInt Mask = ~N1C->getAPIntValue();
Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
N0.getValueType(), N0Op0);
// Replace uses of the AND with uses of the Zero extend node.
CombineTo(N, Zext);
// We actually want to replace all uses of the any_extend with the
// zero_extend, to avoid duplicating things. This will later cause this
// AND to be folded.
CombineTo(N0.getNode(), Zext);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
// (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
// already be zero by virtue of the width of the base type of the load.
//
// the 'X' node here can either be nothing or an extract_vector_elt to catch
// more cases.
if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
N0.getOperand(0).getOpcode() == ISD::LOAD &&
N0.getOperand(0).getResNo() == 0) ||
(N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
N0 : N0.getOperand(0) );
// Get the constant (if applicable) the zero'th operand is being ANDed with.
// This can be a pure constant or a vector splat, in which case we treat the
// vector as a scalar and use the splat value.
APInt Constant = APInt::getNullValue(1);
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
SplatBitSize, HasAnyUndefs);
if (IsSplat) {
// Undef bits can contribute to a possible optimisation if set, so
// set them.
SplatValue |= SplatUndef;
// The splat value may be something like "0x00FFFFFF", which means 0 for
// the first vector value and FF for the rest, repeating. We need a mask
// that will apply equally to all members of the vector, so AND all the
// lanes of the constant together.
unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
// If the splat value has been compressed to a bitlength lower
// than the size of the vector lane, we need to re-expand it to
// the lane size.
if (EltBitWidth > SplatBitSize)
for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
SplatValue |= SplatValue.shl(SplatBitSize);
// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
if ((SplatBitSize % EltBitWidth) == 0) {
Constant = APInt::getAllOnesValue(EltBitWidth);
for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
}
}
}
// If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
// actually legal and isn't going to get expanded, else this is a false
// optimisation.
bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
Load->getValueType(0),
Load->getMemoryVT());
// Resize the constant to the same size as the original memory access before
// extension. If it is still the AllOnesValue then this AND is completely
// unneeded.
Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
bool B;
switch (Load->getExtensionType()) {
default: B = false; break;
case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
case ISD::ZEXTLOAD:
case ISD::NON_EXTLOAD: B = true; break;
}
if (B && Constant.isAllOnesValue()) {
// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
// preserve semantics once we get rid of the AND.
SDValue NewLoad(Load, 0);
// Fold the AND away. NewLoad may get replaced immediately.
CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
if (Load->getExtensionType() == ISD::EXTLOAD) {
NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
Load->getValueType(0), SDLoc(Load),
Load->getChain(), Load->getBasePtr(),
Load->getOffset(), Load->getMemoryVT(),
Load->getMemOperand());
// Replace uses of the EXTLOAD with the new ZEXTLOAD.
if (Load->getNumValues() == 3) {
// PRE/POST_INC loads have 3 values.
SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
NewLoad.getValue(2) };
CombineTo(Load, To, 3, true);
} else {
CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
}
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (and (masked_gather x)) -> (zext_masked_gather x)
if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
EVT MemVT = GN0->getMemoryVT();
EVT ScalarVT = MemVT.getScalarType();
if (SDValue(GN0, 0).hasOneUse() &&
isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
SDValue ZExtLoad = DAG.getMaskedGather(
DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
CombineTo(N, ZExtLoad);
AddToWorklist(ZExtLoad.getNode());
// Avoid recheck of N.
return SDValue(N, 0);
}
}
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
// fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
(N0.getOpcode() == ISD::ANY_EXTEND &&
N0.getOperand(0).getOpcode() == ISD::LOAD))) {
if (SDValue Res = ReduceLoadWidth(N)) {
LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
AddToWorklist(N);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
return SDValue(N, 0);
}
}
if (LegalTypes) {
// Attempt to propagate the AND back up to the leaves which, if they're
// loads, can be combined to narrow loads and the AND node can be removed.
// Perform after legalization so that extend nodes will already be
// combined into the loads.
if (BackwardsPropagateMask(N))
return SDValue(N, 0);
}
if (SDValue Combined = visitANDLike(N0, N1, N))
return Combined;
// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
if (N0.getOpcode() == N1.getOpcode())
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
return V;
// Masking the negated extension of a boolean is just the zero-extended
// boolean:
// and (sub 0, zext(bool X)), 1 --> zext(bool X)
// and (sub 0, sext(bool X)), 1 --> zext(bool X)
//
// Note: the SimplifyDemandedBits fold below can make an information-losing
// transform, and then we have no way to find this better fold.
if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
if (isNullOrNullSplat(N0.getOperand(0))) {
SDValue SubRHS = N0.getOperand(1);
if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
return SubRHS;
if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
}
}
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
// fold (and (sra)) -> (and (srl)) when possible.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (zext_inreg (extload x)) -> (zextload x)
// fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
(ISD::isEXTLoad(N0.getNode()) ||
(ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
unsigned ExtBitSize = N1.getScalarValueSizeInBits();
unsigned MemBitSize = MemVT.getScalarSizeInBits();
APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
if (DAG.MaskedValueIsZero(N1, ExtBits) &&
((!LegalOperations && LN0->isSimple()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
LN0->getBasePtr(), MemVT, LN0->getMemOperand());
AddToWorklist(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
N0.getOperand(1), false))
return BSwap;
}
if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
return Shifts;
if (TLI.hasBitTest(N0, N1))
if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
return V;
// Recognize the following pattern:
//
// AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
//
// where bitmask is a mask that clears the upper bits of AndVT. The
// number of bits in bitmask must be a power of two.
auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
if (LHS->getOpcode() != ISD::SIGN_EXTEND)
return false;
auto *C = dyn_cast<ConstantSDNode>(RHS);
if (!C)
return false;
if (!C->getAPIntValue().isMask(
LHS.getOperand(0).getValueType().getFixedSizeInBits()))
return false;
return true;
};
// Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
if (IsAndZeroExtMask(N0, N1))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
return SDValue();
}
/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits) {
if (!LegalOperations)
return SDValue();
EVT VT = N->getValueType(0);
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
return SDValue();
if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
return SDValue();
// Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
bool LookPassAnd0 = false;
bool LookPassAnd1 = false;
if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
std::swap(N0, N1);
if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() == ISD::AND) {
if (!N0.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
// Also handle 0xffff since the LHS is guaranteed to have zeros there.
// This is needed for X86.
if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
N01C->getZExtValue() != 0xFFFF))
return SDValue();
N0 = N0.getOperand(0);
LookPassAnd0 = true;
}
if (N1.getOpcode() == ISD::AND) {
if (!N1.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C || N11C->getZExtValue() != 0xFF)
return SDValue();
N1 = N1.getOperand(0);
LookPassAnd1 = true;
}
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
return SDValue();
if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N01C || !N11C)
return SDValue();
if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
return SDValue();
// Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
SDValue N00 = N0->getOperand(0);
if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
if (!N00.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
if (!N001C || N001C->getZExtValue() != 0xFF)
return SDValue();
N00 = N00.getOperand(0);
LookPassAnd0 = true;
}
SDValue N10 = N1->getOperand(0);
if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
if (!N10.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
// Also allow 0xFFFF since the bits will be shifted out. This is needed
// for X86.
if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
N101C->getZExtValue() != 0xFFFF))
return SDValue();
N10 = N10.getOperand(0);
LookPassAnd1 = true;
}
if (N00 != N10)
return SDValue();
// Make sure everything beyond the low halfword gets set to zero since the SRL
// 16 will clear the top bits.
unsigned OpSizeInBits = VT.getSizeInBits();
if (DemandHighBits && OpSizeInBits > 16) {
// If the left-shift isn't masked out then the only way this is a bswap is
// if all bits beyond the low 8 are 0. In that case the entire pattern
// reduces to a left shift anyway: leave it for other parts of the combiner.
if (!LookPassAnd0)
return SDValue();
// However, if the right shift isn't masked out then it might be because
// it's not needed. See if we can spot that too.
if (!LookPassAnd1 &&
!DAG.MaskedValueIsZero(
N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
return SDValue();
}
SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
if (OpSizeInBits > 16) {
SDLoc DL(N);
Res = DAG.getNode(ISD::SRL, DL, VT, Res,
DAG.getConstant(OpSizeInBits - 16, DL,
getShiftAmountTy(VT)));
}
return Res;
}
/// Return true if the specified node is an element that makes up a 32-bit
/// packed halfword byteswap.
/// ((x & 0x000000ff) << 8) |
/// ((x & 0x0000ff00) >> 8) |
/// ((x & 0x00ff0000) << 8) |
/// ((x & 0xff000000) >> 8)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
if (!N.getNode()->hasOneUse())
return false;
unsigned Opc = N.getOpcode();
if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
return false;
SDValue N0 = N.getOperand(0);
unsigned Opc0 = N0.getOpcode();
if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
return false;
ConstantSDNode *N1C = nullptr;
// SHL or SRL: look upstream for AND mask operand
if (Opc == ISD::AND)
N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
else if (Opc0 == ISD::AND)
N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!N1C)
return false;
unsigned MaskByteOffset;
switch (N1C->getZExtValue()) {
default:
return false;
case 0xFF: MaskByteOffset = 0; break;
case 0xFF00: MaskByteOffset = 1; break;
case 0xFFFF:
// In case demanded bits didn't clear the bits that will be shifted out.
// This is needed for X86.
if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
MaskByteOffset = 1;
break;
}
return false;
case 0xFF0000: MaskByteOffset = 2; break;
case 0xFF000000: MaskByteOffset = 3; break;
}
// Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
if (Opc == ISD::AND) {
if (MaskByteOffset == 0 || MaskByteOffset == 2) {
// (x >> 8) & 0xff
// (x >> 8) & 0xff0000
if (Opc0 != ISD::SRL)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C || C->getZExtValue() != 8)
return false;
} else {
// (x << 8) & 0xff00
// (x << 8) & 0xff000000
if (Opc0 != ISD::SHL)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C || C->getZExtValue() != 8)
return false;
}
} else if (Opc == ISD::SHL) {
// (x & 0xff) << 8
// (x & 0xff0000) << 8
if (MaskByteOffset != 0 && MaskByteOffset != 2)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!C || C->getZExtValue() != 8)
return false;
} else { // Opc == ISD::SRL
// (x & 0xff00) >> 8
// (x & 0xff000000) >> 8
if (MaskByteOffset != 1 && MaskByteOffset != 3)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!C || C->getZExtValue() != 8)
return false;
}
if (Parts[MaskByteOffset])
return false;
Parts[MaskByteOffset] = N0.getOperand(0).getNode();
return true;
}
// Match 2 elements of a packed halfword bswap.
static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
if (N.getOpcode() == ISD::OR)
return isBSwapHWordElement(N.getOperand(0), Parts) &&
isBSwapHWordElement(N.getOperand(1), Parts);
if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
if (!C || C->getAPIntValue() != 16)
return false;
Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
return true;
}
return false;
}
// Match this pattern:
// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
// And rewrite this to:
// (rotr (bswap A), 16)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
SelectionDAG &DAG, SDNode *N, SDValue N0,
SDValue N1, EVT VT, EVT ShiftAmountTy) {
assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
"MatchBSwapHWordOrAndAnd: expecting i32");
if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
return SDValue();
if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
return SDValue();
// TODO: this is too restrictive; lifting this restriction requires more tests
if (!N0->hasOneUse() || !N1->hasOneUse())
return SDValue();
ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
if (!Mask0 || !Mask1)
return SDValue();
if (Mask0->getAPIntValue() != 0xff00ff00 ||
Mask1->getAPIntValue() != 0x00ff00ff)
return SDValue();
SDValue Shift0 = N0.getOperand(0);
SDValue Shift1 = N1.getOperand(0);
if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
return SDValue();
ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
if (!ShiftAmt0 || !ShiftAmt1)
return SDValue();
if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
return SDValue();
if (Shift0.getOperand(0) != Shift1.getOperand(0))
return SDValue();
SDLoc DL(N);
SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
}
/// Match a 32-bit packed halfword bswap. That is
/// ((x & 0x000000ff) << 8) |
/// ((x & 0x0000ff00) >> 8) |
/// ((x & 0x00ff0000) << 8) |
/// ((x & 0xff000000) >> 8)
/// => (rotl (bswap x), 16)
SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!LegalOperations)
return SDValue();
EVT VT = N->getValueType(0);
if (VT != MVT::i32)
return SDValue();
if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
return SDValue();
if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
getShiftAmountTy(VT)))
return BSwap;
// Try again with commuted operands.
if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
getShiftAmountTy(VT)))
return BSwap;
// Look for either
// (or (bswaphpair), (bswaphpair))
// (or (or (bswaphpair), (and)), (and))
// (or (or (and), (bswaphpair)), (and))
SDNode *Parts[4] = {};
if (isBSwapHWordPair(N0, Parts)) {
// (or (or (and), (and)), (or (and), (and)))
if (!isBSwapHWordPair(N1, Parts))
return SDValue();
} else if (N0.getOpcode() == ISD::OR) {
// (or (or (or (and), (and)), (and)), (and))
if (!isBSwapHWordElement(N1, Parts))
return SDValue();
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
!(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
return SDValue();
} else
return SDValue();
// Make sure the parts are all coming from the same node.
if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
return SDValue();
SDLoc DL(N);
SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
SDValue(Parts[0], 0));
// Result of the bswap should be rotated by 16. If it's not legal, then
// do (x << 16) | (x >> 16).
SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
return DAG.getNode(ISD::OR, DL, VT,
DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
}
/// This contains all DAGCombine rules which reduce two values combined by
/// an Or operation to a single value \see visitANDLike().
SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N1.getValueType();
SDLoc DL(N);
// fold (or x, undef) -> -1
if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
return DAG.getAllOnesConstant(DL, VT);
if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
return V;
// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
// Don't increase # computations.
(N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
// We can only do this xform if we know that bits from X that are set in C2
// but not in C1 are already zero. Likewise for Y.
if (const ConstantSDNode *N0O1C =
getAsNonOpaqueConstant(N0.getOperand(1))) {
if (const ConstantSDNode *N1O1C =
getAsNonOpaqueConstant(N1.getOperand(1))) {
// We can only do this xform if we know that bits from X that are set in
// C2 but not in C1 are already zero. Likewise for Y.
const APInt &LHSMask = N0O1C->getAPIntValue();
const APInt &RHSMask = N1O1C->getAPIntValue();
if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(0), N1.getOperand(0));
return DAG.getNode(ISD::AND, DL, VT, X,
DAG.getConstant(LHSMask | RHSMask, DL, VT));
}
}
}
}
// (or (and X, M), (and X, N)) -> (and X, (or M, N))
if (N0.getOpcode() == ISD::AND &&
N1.getOpcode() == ISD::AND &&
N0.getOperand(0) == N1.getOperand(0) &&
// Don't increase # computations.
(N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(1), N1.getOperand(1));
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
}
return SDValue();
}
/// OR combines for which the commuted variant will be tried as well.
static SDValue visitORCommutative(
SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N0.getValueType();
if (N0.getOpcode() == ISD::AND) {
// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
// fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
}
return SDValue();
}
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N1.getValueType();
// x | x --> x
if (N0 == N1)
return N0;
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (or x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
return N1;
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
// fold (or x, -1) -> -1, vector edition
if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
// do not return N0, because undef node may exist in N0
return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
// Do this only if the resulting shuffle is legal.
if (isa<ShuffleVectorSDNode>(N0) &&
isa<ShuffleVectorSDNode>(N1) &&
// Avoid folding a node with illegal type.
TLI.isTypeLegal(VT)) {
bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
// Ensure both shuffles have a zero input.
if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
bool CanFold = true;
int NumElts = VT.getVectorNumElements();
SmallVector<int, 4> Mask(NumElts);
for (int i = 0; i != NumElts; ++i) {
int M0 = SV0->getMaskElt(i);
int M1 = SV1->getMaskElt(i);
// Determine if either index is pointing to a zero vector.
bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
// If one element is zero and the otherside is undef, keep undef.
// This also handles the case that both are undef.
if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
Mask[i] = -1;
continue;
}
// Make sure only one of the elements is zero.
if (M0Zero == M1Zero) {
CanFold = false;
break;
}
assert((M0 >= 0 || M1 >= 0) && "Undef index!");
// We have a zero and non-zero element. If the non-zero came from
// SV0 make the index a LHS index. If it came from SV1, make it
// a RHS index. We need to mod by NumElts because we don't care
// which operand it came from in the original shuffles.
Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
}
if (CanFold) {
SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
SDValue LegalShuffle =
TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
Mask, DAG);
if (LegalShuffle)
return LegalShuffle;
}
}
}
}
// fold (or c1, c2) -> c1|c2
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
// fold (or x, 0) -> x
if (isNullConstant(N1))
return N0;
// fold (or x, -1) -> -1
if (isAllOnesConstant(N1))
return N1;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// fold (or x, c) -> c iff (x & ~c) == 0
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
if (SDValue Combined = visitORLike(N0, N1, N))
return Combined;
if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
return Combined;
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
return BSwap;
if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
return BSwap;
// reassociate or
if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
// iff (c1 & c2) != 0 or c1/c2 are undef.
auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
};
if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
{N1, N0.getOperand(1)})) {
SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
AddToWorklist(IOR.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
}
}
if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
return Combined;
if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
return Combined;
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode())
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
return V;
// See if this is some rotate idiom.
if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
return Rot;
if (SDValue Load = MatchLoadCombine(N))
return Load;
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// If OR can be rewritten into ADD, try combines based on ADD.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
DAG.haveNoCommonBitsSet(N0, N1))
if (SDValue Combined = visitADDLike(N))
return Combined;
return SDValue();
}
static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
if (Op.getOpcode() == ISD::AND &&
DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
Mask = Op.getOperand(1);
return Op.getOperand(0);
}
return Op;
}
/// Match "(X shl/srl V1) & V2" where V2 may not be present.
static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
SDValue &Mask) {
Op = stripConstantMask(DAG, Op, Mask);
if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
Shift = Op;
return true;
}
return false;
}
/// Helper function for visitOR to extract the needed side of a rotate idiom
/// from a shl/srl/mul/udiv. This is meant to handle cases where
/// InstCombine merged some outside op with one of the shifts from
/// the rotate pattern.
/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
/// Otherwise, returns an expansion of \p ExtractFrom based on the following
/// patterns:
///
/// (or (add v v) (shrl v bitwidth-1)):
/// expands (add v v) -> (shl v 1)
///
/// (or (mul v c0) (shrl (mul v c1) c2)):
/// expands (mul v c0) -> (shl (mul v c1) c3)
///
/// (or (udiv v c0) (shl (udiv v c1) c2)):
/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
///
/// (or (shl v c0) (shrl (shl v c1) c2)):
/// expands (shl v c0) -> (shl (shl v c1) c3)
///
/// (or (shrl v c0) (shl (shrl v c1) c2)):
/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
///
/// Such that in all cases, c3+c2==bitwidth(op v c1).
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
SDValue ExtractFrom, SDValue &Mask,
const SDLoc &DL) {
assert(OppShift && ExtractFrom && "Empty SDValue");
assert(
(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
"Existing shift must be valid as a rotate half");
ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
// Value and Type of the shift.
SDValue OppShiftLHS = OppShift.getOperand(0);
EVT ShiftedVT = OppShiftLHS.getValueType();
// Amount of the existing shift.
ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
// (add v v) -> (shl v 1)
// TODO: Should this be a general DAG canonicalization?
if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
ExtractFrom.getOpcode() == ISD::ADD &&
ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
ExtractFrom.getOperand(0) == OppShiftLHS &&
OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
DAG.getShiftAmountConstant(1, ShiftedVT, DL));
// Preconditions:
// (or (op0 v c0) (shiftl/r (op0 v c1) c2))
//
// Find opcode of the needed shift to be extracted from (op0 v c0).
unsigned Opcode = ISD::DELETED_NODE;
bool IsMulOrDiv = false;
// Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
// opcode or its arithmetic (mul or udiv) variant.
auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
return false;
Opcode = NeededShift;
return true;
};
// op0 must be either the needed shift opcode or the mul/udiv equivalent
// that the needed shift can be extracted from.
if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
(OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
return SDValue();
// op0 must be the same opcode on both sides, have the same LHS argument,
// and produce the same value type.
if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
ShiftedVT != ExtractFrom.getValueType())
return SDValue();
// Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
// Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
ConstantSDNode *ExtractFromCst =
isConstOrConstSplat(ExtractFrom.getOperand(1));
// TODO: We should be able to handle non-uniform constant vectors for these values
// Check that we have constant values.
if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
!OppLHSCst || !OppLHSCst->getAPIntValue() ||
!ExtractFromCst || !ExtractFromCst->getAPIntValue())
return SDValue();
// Compute the shift amount we need to extract to complete the rotate.
const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
if (OppShiftCst->getAPIntValue().ugt(VTWidth))
return SDValue();
APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
// Normalize the bitwidth of the two mul/udiv/shift constant operands.
APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
APInt OppLHSAmt = OppLHSCst->getAPIntValue();
zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
// Now try extract the needed shift from the ExtractFrom op and see if the
// result matches up with the existing shift's LHS op.
if (IsMulOrDiv) {
// Op to extract from is a mul or udiv by a constant.
// Check:
// c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
// c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
NeededShiftAmt.getZExtValue());
APInt ResultAmt;
APInt Rem;
APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
if (Rem != 0 || ResultAmt != OppLHSAmt)
return SDValue();
} else {
// Op to extract from is a shift by a constant.
// Check:
// c2 - (bitwidth(op0 v c0) - c1) == c0
if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
ExtractFromAmt.getBitWidth()))
return SDValue();
}
// Return the expanded shift op that should allow a rotate to be formed.
EVT ShiftVT = OppShift.getOperand(1).getValueType();
EVT ResVT = ExtractFrom.getValueType();
SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
}
// Return true if we can prove that, whenever Neg and Pos are both in the
// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
//
// (or (shift1 X, Neg), (shift2 X, Pos))
//
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
//
// The IsRotate flag should be set when the LHS of both shifts is the same.
// Otherwise if matching a general funnel shift, it should be clear.
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
SelectionDAG &DAG, bool IsRotate) {
// If EltSize is a power of 2 then:
//
// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
// (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
//
// So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
// for the stronger condition:
//
// Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
//
// for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
// we can just replace Neg with Neg' for the rest of the function.
//
// In other cases we check for the even stronger condition:
//
// Neg == EltSize - Pos [B]
//
// for all Neg and Pos. Note that the (or ...) then invokes undefined
// behavior if Pos == 0 (and consequently Neg == EltSize).
//
// We could actually use [A] whenever EltSize is a power of 2, but the
// only extra cases that it would match are those uninteresting ones
// where Neg and Pos are never in range at the same time. E.g. for
// EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
// as well as (sub 32, Pos), but:
//
// (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
//
// always invokes undefined behavior for 32-bit X.
//
// Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
//
// NOTE: We can only do this when matching an AND and not a general
// funnel shift.
unsigned MaskLoBits = 0;
if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
unsigned Bits = Log2_64(EltSize);
if (NegC->getAPIntValue().getActiveBits() <= Bits &&
((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
Neg = Neg.getOperand(0);
MaskLoBits = Bits;
}
}
}
// Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
if (Neg.getOpcode() != ISD::SUB)
return false;
ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
if (!NegC)
return false;
SDValue NegOp1 = Neg.getOperand(1);
// On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
// Pos'. The truncation is redundant for the purpose of the equality.
if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
MaskLoBits))
Pos = Pos.getOperand(0);
}
}
// The condition we need is now:
//
// (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
//
// If NegOp1 == Pos then we need:
//
// EltSize & Mask == NegC & Mask
//
// (because "x & Mask" is a truncation and distributes through subtraction).
//
// We also need to account for a potential truncation of NegOp1 if the amount
// has already been legalized to a shift amount type.
APInt Width;
if ((Pos == NegOp1) ||
(NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
Width = NegC->getAPIntValue();
// Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
// Then the condition we want to prove becomes:
//
// (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
//
// which, again because "x & Mask" is a truncation, becomes:
//
// NegC & Mask == (EltSize - PosC) & Mask
// EltSize & Mask == (NegC + PosC) & Mask
else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
Width = PosC->getAPIntValue() + NegC->getAPIntValue();
else
return false;
} else
return false;
// Now we just need to check that EltSize & Mask == Width & Mask.
if (MaskLoBits)
// EltSize & Mask is 0 since Mask is EltSize - 1.
return Width.getLoBits(MaskLoBits) == 0;
return Width == EltSize;
}
// A subroutine of MatchRotate used once we have found an OR of two opposite
// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
// former being preferred if supported. InnerPos and InnerNeg are Pos and
// Neg with outer conversions stripped away.
SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
SDValue Neg, SDValue InnerPos,
SDValue InnerNeg, unsigned PosOpcode,
unsigned NegOpcode, const SDLoc &DL) {
// fold (or (shl x, (*ext y)),
// (srl x, (*ext (sub 32, y)))) ->
// (rotl x, y) or (rotr x, (sub 32, y))
//
// fold (or (shl x, (*ext (sub 32, y))),
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
/*IsRotate*/ true)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg);
}
return SDValue();
}
// A subroutine of MatchRotate used once we have found an OR of two opposite
// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
// former being preferred if supported. InnerPos and InnerNeg are Pos and
// Neg with outer conversions stripped away.
// TODO: Merge with MatchRotatePosNeg.
SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
SDValue Neg, SDValue InnerPos,
SDValue InnerNeg, unsigned PosOpcode,
unsigned NegOpcode, const SDLoc &DL) {
EVT VT = N0.getValueType();
unsigned EltBits = VT.getScalarSizeInBits();
// fold (or (shl x0, (*ext y)),
// (srl x1, (*ext (sub 32, y)))) ->
// (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
//
// fold (or (shl x0, (*ext (sub 32, y))),
// (srl x1, (*ext y))) ->
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
HasPos ? Pos : Neg);
}
// Matching the shift+xor cases, we can't easily use the xor'd shift amount
// so for now just use the PosOpcode case if its legal.
// TODO: When can we use the NegOpcode case?
if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
if (Op.getOpcode() != BinOpc)
return false;
ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
return Cst && (Cst->getAPIntValue() == Imm);
};
// fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
// -> (fshl x0, x1, y)
if (IsBinOpImm(N1, ISD::SRL, 1) &&
IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
InnerPos == InnerNeg.getOperand(0) &&
TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
}
// fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
// -> (fshr x0, x1, y)
if (IsBinOpImm(N0, ISD::SHL, 1) &&
IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
InnerNeg == InnerPos.getOperand(0) &&
TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
}
// fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
// -> (fshr x0, x1, y)
// TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
InnerNeg == InnerPos.getOperand(0) &&
TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
}
}
return SDValue();
}
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
// with different shifted sources.
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
if (!TLI.isTypeLegal(VT))
return SDValue();
// The target must have at least one rotate/funnel flavor.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
bool HasFSHL = hasOperation(ISD::FSHL, VT);
bool HasFSHR = hasOperation(ISD::FSHR, VT);
if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();
// Check for truncated rotate.
if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
assert(LHS.getValueType() == RHS.getValueType());
if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
}
}
// Match "(X shl/srl V1) & V2" where V2 may not be present.
SDValue LHSShift; // The shift.
SDValue LHSMask; // AND value if any.
matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
SDValue RHSShift; // The shift.
SDValue RHSMask; // AND value if any.
matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
// If neither side matched a rotate half, bail
if (!LHSShift && !RHSShift)
return SDValue();
// InstCombine may have combined a constant shl, srl, mul, or udiv with one
// side of the rotate, so try to handle that here. In all cases we need to
// pass the matched shift from the opposite side to compute the opcode and
// needed shift amount to extract. We still want to do this if both sides
// matched a rotate half because one half may be a potential overshift that
// can be broken down (ie if InstCombine merged two shl or srl ops into a
// single one).
// Have LHS side of the rotate, try to extract the needed shift from the RHS.
if (LHSShift)
if (SDValue NewRHSShift =
extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
RHSShift = NewRHSShift;
// Have RHS side of the rotate, try to extract the needed shift from the LHS.
if (RHSShift)
if (SDValue NewLHSShift =
extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
LHSShift = NewLHSShift;
// If a side is still missing, nothing else we can do.
if (!RHSShift || !LHSShift)
return SDValue();
// At this point we've matched or extracted a shift op on each side.
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.
bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
if (!IsRotate && !(HasFSHL || HasFSHR))
return SDValue(); // Requires funnel shift support.
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);
std::swap(LHSShift, RHSShift);
std::swap(LHSMask, RHSMask);
}
unsigned EltSizeInBits = VT.getScalarSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);
SDValue RHSShiftArg = RHSShift.getOperand(0);
SDValue RHSShiftAmt = RHSShift.getOperand(1);
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
// fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
// fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
// iff C1+C2 == EltSizeInBits
auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
};
if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Res;
if (IsRotate && (HasROTL || HasROTR))
Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
HasROTL ? LHSShiftAmt : RHSShiftAmt);
else
Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
SDValue Mask = AllOnes;
if (LHSMask.getNode()) {
SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
}
if (RHSMask.getNode()) {
SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
}
Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
}
return Res;
}
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
return SDValue();
// If the shift amount is sign/zext/any-extended just peel it off.
SDValue LExtOp0 = LHSShiftAmt;
SDValue RExtOp0 = RHSShiftAmt;
if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
(RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
LExtOp0 = LHSShiftAmt.getOperand(0);
RExtOp0 = RHSShiftAmt.getOperand(0);
}
if (IsRotate && (HasROTL || HasROTR)) {
SDValue TryL =
MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
RExtOp0, ISD::ROTL, ISD::ROTR, DL);
if (TryL)
return TryL;
SDValue TryR =
MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
LExtOp0, ISD::ROTR, ISD::ROTL, DL);
if (TryR)
return TryR;
}
SDValue TryL =
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
if (TryL)
return TryL;
SDValue TryR =
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
if (TryR)
return TryR;
return SDValue();
}
namespace {
/// Represents known origin of an individual byte in load combine pattern. The
/// value of the byte is either constant zero or comes from memory.
struct ByteProvider {
// For constant zero providers Load is set to nullptr. For memory providers
// Load represents the node which loads the byte from memory.
// ByteOffset is the offset of the byte in the value produced by the load.
LoadSDNode *Load = nullptr;
unsigned ByteOffset = 0;
ByteProvider() = default;
static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
return ByteProvider(Load, ByteOffset);
}
static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
bool isConstantZero() const { return !Load; }
bool isMemory() const { return Load; }
bool operator==(const ByteProvider &Other) const {
return Other.Load == Load && Other.ByteOffset == ByteOffset;
}
private:
ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
: Load(Load), ByteOffset(ByteOffset) {}
};
} // end anonymous namespace
/// Recursively traverses the expression calculating the origin of the requested
/// byte of the given value. Returns None if the provider can't be calculated.
///
/// For all the values except the root of the expression verifies that the value
/// has exactly one use and if it's not true return None. This way if the origin
/// of the byte is returned it's guaranteed that the values which contribute to
/// the byte are not used outside of this expression.
///
/// Because the parts of the expression are not allowed to have more than one
/// use this function iterates over trees, not DAGs. So it never visits the same
/// node more than once.
static const Optional<ByteProvider>
calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
bool Root = false) {
// Typical i64 by i8 pattern requires recursion up to 8 calls depth
if (Depth == 10)
return None;
if (!Root && !Op.hasOneUse())
return None;
assert(Op.getValueType().isScalarInteger() && "can't handle other types");
unsigned BitWidth = Op.getValueSizeInBits();
if (BitWidth % 8 != 0)
return None;
unsigned ByteWidth = BitWidth / 8;
assert(Index < ByteWidth && "invalid index requested");
(void) ByteWidth;
switch (Op.getOpcode()) {
case ISD::OR: {
auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
if (!LHS)
return None;
auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
if (!RHS)
return None;
if (LHS->isConstantZero())
return RHS;
if (RHS->isConstantZero())
return LHS;
return None;
}
case ISD::SHL: {
auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!ShiftOp)
return None;
uint64_t BitShift = ShiftOp->getZExtValue();
if (BitShift % 8 != 0)
return None;
uint64_t ByteShift = BitShift / 8;
return Index < ByteShift
? ByteProvider::getConstantZero()
: calculateByteProvider(Op->getOperand(0), Index - ByteShift,
Depth + 1);
}
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND: {
SDValue NarrowOp = Op->getOperand(0);
unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
if (NarrowBitWidth % 8 != 0)
return None;
uint64_t NarrowByteWidth = NarrowBitWidth / 8;
if (Index >= NarrowByteWidth)
return Op.getOpcode() == ISD::ZERO_EXTEND
? Optional<ByteProvider>(ByteProvider::getConstantZero())
: None;
return calculateByteProvider(NarrowOp, Index, Depth + 1);
}
case ISD::BSWAP:
return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
Depth + 1);
case ISD::LOAD: {
auto L = cast<LoadSDNode>(Op.getNode());
if (!L->isSimple() || L->isIndexed())
return None;
unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
if (NarrowBitWidth % 8 != 0)
return None;
uint64_t NarrowByteWidth = NarrowBitWidth / 8;
if (Index >= NarrowByteWidth)
return L->getExtensionType() == ISD::ZEXTLOAD
? Optional<ByteProvider>(ByteProvider::getConstantZero())
: None;
return ByteProvider::getMemory(L, Index);
}
}
return None;
}
static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
return i;
}
static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
return BW - i - 1;
}
// Check if the bytes offsets we are looking at match with either big or
// little endian value loaded. Return true for big endian, false for little
// endian, and None if match failed.
static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
int64_t FirstOffset) {
// The endian can be decided only when it is 2 bytes at least.
unsigned Width = ByteOffsets.size();
if (Width < 2)
return None;
bool BigEndian = true, LittleEndian = true;
for (unsigned i = 0; i < Width; i++) {
int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
if (!BigEndian && !LittleEndian)
return None;
}
assert((BigEndian != LittleEndian) && "It should be either big endian or"
"little endian");
return BigEndian;
}
static SDValue stripTruncAndExt(SDValue Value) {
switch (Value.getOpcode()) {
case ISD::TRUNCATE:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND:
return stripTruncAndExt(Value.getOperand(0));
}
return Value;
}
/// Match a pattern where a wide type scalar value is stored by several narrow
/// stores. Fold it into a single store or a BSWAP and a store if the targets
/// supports it.
///
/// Assuming little endian target:
/// i8 *p = ...
/// i32 val = ...
/// p[0] = (val >> 0) & 0xFF;
/// p[1] = (val >> 8) & 0xFF;
/// p[2] = (val >> 16) & 0xFF;
/// p[3] = (val >> 24) & 0xFF;
/// =>
/// *((i32)p) = val;
///
/// i8 *p = ...
/// i32 val = ...
/// p[0] = (val >> 24) & 0xFF;
/// p[1] = (val >> 16) & 0xFF;
/// p[2] = (val >> 8) & 0xFF;
/// p[3] = (val >> 0) & 0xFF;
/// =>
/// *((i32)p) = BSWAP(val);
SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// The matching looks for "store (trunc x)" patterns that appear early but are
// likely to be replaced by truncating store nodes during combining.
// TODO: If there is evidence that running this later would help, this
// limitation could be removed. Legality checks may need to be added
// for the created store and optional bswap/rotate.
if (LegalOperations)
return SDValue();
// We only handle merging simple stores of 1-4 bytes.
// TODO: Allow unordered atomics when wider type is legal (see D66309)
EVT MemVT = N->getMemoryVT();
if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
!N->isSimple() || N->isIndexed())
return SDValue();
// Collect all of the stores in the chain.
SDValue Chain = N->getChain();
SmallVector<StoreSDNode *, 8> Stores = {N};
while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
// All stores must be the same size to ensure that we are writing all of the
// bytes in the wide value.
// TODO: We could allow multiple sizes by tracking each stored byte.
if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
Store->isIndexed())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
}
// There is no reason to continue if we do not have at least a pair of stores.
if (Stores.size() < 2)
return SDValue();
// Handle simple types only.
LLVMContext &Context = *DAG.getContext();
unsigned NumStores = Stores.size();
unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
unsigned WideNumBits = NumStores * NarrowNumBits;
EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
return SDValue();
// Check if all bytes of the source value that we are looking at are stored
// to the same base address. Collect offsets from Base address into OffsetMap.
SDValue SourceValue;
SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
int64_t FirstOffset = INT64_MAX;
StoreSDNode *FirstStore = nullptr;
Optional<BaseIndexOffset> Base;
for (auto Store : Stores) {
// All the stores store different parts of the CombinedValue. A truncate is
// required to get the partial value.
SDValue Trunc = Store->getValue();
if (Trunc.getOpcode() != ISD::TRUNCATE)
return SDValue();
// Other than the first/last part, a shift operation is required to get the
// offset.
int64_t Offset = 0;
SDValue WideVal = Trunc.getOperand(0);
if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
isa<ConstantSDNode>(WideVal.getOperand(1))) {
// The shift amount must be a constant multiple of the narrow type.
// It is translated to the offset address in the wide source value "y".
//
// x = srl y, ShiftAmtC
// i8 z = trunc x
// store z, ...
uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
if (ShiftAmtC % NarrowNumBits != 0)
return SDValue();
Offset = ShiftAmtC / NarrowNumBits;
WideVal = WideVal.getOperand(0);
}
// Stores must share the same source value with different offsets.
// Truncate and extends should be stripped to get the single source value.
if (!SourceValue)
SourceValue = WideVal;
else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
return SDValue();
else if (SourceValue.getValueType() != WideVT) {
if (WideVal.getValueType() == WideVT ||
WideVal.getScalarValueSizeInBits() >
SourceValue.getScalarValueSizeInBits())
SourceValue = WideVal;
// Give up if the source value type is smaller than the store size.
if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
return SDValue();
}
// Stores must share the same base address.
BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
int64_t ByteOffsetFromBase = 0;
if (!Base)
Base = Ptr;
else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
return SDValue();
// Remember the first store.
if (ByteOffsetFromBase < FirstOffset) {
FirstStore = Store;
FirstOffset = ByteOffsetFromBase;
}
// Map the offset in the store and the offset in the combined value, and
// early return if it has been set before.
if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
return SDValue();
OffsetMap[Offset] = ByteOffsetFromBase;
}
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
assert(FirstStore && "First store must be set");
// Check that a store of the wide type is both allowed and fast on the target
const DataLayout &Layout = DAG.getDataLayout();
bool Fast = false;
bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
*FirstStore->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
// Check if the pieces of the value are going to the expected places in memory
// to merge the stores.
auto checkOffsets = [&](bool MatchLittleEndian) {
if (MatchLittleEndian) {
for (unsigned i = 0; i != NumStores; ++i)
if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
return false;
} else { // MatchBigEndian by reversing loop counter.
for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
return false;
}
return true;
};
// Check if the offsets line up for the native data layout of this target.
bool NeedBswap = false;
bool NeedRotate = false;
if (!checkOffsets(Layout.isLittleEndian())) {
// Special-case: check if byte offsets line up for the opposite endian.
if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
NeedBswap = true;
else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
NeedRotate = true;
else
return SDValue();
}
SDLoc DL(N);
if (WideVT != SourceValue.getValueType()) {
assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
"Unexpected store value to merge");
SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
}
// Before legalize we can introduce illegal bswaps/rotates which will be later
// converted to an explicit bswap sequence. This way we end up with a single
// store and byte shuffling instead of several stores and byte shuffling.
if (NeedBswap) {
SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
} else if (NeedRotate) {
assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
}
SDValue NewStore =
DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
FirstStore->getPointerInfo(), FirstStore->getAlign());
// Rely on other DAG combine rules to remove the other individual stores.
DAG.ReplaceAllUsesWith(N, NewStore.getNode());
return NewStore;
}
/// Match a pattern where a wide type scalar value is loaded by several narrow
/// loads and combined by shifts and ors. Fold it into a single load or a load
/// and a BSWAP if the targets supports it.
///
/// Assuming little endian target:
/// i8 *a = ...
/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
/// =>
/// i32 val = *((i32)a)
///
/// i8 *a = ...
/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
/// =>
/// i32 val = BSWAP(*((i32)a))
///
/// TODO: This rule matches complex patterns with OR node roots and doesn't
/// interact well with the worklist mechanism. When a part of the pattern is
/// updated (e.g. one of the loads) its direct users are put into the worklist,
/// but the root node of the pattern which triggers the load combine is not
/// necessarily a direct user of the changed node. For example, once the address
/// of t28 load is reassociated load combine won't be triggered:
/// t25: i32 = add t4, Constant:i32<2>
/// t26: i64 = sign_extend t25
/// t27: i64 = add t2, t26
/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
/// t29: i32 = zero_extend t28
/// t32: i32 = shl t29, Constant:i8<8>
/// t33: i32 = or t23, t32
/// As a possible fix visitLoad can check if the load can be a part of a load
/// combine pattern and add corresponding OR roots to the worklist.
SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
assert(N->getOpcode() == ISD::OR &&
"Can only match load combining against OR nodes");
// Handles simple types only
EVT VT = N->getValueType(0);
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
return SDValue();
unsigned ByteWidth = VT.getSizeInBits() / 8;
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
auto MemoryByteOffset = [&] (ByteProvider P) {
assert(P.isMemory() && "Must be a memory byte provider");
unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
assert(LoadBitWidth % 8 == 0 &&
"can only analyze providers for individual bytes not bit");
unsigned LoadByteWidth = LoadBitWidth / 8;
return IsBigEndianTarget
? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
: littleEndianByteAt(LoadByteWidth, P.ByteOffset);
};
Optional<BaseIndexOffset> Base;
SDValue Chain;
SmallPtrSet<LoadSDNode *, 8> Loads;
Optional<ByteProvider> FirstByteProvider;
int64_t FirstOffset = INT64_MAX;
// Check if all the bytes of the OR we are looking at are loaded from the same
// base address. Collect bytes offsets from Base address in ByteOffsets.
SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
unsigned ZeroExtendedBytes = 0;
for (int i = ByteWidth - 1; i >= 0; --i) {
auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
if (!P)
return SDValue();
if (P->isConstantZero()) {
// It's OK for the N most significant bytes to be 0, we can just
// zero-extend the load.
if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
return SDValue();
continue;
}
assert(P->isMemory() && "provenance should either be memory or zero");
LoadSDNode *L = P->Load;
assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
!L->isIndexed() &&
"Must be enforced by calculateByteProvider");
assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
// All loads must share the same chain
SDValue LChain = L->getChain();
if (!Chain)
Chain = LChain;
else if (Chain != LChain)
return SDValue();
// Loads must share the same base address
BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
int64_t ByteOffsetFromBase = 0;
if (!Base)
Base = Ptr;
else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
return SDValue();
// Calculate the offset of the current byte from the base address
ByteOffsetFromBase += MemoryByteOffset(*P);
ByteOffsets[i] = ByteOffsetFromBase;
// Remember the first byte load
if (ByteOffsetFromBase < FirstOffset) {
FirstByteProvider = P;
FirstOffset = ByteOffsetFromBase;
}
Loads.insert(L);
}
assert(!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value");
assert(Base && "Base address of the accessed memory location must be set");
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
bool NeedsZext = ZeroExtendedBytes > 0;
EVT MemVT =
EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
if (!MemVT.isSimple())
return SDValue();
// Before legalize we can introduce too wide illegal loads which will be later
// split into legal sized loads. This enables us to combine i64 load by i8
// patterns to a couple of i32 loads on 32 bit targets.
if (LegalOperations &&
!TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
MemVT))
return SDValue();
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
Optional<bool> IsBigEndian = isBigEndian(
makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
if (!IsBigEndian.hasValue())
return SDValue();
assert(FirstByteProvider && "must be set");
// Ensure that the first byte is loaded from zero offset of the first load.
// So the combined value can be loaded from the first load address.
if (MemoryByteOffset(*FirstByteProvider) != 0)
return SDValue();
LoadSDNode *FirstLoad = FirstByteProvider->Load;
// The node we are looking at matches with the pattern, check if we can
// replace it with a single (possibly zero-extended) load and bswap + shift if
// needed.
// If the load needs byte swap check if the target supports it
bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
// Before legalize we can introduce illegal bswaps which will be later
// converted to an explicit bswap sequence. This way we end up with a single
// load and byte shuffling instead of several loads and byte shuffling.
// We do not introduce illegal bswaps when zero-extending as this tends to
// introduce too many arithmetic instructions.
if (NeedsBswap && (LegalOperations || NeedsZext) &&
!TLI.isOperationLegal(ISD::BSWAP, VT))
return SDValue();
// If we need to bswap and zero extend, we have to insert a shift. Check that
// it is legal.
if (NeedsBswap && NeedsZext && LegalOperations &&
!TLI.isOperationLegal(ISD::SHL, VT))
return SDValue();
// Check that a load of the wide type is both allowed and fast on the target
bool Fast = false;
bool Allowed =
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
*FirstLoad->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
SDValue NewLoad =
DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
Chain, FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
// Transfer chain users from old loads to the new load.
for (LoadSDNode *L : Loads)
DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
if (!NeedsBswap)
return NewLoad;
SDValue ShiftedLoad =
NeedsZext
? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
SDLoc(N), LegalOperations))
: NewLoad;
return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
}
// If the target has andn, bsl, or a similar bit-select instruction,
// we want to unfold masked merge, with canonical pattern of:
// | A | |B|
// ((x ^ y) & m) ^ y
// | D |
// Into:
// (x & m) | (y & ~m)
// If y is a constant, and the 'andn' does not work with immediates,
// we unfold into a different pattern:
// ~(~x & m) & (m | y)
// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
// the very least that breaks andnpd / andnps patterns, and because those
// patterns are simplified in IR and shouldn't be created in the DAG
SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
assert(N->getOpcode() == ISD::XOR);
// Don't touch 'not' (i.e. where y = -1).
if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
return SDValue();
EVT VT = N->getValueType(0);
// There are 3 commutable operators in the pattern,
// so we have to deal with 8 possible variants of the basic pattern.
SDValue X, Y, M;
auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
if (And.getOpcode() != ISD::AND || !And.hasOneUse())
return false;
SDValue Xor = And.getOperand(XorIdx);
if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
return false;
SDValue Xor0 = Xor.getOperand(0);
SDValue Xor1 = Xor.getOperand(1);
// Don't touch 'not' (i.e. where y = -1).
if (isAllOnesOrAllOnesSplat(Xor1))
return false;
if (Other == Xor0)
std::swap(Xor0, Xor1);
if (Other != Xor1)
return false;
X = Xor0;
Y = Xor1;
M = And.getOperand(XorIdx ? 0 : 1);
return true;
};
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
!matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
return SDValue();
// Don't do anything if the mask is constant. This should not be reachable.
// InstCombine should have already unfolded this pattern, and DAGCombiner
// probably shouldn't produce it, too.
if (isa<ConstantSDNode>(M.getNode()))
return SDValue();
// We can transform if the target has AndNot
if (!TLI.hasAndNot(M))
return SDValue();
SDLoc DL(N);
// If Y is a constant, check that 'andn' works with immediates.
if (!TLI.hasAndNot(Y)) {
assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
// If not, we need to do a bit more work to make sure andn is still used.
SDValue NotX = DAG.getNOT(DL, X, VT);
SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
}
SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
SDValue NotM = DAG.getNOT(DL, M, VT);
SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
}
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (xor x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
return N1;
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
}
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
SDLoc DL(N);
if (N0.isUndef() && N1.isUndef())
return DAG.getConstant(0, DL, VT);
// fold (xor x, undef) -> undef
if (N0.isUndef())
return N0;
if (N1.isUndef())
return N1;
// fold (xor c1, c2) -> c1^c2
if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
// fold (xor x, 0) -> x
if (isNullConstant(N1))
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// reassociate xor
if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
// fold !(x cc y) -> (x !cc y)
unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1.getNode()) &&
isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
LHS.getValueType());
if (!LegalOperations ||
TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
switch (N0Opcode) {
default:
llvm_unreachable("Unhandled SetCC Equivalent!");
case ISD::SETCC:
return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
case ISD::SELECT_CC:
return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
N0.getOperand(3), NotCC);
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: {
if (N0.hasOneUse()) {
// FIXME Can we handle multiple uses? Could we token factor the chain
// results from the new/old setcc?
SDValue SetCC =
DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
CombineTo(N, SetCC);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
recursivelyDeleteUnusedNodes(N0.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
break;
}
}
}
}
// fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
SDValue V = N0.getOperand(0);
SDLoc DL0(N0);
V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
DAG.getConstant(1, DL0, V.getValueType()));
AddToWorklist(V.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
(N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
return DAG.getNode(NewOpcode, DL, VT, N00, N01);
}
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
if (isAllOnesConstant(N1) && N0.hasOneUse() &&
(N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
return DAG.getNode(NewOpcode, DL, VT, N00, N01);
}
}
// fold (not (neg x)) -> (add X, -1)
// FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
// Y is a constant or the subtract has a single use.
if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
isNullConstant(N0.getOperand(0))) {
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
DAG.getAllOnesConstant(DL, VT));
}
// fold (not (add X, -1)) -> (neg X)
if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
N0.getOperand(0));
}
// fold (xor (and x, y), y) -> (and (not x), y)
if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
SDValue X = N0.getOperand(0);
SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
AddToWorklist(NotX.getNode());
return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
}
if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
ConstantSDNode *XorC = isConstOrConstSplat(N1);
ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
unsigned BitWidth = VT.getScalarSizeInBits();
if (XorC && ShiftC) {
// Don't crash on an oversized shift. We can not guarantee that a bogus
// shift has been simplified to undef.
uint64_t ShiftAmt = ShiftC->getLimitedValue();
if (ShiftAmt < BitWidth) {
APInt Ones = APInt::getAllOnesValue(BitWidth);
Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
if (XorC->getAPIntValue() == Ones) {
// If the xor constant is a shifted -1, do a 'not' before the shift:
// xor (X << ShiftC), XorC --> (not X) << ShiftC
// xor (X >> ShiftC), XorC --> (not X) >> ShiftC
SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
}
}
}
}
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
SDValue S0 = S.getOperand(0);
if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
return DAG.getNode(ISD::ABS, DL, VT, S0);
}
}
// fold (xor x, x) -> 0
if (N0 == N1)
return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
// fold (xor (shl 1, x), -1) -> (rotl ~1, x)
// Here is a concrete example of this equivalence:
// i16 x == 14
// i16 shl == 1 << 14 == 16384 == 0b0100000000000000
// i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
//
// =>
//
// i16 ~1 == 0b1111111111111110
// i16 rol(~1, 14) == 0b1011111111111111
//
// Some additional tips to help conceptualize this transform:
// - Try to see the operation as placing a single zero in a value of all ones.
// - There exists no value for x which would allow the result to contain zero.
// - Values of x larger than the bitwidth are undefined and do not require a
// consistent result.
// - Pushing the zero left requires shifting one bits in from the right.
// A rotate left of ~1 is a nice way of achieving the desired result.
if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
N0.getOperand(1));
}
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0Opcode == N1.getOpcode())
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
return V;
// Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
if (SDValue MM = unfoldMaskedMerge(N))
return MM;
// Simplify the expression using non-local knowledge.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
return Combined;
return SDValue();
}
/// If we have a shift-by-constant of a bitwise logic op that itself has a
/// shift-by-constant operand with identical opcode, we may be able to convert
/// that into 2 independent shifts followed by the logic op. This is a
/// throughput improvement.
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
// Match a one-use bitwise logic op.
SDValue LogicOp = Shift->getOperand(0);
if (!LogicOp.hasOneUse())
return SDValue();
unsigned LogicOpcode = LogicOp.getOpcode();
if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
LogicOpcode != ISD::XOR)
return SDValue();
// Find a matching one-use shift by constant.
unsigned ShiftOpcode = Shift->getOpcode();
SDValue C1 = Shift->getOperand(1);
ConstantSDNode *C1Node = isConstOrConstSplat(C1);
assert(C1Node && "Expected a shift with constant operand");
const APInt &C1Val = C1Node->getAPIntValue();
auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
const APInt *&ShiftAmtVal) {
if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
return false;
ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
if (!ShiftCNode)
return false;
// Capture the shifted operand and shift amount value.
ShiftOp = V.getOperand(0);
ShiftAmtVal = &ShiftCNode->getAPIntValue();
// Shift amount types do not have to match their operand type, so check that
// the constants are the same width.
if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
return false;
// The fold is not valid if the sum of the shift values exceeds bitwidth.
if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
return false;
return true;
};
// Logic ops are commutative, so check each operand for a match.
SDValue X, Y;
const APInt *C0Val;
if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
Y = LogicOp.getOperand(1);
else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
Y = LogicOp.getOperand(0);
else
return SDValue();
// shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
SDLoc DL(Shift);
EVT VT = Shift->getValueType(0);
EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
}
/// Handle transforms common to the three shifts, when the shift amount is a
/// constant.
/// We are looking for: (shift being one of shl/sra/srl)
/// shift (binop X, C0), C1
/// And want to transform into:
/// binop (shift X, C1), (shift C0, C1)
SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
// Do not turn a 'not' into a regular xor.
if (isBitwiseNot(N->getOperand(0)))
return SDValue();
// The inner binop must be one-use, since we want to replace it.
SDValue LHS = N->getOperand(0);
if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
return SDValue();
// TODO: This is limited to early combining because it may reveal regressions
// otherwise. But since we just checked a target hook to see if this is
// desirable, that should have filtered out cases where this interferes
// with some other pattern matching.
if (!LegalTypes)
if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
return R;
// We want to pull some binops through shifts, so that we have (and (shift))
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
// thing happens with address calculations, so it's important to canonicalize
// it.
switch (LHS.getOpcode()) {
default:
return SDValue();
case ISD::OR:
case ISD::XOR:
case ISD::AND:
break;
case ISD::ADD:
if (N->getOpcode() != ISD::SHL)
return SDValue(); // only shl(add) not sr[al](add).
break;
}
// We require the RHS of the binop to be a constant and not opaque as well.
ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
if (!BinOpCst)
return SDValue();
// FIXME: disable this unless the input to the binop is a shift by a constant
// or is copy/select. Enable this in other cases when figure out it's exactly
// profitable.
SDValue BinOpLHSVal = LHS.getOperand(0);
bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
BinOpLHSVal.getOpcode() == ISD::SRA ||
BinOpLHSVal.getOpcode() == ISD::SRL) &&
isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
BinOpLHSVal.getOpcode() == ISD::SELECT;
if (!IsShiftByConstant && !IsCopyOrSelect)
return SDValue();
if (IsCopyOrSelect && N->hasOneUse())
return SDValue();
// Fold the constants, shifting the binop RHS by the shift amount.
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
N->getOperand(1));
assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
N->getOperand(1));
return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
}
SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
assert(N->getOpcode() == ISD::TRUNCATE);
assert(N->getOperand(0).getOpcode() == ISD::AND);
// (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
EVT TruncVT = N->getValueType(0);
if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
SDValue N01 = N->getOperand(0).getOperand(1);
if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
SDLoc DL(N);
SDValue N00 = N->getOperand(0).getOperand(0);
SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
AddToWorklist(Trunc00.getNode());
AddToWorklist(Trunc01.getNode());
return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
}
}
return SDValue();
}
SDValue DAGCombiner::visitRotate(SDNode *N) {
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
unsigned Bitsize = VT.getScalarSizeInBits();
// fold (rot x, 0) -> x
if (isNullOrNullSplat(N1))
return N0;
// fold (rot x, c) -> x iff (c % BitSize) == 0
if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
if (DAG.MaskedValueIsZero(N1, ModuloMask))
return N0;
}
// fold (rot x, c) -> (rot x, c % BitSize)
bool OutOfRange = false;
auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
OutOfRange |= C->getAPIntValue().uge(Bitsize);
return true;
};
if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
EVT AmtVT = N1.getValueType();
SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
if (SDValue Amt =
DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
}
// rot i16 X, 8 --> bswap X
auto *RotAmtC = isConstOrConstSplat(N1);
if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
return DAG.getNode(ISD::BSWAP, dl, VT, N0);
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
}
unsigned NextOp = N0.getOpcode();
// fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
EVT ShiftVT = C1->getValueType(0);
bool SameSide = (N->getOpcode() == NextOp);
unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
CombinedShiftNorm);
}
}
}
return SDValue();
}
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (SDValue V = DAG.simplifyShift(N0, N1))
return V;
EVT VT = N0.getValueType();
EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
// If setcc produces all-one true value then:
// (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
if (N1CV && N1CV->isConstant()) {
if (N0.getOpcode() == ISD::AND) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
if (SDValue C =
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
}
}
}
}
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (shl c1, c2) -> c1<<c2
if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
if (N0.getOpcode() == ISD::SHL) {
auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
APInt c1 = LHS->getAPIntValue();
APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).uge(OpSizeInBits);
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
return DAG.getConstant(0, SDLoc(N), VT);
auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
APInt c1 = LHS->getAPIntValue();
APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).ult(OpSizeInBits);
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDLoc DL(N);
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
}
}
// fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
// For this to be valid, the second form must not preserve any of the bits
// that are shifted out by the inner shift in the first form. This means
// the outer shift size must be >= the number of bits added by the ext.
// As a corollary, we don't care what kind of ext it is.
if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND) &&
N0.getOperand(0).getOpcode() == ISD::SHL) {
SDValue N0Op0 = N0.getOperand(0);
SDValue InnerShiftAmt = N0Op0.getOperand(1);
EVT InnerVT = N0Op0.getValueType();
uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
APInt c1 = LHS->getAPIntValue();
APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return c2.uge(OpSizeInBits - InnerBitwidth) &&
(c1 + c2).uge(OpSizeInBits);
};
if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true))
return DAG.getConstant(0, SDLoc(N), VT);
auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
APInt c1 = LHS->getAPIntValue();
APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return c2.uge(OpSizeInBits - InnerBitwidth) &&
(c1 + c2).ult(OpSizeInBits);
};
if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
SDLoc DL(N);
SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
}
}
// fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
// Only fold this if the inner zext has no other uses to avoid increasing
// the total number of instructions.
if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::SRL) {
SDValue N0Op0 = N0.getOperand(0);
SDValue InnerShiftAmt = N0Op0.getOperand(1);
auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
APInt c1 = LHS->getAPIntValue();
APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2);
return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
};
if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
SDLoc DL(N);
EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
AddToWorklist(NewSHL.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
}
}
// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
// TODO - support non-uniform vector shift amounts.
if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
N0->getFlags().hasExact()) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t C1 = N0C1->getZExtValue();
uint64_t C2 = N1C->getZExtValue();
SDLoc DL(N);
if (C1 <= C2)
return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
DAG.getConstant(C2 - C1, DL, ShiftVT));
return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
DAG.getConstant(C1 - C2, DL, ShiftVT));
}
}
// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
// TODO - drop hasOneUse requirement if c1 == c2?
// TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
uint64_t c1 = N0C1->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
SDValue Shift;
if (c2 > c1) {
Mask <<= c2 - c1;
SDLoc DL(N);
Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
DAG.getConstant(c2 - c1, DL, ShiftVT));
} else {
Mask.lshrInPlace(c1 - c2);
SDLoc DL(N);
Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
DAG.getConstant(c1 - c2, DL, ShiftVT));
}
SDLoc DL(N0);
return DAG.getNode(ISD::AND, DL, VT, Shift,
DAG.getConstant(Mask, DL, VT));
}
}
}
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
isConstantOrConstantVector(N1, /* No Opaques */ true)) {
SDLoc DL(N);
SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
}
// fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
// fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
N0.getNode()->hasOneUse() &&
isConstantOrConstantVector(N1, /* No Opaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
TLI.isDesirableToCommuteWithShift(N, Level)) {
SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
AddToWorklist(Shl0.getNode());
AddToWorklist(Shl1.getNode());
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
}
// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
isConstantOrConstantVector(N1, /* No Opaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
if (isConstantOrConstantVector(Shl))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
}
if (N1C && !N1C->isOpaque())
if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
// Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
if (N0.getOpcode() == ISD::VSCALE)
if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
const APInt &C1 = NC1->getAPIntValue();
return DAG.getVScale(SDLoc(N), VT, C0 << C1);
}
// Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
APInt ShlVal;
if (N0.getOpcode() == ISD::STEP_VECTOR)
if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
if (ShlVal.ult(C0.getBitWidth())) {
APInt NewStep = C0 << ShlVal;
return DAG.getStepVector(SDLoc(N), VT, NewStep);
}
}
return SDValue();
}
// Transform a right shift of a multiply into a multiply-high.
// Examples:
// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
"SRL or SRA node is required here!");
// Check the shift amount. Proceed with the transformation if the shift
// amount is constant.
ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
if (!ShiftAmtSrc)
return SDValue();
SDLoc DL(N);
// The operation feeding into the shift must be a multiply.
SDValue ShiftOperand = N->getOperand(0);
if (ShiftOperand.getOpcode() != ISD::MUL)
return SDValue();
// Both operands must be equivalent extend nodes.
SDValue LeftOp = ShiftOperand.getOperand(0);
SDValue RightOp = ShiftOperand.getOperand(1);
bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
return SDValue();
EVT WideVT1 = LeftOp.getValueType();
EVT WideVT2 = RightOp.getValueType();
(void)WideVT2;
// Proceed with the transformation if the wide types match.
assert((WideVT1 == WideVT2) &&
"Cannot have a multiply node with two different operand types.");
EVT NarrowVT = LeftOp.getOperand(0).getValueType();
// Check that the two extend nodes are the same type.
if (NarrowVT != RightOp.getOperand(0).getValueType())
return SDValue();
// Proceed with the transformation if the wide type is twice as large
// as the narrow type.
unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
return SDValue();
// Check the shift amount with the narrow type size.
// Proceed with the transformation if the shift amount is the width
// of the narrow type.
unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
if (ShiftAmt != NarrowVTSize)
return SDValue();
// If the operation feeding into the MUL is a sign extend (sext),
// we use mulhs. Othewise, zero extends (zext) use mulhu.
unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
// Combine to mulh if mulh is legal/custom for the narrow type on the target.
if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
return SDValue();
SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
RightOp.getOperand(0));
return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
: DAG.getZExtOrTrunc(Result, DL, WideVT1));
}
SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (SDValue V = DAG.simplifyShift(N0, N1))
return V;
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// Arithmetic shifting an all-sign-bit value is a no-op.
// fold (sra 0, x) -> 0
// fold (sra -1, x) -> -1
if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
return N0;
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (sra c1, c2) -> (sra c1, c2)
if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
// sext_inreg.
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
if (VT.isVector())
ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
VT.getVectorElementCount());
if (!LegalOperations ||
TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
TargetLowering::Legal)
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
N0.getOperand(0), DAG.getValueType(ExtVT));
// Even if we can't convert to sext_inreg, we might be able to remove
// this shift pair if the input is already sign extended.
if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
return N0.getOperand(0);
}
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
// clamp (add c1, c2) to max shift.
if (N0.getOpcode() == ISD::SRA) {
SDLoc DL(N);
EVT ShiftVT = N1.getValueType();
EVT ShiftSVT = ShiftVT.getScalarType();
SmallVector<SDValue, 16> ShiftValues;
auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
APInt c1 = LHS->getAPIntValue();
APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
APInt Sum = c1 + c2;
unsigned ShiftSum =
Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
return true;
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
SDValue ShiftValue;
if (N1.getOpcode() == ISD::BUILD_VECTOR)
ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
assert(ShiftValues.size() == 1 &&
"Expected matchBinaryPredicate to return one element for "
"SPLAT_VECTORs");
ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
} else
ShiftValue = ShiftValues[0];
return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
}
}
// fold (sra (shl X, m), (sub result_size, n))
// -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
// result_size - n != m.
// If truncate is free for the target sext(shl) is likely to result in better
// code.
if (N0.getOpcode() == ISD::SHL && N1C) {
// Get the two constanst of the shifts, CN0 = m, CN = n.
const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
if (N01C) {
LLVMContext &Ctx = *DAG.getContext();
// Determine what the truncate's result bitsize and type would be.
EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
if (VT.isVector())
TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
// Determine the residual right-shift amount.
int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
// If the shift is not a no-op (in which case this should be just a sign
// extend already), the truncated to type is legal, sign_extend is legal
// on that type, and the truncate to that type is both legal and free,
// perform the transform.
if ((ShiftAmt > 0) &&
TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
TLI.isTruncateFree(VT, TruncVT)) {
SDLoc DL(N);
SDValue Amt = DAG.getConstant(ShiftAmt, DL,
getShiftAmountTy(N0.getOperand(0).getValueType()));
SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
N0.getOperand(0), Amt);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
Shift);
return DAG.getNode(ISD::SIGN_EXTEND, DL,
N->getValueType(0), Trunc);
}
}
}
// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
// sra (add (shl X, N1C), AddC), N1C -->
// sext (add (trunc X to (width - N1C)), AddC')
if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
N0.getOperand(0).getOpcode() == ISD::SHL &&
N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
SDValue Shl = N0.getOperand(0);
// Determine what the truncate's type would be and ask the target if that
// is a free operation.
LLVMContext &Ctx = *DAG.getContext();
unsigned ShiftAmt = N1C->getZExtValue();
EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
if (VT.isVector())
TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
// TODO: The simple type check probably belongs in the default hook
// implementation and/or target-specific overrides (because
// non-simple types likely require masking when legalized), but that
// restriction may conflict with other transforms.
if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
TLI.isTruncateFree(VT, TruncVT)) {
SDLoc DL(N);
SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
return DAG.getSExtOrTrunc(Add, DL, VT);
}
}
}
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
}
// fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
// fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
// if c1 is equal to the number of bits the trunc removes
// TODO - support non-uniform vector shift amounts.
if (N0.getOpcode() == ISD::TRUNCATE &&
(N0.getOperand(0).getOpcode() == ISD::SRL ||
N0.getOperand(0).getOpcode() == ISD::SRA) &&
N0.getOperand(0).hasOneUse() &&
N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
SDValue N0Op0 = N0.getOperand(0);
if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
EVT LargeVT = N0Op0.getValueType();
unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
if (LargeShift->getAPIntValue() == TruncBits) {
SDLoc DL(N);
SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
getShiftAmountTy(LargeVT));
SDValue SRA =
DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
}
}
}
// Simplify, based on bits shifted out of the LHS.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// If the sign bit is known to be zero, switch this to a SRL.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
if (N1C && !N1C->isOpaque())
if (SDValue NewSRA = visitShiftByConstant(N))
return NewSRA;
// Try to transform this shift into a multiply-high if
// it matches the appropriate pattern detected in combineShiftToMULH.
if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
return MULH;
return SDValue();
}
SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (SDValue V = DAG.simplifyShift(N0, N1))
return V;
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (srl c1, c2) -> c1 >>u c2
if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// if (srl x, c) is known to be zero, return 0
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
if (N0.getOpcode() == ISD::SRL) {
auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
APInt c1 = LHS->getAPIntValue();
APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).uge(OpSizeInBits);
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
return DAG.getConstant(0, SDLoc(N), VT);
auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
APInt c1 = LHS->getAPIntValue();
APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).ult(OpSizeInBits);
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDLoc DL(N);
EVT ShiftVT = N1.getValueType();
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
}
}
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
N0.getOperand(0).getOpcode() == ISD::SRL) {
SDValue InnerShift = N0.getOperand(0);
// TODO - support non-uniform vector shift amounts.
if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
uint64_t c1 = N001C->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
EVT InnerShiftVT = InnerShift.getValueType();
EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
// srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
SDLoc DL(N);
if (c1 + c2 >= InnerShiftSize)
return DAG.getConstant(0, DL, VT);
SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
InnerShift.getOperand(0), NewShiftAmt);
return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
}
// In the more general case, we can clear the high bits after the shift:
// srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
if (N0.hasOneUse() && InnerShift.hasOneUse() &&
c1 + c2 < InnerShiftSize) {
SDLoc DL(N);
SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
InnerShift.getOperand(0), NewShiftAmt);
SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
OpSizeInBits - c2),
DL, InnerShiftVT);
SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
}
}
}
// fold (srl (shl x, c), c) -> (and x, cst2)
// TODO - (srl (shl x, c1), c2).
if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
SDLoc DL(N);
SDValue Mask =
DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
AddToWorklist(Mask.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
// TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
// Shifting in all undef bits?
EVT SmallVT = N0.getOperand(0).getValueType();
unsigned BitSize = SmallVT.getScalarSizeInBits();
if (N1C->getAPIntValue().uge(BitSize))
return DAG.getUNDEF(VT);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
uint64_t ShiftAmt = N1C->getZExtValue();
SDLoc DL0(N0);
SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
N0.getOperand(0),
DAG.getConstant(ShiftAmt, DL0,
getShiftAmountTy(SmallVT)));
AddToWorklist(SmallShift.getNode());
APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT,
DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
DAG.getConstant(Mask, DL, VT));
}
}
// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
// bit, which is unmodified by sra.
if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
if (N0.getOpcode() == ISD::SRA)
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
}
// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
// If all of the bits input the to ctlz node are known to be zero, then
// the result of the ctlz is "32" and the result of the shift is one.
APInt UnknownBits = ~Known.Zero;
if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
// Otherwise, check to see if there is exactly one bit input to the ctlz.
if (UnknownBits.isPowerOf2()) {
// Okay, we know that only that the single bit specified by UnknownBits
// could be set on input to the CTLZ node. If this bit is set, the SRL
// will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
// to an SRL/XOR pair, which is likely to simplify more.
unsigned ShAmt = UnknownBits.countTrailingZeros();
SDValue Op = N0.getOperand(0);
if (ShAmt) {
SDLoc DL(N0);
Op = DAG.getNode(ISD::SRL, DL, VT, Op,
DAG.getConstant(ShAmt, DL,
getShiftAmountTy(Op.getValueType())));
AddToWorklist(Op.getNode());
}
SDLoc DL(N);
return DAG.getNode(ISD::XOR, DL, VT,
Op, DAG.getConstant(1, DL, VT));
}
}
// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
}
// fold operands of srl based on knowledge that the low bits are not
// demanded.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
if (N1C && !N1C->isOpaque())
if (SDValue NewSRL = visitShiftByConstant(N))
return NewSRL;
// Attempt to convert a srl of a load into a narrower zero-extending load.
if (SDValue NarrowLoad = ReduceLoadWidth(N))
return NarrowLoad;
// Here is a common situation. We want to optimize:
//
// %a = ...
// %b = and i32 %a, 2
// %c = srl i32 %b, 1
// brcond i32 %c ...
//
// into
//
// %a = ...
// %b = and %a, 2
// %c = setcc eq %b, 0
// brcond %c ...
//
// However when after the source operand of SRL is optimized into AND, the SRL
// itself may not be optimized further. Look for it and add the BRCOND into
// the worklist.
if (N->hasOneUse()) {
SDNode *Use = *N->use_begin();
if (Use->getOpcode() == ISD::BRCOND)
AddToWorklist(Use);
else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
// Also look pass the truncate.
Use = *Use->use_begin();
if (Use->getOpcode() == ISD::BRCOND)
AddToWorklist(Use);
}
}
// Try to transform this shift into a multiply-high if
// it matches the appropriate pattern detected in combineShiftToMULH.
if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
return MULH;
return SDValue();
}
SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
bool IsFSHL = N->getOpcode() == ISD::FSHL;
unsigned BitWidth = VT.getScalarSizeInBits();
// fold (fshl N0, N1, 0) -> N0
// fold (fshr N0, N1, 0) -> N1
if (isPowerOf2_32(BitWidth))
if (DAG.MaskedValueIsZero(
N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
return IsFSHL ? N0 : N1;
auto IsUndefOrZero = [](SDValue V) {
return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
};
// TODO - support non-uniform vector shift amounts.
if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
EVT ShAmtTy = N2.getValueType();
// fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
if (Cst->getAPIntValue().uge(BitWidth)) {
uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
}
unsigned ShAmt = Cst->getZExtValue();
if (ShAmt == 0)
return IsFSHL ? N0 : N1;
// fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
// fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
// fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
// fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
if (IsUndefOrZero(N0))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
SDLoc(N), ShAmtTy));
if (IsUndefOrZero(N1))
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
SDLoc(N), ShAmtTy));
// fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
// fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
// TODO - bigendian support once we have test coverage.
// TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
// TODO - permit LHS EXTLOAD if extensions are shifted out.
if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
!DAG.getDataLayout().isBigEndian()) {
auto *LHS = dyn_cast<LoadSDNode>(N0);
auto *RHS = dyn_cast<LoadSDNode>(N1);
if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
LHS->getAddressSpace() == RHS->getAddressSpace() &&
(LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
ISD::isNON_EXTLoad(LHS)) {
if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
SDLoc DL(RHS);
uint64_t PtrOff =
IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
bool Fast = false;
if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
RHS->getAddressSpace(), NewAlign,
RHS->getMemOperand()->getFlags(), &Fast) &&
Fast) {
SDValue NewPtr = DAG.getMemBasePlusOffset(
RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
AddToWorklist(NewPtr.getNode());
SDValue Load = DAG.getLoad(
VT, DL, RHS->getChain(), NewPtr,
RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
// Replace the old load's chain with the new load's chain.
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
return Load;
}
}
}
}
}
// fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
// fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
// iff We know the shift amount is in range.
// TODO: when is it worth doing SUB(BW, N2) as well?
if (isPowerOf2_32(BitWidth)) {
APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
}
// fold (fshl N0, N0, N2) -> (rotl N0, N2)
// fold (fshr N0, N0, N2) -> (rotr N0, N2)
// TODO: Investigate flipping this rotate if only one is legal, if funnel shift
// is legal as well we might be better off avoiding non-constant (BW - N2).
unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
if (N0 == N1 && hasOperation(RotOpc, VT))
return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
// Simplify, based on bits shifted out of N0/N1.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
}
// Given a ABS node, detect the following pattern:
// (ABS (SUB (EXTEND a), (EXTEND b))).
// Generates UABD/SABD instruction.
static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
SDValue AbsOp1 = N->getOperand(0);
SDValue Op0, Op1;
if (AbsOp1.getOpcode() != ISD::SUB)
return SDValue();
Op0 = AbsOp1.getOperand(0);
Op1 = AbsOp1.getOperand(1);
unsigned Opc0 = Op0.getOpcode();
// Check if the operands of the sub are (zero|sign)-extended.
if (Opc0 != Op1.getOpcode() ||
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
return SDValue();
EVT VT1 = Op0.getOperand(0).getValueType();
EVT VT2 = Op1.getOperand(0).getValueType();
// Check if the operands are of same type and valid size.
unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
return SDValue();
Op0 = Op0.getOperand(0);
Op1 = Op1.getOperand(0);
SDValue ABD =
DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
}
SDValue DAGCombiner::visitABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (abs c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
// fold (abs (abs x)) -> (abs x)
if (N0.getOpcode() == ISD::ABS)
return N0;
// fold (abs x) -> x iff not-negative
if (DAG.SignBitIsZero(N0))
return N0;
if (SDValue ABD = combineABSToABD(N, DAG, TLI))
return ABD;
return SDValue();
}
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (bswap c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
// fold (bswap (bswap x)) -> x
if (N0.getOpcode() == ISD::BSWAP)
return N0->getOperand(0);
return SDValue();
}
SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (bitreverse c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
// fold (bitreverse (bitreverse x)) -> x
if (N0.getOpcode() == ISD::BITREVERSE)
return N0.getOperand(0);
return SDValue();
}
SDValue DAGCombiner::visitCTLZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (ctlz c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
// If the value is known never to be zero, switch to the undef version.
if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
if (DAG.isKnownNeverZero(N0))
return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
}
return SDValue();
}
SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (ctlz_zero_undef c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitCTTZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (cttz c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
// If the value is known never to be zero, switch to the undef version.
if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
if (DAG.isKnownNeverZero(N0))
return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
}
return SDValue();
}
SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (cttz_zero_undef c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (ctpop c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
return SDValue();
}
// FIXME: This should be checking for no signed zeros on individual operands, as
// well as no nans.
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
SDValue RHS,
const TargetLowering &TLI) {
const TargetOptions &Options = DAG.getTarget().Options;
EVT VT = LHS.getValueType();
return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
TLI.isProfitableToCombineMinNumMaxNum(VT) &&
DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
}
/// Generate Min/Max node
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
ISD::CondCode CC, const TargetLowering &TLI,
SelectionDAG &DAG) {
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
switch (CC) {
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETLT:
case ISD::SETLE:
case ISD::SETULT:
case ISD::SETULE: {
// Since it's known never nan to get here already, either fminnum or
// fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
// expanded in terms of it.
unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
return SDValue();
}
case ISD::SETOGT:
case ISD::SETOGE:
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETUGT:
case ISD::SETUGE: {
unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
return SDValue();
}
default:
return SDValue();
}
}
/// If a (v)select has a condition value that is a sign-bit test, try to smear
/// the condition operand sign-bit across the value width and use it as a mask.
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
SDValue Cond = N->getOperand(0);
SDValue C1 = N->getOperand(1);
SDValue C2 = N->getOperand(2);
if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
return SDValue();
EVT VT = N->getValueType(0);
if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
VT != Cond.getOperand(0).getValueType())
return SDValue();
// The inverted-condition + commuted-select variants of these patterns are
// canonicalized to these forms in IR.
SDValue X = Cond.getOperand(0);
SDValue CondC = Cond.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
isAllOnesOrAllOnesSplat(C2)) {
// i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
SDLoc DL(N);
SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
}
if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
// i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
SDLoc DL(N);
SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
}
return SDValue();
}
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
EVT VT = N->getValueType(0);
EVT CondVT = Cond.getValueType();
SDLoc DL(N);
if (!VT.isInteger())
return SDValue();
auto *C1 = dyn_cast<ConstantSDNode>(N1);
auto *C2 = dyn_cast<ConstantSDNode>(N2);
if (!C1 || !C2)
return SDValue();
// Only do this before legalization to avoid conflicting with target-specific
// transforms in the other direction (create a select from a zext/sext). There
// is also a target-independent combine here in DAGCombiner in the other
// direction for (select Cond, -1, 0) when the condition is not i1.
if (CondVT == MVT::i1 && !LegalOperations) {
if (C1->isNullValue() && C2->isOne()) {
// select Cond, 0, 1 --> zext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
return NotCond;
}
if (C1->isNullValue() && C2->isAllOnesValue()) {
// select Cond, 0, -1 --> sext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
return NotCond;
}
if (C1->isOne() && C2->isNullValue()) {
// select Cond, 1, 0 --> zext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
return Cond;
}
if (C1->isAllOnesValue() && C2->isNullValue()) {
// select Cond, -1, 0 --> sext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
return Cond;
}
// Use a target hook because some targets may prefer to transform in the
// other direction.
if (TLI.convertSelectOfConstantsToMath(VT)) {
// For any constants that differ by 1, we can transform the select into an
// extend and add.
const APInt &C1Val = C1->getAPIntValue();
const APInt &C2Val = C2->getAPIntValue();
if (C1Val - 1 == C2Val) {
// select Cond, C1, C1-1 --> add (zext Cond), C1-1
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
}
if (C1Val + 1 == C2Val) {
// select Cond, C1, C1+1 --> add (sext Cond), C1+1
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
}
// select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
}
if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
return V;
}
return SDValue();
}
// fold (select Cond, 0, 1) -> (xor Cond, 1)
// We can't do this reliably if integer based booleans have different contents
// to floating point based booleans. This is because we can't tell whether we
// have an integer-based boolean or a floating-point-based boolean unless we
// can find the SETCC that produced it and inspect its operands. This is
// fairly easy if C is the SETCC node, but it can potentially be
// undiscoverable (or not reasonably discoverable). For example, it could be
// in another basic block or it could require searching a complicated
// expression.
if (CondVT.isInteger() &&
TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
TargetLowering::ZeroOrOneBooleanContent &&
TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
TargetLowering::ZeroOrOneBooleanContent &&
C1->isNullValue() && C2->isOne()) {
SDValue NotCond =
DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
if (VT.bitsEq(CondVT))
return NotCond;
return DAG.getZExtOrTrunc(NotCond, DL, VT);
}
return SDValue();
}
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
"Expected a (v)select");
SDValue Cond = N->getOperand(0);
SDValue T = N->getOperand(1), F = N->getOperand(2);
EVT VT = N->getValueType(0);
if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
return SDValue();
// select Cond, Cond, F --> or Cond, F
// select Cond, 1, F --> or Cond, F
if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
// select Cond, T, Cond --> and Cond, T
// select Cond, T, 0 --> and Cond, T
if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
// select Cond, T, 1 --> or (not Cond), T
if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
}
// select Cond, 0, F --> and (not Cond), F
if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
}
return SDValue();
}
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
SDLoc DL(N);
SDNodeFlags Flags = N->getFlags();
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
if (SDValue V = foldSelectOfConstants(N))
return V;
if (SDValue V = foldBoolSelectToLogic(N, DAG))
return V;
// If we can fold this based on the true/false value, do so.
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.
if (VT0 == MVT::i1) {
// The code in this block deals with the following 2 equivalences:
// select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
// select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
// The target can specify its preferred form with the
// shouldNormalizeToSelectSequence() callback. However we always transform
// to the right anyway if we find the inner select exists in the DAG anyway
// and we always transform to the left side if we know that we can further
// optimize the combination of the conditions.
bool normalizeToSequence =
TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
// select (and Cond0, Cond1), X, Y
// -> select Cond0, (select Cond1, X, Y), Y
if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
SDValue InnerSelect =
DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
InnerSelect, N2, Flags);
// Cleanup on failure.
if (InnerSelect.use_empty())
recursivelyDeleteUnusedNodes(InnerSelect.getNode());
}
// select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
Cond1, N1, N2, Flags);
if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
InnerSelect, Flags);
// Cleanup on failure.
if (InnerSelect.use_empty())
recursivelyDeleteUnusedNodes(InnerSelect.getNode());
}
// select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
SDValue N1_0 = N1->getOperand(0);
SDValue N1_1 = N1->getOperand(1);
SDValue N1_2 = N1->getOperand(2);
if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
// Create the actual and node if we can generate good code for it.
if (!normalizeToSequence) {
SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
N2, Flags);
}
// Otherwise see if we can optimize the "and" to a better pattern.
if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
N2, Flags);
}
}
}
// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
SDValue N2_0 = N2->getOperand(0);
SDValue N2_1 = N2->getOperand(1);
SDValue N2_2 = N2->getOperand(2);
if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
// Create the actual or node if we can generate good code for it.
if (!normalizeToSequence) {
SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
N2_2, Flags);
}
// Otherwise see if we can optimize to a better pattern.
if (SDValue Combined = visitORLike(N0, N2_0, N))
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
N2_2, Flags);
}
}
}
// select (not Cond), N1, N2 -> select Cond, N2, N1
if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
SelectOp->setFlags(Flags);
return SelectOp;
}
// Fold selects based on a setcc into other things, such as min/max/abs.
if (N0.getOpcode() == ISD::SETCC) {
SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
// select (fcmp lt x, y), x, y -> fminnum x, y
// select (fcmp gt x, y), x, y -> fmaxnum x, y
//
// This is OK if we don't care what happens if either operand is a NaN.
if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
CC, TLI, DAG))
return FMinMax;
// Use 'unsigned add with overflow' to optimize an unsigned saturating add.
// This is conservatively limited to pre-legal-operations to give targets
// a chance to reverse the transform if they want to do that. Also, it is
// unlikely that the pattern would be formed late, so it's probably not
// worth going through the other checks.
if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
// select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
// uaddo Cond0, C; select uaddo.1, -1, uaddo.0
//
// The IR equivalent of this transform would have this form:
// %a = add %x, C
// %c = icmp ugt %x, ~C
// %r = select %c, -1, %a
// =>
// %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
// %u0 = extractvalue %u, 0
// %u1 = extractvalue %u, 1
// %r = select %u1, -1, %u0
SDVTList VTs = DAG.getVTList(VT, VT0);
SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
}
}
if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
(!LegalOperations &&
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
// Any flags available in a select/setcc fold will be on the setcc as they
// migrated from fcmp
Flags = N0.getNode()->getFlags();
SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
N2, N0.getOperand(2));
SelectNode->setFlags(Flags);
return SelectNode;
}
if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
return NewSel;
}
if (!VT.isVector())
if (SDValue BinOp = foldSelectOfBinops(N))
return BinOp;
return SDValue();
}
// This function assumes all the vselect's arguments are CONCAT_VECTOR
// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
EVT VT = N->getValueType(0);
int NumElems = VT.getVectorNumElements();
assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR);
// CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
// binary ones here.
if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
return SDValue();
// We're sure we have an even number of elements due to the
// concat_vectors we have as arguments to vselect.
// Skip BV elements until we find one that's not an UNDEF
// After we find an UNDEF element, keep looping until we get to half the
// length of the BV and see if all the non-undef nodes are the same.
ConstantSDNode *BottomHalf = nullptr;
for (int i = 0; i < NumElems / 2; ++i) {
if (Cond->getOperand(i)->isUndef())
continue;
if (BottomHalf == nullptr)
BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
else if (Cond->getOperand(i).getNode() != BottomHalf)
return SDValue();
}
// Do the same for the second half of the BuildVector
ConstantSDNode *TopHalf = nullptr;
for (int i = NumElems / 2; i < NumElems; ++i) {
if (Cond->getOperand(i)->isUndef())
continue;
if (TopHalf == nullptr)
TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
else if (Cond->getOperand(i).getNode() != TopHalf)
return SDValue();
}
assert(TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function.");
return DAG.getNode(
ISD::CONCAT_VECTORS, DL, VT,
BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
}
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
return false;
// For now we check only the LHS of the add.
SDValue LHS = Index.getOperand(0);
SDValue SplatVal = DAG.getSplatValue(LHS);
if (!SplatVal)
return false;
BasePtr = SplatVal;
Index = Index.getOperand(1);
return true;
}
// Fold sext/zext of index into index type.
bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
bool Scaled, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (Index.getOpcode() == ISD::ZERO_EXTEND) {
SDValue Op = Index.getOperand(0);
MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
Index = Op;
return true;
}
}
if (Index.getOpcode() == ISD::SIGN_EXTEND) {
SDValue Op = Index.getOperand(0);
MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
Index = Op;
return true;
}
}
return false;
}
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
SDValue Chain = MSC->getChain();
SDValue Index = MSC->getIndex();
SDValue Scale = MSC->getScale();
SDValue StoreVal = MSC->getValue();
SDValue BasePtr = MSC->getBasePtr();
SDLoc DL(N);
// Zap scatters with a zero mask.
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
if (refineUniformBase(BasePtr, Index, DAG)) {
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
return DAG.getMaskedScatter(
DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
}
if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
return DAG.getMaskedScatter(
DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
}
return SDValue();
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
SDValue Chain = MST->getChain();
SDLoc DL(N);
// Zap masked stores with a zero mask.
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, compressing, or truncating stores?
if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
MST->isUnindexed() && !MST->isCompressingStore() &&
!MST->isTruncatingStore())
return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
MST->getBasePtr(), MST->getMemOperand());
// Try transforming N to an indexed store.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
return SDValue();
}
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
SDValue Mask = MGT->getMask();
SDValue Chain = MGT->getChain();
SDValue Index = MGT->getIndex();
SDValue Scale = MGT->getScale();
SDValue PassThru = MGT->getPassThru();
SDValue BasePtr = MGT->getBasePtr();
SDLoc DL(N);
// Zap gathers with a zero mask.
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return CombineTo(N, PassThru, MGT->getChain());
if (refineUniformBase(BasePtr, Index, DAG)) {
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
MGT->getMemoryVT(), DL, Ops,
MGT->getMemOperand(), MGT->getIndexType(),
MGT->getExtensionType());
}
if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
MGT->getMemoryVT(), DL, Ops,
MGT->getMemOperand(), MGT->getIndexType(),
MGT->getExtensionType());
}
return SDValue();
}
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
SDValue Mask = MLD->getMask();
SDLoc DL(N);
// Zap masked loads with a zero mask.
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, expanding, or extending loads?
if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
MLD->isUnindexed() && !MLD->isExpandingLoad() &&
MLD->getExtensionType() == ISD::NON_EXTLOAD) {
SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
MLD->getBasePtr(), MLD->getMemOperand());
return CombineTo(N, NewLd, NewLd.getValue(1));
}
// Try transforming N to an indexed load.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
return SDValue();
}
/// A vector select of 2 constant vectors can be simplified to math/logic to
/// avoid a variable select instruction and possibly avoid constant loads.
SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
EVT VT = N->getValueType(0);
if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
!TLI.convertSelectOfConstantsToMath(VT) ||
!ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
!ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
return SDValue();
// Check if we can use the condition value to increment/decrement a single
// constant value. This simplifies a select to an add and removes a constant
// load/materialization from the general case.
bool AllAddOne = true;
bool AllSubOne = true;
unsigned Elts = VT.getVectorNumElements();
for (unsigned i = 0; i != Elts; ++i) {
SDValue N1Elt = N1.getOperand(i);
SDValue N2Elt = N2.getOperand(i);
if (N1Elt.isUndef() || N2Elt.isUndef())
continue;
if (N1Elt.getValueType() != N2Elt.getValueType())
continue;
const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
if (C1 != C2 + 1)
AllAddOne = false;
if (C1 != C2 - 1)
AllSubOne = false;
}
// Further simplifications for the extra-special cases where the constants are
// all 0 or all -1 should be implemented as folds of these patterns.
SDLoc DL(N);
if (AllAddOne || AllSubOne) {
// vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
// vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
}
// select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
APInt Pow2C;
if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
isNullOrNullSplat(N2)) {
SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
}
if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
return V;
// The general case for select-of-constants:
// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
// ...but that only makes sense if a vselect is slower than 2 logic ops, so
// leave that to a machine-specific pass.
return SDValue();
}
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
EVT VT = N->getValueType(0);
SDLoc DL(N);
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
if (SDValue V = foldBoolSelectToLogic(N, DAG))
return V;
// vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
return DAG.getSelect(DL, VT, F, N2, N1);
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
// vselect (setl[te] X, 0), -X, X ->
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
if (N0.getOpcode() == ISD::SETCC) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
bool isAbs = false;
bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
(ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
if (isAbs) {
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
return DAG.getNode(ISD::ABS, DL, VT, LHS);
SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
DAG.getConstant(VT.getScalarSizeInBits() - 1,
DL, getShiftAmountTy(VT)));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
}
// vselect x, y (fcmp lt x, y) -> fminnum x, y
// vselect x, y (fcmp gt x, y) -> fmaxnum x, y
//
// This is OK if we don't care about what happens if either operand is a
// NaN.
//
if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
if (SDValue FMinMax =
combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
return FMinMax;
}
// If this select has a condition (setcc) with narrower operands than the
// select, try to widen the compare to match the select width.
// TODO: This should be extended to handle any constant.
// TODO: This could be extended to handle non-loading patterns, but that
// requires thorough testing to avoid regressions.
if (isNullOrNullSplat(RHS)) {
EVT NarrowVT = LHS.getValueType();
EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
EVT SetCCVT = getSetCCResultType(LHS.getValueType());
unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
unsigned WideWidth = WideVT.getScalarSizeInBits();
bool IsSigned = isSignedIntSetCC(CC);
auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
SetCCWidth != 1 && SetCCWidth < WideWidth &&
TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
// Both compare operands can be widened for free. The LHS can use an
// extended load, and the RHS is a constant:
// vselect (ext (setcc load(X), C)), N1, N2 -->
// vselect (setcc extload(X), C'), N1, N2
auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
EVT WideSetCCVT = getSetCCResultType(WideVT);
SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
}
}
// Match VSELECTs into add with unsigned saturation.
if (hasOperation(ISD::UADDSAT, VT)) {
// Check if one of the arms of the VSELECT is vector with all bits set.
// If it's on the left side invert the predicate to simplify logic below.
SDValue Other;
ISD::CondCode SatCC = CC;
if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
Other = N2;
SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
} else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
Other = N1;
}
if (Other && Other.getOpcode() == ISD::ADD) {
SDValue CondLHS = LHS, CondRHS = RHS;
SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
// Canonicalize condition operands.
if (SatCC == ISD::SETUGE) {
std::swap(CondLHS, CondRHS);
SatCC = ISD::SETULE;
}
// We can test against either of the addition operands.
// x <= x+y ? x+y : ~0 --> uaddsat x, y
// x+y >= x ? x+y : ~0 --> uaddsat x, y
if (SatCC == ISD::SETULE && Other == CondRHS &&
(OpLHS == CondLHS || OpRHS == CondLHS))
return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
(OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
CondLHS == OpLHS) {
// If the RHS is a constant we have to reverse the const
// canonicalization.
// x >= ~C ? x+C : ~0 --> uaddsat x, C
auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
return Cond->getAPIntValue() == ~Op->getAPIntValue();
};
if (SatCC == ISD::SETULE &&
ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
}
}
}
// Match VSELECTs into sub with unsigned saturation.
if (hasOperation(ISD::USUBSAT, VT)) {
// Check if one of the arms of the VSELECT is a zero vector. If it's on
// the left side invert the predicate to simplify logic below.
SDValue Other;
ISD::CondCode SatCC = CC;
if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
Other = N2;
SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
} else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
Other = N1;
}
if (Other && Other.getNumOperands() == 2) {
SDValue CondRHS = RHS;
SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
if (Other.getOpcode() == ISD::SUB &&
LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
// Look for a general sub with unsigned saturation first.
// zext(x) >= y ? x - trunc(y) : 0
// --> usubsat(x,trunc(umin(y,SatLimit)))
// zext(x) > y ? x - trunc(y) : 0
// --> usubsat(x,trunc(umin(y,SatLimit)))
if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
DL);
}
if (OpLHS == LHS) {
// Look for a general sub with unsigned saturation first.
// x >= y ? x-y : 0 --> usubsat x, y
// x > y ? x-y : 0 --> usubsat x, y
if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
// If the RHS is a constant we have to reverse the const
// canonicalization.
// x > C-1 ? x+-C : 0 --> usubsat x, C
auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
return (!Op && !Cond) ||
(Op && Cond &&
Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
};
if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
/*AllowUndefs*/ true)) {
OpRHS = DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), OpRHS);
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
}
// Another special case: If C was a sign bit, the sub has been
// canonicalized into a xor.
// FIXME: Would it be better to use computeKnownBits to determine
// whether it's safe to decanonicalize the xor?
// x s< 0 ? x^C : 0 --> usubsat x, C
APInt SplatValue;
if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
SplatValue.isSignMask()) {
// Note that we have to rebuild the RHS constant here to
// ensure we don't rely on particular values of undef lanes.
OpRHS = DAG.getConstant(SplatValue, DL, VT);
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
}
}
}
}
}
}
}
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.
// Fold (vselect all_ones, N1, N2) -> N1
if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
return N1;
// Fold (vselect all_zeros, N1, N2) -> N2
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
return N2;
// The ConvertSelectToConcatVector function is assuming both the above
// checks for (vselect (build_vector all{ones,zeros) ...) have been made
// and addressed.
if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
N2.getOpcode() == ISD::CONCAT_VECTORS &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
return CV;
}
if (SDValue V = foldVSelectOfConstants(N))
return V;
return SDValue();
}
SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
SDValue N3 = N->getOperand(3);
SDValue N4 = N->getOperand(4);
ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
// fold select_cc lhs, rhs, x, x, cc -> x
if (N2 == N3)
return N2;
// Determine if the condition we're dealing with is constant
if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
CC, SDLoc(N), false)) {
AddToWorklist(SCC.getNode());
if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
if (!SCCC->isNullValue())
return N2; // cond always true -> true val
else
return N3; // cond always false -> false val
} else if (SCC->isUndef()) {
// When the condition is UNDEF, just return the first operand. This is
// coherent the DAG creation, no setcc node is created in this case
return N2;
} else if (SCC.getOpcode() == ISD::SETCC) {
// Fold to a simpler select_cc
SDValue SelectOp = DAG.getNode(
ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
SCC.getOperand(1), N2, N3, SCC.getOperand(2));
SelectOp->setFlags(SCC->getFlags());
return SelectOp;
}
}
// If we can fold this based on the true/false value, do so.
if (SimplifySelectOps(N, N2, N3))
return SDValue(N, 0); // Don't revisit N.
// fold select_cc into other things, such as min/max/abs
return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
}
SDValue DAGCombiner::visitSETCC(SDNode *N) {
// setcc is very commonly used as an argument to brcond. This pattern
// also lend itself to numerous combines and, as a result, it is desired
// we keep the argument to a brcond as a setcc as much as possible.
bool PreferSetCC =
N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
EVT VT = N->getValueType(0);
// SETCC(FREEZE(X), CONST, Cond)
// =>
// FREEZE(SETCC(X, CONST, Cond))
// This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
// isn't equivalent to true or false.
// For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
// FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
//
// This transformation is beneficial because visitBRCOND can fold
// BRCOND(FREEZE(X)) to BRCOND(X).
// Conservatively optimize integer comparisons only.
if (PreferSetCC) {
// Do this only when SETCC is going to be used by BRCOND.
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
bool Updated = false;
// Is 'X Cond C' always true or false?
auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
bool False = (Cond == ISD::SETULT && C->isNullValue()) ||
(Cond == ISD::SETLT && C->isMinSignedValue()) ||
(Cond == ISD::SETUGT && C->isAllOnesValue()) ||
(Cond == ISD::SETGT && C->isMaxSignedValue());
bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) ||
(Cond == ISD::SETLE && C->isMaxSignedValue()) ||
(Cond == ISD::SETUGE && C->isNullValue()) ||
(Cond == ISD::SETGE && C->isMinSignedValue());
return True || False;
};
if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
N0 = N0->getOperand(0);
Updated = true;
}
}
if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
N0C)) {
N1 = N1->getOperand(0);
Updated = true;
}
}
if (Updated)
return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
}
SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
SDLoc(N), !PreferSetCC);
if (!Combined)
return SDValue();
// If we prefer to have a setcc, and we don't, we'll try our best to
// recreate one using rebuildSetCC.
if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
SDValue NewSetCC = rebuildSetCC(Combined);
// We don't have anything interesting to combine to.
if (NewSetCC.getNode() == N)
return SDValue();
if (NewSetCC)
return NewSetCC;
}
return Combined;
}
SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Carry = N->getOperand(2);
SDValue Cond = N->getOperand(3);
// If Carry is false, fold to a regular SETCC.
if (isNullConstant(Carry))
return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
return SDValue();
}
/// Check if N satisfies:
/// N is used once.
/// N is a Load.
/// The load is compatible with ExtOpcode. It means
/// If load has explicit zero/sign extension, ExpOpcode must have the same
/// extension.
/// Otherwise returns true.
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
if (!N.hasOneUse())
return false;
if (!isa<LoadSDNode>(N))
return false;
LoadSDNode *Load = cast<LoadSDNode>(N);
ISD::LoadExtType LoadExt = Load->getExtensionType();
if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
return true;
// Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
// extension.
if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
(LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
return false;
return true;
}
/// Fold
/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
/// This function is called by the DAGCombiner when visiting sext/zext/aext
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
SelectionDAG &DAG) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
SDLoc DL(N);
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND) &&
"Expected EXTEND dag node in input!");
if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
!N0.hasOneUse())
return SDValue();
SDValue Op1 = N0->getOperand(1);
SDValue Op2 = N0->getOperand(2);
if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
return SDValue();
auto ExtLoadOpcode = ISD::EXTLOAD;
if (Opcode == ISD::SIGN_EXTEND)
ExtLoadOpcode = ISD::SEXTLOAD;
else if (Opcode == ISD::ZERO_EXTEND)
ExtLoadOpcode = ISD::ZEXTLOAD;
LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
return SDValue();
SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
}
/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
/// a build_vector of constants.
/// This function is called by the DAGCombiner when visiting sext/zext/aext
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
/// Vector extends are not folded if operations are legal; this is to
/// avoid introducing illegal build_vector dag nodes.
static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
SelectionDAG &DAG, bool LegalTypes) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
SDLoc DL(N);
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
&& "Expected EXTEND dag node in input!");
// fold (sext c1) -> c1
// fold (zext c1) -> c1
// fold (aext c1) -> c1
if (isa<ConstantSDNode>(N0))
return DAG.getNode(Opcode, DL, VT, N0);
// fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
// fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
// fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
if (N0->getOpcode() == ISD::SELECT) {
SDValue Op1 = N0->getOperand(1);
SDValue Op2 = N0->getOperand(2);
if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
(Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
// For any_extend, choose sign extension of the constants to allow a
// possible further transform to sign_extend_inreg.i.e.
//
// t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
// t2: i64 = any_extend t1
// -->
// t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
// -->
// t4: i64 = sign_extend_inreg t3
unsigned FoldOpc = Opcode;
if (FoldOpc == ISD::ANY_EXTEND)
FoldOpc = ISD::SIGN_EXTEND;
return DAG.getSelect(DL, VT, N0->getOperand(0),
DAG.getNode(FoldOpc, DL, VT, Op1),
DAG.getNode(FoldOpc, DL, VT, Op2));
}
}
// fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
// fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
// fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
EVT SVT = VT.getScalarType();
if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
return SDValue();
// We can fold this node into a build_vector.
unsigned VTBits = SVT.getSizeInBits();
unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
SmallVector<SDValue, 8> Elts;
unsigned NumElts = VT.getVectorNumElements();
// For zero-extensions, UNDEF elements still guarantee to have the upper
// bits set to zero.
bool IsZext =
Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
for (unsigned i = 0; i != NumElts; ++i) {
SDValue Op = N0.getOperand(i);
if (Op.isUndef()) {
Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
continue;
}
SDLoc DL(Op);
// Get the constant value and if needed trunc it to the size of the type.
// Nodes like build_vector might have constants wider than the scalar type.
APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
else
Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
}
return DAG.getBuildVector(VT, DL, Elts);
}
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
// transformation. Returns true if extension are possible and the above
// mentioned transformation is profitable.
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
unsigned ExtOpc,
SmallVectorImpl<SDNode *> &ExtendNodes,
const TargetLowering &TLI) {
bool HasCopyToRegUses = false;
bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
UE = N0.getNode()->use_end();
UI != UE; ++UI) {
SDNode *User = *UI;
if (User == N)
continue;
if (UI.getUse().getResNo() != N0.getResNo())
continue;
// FIXME: Only extend SETCC N, N and SETCC N, c for now.
if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
// Sign bits will be lost after a zext.
return false;
bool Add = false;
for (unsigned i = 0; i != 2; ++i) {
SDValue UseOp = User->getOperand(i);
if (UseOp == N0)
continue;
if (!isa<ConstantSDNode>(UseOp))
return false;
Add = true;
}
if (Add)
ExtendNodes.push_back(User);
continue;
}
// If truncates aren't free and there are users we can't
// extend, it isn't worthwhile.
if (!isTruncFree)
return false;
// Remember if this value is live-out.
if (User->getOpcode() == ISD::CopyToReg)
HasCopyToRegUses = true;
}
if (HasCopyToRegUses) {
bool BothLiveOut = false;
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
UI != UE; ++UI) {
SDUse &Use = UI.getUse();
if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
BothLiveOut = true;
break;
}
}
if (BothLiveOut)
// Both unextended and extended values are live out. There had better be
// a good reason for the transformation.
return ExtendNodes.size();
}
return true;
}
void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
SDValue OrigLoad, SDValue ExtLoad,
ISD::NodeType ExtType) {
// Extend SetCC uses if necessary.
SDLoc DL(ExtLoad);
for (SDNode *SetCC : SetCCs) {
SmallVector<SDValue, 4> Ops;
for (unsigned j = 0; j != 2; ++j) {
SDValue SOp = SetCC->getOperand(j);
if (SOp == OrigLoad)
Ops.push_back(ExtLoad);
else
Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
}
Ops.push_back(SetCC->getOperand(2));
CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
}
}
// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT DstVT = N->getValueType(0);
EVT SrcVT = N0.getValueType();
assert((N->getOpcode() == ISD::SIGN_EXTEND ||
N->getOpcode() == ISD::ZERO_EXTEND) &&
"Unexpected node type (not an extend)!");
// fold (sext (load x)) to multiple smaller sextloads; same for zext.
// For example, on a target with legal v4i32, but illegal v8i32, turn:
// (v8i32 (sext (v8i16 (load x))))
// into:
// (v8i32 (concat_vectors (v4i32 (sextload x)),
// (v4i32 (sextload (x + 16)))))
// Where uses of the original load, i.e.:
// (v8i16 (load x))
// are replaced with:
// (v8i16 (truncate
// (v8i32 (concat_vectors (v4i32 (sextload x)),
// (v4i32 (sextload (x + 16)))))))
//
// This combine is only applicable to illegal, but splittable, vectors.
// All legal types, and illegal non-vector types, are handled elsewhere.
// This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
//
if (N0->getOpcode() != ISD::LOAD)
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
!N0.hasOneUse() || !LN0->isSimple() ||
!DstVT.isVector() || !DstVT.isPow2VectorType() ||
!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
return SDValue();
SmallVector<SDNode *, 4> SetCCs;
if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
return SDValue();
ISD::LoadExtType ExtType =
N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
// Try to split the vector types to get down to legal types.
EVT SplitSrcVT = SrcVT;
EVT SplitDstVT = DstVT;
while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
SplitSrcVT.getVectorNumElements() > 1) {
SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
}
if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
return SDValue();
assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
SDLoc DL(N);
const unsigned NumSplits =
DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
const unsigned Stride = SplitSrcVT.getStoreSize();
SmallVector<SDValue, 4> Loads;
SmallVector<SDValue, 4> Chains;
SDValue BasePtr = LN0->getBasePtr();
for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
const unsigned Offset = Idx * Stride;
const Align Align = commonAlignment(LN0->getAlign(), Offset);
SDValue SplitLoad = DAG.getExtLoad(
ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
Loads.push_back(SplitLoad.getValue(0));
Chains.push_back(SplitLoad.getValue(1));
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
// Simplify TF.
AddToWorklist(NewChain.getNode());
CombineTo(N, NewValue);
// Replace uses of the original load (before extension)
// with a truncate of the concatenated sextloaded vectors.
SDValue Trunc =
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
CombineTo(N0.getNode(), Trunc, NewChain);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
assert(N->getOpcode() == ISD::ZERO_EXTEND);
EVT VT = N->getValueType(0);
EVT OrigVT = N->getOperand(0).getValueType();
if (TLI.isZExtFree(OrigVT, VT))
return SDValue();
// and/or/xor
SDValue N0 = N->getOperand(0);
if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) ||
N0.getOperand(1).getOpcode() != ISD::Constant ||
(LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
return SDValue();
// shl/shr
SDValue N1 = N0->getOperand(0);
if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
N1.getOperand(1).getOpcode() != ISD::Constant ||
(LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
return SDValue();
// load
if (!isa<LoadSDNode>(N1.getOperand(0)))
return SDValue();
LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
EVT MemVT = Load->getMemoryVT();
if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
return SDValue();
// If the shift op is SHL, the logic op must be AND, otherwise the result
// will be wrong.
if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
return SDValue();
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
SmallVector<SDNode*, 4> SetCCs;
if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
ISD::ZERO_EXTEND, SetCCs, TLI))
return SDValue();
// Actually do the transformation.
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
Load->getChain(), Load->getBasePtr(),
Load->getMemoryVT(), Load->getMemOperand());
SDLoc DL1(N1);
SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
N1.getOperand(1));
APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
SDLoc DL0(N0);
SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
DAG.getConstant(Mask, DL0, VT));
ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
CombineTo(N, And);
if (SDValue(Load, 0).hasOneUse()) {
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
} else {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
Load->getValueType(0), ExtLoad);
CombineTo(Load, Trunc, ExtLoad.getValue(1));
}
// N0 is dead at this point.
recursivelyDeleteUnusedNodes(N0.getNode());
return SDValue(N,0); // Return N so it doesn't get rechecked!
}
/// If we're narrowing or widening the result of a vector select and the final
/// size is the same size as a setcc (compare) feeding the select, then try to
/// apply the cast operation to the select's operands because matching vector
/// sizes for a select condition and other operands should be more efficient.
SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
unsigned CastOpcode = Cast->getOpcode();
assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
CastOpcode == ISD::FP_ROUND) &&
"Unexpected opcode for vector select narrowing/widening");
// We only do this transform before legal ops because the pattern may be
// obfuscated by target-specific operations after legalization. Do not create
// an illegal select op, however, because that may be difficult to lower.
EVT VT = Cast->getValueType(0);
if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
return SDValue();
SDValue VSel = Cast->getOperand(0);
if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
VSel.getOperand(0).getOpcode() != ISD::SETCC)
return SDValue();
// Does the setcc have the same vector size as the casted select?
SDValue SetCC = VSel.getOperand(0);
EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
return SDValue();
// cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
SDValue A = VSel.getOperand(1);
SDValue B = VSel.getOperand(2);
SDValue CastA, CastB;
SDLoc DL(Cast);
if (CastOpcode == ISD::FP_ROUND) {
// FP_ROUND (fptrunc) has an extra flag operand to pass along.
CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
} else {
CastA = DAG.getNode(CastOpcode, DL, VT, A);
CastB = DAG.getNode(CastOpcode, DL, VT, B);
}
return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
}
// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
const TargetLowering &TLI, EVT VT,
bool LegalOperations, SDNode *N,
SDValue N0, ISD::LoadExtType ExtLoadType) {
SDNode *N0Node = N0.getNode();
bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
: ISD::isZEXTLoad(N0Node);
if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
!ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
if ((LegalOperations || !LN0->isSimple() ||
VT.isVector()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
return SDValue();
SDValue ExtLoad =
DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
LN0->getBasePtr(), MemVT, LN0->getMemOperand());
Combiner.CombineTo(N, ExtLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
if (LN0->use_empty())
Combiner.recursivelyDeleteUnusedNodes(LN0);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
// Only generate vector extloads when 1) they're legal, and 2) they are
// deemed desirable by the target.
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
const TargetLowering &TLI, EVT VT,
bool LegalOperations, SDNode *N, SDValue N0,
ISD::LoadExtType ExtLoadType,
ISD::NodeType ExtOpc) {
if (!ISD::isNON_EXTLoad(N0.getNode()) ||
!ISD::isUNINDEXEDLoad(N0.getNode()) ||
((LegalOperations || VT.isVector() ||
!cast<LoadSDNode>(N0)->isSimple()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
return {};
bool DoXform = true;
SmallVector<SDNode *, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
if (VT.isVector())
DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (!DoXform)
return {};
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
// If the load value is used only by N, replace it via CombineTo N.
bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
Combiner.CombineTo(N, ExtLoad);
if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
Combiner.recursivelyDeleteUnusedNodes(LN0);
} else {
SDValue Trunc =
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
const TargetLowering &TLI, EVT VT,
SDNode *N, SDValue N0,
ISD::LoadExtType ExtLoadType,
ISD::NodeType ExtOpc) {
if (!N0.hasOneUse())
return SDValue();
MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
return SDValue();
if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
return SDValue();
if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
return SDValue();
SDLoc dl(Ld);
SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
SDValue NewLoad = DAG.getMaskedLoad(
VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
ExtLoadType, Ld->isExpandingLoad());
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
return NewLoad;
}
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
bool LegalOperations) {
assert((N->getOpcode() == ISD::SIGN_EXTEND ||
N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
SDValue SetCC = N->getOperand(0);
if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
!SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
return SDValue();
SDValue X = SetCC.getOperand(0);
SDValue Ones = SetCC.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
EVT VT = N->getValueType(0);
EVT XVT = X.getValueType();
// setge X, C is canonicalized to setgt, so we do not need to match that
// pattern. The setlt sibling is folded in SimplifySelectCC() because it does
// not require the 'not' op.
if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
// Invert and smear/shift the sign bit:
// sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
// zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
SDLoc DL(N);
unsigned ShCt = VT.getSizeInBits() - 1;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
SDValue NotX = DAG.getNOT(DL, X, VT);
SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
auto ShiftOpcode =
N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
}
}
return SDValue();
}
SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::SETCC)
return SDValue();
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
EVT VT = N->getValueType(0);
EVT N00VT = N00.getValueType();
SDLoc DL(N);
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// the same size as the compared operands. Try to optimize sext(setcc())
// if this is the case.
if (VT.isVector() && !LegalOperations &&
TLI.getBooleanContents(N00VT) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
EVT SVT = getSetCCResultType(N00VT);
// If we already have the desired type, don't change it.
if (SVT != N0.getValueType()) {
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
// for that matter). Check to see that they are the same size. If so,
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == SVT.getSizeInBits())
return DAG.getSetCC(DL, VT, N00, N01, CC);
// If the desired elements are smaller or larger than the source
// elements, we can use a matching integer vector type and then
// truncate/sign extend.
EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
if (SVT == MatchingVecType) {
SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
return DAG.getSExtOrTrunc(VsetCC, DL, VT);
}
}
// Try to eliminate the sext of a setcc by zexting the compare operands.
if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
!TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
// We have an unsupported narrow vector compare op that would be legal
// if extended to the destination type. See if the compare operands
// can be freely extended to the destination type.
auto IsFreeToExtend = [&](SDValue V) {
if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
return true;
// Match a simple, non-extended load that can be converted to a
// legal {z/s}ext-load.
// TODO: Allow widening of an existing {z/s}ext-load?
if (!(ISD::isNON_EXTLoad(V.getNode()) &&
ISD::isUNINDEXEDLoad(V.getNode()) &&
cast<LoadSDNode>(V)->isSimple() &&
TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
return false;
// Non-chain users of this value must either be the setcc in this
// sequence or extends that can be folded into the new {z/s}ext-load.
for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
// Skip uses of the chain and the setcc.
SDNode *User = *UI;
if (UI.getUse().getResNo() != 0 || User == N0.getNode())
continue;
// Extra users must have exactly the same cast we are about to create.
// TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
// is enhanced similarly.
if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
return false;
}
return true;
};
if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
}
}
}
// sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
// Here, T can be 1 or -1, depending on the type of the setcc and
// getBooleanContents().
unsigned SetCCWidth = N0.getScalarValueSizeInBits();
// To determine the "true" side of the select, we need to know the high bit
// of the value returned by the setcc if it evaluates to true.
// If the type of the setcc is i1, then the true case of the select is just
// sext(i1 1), that is, -1.
// If the type of the setcc is larger (say, i8) then the value of the high
// bit depends on getBooleanContents(), so ask TLI for a real "true" value
// of the appropriate width.
SDValue ExtTrueVal = (SetCCWidth == 1)
? DAG.getAllOnesConstant(DL, VT)
: DAG.getBoolConstant(true, DL, VT, N00VT);
SDValue Zero = DAG.getConstant(0, DL, VT);
if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
return SCC;
if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
EVT SetCCVT = getSetCCResultType(N00VT);
// Don't do this transform for i1 because there's a select transform
// that would reverse it.
// TODO: We should not do this transform at all without a target hook
// because a sext is likely cheaper than a select?
if (SetCCVT.getScalarSizeInBits() != 1 &&
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
}
}
return SDValue();
}
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
SDLoc DL(N);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
// fold (sext (sext x)) -> (sext x)
// fold (sext (aext x)) -> (sext x)
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorklist(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// See if the value being truncated is already sign extended. If so, just
// eliminate the trunc/sext pair.
SDValue Op = N0.getOperand(0);
unsigned OpBits = Op.getScalarValueSizeInBits();
unsigned MidBits = N0.getScalarValueSizeInBits();
unsigned DestBits = VT.getScalarSizeInBits();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
if (OpBits == DestBits) {
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
// bits, it is already ready.
if (NumSignBits > DestBits-MidBits)
return Op;
} else if (OpBits < DestBits) {
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
// bits, just sext from i32.
if (NumSignBits > OpBits-MidBits)
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
} else {
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
// bits, just truncate to i32.
if (NumSignBits > OpBits-MidBits)
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
}
// fold (sext (truncate x)) -> (sextinreg x).
if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
N0.getValueType())) {
if (OpBits < DestBits)
Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
else if (OpBits > DestBits)
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
DAG.getValueType(N0.getValueType()));
}
}
// Try to simplify (sext (load x)).
if (SDValue foldedExt =
tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
ISD::SEXTLOAD, ISD::SIGN_EXTEND))
return foldedExt;
if (SDValue foldedExt =
tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
ISD::SIGN_EXTEND))
return foldedExt;
// fold (sext (load x)) to multiple smaller sextloads.
// Only on illegal but splittable vectors.
if (SDValue ExtLoad = CombineExtLoad(N))
return ExtLoad;
// Try to simplify (sext (sextload x)).
if (SDValue foldedExt = tryToFoldExtOfExtload(
DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
return foldedExt;
// fold (sext (and/or/xor (load x), cst)) ->
// (and/or/xor (sextload x), (sext cst))
if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
EVT MemVT = LN00->getMemoryVT();
if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
SmallVector<SDNode*, 4> SetCCs;
bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
ISD::SIGN_EXTEND, SetCCs, TLI);
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
LN00->getChain(), LN00->getBasePtr(),
LN00->getMemoryVT(),
LN00->getMemOperand());
APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
bool NoReplaceTruncAnd = !N0.hasOneUse();
bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
CombineTo(N, And);
// If N0 has multiple uses, change other uses as well.
if (NoReplaceTruncAnd) {
SDValue TruncAnd =
DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
CombineTo(N0.getNode(), TruncAnd);
}
if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
} else {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
LN00->getValueType(0), ExtLoad);
CombineTo(LN00, Trunc, ExtLoad.getValue(1));
}
return SDValue(N,0); // Return N so it doesn't get rechecked!
}
}
}
if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
return V;
if (SDValue V = foldSextSetcc(N))
return V;
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
// Eliminate this sign extend by doing a negation in the destination type:
// sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
isNullOrNullSplat(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
}
// Eliminate this sign extend by doing a decrement in the destination type:
// sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
}
// fold sext (not i1 X) -> add (zext i1 X), -1
// TODO: This could be extended to handle bool vectors.
if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
(!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
TLI.isOperationLegal(ISD::ADD, VT)))) {
// If we can eliminate the 'not', the sext form should be better
if (SDValue NewXor = visitXOR(N0.getNode())) {
// Returning N0 is a form of in-visit replacement that may have
// invalidated N0.
if (NewXor.getNode() == N0.getNode()) {
// Return SDValue here as the xor should have already been replaced in
// this sext.
return SDValue();
} else {
// Return a new sext with the new xor.
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
}
}
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
}
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
return Res;
return SDValue();
}
// isTruncateOf - If N is a truncate of some other value, return true, record
// the value being truncated in Op and which of Op's bits are zero/one in Known.
// This function computes KnownBits to avoid a duplicated call to
// computeKnownBits in the caller.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
KnownBits &Known) {
if (N->getOpcode() == ISD::TRUNCATE) {
Op = N->getOperand(0);
Known = DAG.computeKnownBits(Op);
return true;
}
if (N.getOpcode() != ISD::SETCC ||
N.getValueType().getScalarType() != MVT::i1 ||
cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
return false;
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
assert(Op0.getValueType() == Op1.getValueType());
if (isNullOrNullSplat(Op0))
Op = Op1;
else if (isNullOrNullSplat(Op1))
Op = Op0;
else
return false;
Known = DAG.computeKnownBits(Op);
return (Known.Zero | 1).isAllOnesValue();
}
/// Given an extending node with a pop-count operand, if the target does not
/// support a pop-count in the narrow source type but does support it in the
/// destination type, widen the pop-count to the destination type.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
SDValue CtPop = Extend->getOperand(0);
if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
return SDValue();
EVT VT = Extend->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
return SDValue();
// zext (ctpop X) --> ctpop (zext X)
SDLoc DL(Extend);
SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
}
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
// fold (zext (zext x)) -> (zext x)
// fold (zext (aext x)) -> (zext x)
if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
N0.getOperand(0));
// fold (zext (truncate x)) -> (zext x) or
// (zext (truncate x)) -> (truncate x)
// This is valid when the truncated bits of x are already zero.
SDValue Op;
KnownBits Known;
if (isTruncateOf(DAG, N0, Op, Known)) {
APInt TruncatedBits =
(Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
APInt(Op.getScalarValueSizeInBits(), 0) :
APInt::getBitsSet(Op.getScalarValueSizeInBits(),
N0.getScalarValueSizeInBits(),
std::min(Op.getScalarValueSizeInBits(),
VT.getScalarSizeInBits()));
if (TruncatedBits.isSubsetOf(Known.Zero))
return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
}
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorklist(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
EVT SrcVT = N0.getOperand(0).getValueType();
EVT MinVT = N0.getValueType();
// Try to mask before the extension to avoid having to generate a larger mask,
// possibly over several sub-vectors.
if (SrcVT.bitsLT(VT) && VT.isVector()) {
if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
SDValue Op = N0.getOperand(0);
Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
AddToWorklist(Op.getNode());
SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
// Transfer the debug info; the new node is equivalent to N0.
DAG.transferDbgValues(N0, ZExtOrTrunc);
return ZExtOrTrunc;
}
}
if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
AddToWorklist(Op.getNode());
SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
// We may safely transfer the debug info describing the truncate node over
// to the equivalent and operation.
DAG.transferDbgValues(N0, And);
return And;
}
}
// Fold (zext (and (trunc x), cst)) -> (and x, cst),
// if either of the casts is not free.
if (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
(!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
N0.getValueType()) ||
!TLI.isZExtFree(N0.getValueType(), VT))) {
SDValue X = N0.getOperand(0).getOperand(0);
X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT,
X, DAG.getConstant(Mask, DL, VT));
}
// Try to simplify (zext (load x)).
if (SDValue foldedExt =
tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
return foldedExt;
if (SDValue foldedExt =
tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
ISD::ZERO_EXTEND))
return foldedExt;
// fold (zext (load x)) to multiple smaller zextloads.
// Only on illegal but splittable vectors.
if (SDValue ExtLoad = CombineExtLoad(N))
return ExtLoad;
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
// Unless (and (load x) cst) will match as a zextload already and has
// additional users.
if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
EVT MemVT = LN00->getMemoryVT();
if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse()) {
if (N0.getOpcode() == ISD::AND) {
auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
EVT LoadResultTy = AndC->getValueType(0);
EVT ExtVT;
if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
DoXform = false;
}
}
if (DoXform)
DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
ISD::ZERO_EXTEND, SetCCs, TLI);
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
LN00->getChain(), LN00->getBasePtr(),
LN00->getMemoryVT(),
LN00->getMemOperand());
APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
SDLoc DL(N);
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
bool NoReplaceTruncAnd = !N0.hasOneUse();
bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
CombineTo(N, And);
// If N0 has multiple uses, change other uses as well.
if (NoReplaceTruncAnd) {
SDValue TruncAnd =
DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
CombineTo(N0.getNode(), TruncAnd);
}
if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
} else {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
LN00->getValueType(0), ExtLoad);
CombineTo(LN00, Trunc, ExtLoad.getValue(1));
}
return SDValue(N,0); // Return N so it doesn't get rechecked!
}
}
}
// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
return ZExtLoad;
// Try to simplify (zext (zextload x)).
if (SDValue foldedExt = tryToFoldExtOfExtload(
DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
return foldedExt;
if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
return V;
if (N0.getOpcode() == ISD::SETCC) {
// Only do this before legalize for now.
if (!LegalOperations && VT.isVector() &&
N0.getValueType().getVectorElementType() == MVT::i1) {
EVT N00VT = N0.getOperand(0).getValueType();
if (getSetCCResultType(N00VT) == N0.getValueType())
return SDValue();
// We know that the # elements of the results is the same as the #
// elements of the compare (and the # elements of the compare result for
// that matter). Check to see that they are the same size. If so, we know
// that the element size of the sext'd result matches the element size of
// the compare operands.
SDLoc DL(N);
if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
// zext(setcc) -> zext_in_reg(vsetcc) for vectors.
SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
N0.getOperand(1), N0.getOperand(2));
return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
}
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/any extend followed by zext_in_reg.
EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
SDValue VsetCC =
DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
N0.getOperand(1), N0.getOperand(2));
return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
N0.getValueType());
}
// zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
SDLoc DL(N);
EVT N0VT = N0.getValueType();
EVT N00VT = N0.getOperand(0).getValueType();
if (SDValue SCC = SimplifySelectCC(
DL, N0.getOperand(0), N0.getOperand(1),
DAG.getBoolConstant(true, DL, N0VT, N00VT),
DAG.getBoolConstant(false, DL, N0VT, N00VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
}
// (zext (shl (zext x), cst)) -> (shl (zext x), cst)
if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
N0.hasOneUse()) {
SDValue ShAmt = N0.getOperand(1);
if (N0.getOpcode() == ISD::SHL) {
SDValue InnerZExt = N0.getOperand(0);
// If the original shl may be shifting out bits, do not perform this
// transformation.
unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
InnerZExt.getOperand(0).getValueSizeInBits();
if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
return SDValue();
}
SDLoc DL(N);
// Ensure that the shift amount is wide enough for the shifted value.
if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
return DAG.getNode(N0.getOpcode(), DL, VT,
DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
ShAmt);
}
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
if (SDValue NewCtPop = widenCtPop(N, DAG))
return NewCtPop;
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
return Res;
return SDValue();
}
SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
if (N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND)
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
// fold (aext (truncate (load x))) -> (aext (smaller load x))
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorklist(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (aext (truncate x))
if (N0.getOpcode() == ISD::TRUNCATE)
return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
// Fold (aext (and (trunc x), cst)) -> (and x, cst)
// if the trunc is not free.
if (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
N0.getValueType())) {
SDLoc DL(N);
SDValue X = N0.getOperand(0).getOperand(0);
X = DAG.getAnyExtOrTrunc(X, DL, VT);
APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, DL, VT,
X, DAG.getConstant(Mask, DL, VT));
}
// fold (aext (load x)) -> (aext (truncate (extload x)))
// None of the supported targets knows how to perform load and any_ext
// on vectors in one instruction, so attempt to fold to zext instead.
if (VT.isVector()) {
// Try to simplify (zext (load x)).
if (SDValue foldedExt =
tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
return foldedExt;
} else if (ISD::isNON_EXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
bool DoXform = true;
SmallVector<SDNode *, 4> SetCCs;
if (!N0.hasOneUse())
DoXform =
ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(), LN0->getBasePtr(),
N0.getValueType(), LN0->getMemOperand());
ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
// If the load value is used only by N, replace it via CombineTo N.
bool NoReplaceTrunc = N0.hasOneUse();
CombineTo(N, ExtLoad);
if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
recursivelyDeleteUnusedNodes(LN0);
} else {
SDValue Trunc =
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
CombineTo(LN0, Trunc, ExtLoad.getValue(1));
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (aext (zextload x)) -> (aext (truncate (zextload x)))
// fold (aext (sextload x)) -> (aext (truncate (sextload x)))
// fold (aext ( extload x)) -> (aext (truncate (extload x)))
if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
ISD::LoadExtType ExtType = LN0->getExtensionType();
EVT MemVT = LN0->getMemoryVT();
if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
VT, LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
CombineTo(N, ExtLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
recursivelyDeleteUnusedNodes(LN0);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
if (N0.getOpcode() == ISD::SETCC) {
// For vectors:
// aext(setcc) -> vsetcc
// aext(setcc) -> truncate(vsetcc)
// aext(setcc) -> aext(vsetcc)
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations) {
EVT N00VT = N0.getOperand(0).getValueType();
if (getSetCCResultType(N00VT) == N0.getValueType())
return SDValue();
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
// for that matter). Check to see that they are the same size. If so,
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == N00VT.getSizeInBits())
return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/any extend
EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
SDValue VsetCC =
DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
}
// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
SDLoc DL(N);
if (SDValue SCC = SimplifySelectCC(
DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
return SCC;
}
if (SDValue NewCtPop = widenCtPop(N, DAG))
return NewCtPop;
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
return Res;
return SDValue();
}
SDValue DAGCombiner::visitAssertExt(SDNode *N) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT AssertVT = cast<VTSDNode>(N1)->getVT();
// fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
if (N0.getOpcode() == Opcode &&
AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
return N0;
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == Opcode) {
// We have an assert, truncate, assert sandwich. Make one stronger assert
// by asserting on the smallest asserted type to the larger source type.
// This eliminates the later assert:
// assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
// assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
SDValue BigA = N0.getOperand(0);
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
"Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information");
SDLoc DL(N);
EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
BigA.getOperand(0), MinAssertVTVal);
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
}
// If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
// than X. Just move the AssertZext in front of the truncate and drop the
// AssertSExt.
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::AssertSext &&
Opcode == ISD::AssertZext) {
SDValue BigA = N0.getOperand(0);
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
"Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information");
if (AssertVT.bitsLT(BigA_AssertVT)) {
SDLoc DL(N);
SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
BigA.getOperand(0), N1);
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
}
}
return SDValue();
}
SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
SDLoc DL(N);
Align AL = cast<AssertAlignSDNode>(N)->getAlign();
SDValue N0 = N->getOperand(0);
// Fold (assertalign (assertalign x, AL0), AL1) ->
// (assertalign x, max(AL0, AL1))
if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
return DAG.getAssertAlign(DL, N0.getOperand(0),
std::max(AL, AAN->getAlign()));
// In rare cases, there are trivial arithmetic ops in source operands. Sink
// this assert down to source operands so that those arithmetic ops could be
// exposed to the DAG combining.
switch (N0.getOpcode()) {
default:
break;
case ISD::ADD:
case ISD::SUB: {
unsigned AlignShift = Log2(AL);
SDValue LHS = N0.getOperand(0);
SDValue RHS = N0.getOperand(1);
unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
if (LHSAlignShift < AlignShift)
LHS = DAG.getAssertAlign(DL, LHS, AL);
if (RHSAlignShift < AlignShift)
RHS = DAG.getAssertAlign(DL, RHS, AL);
return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
}
break;
}
}
return SDValue();
}
/// If the result of a wider load is shifted to right of N bits and then
/// truncated to a narrower type and where N is a multiple of number of bits of
/// the narrower type, transform it to a narrower load from address + N / num of
/// bits of new type. Also narrow the load if the result is masked with an AND
/// to effectively produce a smaller type. If the result is to be extended, also
/// fold the extension to form a extending load.
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned Opc = N->getOpcode();
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT ExtVT = VT;
// This transformation isn't valid for vector loads.
if (VT.isVector())
return SDValue();
unsigned ShAmt = 0;
bool HasShiftedOffset = false;
// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
// extended to VT.
if (Opc == ISD::SIGN_EXTEND_INREG) {
ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
} else if (Opc == ISD::SRL) {
// Another special-case: SRL is basically zero-extending a narrower value,
// or it maybe shifting a higher subword, half or byte into the lowest
// bits.
ExtType = ISD::ZEXTLOAD;
N0 = SDValue(N, 0);
auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!N01 || !LN0)
return SDValue();
uint64_t ShiftAmt = N01->getZExtValue();
uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
else
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
VT.getScalarSizeInBits() - ShiftAmt);
} else if (Opc == ISD::AND) {
// An AND with a constant mask is the same as a truncate + zero-extend.
auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!AndC)
return SDValue();
const APInt &Mask = AndC->getAPIntValue();
unsigned ActiveBits = 0;
if (Mask.isMask()) {
ActiveBits = Mask.countTrailingOnes();
} else if (Mask.isShiftedMask()) {
ShAmt = Mask.countTrailingZeros();
APInt ShiftedMask = Mask.lshr(ShAmt);
ActiveBits = ShiftedMask.countTrailingOnes();
HasShiftedOffset = true;
} else
return SDValue();
ExtType = ISD::ZEXTLOAD;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
}
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
SDValue SRL = N0;
if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
ShAmt = ConstShift->getZExtValue();
unsigned EVTBits = ExtVT.getScalarSizeInBits();
// Is the shift amount a multiple of size of VT?
if ((ShAmt & (EVTBits-1)) == 0) {
N0 = N0.getOperand(0);
// Is the load width a multiple of size of VT?
if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
return SDValue();
}
// At this point, we must have a load or else we can't do the transform.
auto *LN0 = dyn_cast<LoadSDNode>(N0);
if (!LN0) return SDValue();
// Because a SRL must be assumed to *need* to zero-extend the high bits
// (as opposed to anyext the high bits), we can't combine the zextload
// lowering of SRL and an sextload.
if (LN0->getExtensionType() == ISD::SEXTLOAD)
return SDValue();
// If the shift amount is larger than the input type then we're not
// accessing any of the loaded bytes. If the load was a zextload/extload
// then the result of the shift+trunc is zero/undef (handled elsewhere).
if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
return SDValue();
// If the SRL is only used by a masking AND, we may be able to adjust
// the ExtVT to make the AND redundant.
SDNode *Mask = *(SRL->use_begin());
if (Mask->getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Mask->getOperand(1))) {
const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
if (ShiftMask.isMask()) {
EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
ShiftMask.countTrailingOnes());
// If the mask is smaller, recompute the type.
if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
ExtVT = MaskedVT;
}
}
}
}
// If the load is shifted left (and the result isn't shifted back right),
// we can fold the truncate through the shift.
unsigned ShLeftAmt = 0;
if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
ShLeftAmt = N01->getZExtValue();
N0 = N0.getOperand(0);
}
}
// If we haven't found a load, we can't narrow it.
if (!isa<LoadSDNode>(N0))
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
// Reducing the width of a volatile load is illegal. For atomics, we may be
// able to reduce the width provided we never widen again. (see D66309)
if (!LN0->isSimple() ||
!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
auto AdjustBigEndianShift = [&](unsigned ShAmt) {
unsigned LVTStoreBits =
LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
return LVTStoreBits - EVTStoreBits - ShAmt;
};
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (DAG.getDataLayout().isBigEndian())
ShAmt = AdjustBigEndianShift(ShAmt);
uint64_t PtrOff = ShAmt / 8;
Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
SDLoc DL(LN0);
// The original load itself didn't wrap, so an offset within it doesn't.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
TypeSize::Fixed(PtrOff), DL, Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
if (ExtType == ISD::NON_EXTLOAD)
Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
else
Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
NewAlign, LN0->getMemOperand()->getFlags(),
LN0->getAAInfo());
// Replace the old load's chain with the new load's chain.
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
// Shift the result left, if we've swallowed a left shift.
SDValue Result = Load;
if (ShLeftAmt != 0) {
EVT ShImmTy = getShiftAmountTy(Result.getValueType());
if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
ShImmTy = VT;
// If the shift amount is as large as the result size (but, presumably,
// no larger than the source) then the useful bits of the result are
// zero; we can't simply return the shortened shift, because the result
// of that operation is undefined.
if (ShLeftAmt >= VT.getScalarSizeInBits())
Result = DAG.getConstant(0, DL, VT);
else
Result = DAG.getNode(ISD::SHL, DL, VT,
Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
}
if (HasShiftedOffset) {
// Recalculate the shift amount after it has been altered to calculate
// the offset.
if (DAG.getDataLayout().isBigEndian())
ShAmt = AdjustBigEndianShift(ShAmt);
// We're using a shifted mask, so the load now has an offset. This means
// that data has been loaded into the lower bytes than it would have been
// before, so we need to shl the loaded data into the correct position in the
// register.
SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
}
// Return the new loaded value.
return Result;
}
SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT ExtVT = cast<VTSDNode>(N1)->getVT();
unsigned VTBits = VT.getScalarSizeInBits();
unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
// sext_vector_inreg(undef) = 0 because the top bit will all be the same.
if (N0.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
// fold (sext_in_reg c1) -> c1
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
N1);
// fold (sext_in_reg (sext x)) -> (sext x)
// fold (sext_in_reg (aext x)) -> (sext x)
// if x is small enough or if we know that x has more than 1 sign bit and the
// sign_extend_inreg is extending from one of them.
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
if ((N00Bits <= ExtVTBits ||
(N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
// fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
// if x is small enough or if we know that x has more than 1 sign bit and the
// sign_extend_inreg is extending from one of them.
if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
unsigned DstElts = N0.getValueType().getVectorMinNumElements();
unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
if ((N00Bits == ExtVTBits ||
(!IsZext && (N00Bits < ExtVTBits ||
(N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
ExtVTBits))) &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
}
// fold (sext_in_reg (zext x)) -> (sext x)
// iff we are extending the source sign bit.
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getScalarValueSizeInBits() == ExtVTBits &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
// fold operands of sext_in_reg based on knowledge that the top bits are not
// demanded.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (sext_in_reg (load x)) -> (smaller sextload x)
// fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
if (SDValue NarrowLoad = ReduceLoadWidth(N))
return NarrowLoad;
// fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
// fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
if (N0.getOpcode() == ISD::SRL) {
if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
// We can turn this into an SRA iff the input to the SRL is already sign
// extended enough.
unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(1));
}
}
// fold (sext_inreg (extload x)) -> (sextload x)
// If sextload is not supported by target, we can only do the combine when
// load has one use. Doing otherwise can block folding the extload with other
// extends that the target does support.
if (ISD::isEXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
N0.hasOneUse()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), ExtVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
AddToWorklist(ExtLoad.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse() &&
ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), ExtVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
// ignore it if the masked load is already sign extended
if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
SDValue ExtMaskedLoad = DAG.getMaskedLoad(
VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
CombineTo(N, ExtMaskedLoad);
CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
if (SDValue(GN0, 0).hasOneUse() &&
ExtVT == GN0->getMemoryVT() &&
TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
SDValue ExtLoad = DAG.getMaskedGather(
DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
AddToWorklist(ExtLoad.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
N0.getOperand(1), false))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
}
return SDValue();
}
SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
if (N0.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
}
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = N0.getValueType();
bool isLE = DAG.getDataLayout().isLittleEndian();
// noop truncate
if (SrcVT == VT)
return N0;
// fold (truncate (truncate x)) -> (truncate x)
if (N0.getOpcode() == ISD::TRUNCATE)
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
// fold (truncate c1) -> c1
if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
if (C.getNode() != N)
return C;
}
// fold (truncate (ext x)) -> (ext x) or (truncate x) or x
if (N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND) {
// if the source is smaller than the dest, we still need an extend.
if (N0.getOperand(0).getValueType().bitsLT(VT))
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
// if the source is larger than the dest, than we just need the truncate.
if (N0.getOperand(0).getValueType().bitsGT(VT))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
// if the source and dest are the same type, we can drop both the extend
// and the truncate.
return N0.getOperand(0);
}
// If this is anyext(trunc), don't fold it, allow ourselves to be folded.
if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
return SDValue();
// Fold extract-and-trunc into a narrow extract. For example:
// i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
// i32 y = TRUNCATE(i64 x)
// -- becomes --
// v16i8 b = BITCAST (v2i64 val)
// i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
//
// Note: We only run this optimization after type legalization (which often
// creates this pattern) and before operation legalization after which
// we need to be more careful about the vector instructions that we generate.
if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
EVT VecTy = N0.getOperand(0).getValueType();
EVT ExTy = N0.getValueType();
EVT TrTy = N->getValueType(0);
auto EltCnt = VecTy.getVectorElementCount();
unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
auto NewEltCnt = EltCnt * SizeRatio;
EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
SDValue EltNo = N0->getOperand(1);
if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
SDLoc DL(N);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
DAG.getBitcast(NVT, N0.getOperand(0)),
DAG.getVectorIdxConstant(Index, DL));
}
}
// trunc (select c, a, b) -> select c, (trunc a), (trunc b)
if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
TLI.isTruncateFree(SrcVT, VT)) {
SDLoc SL(N0);
SDValue Cond = N0.getOperand(0);
SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
}
}
// trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
(!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
SDValue Amt = N0.getOperand(1);
KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
SDLoc SL(N);
EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
if (AmtVT != Amt.getValueType()) {
Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
AddToWorklist(Amt.getNode());
}
return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
}
}
if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
return V;
// Attempt to pre-truncate BUILD_VECTOR sources.
if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
// Avoid creating illegal types if running after type legalizer.
(!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
SDLoc DL(N);
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 8> TruncOps;
for (const SDValue &Op : N0->op_values()) {
SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
TruncOps.push_back(TruncOp);
}
return DAG.getBuildVector(VT, DL, TruncOps);
}
// Fold a series of buildvector, bitcast, and truncate if possible.
// For example fold
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
// (2xi32 (buildvector x, y)).
if (Level == AfterLegalizeVectorOps && VT.isVector() &&
N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
N0.getOperand(0).hasOneUse()) {
SDValue BuildVect = N0.getOperand(0);
EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
EVT TruncVecEltTy = VT.getVectorElementType();
// Check that the element types match.
if (BuildVectEltTy == TruncVecEltTy) {
// Now we only need to compute the offset of the truncated elements.
unsigned BuildVecNumElts = BuildVect.getNumOperands();
unsigned TruncVecNumElts = VT.getVectorNumElements();
unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
"Invalid number of elements");
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
Opnds.push_back(BuildVect.getOperand(i));
return DAG.getBuildVector(VT, SDLoc(N), Opnds);
}
}
// See if we can simplify the input to this truncate through knowledge that
// only the low bits are being used.
// For example "trunc (or (shl x, 8), y)" // -> trunc y
// Currently we only perform this optimization on scalars because vectors
// may have different active low bits.
if (!VT.isVector()) {
APInt Mask =
APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
}
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
if (SDValue Reduced = ReduceLoadWidth(N))
return Reduced;
// Handle the case where the load remains an extending load even
// after truncation.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
VT, LN0->getChain(), LN0->getBasePtr(),
LN0->getMemoryVT(),
LN0->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
return NewLoad;
}
}
}
// fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
// where ... are all 'undef'.
if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
SmallVector<EVT, 8> VTs;
SDValue V;
unsigned Idx = 0;
unsigned NumDefs = 0;
for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
SDValue X = N0.getOperand(i);
if (!X.isUndef()) {
V = X;
Idx = i;
NumDefs++;
}
// Stop if more than one members are non-undef.
if (NumDefs > 1)
break;
VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
VT.getVectorElementType(),
X.getValueType().getVectorElementCount()));
}
if (NumDefs == 0)
return DAG.getUNDEF(VT);
if (NumDefs == 1) {
assert(V.getNode() && "The single defined operand is empty!");
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
if (i != Idx) {
Opnds.push_back(DAG.getUNDEF(VTs[i]));
continue;
}
SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
AddToWorklist(NV.getNode());
Opnds.push_back(NV);
}
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
}
}
// Fold truncate of a bitcast of a vector to an extract of the low vector
// element.
//
// e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
SDValue VecSrc = N0.getOperand(0);
EVT VecSrcVT = VecSrc.getValueType();
if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
SDLoc SL(N);
unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
DAG.getVectorIdxConstant(Idx, SL));
}
}
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
// (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
// When the adde's carry is not used.
if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
// We only do for addcarry before legalize operation
((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
TLI.isOperationLegal(N0.getOpcode(), VT))) {
SDLoc SL(N);
auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
auto VTs = DAG.getVTList(VT, N0->getValueType(1));
return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
}
// fold (truncate (extract_subvector(ext x))) ->
// (extract_subvector x)
// TODO: This can be generalized to cover cases where the truncate and extract
// do not fully cancel each other out.
if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::SIGN_EXTEND ||
N00.getOpcode() == ISD::ZERO_EXTEND ||
N00.getOpcode() == ISD::ANY_EXTEND) {
if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
VT.getVectorElementType())
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
N00.getOperand(0), N0.getOperand(1));
}
}
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
// Narrow a suitable binary operation with a non-opaque constant operand by
// moving it ahead of the truncate. This is limited to pre-legalization
// because targets may prefer a wider type during later combines and invert
// this transform.
switch (N0.getOpcode()) {
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
if (!LegalOperations && N0.hasOneUse() &&
(isConstantOrConstantVector(N0.getOperand(0), true) ||
isConstantOrConstantVector(N0.getOperand(1), true))) {
// TODO: We already restricted this to pre-legalization, but for vectors
// we are extra cautious to not create an unsupported operation.
// Target-specific changes are likely needed to avoid regressions here.
if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
SDLoc DL(N);
SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
}
}
break;
case ISD::USUBSAT:
// Truncate the USUBSAT only if LHS is a known zero-extension, its not
// enough to know that the upper bits are zero we must ensure that we don't
// introduce an extra truncate.
if (!LegalOperations && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
VT.getScalarSizeInBits() &&
hasOperation(N0.getOpcode(), VT)) {
return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
DAG, SDLoc(N));
}
break;
}
return SDValue();
}
static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
SDValue Elt = N->getOperand(i);
if (Elt.getOpcode() != ISD::MERGE_VALUES)
return Elt.getNode();
return Elt.getOperand(Elt.getResNo()).getNode();
}
/// build_pair (load, load) -> load
/// if load locations are consecutive.
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
assert(N->getOpcode() == ISD::BUILD_PAIR);
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
// A BUILD_PAIR is always having the least significant part in elt 0 and the
// most significant part in elt 1. So when combining into one large load, we
// need to consider the endianness.
if (DAG.getDataLayout().isBigEndian())
std::swap(LD1, LD2);
if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
unsigned LD1Bytes = LD1VT.getStoreSize();
if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
Align Alignment = LD1->getAlign();
Align NewAlign = DAG.getDataLayout().getABITypeAlign(
VT.getTypeForEVT(*DAG.getContext()));
if (NewAlign <= Alignment &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
LD1->getPointerInfo(), Alignment);
}
return SDValue();
}
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
// On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
// and Lo parts; on big-endian machines it doesn't.
return DAG.getDataLayout().isBigEndian() ? 1 : 0;
}
static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
// If this is not a bitcast to an FP type or if the target doesn't have
// IEEE754-compliant FP logic, we're done.
EVT VT = N->getValueType(0);
if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
return SDValue();
// TODO: Handle cases where the integer constant is a different scalar
// bitwidth to the FP.
SDValue N0 = N->getOperand(0);
EVT SourceVT = N0.getValueType();
if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
return SDValue();
unsigned FPOpcode;
APInt SignMask;
switch (N0.getOpcode()) {
case ISD::AND:
FPOpcode = ISD::FABS;
SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
break;
case ISD::XOR:
FPOpcode = ISD::FNEG;
SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
break;
case ISD::OR:
FPOpcode = ISD::FABS;
SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
break;
default:
return SDValue();
}
// Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
// Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
// Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
// fneg (fabs X)
SDValue LogicOp0 = N0.getOperand(0);
ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
LogicOp0.getOpcode() == ISD::BITCAST &&
LogicOp0.getOperand(0).getValueType() == VT) {
SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
NumFPLogicOpsConv++;
if (N0.getOpcode() == ISD::OR)
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
return FPOp;
}
return SDValue();
}
SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
if (N0.isUndef())
return DAG.getUNDEF(VT);
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
// Only do this before legalize types, unless both types are integer and the
// scalar type is legal. Only do this before legalize ops, since the target
// maybe depending on the bitcast.
// First check to see if this is all constant.
// TODO: Support FP bitcasts after legalize types.
if (VT.isVector() &&
(!LegalTypes ||
(!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
TLI.isTypeLegal(VT.getVectorElementType()))) &&
N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
cast<BuildVectorSDNode>(N0)->isConstant())
return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
VT.getVectorElementType());
// If the input is a constant, let getNode fold it.
if (isIntOrFPConstant(N0)) {
// If we can't allow illegal operations, we need to check that this is just
// a fp -> int or int -> conversion and that the resulting operation will
// be legal.
if (!LegalOperations ||
(isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
(isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
TLI.isOperationLegal(ISD::Constant, VT))) {
SDValue C = DAG.getBitcast(VT, N0);
if (C.getNode() != N)
return C;
}
}
// (conv (conv x, t1), t2) -> (conv x, t2)
if (N0.getOpcode() == ISD::BITCAST)
return DAG.getBitcast(VT, N0.getOperand(0));
// fold (conv (load x)) -> (load (conv*)x)
// If the resultant load doesn't need a higher alignment than the original!
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not remove the cast if the types differ in endian layout.
TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
// If the load is volatile, we only want to change the load type if the
// resulting load is legal. Otherwise we might increase the number of
// memory accesses. We don't care if the original type was legal or not
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
TLI.isOperationLegal(ISD::LOAD, VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
*LN0->getMemOperand())) {
SDValue Load =
DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
LN0->getPointerInfo(), LN0->getAlign(),
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
}
}
if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
return V;
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
//
// For ppc_fp128:
// fold (bitcast (fneg x)) ->
// flipbit = signbit
// (xor (bitcast x) (build_pair flipbit, flipbit))
//
// fold (bitcast (fabs x)) ->
// flipbit = (and (extract_element (bitcast x), 0), signbit)
// (xor (bitcast x) (build_pair flipbit, flipbit))
// This often reduces constant pool loads.
if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
N0.getNode()->hasOneUse() && VT.isInteger() &&
!VT.isVector() && !N0.getValueType().isVector()) {
SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
AddToWorklist(NewConv.getNode());
SDLoc DL(N);
if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
assert(VT.getSizeInBits() == 128);
SDValue SignBit = DAG.getConstant(
APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
SDValue FlipBit;
if (N0.getOpcode() == ISD::FNEG) {
FlipBit = SignBit;
AddToWorklist(FlipBit.getNode());
} else {
assert(N0.getOpcode() == ISD::FABS);
SDValue Hi =
DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
SDLoc(NewConv)));
AddToWorklist(Hi.getNode());
FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
AddToWorklist(FlipBit.getNode());
}
SDValue FlipBits =
DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
AddToWorklist(FlipBits.getNode());
return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
}
APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, DL, VT,
NewConv, DAG.getConstant(SignBit, DL, VT));
assert(N0.getOpcode() == ISD::FABS);
return DAG.getNode(ISD::AND, DL, VT,
NewConv, DAG.getConstant(~SignBit, DL, VT));
}
// fold (bitconvert (fcopysign cst, x)) ->
// (or (and (bitconvert x), sign), (and cst, (not sign)))
// Note that we don't handle (copysign x, cst) because this can always be
// folded to an fneg or fabs.
//
// For ppc_fp128:
// fold (bitcast (fcopysign cst, x)) ->
// flipbit = (and (extract_element
// (xor (bitcast cst), (bitcast x)), 0),
// signbit)
// (xor (bitcast cst) (build_pair flipbit, flipbit))
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(0)) &&
VT.isInteger() && !VT.isVector()) {
unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (isTypeLegal(IntXVT)) {
SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
AddToWorklist(X.getNode());
// If X has a different width than the result/lhs, sext it or truncate it.
unsigned VTWidth = VT.getSizeInBits();
if (OrigXWidth < VTWidth) {
X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
AddToWorklist(X.getNode());
} else if (OrigXWidth > VTWidth) {
// To get the sign bit in the right place, we have to shift it right
// before truncating.
SDLoc DL(X);
X = DAG.getNode(ISD::SRL, DL,
X.getValueType(), X,
DAG.getConstant(OrigXWidth-VTWidth, DL,
X.getValueType()));
AddToWorklist(X.getNode());
X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
AddToWorklist(X.getNode());
}
if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
AddToWorklist(Cst.getNode());
SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
AddToWorklist(X.getNode());
SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
AddToWorklist(XorResult.getNode());
SDValue XorResult64 = DAG.getNode(
ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
SDLoc(XorResult)));
AddToWorklist(XorResult64.getNode());
SDValue FlipBit =
DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
AddToWorklist(FlipBit.getNode());
SDValue FlipBits =
DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
AddToWorklist(FlipBits.getNode());
return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
}
APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
X, DAG.getConstant(SignBit, SDLoc(X), VT));
AddToWorklist(X.getNode());
SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
AddToWorklist(Cst.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
}
}
// bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
if (N0.getOpcode() == ISD::BUILD_PAIR)
if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
return CombineLD;
// Remove double bitcasts from shuffles - this is often a legacy of
// XformToShuffleWithZero being used to combine bitmaskings (of
// float vectors bitcast to integer vectors) into shuffles.
// bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
!(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
// If operands are a bitcast, peek through if it casts the original VT.
// If operands are a constant, just bitcast back to original VT.
auto PeekThroughBitcast = [&](SDValue Op) {
if (Op.getOpcode() == ISD::BITCAST &&
Op.getOperand(0).getValueType() == VT)
return SDValue(Op.getOperand(0));
if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
return DAG.getBitcast(VT, Op);
return SDValue();
};
// FIXME: If either input vector is bitcast, try to convert the shuffle to
// the result type of this bitcast. This would eliminate at least one
// bitcast. See the transform in InstCombine.
SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
if (!(SV0 && SV1))
return SDValue();
int MaskScale =
VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
SmallVector<int, 8> NewMask;
for (int M : SVN->getMask())
for (int i = 0; i != MaskScale; ++i)
NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
SDValue LegalShuffle =
TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
if (LegalShuffle)
return LegalShuffle;
}
return SDValue();
}
SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
EVT VT = N->getValueType(0);
return CombineConsecutiveLoads(N, VT);
}
SDValue DAGCombiner::visitFREEZE(SDNode *N) {
SDValue N0 = N->getOperand(0);
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
return N0;
return SDValue();
}
/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
/// operands. DstEltVT indicates the destination element value type.
SDValue DAGCombiner::
ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
// If this is already the right type, we're done.
if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
unsigned SrcBitSize = SrcEltVT.getSizeInBits();
unsigned DstBitSize = DstEltVT.getSizeInBits();
// If this is a conversion of N elements of one type to N elements of another
// type, convert each element. This handles FP<->INT cases.
if (SrcBitSize == DstBitSize) {
SmallVector<SDValue, 8> Ops;
for (SDValue Op : BV->op_values()) {
// If the vector element type is not legal, the BUILD_VECTOR operands
// are promoted and implicitly truncated. Make that explicit here.
if (Op.getValueType() != SrcEltVT)
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
Ops.push_back(DAG.getBitcast(DstEltVT, Op));
AddToWorklist(Ops.back().getNode());
}
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
BV->getValueType(0).getVectorNumElements());
return DAG.getBuildVector(VT, SDLoc(BV), Ops);
}
// Otherwise, we're growing or shrinking the elements. To avoid having to
// handle annoying details of growing/shrinking FP values, we convert them to
// int first.
if (SrcEltVT.isFloatingPoint()) {
// Convert the input float vector to a int vector where the elements are the
// same sizes.
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
SrcEltVT = IntVT;
}
// Now we know the input is an integer vector. If the output is a FP type,
// convert to integer first, then to FP of the right size.
if (DstEltVT.isFloatingPoint()) {
EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
// Next, convert to FP elements of the same size.
return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
SDLoc DL(BV);
// Okay, we know the src/dst types are both integers of differing types.
// Handling growing first.
assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
if (SrcBitSize < DstBitSize) {
unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e;
i += NumInputsPerOutput) {
bool isLE = DAG.getDataLayout().isLittleEndian();
APInt NewBits = APInt(DstBitSize, 0);
bool EltIsUndef = true;
for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
// Shift the previously computed bits over.
NewBits <<= SrcBitSize;
SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
if (Op.isUndef()) continue;
EltIsUndef = false;
NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
zextOrTrunc(SrcBitSize).zext(DstBitSize);
}
if (EltIsUndef)
Ops.push_back(DAG.getUNDEF(DstEltVT));
else
Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
}
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
return DAG.getBuildVector(VT, DL, Ops);
}
// Finally, this must be the case where we are shrinking elements: each input
// turns into multiple outputs.
unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
NumOutputsPerInput*BV->getNumOperands());
SmallVector<SDValue, 8> Ops;
for (const SDValue &Op : BV->op_values()) {
if (Op.isUndef()) {
Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
continue;
}
APInt OpVal = cast<ConstantSDNode>(Op)->
getAPIntValue().zextOrTrunc(SrcBitSize);
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
APInt ThisVal = OpVal.trunc(DstBitSize);
Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
OpVal.lshrInPlace(DstBitSize);
}
// For big endian targets, swap the order of the pieces of each element.
if (DAG.getDataLayout().isBigEndian())
std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
}
return DAG.getBuildVector(VT, DL, Ops);
}
/// Try to perform FMA combining on a given FADD node.
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
bool CanReassociate =
Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
Options.UnsafeFPMath || HasFMAD);
// If the addition is not contractable, do not combine.
if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
return SDValue();
if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
if (N.getOpcode() != ISD::FMUL)
return false;
return AllowFusionGlobally || N->getFlags().hasAllowContract();
};
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
if (N0.getNode()->use_size() > N1.getNode()->use_size())
std::swap(N0, N1);
}
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
N1.getOperand(1), N0);
}
// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
// This requires reassociation because it changes the order of operations.
SDValue FMA, E;
if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
N0.getOperand(2).hasOneUse()) {
FMA = N0;
E = N1;
} else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
N1.getOperand(2).hasOneUse()) {
FMA = N1;
E = N0;
}
if (FMA && E) {
SDValue A = FMA.getOperand(0);
SDValue B = FMA.getOperand(1);
SDValue C = FMA.getOperand(2).getOperand(0);
SDValue D = FMA.getOperand(2).getOperand(1);
SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
}
// Look through FP_EXTEND nodes to do more combining.
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
N1);
}
}
// fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
N0);
}
}
// More folding opportunities when target permits.
if (Aggressive) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
SDValue Z) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
Z));
};
if (N0.getOpcode() == PreferredFusedOpcode) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
if (isContractableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N020.getValueType())) {
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
N1);
}
}
}
// fold (fadd (fpext (fma x, y, (fmul u, v))), z)
// -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
SDValue Z) {
return DAG.getNode(
PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
N1);
}
}
}
// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
// -> (fma y, z, (fma (fpext u), (fpext v), x))
if (N1.getOpcode() == PreferredFusedOpcode) {
SDValue N12 = N1.getOperand(2);
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
if (isContractableFMUL(N120) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N120.getValueType())) {
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
N0);
}
}
}
// fold (fadd x, (fpext (fma y, z, (fmul u, v)))
// -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == PreferredFusedOpcode) {
SDValue N102 = N10.getOperand(2);
if (isContractableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
N0);
}
}
}
}
return SDValue();
}
/// Try to perform FMA combining on a given FSUB node.
SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
const SDNodeFlags Flags = N->getFlags();
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
Options.UnsafeFPMath || HasFMAD);
// If the subtraction is not contractable, do not combine.
if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
return SDValue();
if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
if (N.getOpcode() != ISD::FMUL)
return false;
return AllowFusionGlobally || N->getFlags().hasAllowContract();
};
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
}
return SDValue();
};
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
YZ.getOperand(1), X);
}
return SDValue();
};
// If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
(N0.getNode()->use_size() > N1.getNode()->use_size())) {
// fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
if (SDValue V = tryToFoldXSubYZ(N0, N1))
return V;
// fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
if (SDValue V = tryToFoldXYSubZ(N0, N1))
return V;
} else {
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
if (SDValue V = tryToFoldXYSubZ(N0, N1))
return V;
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
if (SDValue V = tryToFoldXSubYZ(N0, N1))
return V;
}
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
(Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
DAG.getNode(ISD::FNEG, SL, VT, N1));
}
// Look through FP_EXTEND nodes to do more combining.
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT, N1));
}
}
// fold (fsub x, (fpext (fmul y, z)))
// -> (fma (fneg (fpext y)), (fpext z), x)
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
return DAG.getNode(
PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
}
}
// fold (fsub (fpext (fneg (fmul, x, y))), z)
// -> (fneg (fma (fpext x), (fpext y), z))
// Note: This could be removed with appropriate canonicalization of the
// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
// from implementing the canonicalization in visitFSUB.
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FNEG) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(
ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
N1));
}
}
}
// fold (fsub (fneg (fpext (fmul, x, y))), z)
// -> (fneg (fma (fpext x)), (fpext y), z)
// Note: This could be removed with appropriate canonicalization of the
// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
// from implementing the canonicalization in visitFSUB.
if (N0.getOpcode() == ISD::FNEG) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FP_EXTEND) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N000.getValueType())) {
return DAG.getNode(
ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
N1));
}
}
}
auto isReassociable = [Options](SDNode *N) {
return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
};
auto isContractableAndReassociableFMUL = [isContractableFMUL,
isReassociable](SDValue N) {
return isContractableFMUL(N) && isReassociable(N.getNode());
};
// More folding opportunities when target permits.
if (Aggressive && isReassociable(N)) {
bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
isContractableAndReassociableFMUL(N0.getOperand(2)) &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
isContractableAndReassociableFMUL(N1.getOperand(2)) &&
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(
PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
}
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode &&
N0->hasOneUse()) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
if (isContractableAndReassociableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N020.getValueType())) {
return DAG.getNode(
PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
DAG.getNode(
PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
// fold (fsub (fpext (fma x, y, (fmul u, v))), z)
// -> (fma (fpext x), (fpext y),
// (fma (fpext u), (fpext v), (fneg z)))
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(
PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
DAG.getNode(
PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
if (N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableAndReassociableFMUL(N120) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
return DAG.getNode(
PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
}
}
// fold (fsub x, (fpext (fma y, z, (fmul u, v))))
// -> (fma (fneg (fpext y)), (fpext z),
// (fma (fneg (fpext u)), (fpext v), x))
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
SDValue CvtSrc = N1.getOperand(0);
SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);
SDValue N102 = CvtSrc.getOperand(2);
if (isContractableAndReassociableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
return DAG.getNode(
PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
}
}
}
return SDValue();
}
/// Try to perform FMA combining on a given FMUL node based on the distributive
/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
/// subtraction instead of addition).
SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
const TargetOptions &Options = DAG.getTarget().Options;
// The transforms below are incorrect when x == 0 and y == inf, because the
// intermediate multiplication produces a nan.
if (!Options.NoInfsFPMath)
return SDValue();
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// Floating-point multiply-add with intermediate rounding. This can result
// in a less precise result due to the changed rounding order.
bool HasFMAD = Options.UnsafeFPMath &&
(LegalOperations && TLI.isFMADLegal(DAG, N));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
// fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
// fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
auto FuseFADD = [&](SDValue X, SDValue Y) {
if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
if (C->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
Y);
if (C->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
DAG.getNode(ISD::FNEG, SL, VT, Y));
}
}
return SDValue();
};
if (SDValue FMA = FuseFADD(N0, N1))
return FMA;
if (SDValue FMA = FuseFADD(N1, N0))
return FMA;
// fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
// fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
// fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
// fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
auto FuseFSUB = [&](SDValue X, SDValue Y) {
if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
if (C0->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
Y);
if (C0->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
DAG.getNode(ISD::FNEG, SL, VT, Y));
}
if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
if (C1->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
DAG.getNode(ISD::FNEG, SL, VT, Y));
if (C1->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
Y);
}
}
return SDValue();
};
if (SDValue FMA = FuseFSUB(N0, N1))
return FMA;
if (SDValue FMA = FuseFSUB(N1, N0))
return FMA;
return SDValue();
}
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// fold (fadd A, (fneg B)) -> (fsub A, B)
if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
N1, DAG, LegalOperations, ForCodeSize))
return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
// fold (fadd (fneg A), B) -> (fsub B, A)
if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
N0, DAG, LegalOperations, ForCodeSize))
return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
return false;
auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
return C && C->isExactlyValue(-2.0);
};
// fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
if (isFMulNegTwo(N0)) {
SDValue B = N0.getOperand(0);
SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
}
// fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
if (isFMulNegTwo(N1)) {
SDValue B = N1.getOperand(0);
SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
}
// No FP constant should be created after legalization as Instruction
// Selection pass has a hard time dealing with FP constants.
bool AllowNewConst = (Level < AfterLegalizeDAG);
// If nnan is enabled, fold lots of things.
if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, DL, VT);
// If allowed, fold (fadd x, (fneg x)) -> 0.0
if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
return DAG.getConstantFP(0.0, DL, VT);
}
// If 'unsafe math' or reassoc and nsz, fold lots of things.
// TODO: break out portions of the transformations below for which Unsafe is
// considered and which do not require both nsz and reassoc
if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
if (N1CFP && N0.getOpcode() == ISD::FADD &&
DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
}
// We can fold chains of FADD's of the same value into multiplications.
// This transform is not safe in general because we are reducing the number
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
DAG.getConstantFP(1.0, DL, VT));
return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
}
// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
DAG.getConstantFP(2.0, DL, VT));
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
}
}
if (N1.getOpcode() == ISD::FMUL) {
bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
DAG.getConstantFP(1.0, DL, VT));
return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
}
// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
DAG.getConstantFP(2.0, DL, VT));
return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
}
}
if (N0.getOpcode() == ISD::FADD) {
bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
return DAG.getNode(ISD::FMUL, DL, VT, N1,
DAG.getConstantFP(3.0, DL, VT));
}
}
if (N1.getOpcode() == ISD::FADD) {
bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getConstantFP(3.0, DL, VT));
}
}
// (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
DAG.getConstantFP(4.0, DL, VT));
}
}
} // enable-unsafe-fp-math
// FADD -> FMA combines:
if (SDValue Fused = visitFADDForFMACombine(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
return SDValue();
}
SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue N0 = N->getOperand(1);
SDValue N1 = N->getOperand(2);
EVT VT = N->getValueType(0);
EVT ChainVT = N->getValueType(1);
SDLoc DL(N);
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
// fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
N1, DAG, LegalOperations, ForCodeSize)) {
return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
{Chain, N0, NegN1});
}
// fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
N0, DAG, LegalOperations, ForCodeSize)) {
return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
{Chain, N1, NegN0});
}
return SDValue();
}
SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// (fsub A, 0) -> A
if (N1CFP && N1CFP->isZero()) {
if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
Flags.hasNoSignedZeros()) {
return N0;
}
}
if (N0 == N1) {
// (fsub x, x) -> 0.0
if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
return DAG.getConstantFP(0.0f, DL, VT);
}
// (fsub -0.0, N1) -> -N1
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
// We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
// flushed to zero, unless all users treat denorms as zero (DAZ).
// FIXME: This transform will change the sign of a NaN and the behavior
// of a signaling NaN. It is only valid when a NoNaN flag is present.
DenormalMode DenormMode = DAG.getDenormalMode(VT);
if (DenormMode == DenormalMode::getIEEE()) {
if (SDValue NegN1 =
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
return NegN1;
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N1);
}
}
}
if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
if (N0 == N1->getOperand(0))
return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
// X - (Y + X) -> -Y
if (N0 == N1->getOperand(1))
return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (SDValue NegN1 =
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
return SDValue();
}
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold vector ops
if (VT.isVector()) {
// This just handles C1 * C2 for vectors. Other vector folds are below.
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
}
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
// canonicalize constant to RHS
if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
N0.getOpcode() == ISD::FMUL) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
// Avoid an infinite loop by making sure that N00 is not a constant
// (the inner multiply has not been constant folded yet).
if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
!DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
}
}
// Match a special-case: we convert X * 2.0 into fadd.
// fmul (fadd X, X), C -> fmul X, 2.0 * C
if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
N0.getOperand(0) == N0.getOperand(1)) {
const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
}
}
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N0);
// -N0 * -N1 --> N0 * N1
TargetLowering::NegatibleCost CostN0 =
TargetLowering::NegatibleCost::Expensive;
TargetLowering::NegatibleCost CostN1 =
TargetLowering::NegatibleCost::Expensive;
SDValue NegN0 =
TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
SDValue NegN1 =
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
// fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
(N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
TLI.isOperationLegal(ISD::FABS, VT)) {
SDValue Select = N0, X = N1;
if (Select.getOpcode() != ISD::SELECT)
std::swap(Select, X);
SDValue Cond = Select.getOperand(0);
auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
if (TrueOpnd && FalseOpnd &&
Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
switch (CC) {
default: break;
case ISD::SETOLT:
case ISD::SETULT:
case ISD::SETOLE:
case ISD::SETULE:
case ISD::SETLT:
case ISD::SETLE:
std::swap(TrueOpnd, FalseOpnd);
LLVM_FALLTHROUGH;
case ISD::SETOGT:
case ISD::SETUGT:
case ISD::SETOGE:
case ISD::SETUGE:
case ISD::SETGT:
case ISD::SETGE:
if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT,
DAG.getNode(ISD::FABS, DL, VT, X));
if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
return DAG.getNode(ISD::FABS, DL, VT, X);
break;
}
}
}
// FMUL -> FMA combines:
if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
return SDValue();
}
SDValue DAGCombiner::visitFMA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
// FMA nodes have flags that propagate to the created nodes.
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
bool UnsafeFPMath =
Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&
isa<ConstantFPSDNode>(N1) &&
isa<ConstantFPSDNode>(N2)) {
return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}
// (-N0 * -N1) + N2 --> (N0 * N1) + N2
TargetLowering::NegatibleCost CostN0 =
TargetLowering::NegatibleCost::Expensive;
TargetLowering::NegatibleCost CostN1 =
TargetLowering::NegatibleCost::Expensive;
SDValue NegN0 =
TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
SDValue NegN1 =
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
return N2;
if (N1CFP && N1CFP->isZero())
return N2;
}
if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
if (UnsafeFPMath) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
}
// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
if (N0.getOpcode() == ISD::FMUL &&
DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
N2);
}
}
// (fma x, -1, y) -> (fadd (fneg x), y)
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
AddToWorklist(RHSNeg.getNode());
return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
}
// fma (fneg x), K, y -> fma x -K, y
if (N0.getOpcode() == ISD::FNEG &&
(TLI.isOperationLegal(ISD::ConstantFP, VT) ||
(N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
ForCodeSize)))) {
return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
}
}
if (UnsafeFPMath) {
// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {
return DAG.getNode(
ISD::FMUL, DL, VT, N0,
DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
}
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
return DAG.getNode(
ISD::FMUL, DL, VT, N0,
DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
}
}
// fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
// fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
if (!TLI.isFNegFree(VT))
if (SDValue Neg = TLI.getCheaperNegatedExpression(
SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
return DAG.getNode(ISD::FNEG, DL, VT, Neg);
return SDValue();
}
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal.
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
// Notice that this is not always beneficial. One reason is different targets
// may have different costs for FDIV and FMUL, so sometimes the cost of two
// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// TODO: Limit this transform based on optsize/minsize - it always creates at
// least 1 extra instruction. But the perf win may be substantial enough
// that only minsize should restrict this.
bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
const SDNodeFlags Flags = N->getFlags();
if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
return SDValue();
// Skip if current node is a reciprocal/fneg-reciprocal.
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
return SDValue();
// Exit early if the target does not want this transform or if there can't
// possibly be enough uses of the divisor to make the transform worthwhile.
unsigned MinUses = TLI.combineRepeatedFPDivisors();
// For splat vectors, scale the number of uses by the splat factor. If we can
// convert the division into a scalar op, that will likely be much faster.
unsigned NumElts = 1;
EVT VT = N->getValueType(0);
if (VT.isVector() && DAG.isSplatValue(N1))
NumElts = VT.getVectorNumElements();
if (!MinUses || (N1->use_size() * NumElts) < MinUses)
return SDValue();
// Find all FDIV users of the same divisor.
// Use a set because duplicates may be present in the user list.
SetVector<SDNode *> Users;
for (auto *U : N1->uses()) {
if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
// Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
U->getOperand(0) == U->getOperand(1).getOperand(0) &&
U->getFlags().hasAllowReassociation() &&
U->getFlags().hasNoSignedZeros())
continue;
// This division is eligible for optimization only if global unsafe math
// is enabled or if this division allows reciprocal formation.
if (UnsafeMath || U->getFlags().hasAllowReciprocal())
Users.insert(U);
}
}
// Now that we have the actual number of divisor uses, make sure it meets
// the minimum threshold specified by the target.
if ((Users.size() * NumElts) < MinUses)
return SDValue();
SDLoc DL(N);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
// Dividend / Divisor -> Dividend * Reciprocal
for (auto *U : Users) {
SDValue Dividend = U->getOperand(0);
if (Dividend != FPOne) {
SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
Reciprocal, Flags);
CombineTo(U, NewNode);
} else if (U != Reciprocal.getNode()) {
// In the absence of fast-math-flags, this user node is always the
// same node as Reciprocal, but with FMF they may be different nodes.
CombineTo(U, Reciprocal);
}
}
return SDValue(N, 0); // N was replaced.
}
SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (fdiv c1, c2) -> c1/c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
if (SDValue V = combineRepeatedFPDivisors(N))
return V;
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
// Compute the reciprocal 1.0 / c2.
const APFloat &N1APF = N1CFP->getValueAPF();
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
// Only do the transform if the reciprocal is a legal fp immediate that
// isn't too nasty (eg NaN, denormal, ...).
if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
(!LegalOperations ||
// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
// backend)... we should handle this gracefully after Legalize.
// TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
TLI.isOperationLegal(ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getConstantFP(Recip, DL, VT));
}
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
if (SDValue RV =
buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
if (SDValue RV =
buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FMUL) {
// Look through an FMUL. Even though this won't remove the FDIV directly,
// it's still worthwhile to get rid of the FSQRT if possible.
SDValue Sqrt, Y;
if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
Sqrt = N1.getOperand(0);
Y = N1.getOperand(1);
} else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
Sqrt = N1.getOperand(1);
Y = N1.getOperand(0);
}
if (Sqrt.getNode()) {
// If the other multiply operand is known positive, pull it into the
// sqrt. That will eliminate the division if we convert to an estimate.
if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
SDValue A;
if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
A = Y.getOperand(0);
else if (Y == Sqrt.getOperand(0))
A = Y;
if (A) {
// X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
// X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
SDValue AAZ =
DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
// Estimate creation failed. Clean up speculatively created nodes.
recursivelyDeleteUnusedNodes(AAZ.getNode());
}
}
// We found a FSQRT, so try to make this fold:
// X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
AddToWorklist(Div.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
}
}
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
if (Options.NoInfsFPMath || Flags.hasNoInfs())
if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
return RV;
}
// Fold X/Sqrt(X) -> Sqrt(X)
if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
(Options.UnsafeFPMath || Flags.hasAllowReassociation()))
if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
return N1;
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
TargetLowering::NegatibleCost CostN0 =
TargetLowering::NegatibleCost::Expensive;
TargetLowering::NegatibleCost CostN1 =
TargetLowering::NegatibleCost::Expensive;
SDValue NegN0 =
TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
SDValue NegN1 =
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
return SDValue();
}
SDValue DAGCombiner::visitFREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
return SDValue();
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
SDNodeFlags Flags = N->getFlags();
const TargetOptions &Options = DAG.getTarget().Options;
// Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
// sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
if (!Flags.hasApproximateFuncs() ||
(!Options.NoInfsFPMath && !Flags.hasNoInfs()))
return SDValue();
SDValue N0 = N->getOperand(0);
if (TLI.isFsqrtCheap(N0, DAG))
return SDValue();
// FSQRT nodes have flags that propagate to the created nodes.
// TODO: If this is N0/sqrt(N0), and we reach this node before trying to
// transform the fdiv, we may produce a sub-optimal estimate sequence
// because the reciprocal calculation may not have to filter out a
// 0.0 input.
return buildSqrtEstimate(N0, Flags);
}
/// copysign(x, fp_extend(y)) -> copysign(x, y)
/// copysign(x, fp_round(y)) -> copysign(x, y)
static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
SDValue N1 = N->getOperand(1);
if ((N1.getOpcode() == ISD::FP_EXTEND ||
N1.getOpcode() == ISD::FP_ROUND)) {
EVT N1VT = N1->getValueType(0);
EVT N1Op0VT = N1->getOperand(0).getValueType();
// Always fold no-op FP casts.
if (N1VT == N1Op0VT)
return true;
// Do not optimize out type conversion of f128 type yet.
// For some targets like x86_64, configuration is changed to keep one f128
// value in one SSE register, but instruction selection cannot handle
// FCOPYSIGN on SSE registers yet.
if (N1Op0VT == MVT::f128)
return false;
// Avoid mismatched vector operand types, for better instruction selection.
if (N1Op0VT.isVector())
return false;
return true;
}
return false;
}
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
if (N0CFP && N1CFP) // Constant fold
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
const APFloat &V = N1C->getValueAPF();
// copysign(x, c1) -> fabs(x) iff ispos(c1)
// copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
if (!V.isNegative()) {
if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
} else {
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
}
}
// copysign(fabs(x), y) -> copysign(x, y)
// copysign(fneg(x), y) -> copysign(x, y)
// copysign(copysign(x,z), y) -> copysign(x, y)
if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
N0.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
// copysign(x, abs(y)) -> abs(x)
if (N1.getOpcode() == ISD::FABS)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
// copysign(x, copysign(y,z)) -> copysign(x, z)
if (N1.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
// copysign(x, fp_extend(y)) -> copysign(x, y)
// copysign(x, fp_round(y)) -> copysign(x, y)
if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
return SDValue();
}
SDValue DAGCombiner::visitFPOW(SDNode *N) {
ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
if (!ExponentC)
return SDValue();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
// Try to convert x ** (1/3) into cube root.
// TODO: Handle the various flavors of long double.
// TODO: Since we're approximating, we don't need an exact 1/3 exponent.
// Some range near 1/3 should be fine.
EVT VT = N->getValueType(0);
if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
(VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
// pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
// pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
// pow(-val, 1/3) = nan; cbrt(-val) = -num.
// For regular numbers, rounding may cause the results to differ.
// Therefore, we require { nsz ninf nnan afn } for this transform.
// TODO: We could select out the special cases if we don't have nsz/ninf.
SDNodeFlags Flags = N->getFlags();
if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
!Flags.hasApproximateFuncs())
return SDValue();
// Do not create a cbrt() libcall if the target does not have it, and do not
// turn a pow that has lowering support into a cbrt() libcall.
if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
(!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
return SDValue();
return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
}
// Try to convert x ** (1/4) and x ** (3/4) into square roots.
// x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
// TODO: This could be extended (using a target hook) to handle smaller
// power-of-2 fractional exponents.
bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
if (ExponentIs025 || ExponentIs075) {
// pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
// pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
// pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
// pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
// For regular numbers, rounding may cause the results to differ.
// Therefore, we require { nsz ninf afn } for this transform.
// TODO: We could select out the special cases if we don't have nsz/ninf.
SDNodeFlags Flags = N->getFlags();
// We only need no signed zeros for the 0.25 case.
if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
!Flags.hasApproximateFuncs())
return SDValue();
// Don't double the number of libcalls. We are trying to inline fast code.
if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
return SDValue();
// Assume that libcalls are the smallest code.
// TODO: This restriction should probably be lifted for vectors.
if (ForCodeSize)
return SDValue();
// pow(X, 0.25) --> sqrt(sqrt(X))
SDLoc DL(N);
SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
if (ExponentIs025)
return SqrtSqrt;
// pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
}
return SDValue();
}
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
// This optimization is guarded by a function attribute because it may produce
// unexpected results. Ie, programs may be relying on the platform-specific
// undefined behavior when the float-to-int conversion overflows.
const Function &F = DAG.getMachineFunction().getFunction();
Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
if (StrictOverflow.getValueAsString().equals("false"))
return SDValue();
// We only do this if the target has legal ftrunc. Otherwise, we'd likely be
// replacing casts with a libcall. We also must be allowed to ignore -0.0
// because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
// conversions would return +0.0.
// FIXME: We should be able to use node-level FMF here.
// TODO: If strict math, should we use FABS (+ range check for signed cast)?
EVT VT = N->getValueType(0);
if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
!DAG.getTarget().Options.NoSignedZerosFPMath)
return SDValue();
// fptosi/fptoui round towards zero, so converting from FP to integer and
// back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
SDValue N0 = N->getOperand(0);
if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
N0.getOperand(0).getValueType() == VT)
return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
N0.getOperand(0).getValueType() == VT)
return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
return SDValue();
}
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// [us]itofp(undef) = 0, because the result value is bounded.
if (N0.isUndef())
return DAG.getConstantFP(0.0, SDLoc(N), VT);
// fold (sint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
// If the input is a legal type, and SINT_TO_FP is not legal on this target,
// but UINT_TO_FP is legal on this target, try to convert.
if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
hasOperation(ISD::UINT_TO_FP, OpVT)) {
// If the sign bit is known to be zero, we can change this to UINT_TO_FP.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
}
// The next optimizations are desirable only if SELECT_CC can be lowered.
// fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
!VT.isVector() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
DAG.getConstantFP(0.0, DL, VT));
}
// fold (sint_to_fp (zext (setcc x, y, cc))) ->
// (select (setcc x, y, cc), 1.0, 0.0)
if (N0.getOpcode() == ISD::ZERO_EXTEND &&
N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
return DAG.getSelect(DL, VT, N0.getOperand(0),
DAG.getConstantFP(1.0, DL, VT),
DAG.getConstantFP(0.0, DL, VT));
}
if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
return FTrunc;
return SDValue();
}
SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// [us]itofp(undef) = 0, because the result value is bounded.
if (N0.isUndef())
return DAG.getConstantFP(0.0, SDLoc(N), VT);
// fold (uint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
// If the input is a legal type, and UINT_TO_FP is not legal on this target,
// but SINT_TO_FP is legal on this target, try to convert.
if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
hasOperation(ISD::SINT_TO_FP, OpVT)) {
// If the sign bit is known to be zero, we can change this to SINT_TO_FP.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
}
// fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
DAG.getConstantFP(0.0, DL, VT));
}
if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
return FTrunc;
return SDValue();
}
// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
return SDValue();
SDValue Src = N0.getOperand(0);
EVT SrcVT = Src.getValueType();
bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
// We can safely assume the conversion won't overflow the output range,
// because (for example) (uint8_t)18293.f is undefined behavior.
// Since we can assume the conversion won't overflow, our decision as to
// whether the input will fit in the float should depend on the minimum
// of the input range and output range.
// This means this is also safe for a signed input and unsigned output, since
// a negative input would lead to undefined behavior.
unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
unsigned ActualSize = std::min(InputSize, OutputSize);
const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
// We can only fold away the float conversion if the input range can be
// represented exactly in the float range.
if (APFloat::semanticsPrecision(sem) >= ActualSize) {
if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND;
return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
}
if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
return DAG.getBitcast(VT, Src);
}
return SDValue();
}
SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (fp_to_sint undef) -> undef
if (N0.isUndef())
return DAG.getUNDEF(VT);
// fold (fp_to_sint c1fp) -> c1
if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
return FoldIntToFPToInt(N, DAG);
}
SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (fp_to_uint undef) -> undef
if (N0.isUndef())
return DAG.getUNDEF(VT);
// fold (fp_to_uint c1fp) -> c1
if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
return FoldIntToFPToInt(N, DAG);
}
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (fp_round c1fp) -> c1fp
if (N0CFP)
return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
// fold (fp_round (fp_extend x)) -> x
if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
return N0.getOperand(0);
// fold (fp_round (fp_round x)) -> (fp_round x)
if (N0.getOpcode() == ISD::FP_ROUND) {
const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
// Skip this folding if it results in an fp_round from f80 to f16.
//
// f80 to f16 always generates an expensive (and as yet, unimplemented)
// libcall to __truncxfhf2 instead of selecting native f16 conversion
// instructions from f32 or f64. Moreover, the first (value-preserving)
// fp_round from f80 to either f32 or f64 may become a NOP in platforms like
// x86.
if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
return SDValue();
// If the first fp_round isn't a value preserving truncation, it might
// introduce a tie in the second fp_round, that wouldn't occur in the
// single-step fp_round we want to fold to.
// In other words, double rounding isn't the same as rounding.
// Also, this is a value preserving truncation iff both fp_round's are.
if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
SDLoc DL(N);
return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
}
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
N0.getOperand(0), N1);
AddToWorklist(Tmp.getNode());
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
Tmp, N0.getOperand(1));
}
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
return SDValue();
}
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
if (N->hasOneUse() &&
N->use_begin()->getOpcode() == ISD::FP_ROUND)
return SDValue();
// fold (fp_extend c1fp) -> c1fp
if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
// fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
if (N0.getOpcode() == ISD::FP16_TO_FP &&
TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
// value of X.
if (N0.getOpcode() == ISD::FP_ROUND
&& N0.getConstantOperandVal(1) == 1) {
SDValue In = N0.getOperand(0);
if (In.getValueType() == VT) return In;
if (VT.bitsLT(In.getValueType()))
return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
In, N0.getOperand(1));
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
}
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
N0.getValueType(), ExtLoad,
DAG.getIntPtrConstant(1, SDLoc(N0))),
ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
return SDValue();
}
SDValue DAGCombiner::visitFCEIL(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (fceil c1) -> fceil(c1)
if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (ftrunc c1) -> ftrunc(c1)
if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
// fold ftrunc (known rounded int x) -> x
// ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
// likely to be generated to extract integer from a rounded floating value.
switch (N0.getOpcode()) {
default: break;
case ISD::FRINT:
case ISD::FTRUNC:
case ISD::FNEARBYINT:
case ISD::FFLOOR:
case ISD::FCEIL:
return N0;
}
return SDValue();
}
SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (ffloor c1) -> ffloor(c1)
if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
return SDValue();
}
SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
// Constant fold FNEG.
if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
if (SDValue NegN0 =
TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
return NegN0;
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
// FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
// know it was called from a context with a nsz flag if the input fsub does
// not.
if (N0.getOpcode() == ISD::FSUB &&
(DAG.getTarget().Options.NoSignedZerosFPMath ||
N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
N0.getOperand(0));
}
if (SDValue Cast = foldSignChangeInBitcast(N))
return Cast;
return SDValue();
}
static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
APFloat (*Op)(const APFloat &, const APFloat &)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
const SDNodeFlags Flags = N->getFlags();
unsigned Opc = N->getOpcode();
bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
}
// Canonicalize to constant on RHS.
if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
if (N1CFP) {
const APFloat &AF = N1CFP->getValueAPF();
// minnum(X, nan) -> X
// maxnum(X, nan) -> X
// minimum(X, nan) -> nan
// maximum(X, nan) -> nan
if (AF.isNaN())
return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
// In the following folds, inf can be replaced with the largest finite
// float, if the ninf flag is set.
if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
// minnum(X, -inf) -> -inf
// maxnum(X, +inf) -> +inf
// minimum(X, -inf) -> -inf if nnan
// maximum(X, +inf) -> +inf if nnan
if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
return N->getOperand(1);
// minnum(X, +inf) -> X if nnan
// maxnum(X, -inf) -> X if nnan
// minimum(X, +inf) -> X
// maximum(X, -inf) -> X
if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
return N->getOperand(0);
}
}
return SDValue();
}
SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
return visitFMinMax(DAG, N, minnum);
}
SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
return visitFMinMax(DAG, N, maxnum);
}
SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
return visitFMinMax(DAG, N, minimum);
}
SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
return visitFMinMax(DAG, N, maximum);
}
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (fabs c1) -> fabs(c1)
if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
// fold (fabs (fabs x)) -> (fabs x)
if (N0.getOpcode() == ISD::FABS)
return N->getOperand(0);
// fold (fabs (fneg x)) -> (fabs x)
// fold (fabs (fcopysign x, y)) -> (fabs x)
if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
if (SDValue Cast = foldSignChangeInBitcast(N))
return Cast;
return SDValue();
}
SDValue DAGCombiner::visitBRCOND(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
// BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
// nondeterministic jumps).
if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
N1->getOperand(0), N2);
}
// If N is a constant we could fold this into a fallthrough or unconditional
// branch. However that doesn't happen very often in normal code, because
// Instcombine/SimplifyCFG should have handled the available opportunities.
// If we did this folding here, it would be necessary to update the
// MachineBasicBlock CFG, which is awkward.
// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
// on the target.
if (N1.getOpcode() == ISD::SETCC &&
TLI.isOperationLegalOrCustom(ISD::BR_CC,
N1.getOperand(0).getValueType())) {
return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
Chain, N1.getOperand(2),
N1.getOperand(0), N1.getOperand(1), N2);
}
if (N1.hasOneUse()) {
// rebuildSetCC calls visitXor which may change the Chain when there is a
// STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
HandleSDNode ChainHandle(Chain);
if (SDValue NewN1 = rebuildSetCC(N1))
return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
ChainHandle.getValue(), NewN1, N2);
}
return SDValue();
}
SDValue DAGCombiner::rebuildSetCC(SDValue N) {
if (N.getOpcode() == ISD::SRL ||
(N.getOpcode() == ISD::TRUNCATE &&
(N.getOperand(0).hasOneUse() &&
N.getOperand(0).getOpcode() == ISD::SRL))) {
// Look pass the truncate.
if (N.getOpcode() == ISD::TRUNCATE)
N = N.getOperand(0);
// Match this pattern so that we can generate simpler code:
//
// %a = ...
// %b = and i32 %a, 2
// %c = srl i32 %b, 1
// brcond i32 %c ...
//
// into
//
// %a = ...
// %b = and i32 %a, 2
// %c = setcc eq %b, 0
// brcond %c ...
//
// This applies only when the AND constant value has one bit set and the
// SRL constant is equal to the log2 of the AND constant. The back-end is
// smart enough to convert the result into a TEST/JMP sequence.
SDValue Op0 = N.getOperand(0);
SDValue Op1 = N.getOperand(1);
if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
SDValue AndOp1 = Op0.getOperand(1);
if (AndOp1.getOpcode() == ISD::Constant) {
const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
if (AndConst.isPowerOf2() &&
cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
SDLoc DL(N);
return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
Op0, DAG.getConstant(0, DL, Op0.getValueType()),
ISD::SETNE);
}
}
}
}
// Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
// Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
if (N.getOpcode() == ISD::XOR) {
// Because we may call this on a speculatively constructed
// SimplifiedSetCC Node, we need to simplify this node first.
// Ideally this should be folded into SimplifySetCC and not
// here. For now, grab a handle to N so we don't lose it from
// replacements interal to the visit.
HandleSDNode XORHandle(N);
while (N.getOpcode() == ISD::XOR) {
SDValue Tmp = visitXOR(N.getNode());
// No simplification done.
if (!Tmp.getNode())
break;
// Returning N is form in-visit replacement that may invalidated
// N. Grab value from Handle.
if (Tmp.getNode() == N.getNode())
N = XORHandle.getValue();
else // Node simplified. Try simplifying again.
N = Tmp;
}
if (N.getOpcode() != ISD::XOR)
return N;
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
bool Equal = false;
// (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
Op0.getValueType() == MVT::i1) {
N = Op0;
Op0 = N->getOperand(0);
Op1 = N->getOperand(1);
Equal = true;
}
EVT SetCCVT = N.getValueType();
if (LegalTypes)
SetCCVT = getSetCCResultType(SetCCVT);
// Replace the uses of XOR with SETCC
return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
Equal ? ISD::SETEQ : ISD::SETNE);
}
}
return SDValue();
}
// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
//
SDValue DAGCombiner::visitBR_CC(SDNode *N) {
CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
// If N is a constant we could fold this into a fallthrough or unconditional
// branch. However that doesn't happen very often in normal code, because
// Instcombine/SimplifyCFG should have handled the available opportunities.
// If we did this folding here, it would be necessary to update the
// MachineBasicBlock CFG, which is awkward.
// Use SimplifySetCC to simplify SETCC's.
SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
CondLHS, CondRHS, CC->get(), SDLoc(N),
false);
if (Simp.getNode()) AddToWorklist(Simp.getNode());
// fold to a simpler setcc
if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
N->getOperand(0), Simp.getOperand(2),
Simp.getOperand(0), Simp.getOperand(1),
N->getOperand(4));
return SDValue();
}
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
bool &IsLoad, bool &IsMasked, SDValue &Ptr,
const TargetLowering &TLI) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
if (LD->isIndexed())
return false;
EVT VT = LD->getMemoryVT();
if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
return false;
Ptr = LD->getBasePtr();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
if (ST->isIndexed())
return false;
EVT VT = ST->getMemoryVT();
if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
return false;
Ptr = ST->getBasePtr();
IsLoad = false;
} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
if (LD->isIndexed())
return false;
EVT VT = LD->getMemoryVT();
if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
!TLI.isIndexedMaskedLoadLegal(Dec, VT))
return false;
Ptr = LD->getBasePtr();
IsMasked = true;
} else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
if (ST->isIndexed())
return false;
EVT VT = ST->getMemoryVT();
if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
!TLI.isIndexedMaskedStoreLegal(Dec, VT))
return false;
Ptr = ST->getBasePtr();
IsLoad = false;
IsMasked = true;
} else {
return false;
}
return true;
}
/// Try turning a load/store into a pre-indexed load/store when the base
/// pointer is an add or subtract and it has other uses besides the load/store.
/// After the transformation, the new indexed load/store has effectively folded
/// the add/subtract in and all of its other uses are redirected to the
/// new load/store.
bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
bool IsLoad = true;
bool IsMasked = false;
SDValue Ptr;
if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
Ptr, TLI))
return false;
// If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
// out. There is no reason to make this a preinc/predec.
if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
Ptr.getNode()->hasOneUse())
return false;
// Ask the target to do addressing mode selection.
SDValue BasePtr;
SDValue Offset;
ISD::MemIndexedMode AM = ISD::UNINDEXED;
if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
return false;
// Backends without true r+i pre-indexed forms may need to pass a
// constant base with a variable offset so that constant coercion
// will work with the patterns in canonical form.
bool Swapped = false;
if (isa<ConstantSDNode>(BasePtr)) {
std::swap(BasePtr, Offset);
Swapped = true;
}
// Don't create a indexed load / store with zero offset.
if (isNullConstant(Offset))
return false;
// Try turning it into a pre-indexed load / store except when:
// 1) The new base ptr is a frame index.
// 2) If N is a store and the new base ptr is either the same as or is a
// predecessor of the value being stored.
// 3) Another use of old base ptr is a predecessor of N. If ptr is folded
// that would create a cycle.
// 4) All uses are load / store ops that use it as old base ptr.
// Check #1. Preinc'ing a frame index would require copying the stack pointer
// (plus the implicit offset) to a register to preinc anyway.
if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
return false;
// Check #2.
if (!IsLoad) {
SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
: cast<StoreSDNode>(N)->getValue();
// Would require a copy.
if (Val == BasePtr)
return false;
// Would create a cycle.
if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
return false;
}
// Caches for hasPredecessorHelper.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
Worklist.push_back(N);
// If the offset is a constant, there may be other adds of constants that
// can be folded with this one. We should do this to avoid having to keep
// a copy of the original base pointer.
SmallVector<SDNode *, 16> OtherUses;
if (isa<ConstantSDNode>(Offset))
for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
UE = BasePtr.getNode()->use_end();
UI != UE; ++UI) {
SDUse &Use = UI.getUse();
// Skip the use that is Ptr and uses of other results from BasePtr's
// node (important for nodes that return multiple results).
if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
continue;
if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
continue;
if (Use.getUser()->getOpcode() != ISD::ADD &&
Use.getUser()->getOpcode() != ISD::SUB) {
OtherUses.clear();
break;
}
SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
if (!isa<ConstantSDNode>(Op1)) {
OtherUses.clear();
break;
}
// FIXME: In some cases, we can be smarter about this.
if (Op1.getValueType() != Offset.getValueType()) {
OtherUses.clear();
break;
}
OtherUses.push_back(Use.getUser());
}
if (Swapped)
std::swap(BasePtr, Offset);
// Now check for #3 and #4.
bool RealUse = false;
for (SDNode *Use : Ptr.getNode()->uses()) {
if (Use == N)
continue;
if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
return false;
// If Ptr may be folded in addressing mode of other use, then it's
// not profitable to do this transformation.
if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
RealUse = true;
}
if (!RealUse)
return false;
SDValue Result;
if (!IsMasked) {
if (IsLoad)
Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
else
Result =
DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
} else {
if (IsLoad)
Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
Offset, AM);
else
Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
Offset, AM);
}
++PreIndexedNodes;
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
Result.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
// Finally, since the node is now dead, remove it from the graph.
deleteAndRecombine(N);
if (Swapped)
std::swap(BasePtr, Offset);
// Replace other uses of BasePtr that can be updated to use Ptr
for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
unsigned OffsetIdx = 1;
if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
OffsetIdx = 0;
assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
BasePtr.getNode() && "Expected BasePtr operand");
// We need to replace ptr0 in the following expression:
// x0 * offset0 + y0 * ptr0 = t0
// knowing that
// x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
//
// where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
// indexed load/store and the expression that needs to be re-written.
//
// Therefore, we have:
// t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
const APInt &Offset0 = CN->getAPIntValue();
const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
APInt CNV = Offset0;
if (X0 < 0) CNV = -CNV;
if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
else CNV = CNV - Offset1;
SDLoc DL(OtherUses[i]);
// We can now generate the new expression.
SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
SDValue NewUse = DAG.getNode(Opcode,
DL,
OtherUses[i]->getValueType(0), NewOp1, NewOp2);
DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
deleteAndRecombine(OtherUses[i]);
}
// Replace the uses of Ptr with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
deleteAndRecombine(Ptr.getNode());
AddToWorklist(Result.getNode());
return true;
}
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
SDValue &BasePtr, SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG,
const TargetLowering &TLI) {
if (PtrUse == N ||
(PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
return false;
if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
return false;
// Don't create a indexed load / store with zero offset.
if (isNullConstant(Offset))
return false;
if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
return false;
SmallPtrSet<const SDNode *, 32> Visited;
for (SDNode *Use : BasePtr.getNode()->uses()) {
if (Use == Ptr.getNode())
continue;
// No if there's a later user which could perform the index instead.
if (isa<MemSDNode>(Use)) {
bool IsLoad = true;
bool IsMasked = false;
SDValue OtherPtr;
if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
IsMasked, OtherPtr, TLI)) {
SmallVector<const SDNode *, 2> Worklist;
Worklist.push_back(Use);
if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
return false;
}
}
// If all the uses are load / store addresses, then don't do the
// transformation.
if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
for (SDNode *UseUse : Use->uses())
if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
return false;
}
}
return true;
}
static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
bool &IsMasked, SDValue &Ptr,
SDValue &BasePtr, SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG,
const TargetLowering &TLI) {
if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
IsMasked, Ptr, TLI) ||
Ptr.getNode()->hasOneUse())
return nullptr;
// Try turning it into a post-indexed load / store except when
// 1) All uses are load / store ops that use it as base ptr (and
// it may be folded as addressing mmode).
// 2) Op must be independent of N, i.e. Op is neither a predecessor
// nor a successor of N. Otherwise, if Op is folded that would
// create a cycle.
for (SDNode *Op : Ptr->uses()) {
// Check for #1.
if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
continue;
// Check for #2.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 8> Worklist;
// Ptr is predecessor to both N and Op.
Visited.insert(Ptr.getNode());
Worklist.push_back(N);
Worklist.push_back(Op);
if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
!SDNode::hasPredecessorHelper(Op, Visited, Worklist))
return Op;
}
return nullptr;
}
/// Try to combine a load/store with a add/sub of the base pointer node into a
/// post-indexed load/store. The transformation folded the add/subtract into the
/// new indexed load/store effectively and all of its uses are redirected to the
/// new load/store.
bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
bool IsLoad = true;
bool IsMasked = false;
SDValue Ptr;
SDValue BasePtr;
SDValue Offset;
ISD::MemIndexedMode AM = ISD::UNINDEXED;
SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
Offset, AM, DAG, TLI);
if (!Op)
return false;
SDValue Result;
if (!IsMasked)
Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
Offset, AM)
: DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
BasePtr, Offset, AM);
else
Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
BasePtr, Offset, AM)
: DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
BasePtr, Offset, AM);
++PostIndexedNodes;
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorklistRemover DeadNodes(*this);
if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
// Finally, since the node is now dead, remove it from the graph.
deleteAndRecombine(N);
// Replace the uses of Use with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
Result.getValue(IsLoad ? 1 : 0));
deleteAndRecombine(Op);
return true;
}
/// Return the base-pointer arithmetic from an indexed \p LD.
SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
ISD::MemIndexedMode AM = LD->getAddressingMode();
assert(AM != ISD::UNINDEXED);
SDValue BP = LD->getOperand(1);
SDValue Inc = LD->getOperand(2);
// Some backends use TargetConstants for load offsets, but don't expect
// TargetConstants in general ADD nodes. We can convert these constants into
// regular Constants (if the constant is not opaque).
assert((Inc.getOpcode() != ISD::TargetConstant ||
!cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants");
if (Inc.getOpcode() == ISD::TargetConstant) {
ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
ConstInc->getValueType(0));
}
unsigned Opc =
(AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
}
static inline ElementCount numVectorEltsOrZero(EVT T) {
return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
}
bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
Val = ST->getValue();
EVT STType = Val.getValueType();
EVT STMemType = ST->getMemoryVT();
if (STType == STMemType)
return true;
if (isTypeLegal(STMemType))
return false; // fail.
if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
return true;
}
if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
STType.isInteger() && STMemType.isInteger()) {
Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
return true;
}
if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
Val = DAG.getBitcast(STMemType, Val);
return true;
}
return false; // fail.
}
bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
EVT LDMemType = LD->getMemoryVT();
EVT LDType = LD->getValueType(0);
assert(Val.getValueType() == LDMemType &&
"Attempting to extend value of non-matching type");
if (LDType == LDMemType)
return true;
if (LDMemType.isInteger() && LDType.isInteger()) {
switch (LD->getExtensionType()) {
case ISD::NON_EXTLOAD:
Val = DAG.getBitcast(LDType, Val);
return true;
case ISD::EXTLOAD:
Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
return true;
case ISD::SEXTLOAD:
Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
return true;
case ISD::ZEXTLOAD:
Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
return true;
}
}
return false;
}
SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
if (OptLevel == CodeGenOpt::None || !LD->isSimple())
return SDValue();
SDValue Chain = LD->getOperand(0);
StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
// TODO: Relax this restriction for unordered atomics (see D66309)
if (!ST || !ST->isSimple())
return SDValue();
EVT LDType = LD->getValueType(0);
EVT LDMemType = LD->getMemoryVT();
EVT STMemType = ST->getMemoryVT();
EVT STType = ST->getValue().getValueType();
// There are two cases to consider here:
// 1. The store is fixed width and the load is scalable. In this case we
// don't know at compile time if the store completely envelops the load
// so we abandon the optimisation.
// 2. The store is scalable and the load is fixed width. We could
// potentially support a limited number of cases here, but there has been
// no cost-benefit analysis to prove it's worth it.
bool LdStScalable = LDMemType.isScalableVector();
if (LdStScalable != STMemType.isScalableVector())
return SDValue();
// If we are dealing with scalable vectors on a big endian platform the
// calculation of offsets below becomes trickier, since we do not know at
// compile time the absolute size of the vector. Until we've done more
// analysis on big-endian platforms it seems better to bail out for now.
if (LdStScalable && DAG.getDataLayout().isBigEndian())
return SDValue();
BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
int64_t Offset;
if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
return SDValue();
// Normalize for Endianness. After this Offset=0 will denote that the least
// significant bit in the loaded value maps to the least significant bit in
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
// n:th least significant byte of the stored value.
if (DAG.getDataLayout().isBigEndian())
Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
(int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
8 -
Offset;
// Check that the stored value cover all bits that are loaded.
bool STCoversLD;
TypeSize LdMemSize = LDMemType.getSizeInBits();
TypeSize StMemSize = STMemType.getSizeInBits();
if (LdStScalable)
STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
else
STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
StMemSize.getFixedSize());
auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
if (LD->isIndexed()) {
// Cannot handle opaque target constants and we must respect the user's
// request not to split indexes from loads.
if (!canSplitIdx(LD))
return SDValue();
SDValue Idx = SplitIndexingFromLoad(LD);
SDValue Ops[] = {Val, Idx, Chain};
return CombineTo(LD, Ops, 3);
}
return CombineTo(LD, Val, Chain);
};
if (!STCoversLD)
return SDValue();
// Memory as copy space (potentially masked).
if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
// Simple case: Direct non-truncating forwarding
if (LDType.getSizeInBits() == LdMemSize)
return ReplaceLd(LD, ST->getValue(), Chain);
// Can we model the truncate and extension with an and mask?
if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
!LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
// Mask to size of LDMemType
auto Mask =
DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
StMemSize.getFixedSize()),
SDLoc(ST), STType);
auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
return ReplaceLd(LD, Val, Chain);
}
}
// TODO: Deal with nonzero offset.
if (LD->getBasePtr().isUndef() || Offset != 0)
return SDValue();
// Model necessary truncations / extenstions.
SDValue Val;
// Truncate Value To Stored Memory Size.
do {
if (!getTruncatedStoreValue(ST, Val))
continue;
if (!isTypeLegal(LDMemType))
continue;
if (STMemType != LDMemType) {
// TODO: Support vectors? This requires extract_subvector/bitcast.
if (!STMemType.isVector() && !LDMemType.isVector() &&
STMemType.isInteger() && LDMemType.isInteger())
Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
else
continue;
}
if (!extendLoadedValueToExtension(LD, Val))
continue;
return ReplaceLd(LD, Val, Chain);
} while (false);
// On failure, cleanup dead nodes we may have created.
if (Val->use_empty())
deleteAndRecombine(Val.getNode());
return SDValue();
}
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
// If load is not volatile and there are no uses of the loaded value (and
// the updated indexed value in case of indexed loads), change uses of the
// chain value into uses of the chain input (i.e. delete the dead load).
// TODO: Allow this for unordered atomics (see D66309)
if (LD->isSimple()) {
if (N->getValueType(1) == MVT::Other) {
// Unindexed loads.
if (!N->hasAnyUseOfValue(0)) {
// It's not safe to use the two value CombineTo variant here. e.g.
// v1, chain2 = load chain1, loc
// v2, chain3 = load chain2, loc
// v3 = add v2, c
// Now we replace use of chain2 with chain1. This makes the second load
// isomorphic to the one we are deleting, and thus makes this load live.
LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
dbgs() << "\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
AddUsersToWorklist(Chain.getNode());
if (N->use_empty())
deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
// If this load has an opaque TargetConstant offset, then we cannot split
// the indexing into an add/sub directly (that TargetConstant may not be
// valid for a different type of node, and we cannot convert an opaque
// target constant into a regular constant).
bool CanSplitIdx = canSplitIdx(LD);
if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
SDValue Index;
if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
Index = SplitIndexingFromLoad(LD);
// Try to fold the base pointer arithmetic into subsequent loads and
// stores.
AddUsersToWorklist(N);
} else
Index = DAG.getUNDEF(N->getValueType(1));
LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
dbgs() << " and 2 other values\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
// If this load is directly stored, replace the load value with the stored
// value.
if (auto V = ForwardStoreValueToDirectLoad(LD))
return V;
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
if (*Alignment > LD->getAlign() &&
isAligned(*Alignment, LD->getSrcValueOffset())) {
SDValue NewLoad = DAG.getExtLoad(
LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
LD->getMemOperand()->getFlags(), LD->getAAInfo());
// NewLoad will always be N as we are only refining the alignment
assert(NewLoad.getNode() == N);
(void)NewLoad;
}
}
}
if (LD->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(LD, Chain);
// If there is a better chain.
if (Chain != BetterChain) {
SDValue ReplLoad;
// Replace the chain to void dependency.
if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
BetterChain, Ptr, LD->getMemOperand());
} else {
ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
LD->getValueType(0),
BetterChain, Ptr, LD->getMemoryVT(),
LD->getMemOperand());
}
// Create token factor to keep old chain connected.
SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
MVT::Other, Chain, ReplLoad.getValue(1));
// Replace uses with load result and token factor
return CombineTo(N, ReplLoad.getValue(0), Token);
}
}
// Try transforming N to an indexed load.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
// Try to slice up N to more direct loads if the slices are mapped to
// different register banks or pairing can take place.
if (SliceUpLoad(N))
return SDValue(N, 0);
return SDValue();
}
namespace {
/// Helper structure used to slice a load in smaller loads.
/// Basically a slice is obtained from the following sequence:
/// Origin = load Ty1, Base
/// Shift = srl Ty1 Origin, CstTy Amount
/// Inst = trunc Shift to Ty2
///
/// Then, it will be rewritten into:
/// Slice = load SliceTy, Base + SliceOffset
/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
///
/// SliceTy is deduced from the number of bits that are actually used to
/// build Inst.
struct LoadedSlice {
/// Helper structure used to compute the cost of a slice.
struct Cost {
/// Are we optimizing for code size.
bool ForCodeSize = false;
/// Various cost.
unsigned Loads = 0;
unsigned Truncates = 0;
unsigned CrossRegisterBanksCopies = 0;
unsigned ZExts = 0;
unsigned Shift = 0;
explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
/// Get the cost of one isolated slice.
Cost(const LoadedSlice &LS, bool ForCodeSize)
: ForCodeSize(ForCodeSize), Loads(1) {
EVT TruncType = LS.Inst->getValueType(0);
EVT LoadedType = LS.getLoadedType();
if (TruncType != LoadedType &&
!LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
ZExts = 1;
}
/// Account for slicing gain in the current cost.
/// Slicing provide a few gains like removing a shift or a
/// truncate. This method allows to grow the cost of the original
/// load with the gain from this slice.
void addSliceGain(const LoadedSlice &LS) {
// Each slice saves a truncate.
const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
LS.Inst->getValueType(0)))
++Truncates;
// If there is a shift amount, this slice gets rid of it.
if (LS.Shift)
++Shift;
// If this slice can merge a cross register bank copy, account for it.
if (LS.canMergeExpensiveCrossRegisterBankCopy())
++CrossRegisterBanksCopies;
}
Cost &operator+=(const Cost &RHS) {
Loads += RHS.Loads;
Truncates += RHS.Truncates;
CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
ZExts += RHS.ZExts;
Shift += RHS.Shift;
return *this;
}
bool operator==(const Cost &RHS) const {
return Loads == RHS.Loads && Truncates == RHS.Truncates &&
CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
ZExts == RHS.ZExts && Shift == RHS.Shift;
}
bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
bool operator<(const Cost &RHS) const {
// Assume cross register banks copies are as expensive as loads.
// FIXME: Do we want some more target hooks?
unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
// Unless we are optimizing for code size, consider the
// expensive operation first.
if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
return ExpensiveOpsLHS < ExpensiveOpsRHS;
return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
(RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
}
bool operator>(const Cost &RHS) const { return RHS < *this; }
bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
};
// The last instruction that represent the slice. This should be a
// truncate instruction.
SDNode *Inst;
// The original load instruction.
LoadSDNode *Origin;
// The right shift amount in bits from the original load.
unsigned Shift;
// The DAG from which Origin came from.
// This is used to get some contextual information about legal types, etc.
SelectionDAG *DAG;
LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
unsigned Shift = 0, SelectionDAG *DAG = nullptr)
: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
/// Get the bits used in a chunk of bits \p BitWidth large.
/// \return Result is \p BitWidth and has used bits set to 1 and
/// not used bits set to 0.
APInt getUsedBits() const {
// Reproduce the trunc(lshr) sequence:
// - Start from the truncated value.
// - Zero extend to the desired bit width.
// - Shift left.
assert(Origin && "No original load to compare against.");
unsigned BitWidth = Origin->getValueSizeInBits(0);
assert(Inst && "This slice is not bound to an instruction");
assert(Inst->getValueSizeInBits(0) <= BitWidth &&
"Extracted slice is bigger than the whole type!");
APInt UsedBits(Inst->getValueSizeInBits(0), 0);
UsedBits.setAllBits();
UsedBits = UsedBits.zext(BitWidth);
UsedBits <<= Shift;
return UsedBits;
}
/// Get the size of the slice to be loaded in bytes.
unsigned getLoadedSize() const {
unsigned SliceSize = getUsedBits().countPopulation();
assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
return SliceSize / 8;
}
/// Get the type that will be loaded for this slice.
/// Note: This may not be the final type for the slice.
EVT getLoadedType() const {
assert(DAG && "Missing context");
LLVMContext &Ctxt = *DAG->getContext();
return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
}
/// Get the alignment of the load used for this slice.
Align getAlign() const {
Align Alignment = Origin->getAlign();
uint64_t Offset = getOffsetFromBase();
if (Offset != 0)
Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
return Alignment;
}
/// Check if this slice can be rewritten with legal operations.
bool isLegal() const {
// An invalid slice is not legal.
if (!Origin || !Inst || !DAG)
return false;
// Offsets are for indexed load only, we do not handle that.
if (!Origin->getOffset().isUndef())
return false;
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
// Check that the type is legal.
EVT SliceType = getLoadedType();
if (!TLI.isTypeLegal(SliceType))
return false;
// Check that the load is legal for this type.
if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
return false;
// Check that the offset can be computed.
// 1. Check its type.
EVT PtrType = Origin->getBasePtr().getValueType();
if (PtrType == MVT::Untyped || PtrType.isExtended())
return false;
// 2. Check that it fits in the immediate.
if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
return false;
// 3. Check that the computation is legal.
if (!TLI.isOperationLegal(ISD::ADD, PtrType))
return false;
// Check that the zext is legal if it needs one.
EVT TruncateType = Inst->getValueType(0);
if (TruncateType != SliceType &&
!TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
return false;
return true;
}
/// Get the offset in bytes of this slice in the original chunk of
/// bits.
/// \pre DAG != nullptr.
uint64_t getOffsetFromBase() const {
assert(DAG && "Missing context.");
bool IsBigEndian = DAG->getDataLayout().isBigEndian();
assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
uint64_t Offset = Shift / 8;
unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
"The size of the original loaded type is not a multiple of a"
" byte.");
// If Offset is bigger than TySizeInBytes, it means we are loading all
// zeros. This should have been optimized before in the process.
assert(TySizeInBytes > Offset &&
"Invalid shift amount for given loaded size");
if (IsBigEndian)
Offset = TySizeInBytes - Offset - getLoadedSize();
return Offset;
}
/// Generate the sequence of instructions to load the slice
/// represented by this object and redirect the uses of this slice to
/// this new sequence of instructions.
/// \pre this->Inst && this->Origin are valid Instructions and this
/// object passed the legal check: LoadedSlice::isLegal returned true.
/// \return The last instruction of the sequence used to load the slice.
SDValue loadSlice() const {
assert(Inst && Origin && "Unable to replace a non-existing slice.");
const SDValue &OldBaseAddr = Origin->getBasePtr();
SDValue BaseAddr = OldBaseAddr;
// Get the offset in that chunk of bytes w.r.t. the endianness.
int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
assert(Offset >= 0 && "Offset too big to fit in int64_t!");
if (Offset) {
// BaseAddr = BaseAddr + Offset.
EVT ArithType = BaseAddr.getValueType();
SDLoc DL(Origin);
BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
DAG->getConstant(Offset, DL, ArithType));
}
// Create the type of the loaded slice according to its size.
EVT SliceType = getLoadedType();
// Create the load for the slice.
SDValue LastInst =
DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
Origin->getMemOperand()->getFlags());
// If the final type is not the same as the loaded type, this means that
// we have to pad with zero. Create a zero extend for that.
EVT FinalType = Inst->getValueType(0);
if (SliceType != FinalType)
LastInst =
DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
return LastInst;
}
/// Check if this slice can be merged with an expensive cross register
/// bank copy. E.g.,
/// i = load i32
/// f = bitcast i32 i to float
bool canMergeExpensiveCrossRegisterBankCopy() const {
if (!Inst || !Inst->hasOneUse())
return false;
SDNode *Use = *Inst->use_begin();
if (Use->getOpcode() != ISD::BITCAST)
return false;
assert(DAG && "Missing context");
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
EVT ResVT = Use->getValueType(0);
const TargetRegisterClass *ResRC =
TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
const TargetRegisterClass *ArgRC =
TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
Use->getOperand(0)->isDivergent());
if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
return false;
// At this point, we know that we perform a cross-register-bank copy.
// Check if it is expensive.
const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
// Assume bitcasts are cheap, unless both register classes do not
// explicitly share a common sub class.
if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
return false;
// Check if it will be merged with the load.
// 1. Check the alignment constraint.
Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
ResVT.getTypeForEVT(*DAG->getContext()));
if (RequiredAlignment > getAlign())
return false;
// 2. Check that the load is a legal operation for that type.
if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
return false;
// 3. Check that we do not have a zext in the way.
if (Inst->getValueType(0) != getLoadedType())
return false;
return true;
}
};
} // end anonymous namespace
/// Check that all bits set in \p UsedBits form a dense region, i.e.,
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
// If all the bits are one, this is dense!
if (UsedBits.isAllOnesValue())
return true;
// Get rid of the unused bits on the right.
APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
// Get rid of the unused bits on the left.
if (NarrowedUsedBits.countLeadingZeros())
NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
// Check that the chunk of bits is completely used.
return NarrowedUsedBits.isAllOnesValue();
}
/// Check whether or not \p First and \p Second are next to each other
/// in memory. This means that there is no hole between the bits loaded
/// by \p First and the bits loaded by \p Second.
static bool areSlicesNextToEachOther(const LoadedSlice &First,
const LoadedSlice &Second) {
assert(First.Origin == Second.Origin && First.Origin &&
"Unable to match different memory origins.");
APInt UsedBits = First.getUsedBits();
assert((UsedBits & Second.getUsedBits()) == 0 &&
"Slices are not supposed to overlap.");
UsedBits |= Second.getUsedBits();
return areUsedBitsDense(UsedBits);
}
/// Adjust the \p GlobalLSCost according to the target
/// paring capabilities and the layout of the slices.
/// \pre \p GlobalLSCost should account for at least as many loads as
/// there is in the slices in \p LoadedSlices.
static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
LoadedSlice::Cost &GlobalLSCost) {
unsigned NumberOfSlices = LoadedSlices.size();
// If there is less than 2 elements, no pairing is possible.
if (NumberOfSlices < 2)
return;
// Sort the slices so that elements that are likely to be next to each
// other in memory are next to each other in the list.
llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
});
const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
// First (resp. Second) is the first (resp. Second) potentially candidate
// to be placed in a paired load.
const LoadedSlice *First = nullptr;
const LoadedSlice *Second = nullptr;
for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
// Set the beginning of the pair.
First = Second) {
Second = &LoadedSlices[CurrSlice];
// If First is NULL, it means we start a new pair.
// Get to the next slice.
if (!First)
continue;
EVT LoadedType = First->getLoadedType();
// If the types of the slices are different, we cannot pair them.
if (LoadedType != Second->getLoadedType())
continue;
// Check if the target supplies paired loads for this type.
Align RequiredAlignment;
if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
// move to the next pair, this type is hopeless.
Second = nullptr;
continue;
}
// Check if we meet the alignment requirement.
if (First->getAlign() < RequiredAlignment)
continue;
// Check that both loads are next to each other in memory.
if (!areSlicesNextToEachOther(*First, *Second))
continue;
assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
--GlobalLSCost.Loads;
// Move to the next pair.
Second = nullptr;
}
}
/// Check the profitability of all involved LoadedSlice.
/// Currently, it is considered profitable if there is exactly two
/// involved slices (1) which are (2) next to each other in memory, and
/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
///
/// Note: The order of the elements in \p LoadedSlices may be modified, but not
/// the elements themselves.
///
/// FIXME: When the cost model will be mature enough, we can relax
/// constraints (1) and (2).
static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
const APInt &UsedBits, bool ForCodeSize) {
unsigned NumberOfSlices = LoadedSlices.size();
if (StressLoadSlicing)
return NumberOfSlices > 1;
// Check (1).
if (NumberOfSlices != 2)
return false;
// Check (2).
if (!areUsedBitsDense(UsedBits))
return false;
// Check (3).
LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
// The original code has one big load.
OrigCost.Loads = 1;
for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
const LoadedSlice &LS = LoadedSlices[CurrSlice];
// Accumulate the cost of all the slices.
LoadedSlice::Cost SliceCost(LS, ForCodeSize);
GlobalSlicingCost += SliceCost;
// Account as cost in the original configuration the gain obtained
// with the current slices.
OrigCost.addSliceGain(LS);
}
// If the target supports paired load, adjust the cost accordingly.
adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
return OrigCost > GlobalSlicingCost;
}
/// If the given load, \p LI, is used only by trunc or trunc(lshr)
/// operations, split it in the various pieces being extracted.
///
/// This sort of thing is introduced by SROA.
/// This slicing takes care not to insert overlapping loads.
/// \pre LI is a simple load (i.e., not an atomic or volatile load).
bool DAGCombiner::SliceUpLoad(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
LoadSDNode *LD = cast<LoadSDNode>(N);
if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
!LD->getValueType(0).isInteger())
return false;
// The algorithm to split up a load of a scalable vector into individual
// elements currently requires knowing the length of the loaded type,
// so will need adjusting to work on scalable vectors.
if (LD->getValueType(0).isScalableVector())
return false;
// Keep track of already used bits to detect overlapping values.
// In that case, we will just abort the transformation.
APInt UsedBits(LD->getValueSizeInBits(0), 0);
SmallVector<LoadedSlice, 4> LoadedSlices;
// Check if this load is used as several smaller chunks of bits.
// Basically, look for uses in trunc or trunc(lshr) and record a new chain
// of computation for each trunc.
for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
UI != UIEnd; ++UI) {
// Skip the uses of the chain.
if (UI.getUse().getResNo() != 0)
continue;
SDNode *User = *UI;
unsigned Shift = 0;
// Check if this is a trunc(lshr).
if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
isa<ConstantSDNode>(User->getOperand(1))) {
Shift = User->getConstantOperandVal(1);
User = *User->use_begin();
}
// At this point, User is a Truncate, iff we encountered, trunc or
// trunc(lshr).
if (User->getOpcode() != ISD::TRUNCATE)
return false;
// The width of the type must be a power of 2 and greater than 8-bits.
// Otherwise the load cannot be represented in LLVM IR.
// Moreover, if we shifted with a non-8-bits multiple, the slice
// will be across several bytes. We do not support that.
unsigned Width = User->getValueSizeInBits(0);
if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
return false;
// Build the slice for this chain of computations.
LoadedSlice LS(User, LD, Shift, &DAG);
APInt CurrentUsedBits = LS.getUsedBits();
// Check if this slice overlaps with another.
if ((CurrentUsedBits & UsedBits) != 0)
return false;
// Update the bits used globally.
UsedBits |= CurrentUsedBits;
// Check if the new slice would be legal.
if (!LS.isLegal())
return false;
// Record the slice.
LoadedSlices.push_back(LS);
}
// Abort slicing if it does not seem to be profitable.
if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
return false;
++SlicedLoads;
// Rewrite each chain to use an independent load.
// By construction, each chain can be represented by a unique load.
// Prepare the argument for the new token factor for all the slices.
SmallVector<SDValue, 8> ArgChains;
for (const LoadedSlice &LS : LoadedSlices) {
SDValue SliceInst = LS.loadSlice();
CombineTo(LS.Inst, SliceInst, true);
if (SliceInst.getOpcode() != ISD::LOAD)
SliceInst = SliceInst.getOperand(0);
assert(SliceInst->getOpcode() == ISD::LOAD &&
"It takes more than a zext to get to the loaded slice!!");
ArgChains.push_back(SliceInst.getValue(1));
}
SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
ArgChains);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
AddToWorklist(Chain.getNode());
return true;
}
/// Check to see if V is (and load (ptr), imm), where the load is having
/// specific bytes cleared out. If so, return the byte size being masked out
/// and the shift amount.
static std::pair<unsigned, unsigned>
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
std::pair<unsigned, unsigned> Result(0, 0);
// Check for the structure we're looking for.
if (V->getOpcode() != ISD::AND ||
!isa<ConstantSDNode>(V->getOperand(1)) ||
!ISD::isNormalLoad(V->getOperand(0).getNode()))
return Result;
// Check the chain and pointer.
LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
// This only handles simple types.
if (V.getValueType() != MVT::i16 &&
V.getValueType() != MVT::i32 &&
V.getValueType() != MVT::i64)
return Result;
// Check the constant mask. Invert it so that the bits being masked out are
// 0 and the bits being kept are 1. Use getSExtValue so that leading bits
// follow the sign bit for uniformity.
uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
unsigned NotMaskLZ = countLeadingZeros(NotMask);
if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
unsigned NotMaskTZ = countTrailingZeros(NotMask);
if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
if (NotMaskLZ == 64) return Result; // All zero mask.
// See if we have a continuous run of bits. If so, we have 0*1+0*
if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
return Result;
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
if (V.getValueType() != MVT::i64 && NotMaskLZ)
NotMaskLZ -= 64-V.getValueSizeInBits();
unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
switch (MaskedBytes) {
case 1:
case 2:
case 4: break;
default: return Result; // All one mask, or 5-byte mask.
}
// Verify that the first bit starts at a multiple of mask so that the access
// is aligned the same as the access width.
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
// For narrowing to be valid, it must be the case that the load the
// immediately preceding memory operation before the store.
if (LD == Chain.getNode())
; // ok.
else if (Chain->getOpcode() == ISD::TokenFactor &&
SDValue(LD, 1).hasOneUse()) {
// LD has only 1 chain use so they are no indirect dependencies.
if (!LD->isOperandOf(Chain.getNode()))
return Result;
} else
return Result; // Fail.
Result.first = MaskedBytes;
Result.second = NotMaskTZ/8;
return Result;
}
/// Check to see if IVal is something that provides a value as specified by
/// MaskInfo. If so, replace the specified store with a narrower store of
/// truncated IVal.
static SDValue
ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue IVal, StoreSDNode *St,
DAGCombiner *DC) {
unsigned NumBytes = MaskInfo.first;
unsigned ByteShift = MaskInfo.second;
SelectionDAG &DAG = DC->getDAG();
// Check to see if IVal is all zeros in the part being masked in by the 'or'
// that uses this. If not, this is not a replacement.
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
ByteShift*8, (ByteShift+NumBytes)*8);
if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
// legalization (and the target doesn't explicitly think this is a bad idea).
MVT VT = MVT::getIntegerVT(NumBytes * 8);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DC->isTypeLegal(VT))
return SDValue();
if (St->getMemOperand() &&
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
*St->getMemOperand()))
return SDValue();
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
if (ByteShift) {
SDLoc DL(IVal);
IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
DAG.getConstant(ByteShift*8, DL,
DC->getShiftAmountTy(IVal.getValueType())));
}
// Figure out the offset for the store and the alignment of the access.
unsigned StOffset;
if (DAG.getDataLayout().isLittleEndian())
StOffset = ByteShift;
else
StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
SDValue Ptr = St->getBasePtr();
if (StOffset) {
SDLoc DL(IVal);
Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
}
// Truncate down to the new size.
IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
++OpsNarrowed;
return DAG
.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
St->getPointerInfo().getWithOffset(StOffset),
St->getOriginalAlign());
}
/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
/// narrowing the load and store if it would end up being a win for performance
/// or code size.
SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
if (!ST->isSimple())
return SDValue();
SDValue Chain = ST->getChain();
SDValue Value = ST->getValue();
SDValue Ptr = ST->getBasePtr();
EVT VT = Value.getValueType();
if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
return SDValue();
unsigned Opc = Value.getOpcode();
// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
// is a byte mask indicating a consecutive number of bytes, check to see if
// Y is known to provide just those bytes. If so, we try to replace the
// load + replace + store sequence with a single (narrower) store, which makes
// the load dead.
if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
std::pair<unsigned, unsigned> MaskedLoad;
MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
if (MaskedLoad.first)
if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(1), ST,this))
return NewST;
// Or is commutative, so try swapping X and Y.
MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
if (MaskedLoad.first)
if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(0), ST,this))
return NewST;
}
if (!EnableReduceLoadOpStoreWidth)
return SDValue();
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
Value.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
SDValue N0 = Value.getOperand(0);
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
Chain == SDValue(N0.getNode(), 1)) {
LoadSDNode *LD = cast<LoadSDNode>(N0);
if (LD->getBasePtr() != Ptr ||
LD->getPointerInfo().getAddrSpace() !=
ST->getPointerInfo().getAddrSpace())
return SDValue();
// Find the type to narrow it the load / op / store to.
SDValue N1 = Value.getOperand(1);
unsigned BitWidth = N1.getValueSizeInBits();
APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
if (Opc == ISD::AND)
Imm ^= APInt::getAllOnesValue(BitWidth);
if (Imm == 0 || Imm.isAllOnesValue())
return SDValue();
unsigned ShAmt = Imm.countTrailingZeros();
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
// The narrowing should be profitable, the load/store operation should be
// legal (or custom) and the store size should be equal to the NewVT width.
while (NewBW < BitWidth &&
(NewVT.getStoreSizeInBits() != NewBW ||
!TLI.isOperationLegalOrCustom(Opc, NewVT) ||
!TLI.isNarrowingProfitable(VT, NewVT))) {
NewBW = NextPowerOf2(NewBW);
NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
}
if (NewBW >= BitWidth)
return SDValue();
// If the lsb changed does not start at the type bitwidth boundary,
// start at the previous one.
if (ShAmt % NewBW)
ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
std::min(BitWidth, ShAmt + NewBW));
if ((Imm & Mask) == Imm) {
APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
if (Opc == ISD::AND)
NewImm ^= APInt::getAllOnesValue(NewBW);
uint64_t PtrOff = ShAmt / 8;
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
return SDValue();
SDValue NewPtr =
DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
SDValue NewLD =
DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
LD->getMemOperand()->getFlags(), LD->getAAInfo());
SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
DAG.getConstant(NewImm, SDLoc(Value),
NewVT));
SDValue NewST =
DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
AddToWorklist(NewPtr.getNode());
AddToWorklist(NewLD.getNode());
AddToWorklist(NewVal.getNode());
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
++OpsNarrowed;
return NewST;
}
}
return SDValue();
}
/// For a given floating point load / store pair, if the load value isn't used
/// by any other operations, then consider transforming the pair to integer
/// load / store operations if the target deems the transformation profitable.
SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Value = ST->getValue();
if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
Value.hasOneUse()) {
LoadSDNode *LD = cast<LoadSDNode>(Value);
EVT VT = LD->getMemoryVT();
if (!VT.isFloatingPoint() ||
VT != ST->getMemoryVT() ||
LD->isNonTemporal() ||
ST->isNonTemporal() ||
LD->getPointerInfo().getAddrSpace() != 0 ||
ST->getPointerInfo().getAddrSpace() != 0)
return SDValue();
TypeSize VTSize = VT.getSizeInBits();
// We don't know the size of scalable types at compile time so we cannot
// create an integer of the equivalent size.
if (VTSize.isScalable())
return SDValue();
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
return SDValue();
Align LDAlign = LD->getAlign();
Align STAlign = ST->getAlign();
Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
if (LDAlign < ABIAlign || STAlign < ABIAlign)
return SDValue();
SDValue NewLD =
DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
LD->getPointerInfo(), LDAlign);
SDValue NewST =
DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
ST->getPointerInfo(), STAlign);
AddToWorklist(NewLD.getNode());
AddToWorklist(NewST.getNode());
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
++LdStFP2Int;
return NewST;
}
return SDValue();
}
// This is a helper function for visitMUL to check the profitability
// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
// MulNode is the original multiply, AddNode is (add x, c1),
// and ConstNode is c2.
//
// If the (add x, c1) has multiple uses, we could increase
// the number of adds if we make this transformation.
// It would only be worth doing this if we can remove a
// multiply in the process. Check for that here.
// To illustrate:
// (A + c1) * c3
// (A + c2) * c3
// We're checking for cases where we have common "c3 * A" expressions.
bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
SDValue &AddNode,
SDValue &ConstNode) {
APInt Val;
// If the add only has one use, this would be OK to do.
if (AddNode.getNode()->hasOneUse())
return true;
// Walk all the users of the constant with which we're multiplying.
for (SDNode *Use : ConstNode->uses()) {
if (Use == MulNode) // This use is the one we're on right now. Skip it.
continue;
if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
SDNode *OtherOp;
SDNode *MulVar = AddNode.getOperand(0).getNode();
// OtherOp is what we're multiplying against the constant.
if (Use->getOperand(0) == ConstNode)
OtherOp = Use->getOperand(1).getNode();
else
OtherOp = Use->getOperand(0).getNode();
// Check to see if multiply is with the same operand of our "add".
//
// ConstNode = CONST
// Use = ConstNode * A <-- visiting Use. OtherOp is A.
// ...
// AddNode = (A + c1) <-- MulVar is A.
// = AddNode * ConstNode <-- current visiting instruction.
//
// If we make this transformation, we will have a common
// multiply (ConstNode * A) that we can save.
if (OtherOp == MulVar)
return true;
// Now check to see if a future expansion will give us a common
// multiply.
//
// ConstNode = CONST
// AddNode = (A + c1)
// ... = AddNode * ConstNode <-- current visiting instruction.
// ...
// OtherOp = (A + c2)
// Use = OtherOp * ConstNode <-- visiting Use.
//
// If we make this transformation, we will have a common
// multiply (CONST * A) after we also do the same transformation
// to the "t2" instruction.
if (OtherOp->getOpcode() == ISD::ADD &&
DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
OtherOp->getOperand(0).getNode() == MulVar)
return true;
}
}
// Didn't find a case where this would be profitable.
return false;
}
SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores) {
SmallVector<SDValue, 8> Chains;
SmallPtrSet<const SDNode *, 8> Visited;
SDLoc StoreDL(StoreNodes[0].MemNode);
for (unsigned i = 0; i < NumStores; ++i) {
Visited.insert(StoreNodes[i].MemNode);
}
// don't include nodes that are children or repeated nodes.
for (unsigned i = 0; i < NumStores; ++i) {
if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
Chains.push_back(StoreNodes[i].MemNode->getChain());
}
assert(Chains.size() > 0 && "Chain should have generated a chain");
return DAG.getTokenFactor(StoreDL, Chains);
}
bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector, bool UseTrunc) {
// Make sure we have something to merge.
if (NumStores < 2)
return false;
assert((!UseTrunc || !UseVector) &&
"This optimization cannot emit a vector truncating store");
// The latest Node in the DAG.
SDLoc DL(StoreNodes[0].MemNode);
TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
unsigned SizeInBits = NumStores * ElementSizeBits;
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
EVT StoreTy;
if (UseVector) {
unsigned Elts = NumStores * NumMemElts;
// Get the type for the merged vector store.
StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
} else
StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
SDValue StoredVal;
if (UseVector) {
if (IsConstantSrc) {
SmallVector<SDValue, 8> BuildVector;
for (unsigned I = 0; I != NumStores; ++I) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
SDValue Val = St->getValue();
// If constant is of the wrong type, convert it now.
if (MemVT != Val.getValueType()) {
Val = peekThroughBitcasts(Val);
// Deal with constants of wrong size.
if (ElementSizeBits != Val.getValueSizeInBits()) {
EVT IntMemVT =
EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
if (isa<ConstantFPSDNode>(Val)) {
// Not clear how to truncate FP values.
return false;
} else if (auto *C = dyn_cast<ConstantSDNode>(Val))
Val = DAG.getConstant(C->getAPIntValue()
.zextOrTrunc(Val.getValueSizeInBits())
.zextOrTrunc(ElementSizeBits),
SDLoc(C), IntMemVT);
}
// Make sure correctly size type is the correct type.
Val = DAG.getBitcast(MemVT, Val);
}
BuildVector.push_back(Val);
}
StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
: ISD::BUILD_VECTOR,
DL, StoreTy, BuildVector);
} else {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue Val = peekThroughBitcasts(St->getValue());
// All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
// type MemVT. If the underlying value is not the correct
// type, but it is an extraction of an appropriate vector we
// can recast Val to be of the correct type. This may require
// converting between EXTRACT_VECTOR_ELT and
// EXTRACT_SUBVECTOR.
if ((MemVT != Val.getValueType()) &&
(Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
EVT MemVTScalarTy = MemVT.getScalarType();
// We may need to add a bitcast here to get types to line up.
if (MemVTScalarTy != Val.getValueType().getScalarType()) {
Val = DAG.getBitcast(MemVT, Val);
} else {
unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
: ISD::EXTRACT_VECTOR_ELT;
SDValue Vec = Val.getOperand(0);
SDValue Idx = Val.getOperand(1);
Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
}
}
Ops.push_back(Val);
}
// Build the extracted vector elements back into a vector.
StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
: ISD::BUILD_VECTOR,
DL, StoreTy, Ops);
}
} else {
// We should always use a vector store when merging extracted vector
// elements, so this path implies a store of constants.
assert(IsConstantSrc && "Merged vector elements should use vector store");
APInt StoreInt(SizeInBits, 0);
// Construct a single integer constant which is made of the smaller
// constant inputs.
bool IsLE = DAG.getDataLayout().isLittleEndian();
for (unsigned i = 0; i < NumStores; ++i) {
unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
SDValue Val = St->getValue();
Val = peekThroughBitcasts(Val);
StoreInt <<= ElementSizeBits;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
StoreInt |= C->getAPIntValue()
.zextOrTrunc(ElementSizeBits)
.zextOrTrunc(SizeInBits);
} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
StoreInt |= C->getValueAPF()
.bitcastToAPInt()
.zextOrTrunc(ElementSizeBits)
.zextOrTrunc(SizeInBits);
// If fp truncation is necessary give up for now.
if (MemVT.getSizeInBits() != ElementSizeBits)
return false;
} else {
llvm_unreachable("Invalid constant element type");
}
}
// Create the new Load and Store operations.
StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
if (!UseTrunc) {
NewStore =
DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), FirstInChain->getAlign());
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
SDValue ExtendedStoreVal =
DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
LegalizedStoredValTy);
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
}
// Replace all merged stores with the new store.
for (unsigned i = 0; i < NumStores; ++i)
CombineTo(StoreNodes[i].MemNode, NewStore);
AddToWorklist(NewChain.getNode());
return true;
}
void DAGCombiner::getStoreMergeCandidates(
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
SDNode *&RootNode) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer. We must have a base and an offset. Do not handle stores to undef
// base pointers.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
return;
SDValue Val = peekThroughBitcasts(St->getValue());
StoreSource StoreSrc = getStoreSource(Val);
assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
// Match on loadbaseptr if relevant.
EVT MemVT = St->getMemoryVT();
BaseIndexOffset LBasePtr;
EVT LoadVT;
if (StoreSrc == StoreSource::Load) {
auto *Ld = cast<LoadSDNode>(Val);
LBasePtr = BaseIndexOffset::match(Ld, DAG);
LoadVT = Ld->getMemoryVT();
// Load and store should be the same type.
if (MemVT != LoadVT)
return;
// Loads must only have one use.
if (!Ld->hasNUsesOfValue(1, 0))
return;
// The memory operands must not be volatile/indexed/atomic.
// TODO: May be able to relax for unordered atomics (see D66309)
if (!Ld->isSimple() || Ld->isIndexed())
return;
}
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
// The memory operands must not be volatile/indexed/atomic.
// TODO: May be able to relax for unordered atomics (see D66309)
if (!Other->isSimple() || Other->isIndexed())
return false;
// Don't mix temporal stores with non-temporal stores.
if (St->isNonTemporal() != Other->isNonTemporal())
return false;
SDValue OtherBC = peekThroughBitcasts(Other->getValue());
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
: Other->getMemoryVT() != MemVT;
switch (StoreSrc) {
case StoreSource::Load: {
if (NoTypeMatch)
return false;
// The Load's Base Ptr must also match.
auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
if (!OtherLd)
return false;
BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
if (LoadVT != OtherLd->getMemoryVT())
return false;
// Loads must only have one use.
if (!OtherLd->hasNUsesOfValue(1, 0))
return false;
// The memory operands must not be volatile/indexed/atomic.
// TODO: May be able to relax for unordered atomics (see D66309)
if (!OtherLd->isSimple() || OtherLd->isIndexed())
return false;
// Don't mix temporal loads with non-temporal loads.
if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
break;
}
case StoreSource::Constant:
if (NoTypeMatch)
return false;
if (!isIntOrFPConstant(OtherBC))
return false;
break;
case StoreSource::Extract:
// Do not merge truncated stores here.
if (Other->isTruncatingStore())
return false;
if (!MemVT.bitsEq(OtherBC.getValueType()))
return false;
if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
break;
default:
llvm_unreachable("Unhandled store source for merging");
}
Ptr = BaseIndexOffset::match(Other, DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
// Check if the pair of StoreNode and the RootNode already bail out many
// times which is over the limit in dependence check.
auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
SDNode *RootNode) -> bool {
auto RootCount = StoreRootCountMap.find(StoreNode);
return RootCount != StoreRootCountMap.end() &&
RootCount->second.first == RootNode &&
RootCount->second.second > StoreMergeDependenceLimit;
};
auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
// This must be a chain use.
if (UseIter.getOperandNo() != 0)
return;
if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
BaseIndexOffset Ptr;
int64_t PtrDiff;
if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
!OverLimitInDependenceCheck(OtherStore, RootNode))
StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
}
};
// We looking for a root node which is an ancestor to all mergable
// stores. We search up through a load, to our root and then down
// through all children. For instance we will find Store{1,2,3} if
// St is Store1, Store2. or Store3 where the root is not a load
// which always true for nonvolatile ops. TODO: Expand
// the search to find all valid candidates through multiple layers of loads.
//
// Root
// |-------|-------|
// Load Load Store3
// | |
// Store1 Store2
//
// FIXME: We should be able to climb and
// descend TokenFactors to find candidates as well.
RootNode = St->getChain().getNode();
unsigned NumNodesExplored = 0;
const unsigned MaxSearchNodes = 1024;
if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
RootNode = Ldn->getChain().getNode();
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
TryToAddCandidate(I2);
}
}
} else {
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
TryToAddCandidate(I);
}
}
// We need to check that merging these stores does not cause a loop in
// the DAG. Any store candidate may depend on another candidate
// indirectly through its operand (we already consider dependencies
// through the chain). Check in parallel by searching up from
// non-chain operands of candidates.
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
SDNode *RootNode) {
// FIXME: We should be able to truncate a full search of
// predecessors by doing a BFS and keeping tabs the originating
// stores from which worklist nodes come from in a similar way to
// TokenFactor simplfication.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 8> Worklist;
// RootNode is a predecessor to all candidates so we need not search
// past it. Add RootNode (peeking through TokenFactors). Do not count
// these towards size check.
Worklist.push_back(RootNode);
while (!Worklist.empty()) {
auto N = Worklist.pop_back_val();
if (!Visited.insert(N).second)
continue; // Already present in Visited.
if (N->getOpcode() == ISD::TokenFactor) {
for (SDValue Op : N->ops())
Worklist.push_back(Op.getNode());
}
}
// Don't count pruning nodes towards max.
unsigned int Max = 1024 + Visited.size();
// Search Ops of store candidates.
for (unsigned i = 0; i < NumStores; ++i) {
SDNode *N = StoreNodes[i].MemNode;
// Of the 4 Store Operands:
// * Chain (Op 0) -> We have already considered these
// in candidate selection and can be
// safely ignored
// * Value (Op 1) -> Cycles may happen (e.g. through load chains)
// * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
// but aren't necessarily fromt the same base node, so
// cycles possible (e.g. via indexed store).
// * (Op 3) -> Represents the pre or post-indexing offset (or undef for
// non-indexed stores). Not constant on all targets (e.g. ARM)
// and so can participate in a cycle.
for (unsigned j = 1; j < N->getNumOperands(); ++j)
Worklist.push_back(N->getOperand(j).getNode());
}
// Search through DAG. We can stop early if we find a store node.
for (unsigned i = 0; i < NumStores; ++i)
if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
Max)) {
// If the searching bail out, record the StoreNode and RootNode in the
// StoreRootCountMap. If we have seen the pair many times over a limit,
// we won't add the StoreNode into StoreNodes set again.
if (Visited.size() >= Max) {
auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
if (RootCount.first == RootNode)
RootCount.second++;
else
RootCount = {RootNode, 1};
}
return false;
}
return true;
}
unsigned
DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
int64_t ElementSizeBytes) const {
while (true) {
// Find a store past the width of the first store.
size_t StartIdx = 0;
while ((StartIdx + 1 < StoreNodes.size()) &&
StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
StoreNodes[StartIdx + 1].OffsetFromBase)
++StartIdx;
// Bail if we don't have enough candidates to merge.
if (StartIdx + 1 >= StoreNodes.size())
return 0;
// Trim stores that overlapped with the first store.
if (StartIdx)
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
// Scan the memory operations on the chain and find the first
// non-consecutive store memory address.
unsigned NumConsecutiveStores = 1;
int64_t StartAddress = StoreNodes[0].OffsetFromBase;
// Check that the addresses are consecutive starting from the second
// element in the list of stores.
for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
NumConsecutiveStores = i + 1;
}
if (NumConsecutiveStores > 1)
return NumConsecutiveStores;
// There are no consecutive stores at the start of the list.
// Remove the first store and try again.
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
}
}
bool DAGCombiner::tryStoreMergeOfConstants(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
EVT MemVT, SDNode *RootNode, bool AllowVectors) {
LLVMContext &Context = *DAG.getContext();
const DataLayout &DL = DAG.getDataLayout();
int64_t ElementSizeBytes = MemVT.getStoreSize();
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
bool MadeChange = false;
// Store the constants into memory as one consecutive store.
while (NumConsecutiveStores >= 2) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
unsigned LastLegalType = 1;
unsigned LastLegalVectorType = 1;
bool LastIntegerTrunc = false;
bool NonZero = false;
unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue StoredVal = ST->getValue();
bool IsElementZero = false;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
IsElementZero = C->isNullValue();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
if (IsElementZero) {
if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
FirstZeroAfterNonZero = i;
}
NonZero |= !IsElementZero;
// Find a legal type for the constant store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
bool IsFast = false;
// Break early when size is too large to be legal.
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
break;
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
LastIntegerTrunc = false;
LastLegalType = i + 1;
// Or check whether a truncstore is legal.
} else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
LastIntegerTrunc = true;
LastLegalType = i + 1;
}
}
// We only use vectors if the constant is known to be zero or the
// target allows it and the function is not marked with the
// noimplicitfloat attribute.
if ((!NonZero ||
TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
AllowVectors) {
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
LastLegalVectorType = i + 1;
}
}
bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
bool UseTrunc = LastIntegerTrunc && !UseVector;
// Check if we found a legal integer type that creates a meaningful
// merge.
if (NumElem < 2) {
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have, is if the alignment has
// improved or we've dropped a non-zero value. Drop as many
// candidates as we can here.
unsigned NumSkip = 1;
while ((NumSkip < NumConsecutiveStores) &&
(NumSkip < FirstZeroAfterNonZero) &&
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
NumConsecutiveStores -= NumSkip;
continue;
}
// Check that we can merge these candidates without causing a cycle.
if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
RootNode)) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
NumConsecutiveStores -= NumElem;
continue;
}
MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
/*IsConstantSrc*/ true,
UseVector, UseTrunc);
// Remove merged stores for next iteration.
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
NumConsecutiveStores -= NumElem;
}
return MadeChange;
}
bool DAGCombiner::tryStoreMergeOfExtracts(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
EVT MemVT, SDNode *RootNode) {
LLVMContext &Context = *DAG.getContext();
const DataLayout &DL = DAG.getDataLayout();
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
bool MadeChange = false;
// Loop on Consecutive Stores on success.
while (NumConsecutiveStores >= 2) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
unsigned NumStoresToMerge = 1;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
bool IsFast = false;
// Break early when size is too large to be legal.
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
break;
if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
NumStoresToMerge = i + 1;
}
// Check if we found a legal integer type creating a meaningful
// merge.
if (NumStoresToMerge < 2) {
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have, is if the alignment has
// improved. Drop as many candidates as we can here.
unsigned NumSkip = 1;
while ((NumSkip < NumConsecutiveStores) &&
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
NumConsecutiveStores -= NumSkip;
continue;
}
// Check that we can merge these candidates without causing a cycle.
if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
RootNode)) {
StoreNodes.erase(StoreNodes.begin(),
StoreNodes.begin() + NumStoresToMerge);
NumConsecutiveStores -= NumStoresToMerge;
continue;
}
MadeChange |= mergeStoresOfConstantsOrVecElts(
StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
/*UseVector*/ true, /*UseTrunc*/ false);
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
NumConsecutiveStores -= NumStoresToMerge;
}
return MadeChange;
}
bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumConsecutiveStores, EVT MemVT,
SDNode *RootNode, bool AllowVectors,
bool IsNonTemporalStore,
bool IsNonTemporalLoad) {
LLVMContext &Context = *DAG.getContext();
const DataLayout &DL = DAG.getDataLayout();
int64_t ElementSizeBytes = MemVT.getStoreSize();
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
bool MadeChange = false;
// Look for load nodes which are used by the stored values.
SmallVector<MemOpLink, 8> LoadNodes;
// Find acceptable loads. Loads need to have the same chain (token factor),
// must not be zext, volatile, indexed, and they must be consecutive.
BaseIndexOffset LdBasePtr;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue Val = peekThroughBitcasts(St->getValue());
LoadSDNode *Ld = cast<LoadSDNode>(Val);
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
// If this is not the first ptr that we check.
int64_t LdOffset = 0;
if (LdBasePtr.getBase().getNode()) {
// The base ptr must be the same.
if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
break;
} else {
// Check that all other base pointers are the same as this one.
LdBasePtr = LdPtr;
}
// We found a potential memory operand to merge.
LoadNodes.push_back(MemOpLink(Ld, LdOffset));
}
while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
Align RequiredAlignment;
bool NeedRotate = false;
if (LoadNodes.size() == 2) {
// If we have load/store pair instructions and we only have two values,
// don't bother merging.
if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
break;
}
// If the loads are reversed, see if we can rotate the halves into place.
int64_t Offset0 = LoadNodes[0].OffsetFromBase;
int64_t Offset1 = LoadNodes[1].OffsetFromBase;
EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
if (Offset0 - Offset1 == ElementSizeBytes &&
(hasOperation(ISD::ROTL, PairVT) ||
hasOperation(ISD::ROTR, PairVT))) {
std::swap(LoadNodes[0], LoadNodes[1]);
NeedRotate = true;
}
}
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
Align FirstStoreAlign = FirstInChain->getAlign();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
// Scan the memory operations on the chain and find the first
// non-consecutive load memory address. These variables hold the index in
// the store node array.
unsigned LastConsecutiveLoad = 1;
// This variable refers to the size and not index in the array.
unsigned LastLegalVectorType = 1;
unsigned LastLegalIntegerType = 1;
bool isDereferenceable = true;
bool DoIntegerTruncate = false;
int64_t StartAddress = LoadNodes[0].OffsetFromBase;
SDValue LoadChain = FirstLoad->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
// All loads must share the same chain.
if (LoadNodes[i].MemNode->getChain() != LoadChain)
break;
int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
LastConsecutiveLoad = i;
if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
isDereferenceable = false;
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
// Break early when size is too large to be legal.
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
break;
bool IsFastSt = false;
bool IsFastLd = false;
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalVectorType = i + 1;
}
// Find a legal type for the integer store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
DoIntegerTruncate = false;
// Or check whether a truncstore and extload is legal.
} else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
DoIntegerTruncate = true;
}
}
}
// Only use vector types if the vector type is larger than the integer
// type. If they are the same, use integers.
bool UseVectorTy =
LastLegalVectorType > LastLegalIntegerType && AllowVectors;
unsigned LastLegalType =
std::max(LastLegalVectorType, LastLegalIntegerType);
// We add +1 here because the LastXXX variables refer to location while
// the NumElem refers to array/index size.
unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
NumElem = std::min(LastLegalType, NumElem);
Align FirstLoadAlign = FirstLoad->getAlign();
if (NumElem < 2) {
// We know that candidate stores are in order and of correct
// shape. While there is no mergeable sequence from the
// beginning one may start later in the sequence. The only
// reason a merge of size N could have failed where another of
// the same size would not have is if the alignment or either
// the load or store has improved. Drop as many candidates as we
// can here.
unsigned NumSkip = 1;
while ((NumSkip < LoadNodes.size()) &&
(LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
(StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
NumConsecutiveStores -= NumSkip;
continue;
}
// Check that we can merge these candidates without causing a cycle.
if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
RootNode)) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
NumConsecutiveStores -= NumElem;
continue;
}
// Find if it is better to use vectors or integers to load and store
// to memory.
EVT JointMemOpVT;
if (UseVectorTy) {
// Find a legal type for the vector store.
unsigned Elts = NumElem * NumMemElts;
JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
} else {
unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
}
SDLoc LoadDL(LoadNodes[0].MemNode);
SDLoc StoreDL(StoreNodes[0].MemNode);
// The merged loads are required to have the same incoming chain, so
// using the first's chain is acceptable.
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
AddToWorklist(NewStoreChain.getNode());
MachineMemOperand::Flags LdMMOFlags =
isDereferenceable ? MachineMemOperand::MODereferenceable
: MachineMemOperand::MONone;
if (IsNonTemporalLoad)
LdMMOFlags |= MachineMemOperand::MONonTemporal;
MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
? MachineMemOperand::MONonTemporal
: MachineMemOperand::MONone;
SDValue NewLoad, NewStore;
if (UseVectorTy || !DoIntegerTruncate) {
NewLoad = DAG.getLoad(
JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
SDValue StoreOp = NewLoad;
if (NeedRotate) {
unsigned LoadWidth = ElementSizeBytes * 8 * 2;
assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
"Unexpected type for rotate-able load pair");
SDValue RotAmt =
DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
// Target can convert to the identical ROTR if it does not have ROTL.
StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
}
NewStore = DAG.getStore(
NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
} else { // This must be the truncstore/extload case
EVT ExtendedTy =
TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), JointMemOpVT,
FirstLoadAlign, LdMMOFlags);
NewStore = DAG.getTruncStore(
NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), JointMemOpVT,
FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
}
// Transfer chain users from old loads to the new load.
for (unsigned i = 0; i < NumElem; ++i) {
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
SDValue(NewLoad.getNode(), 1));
}
// Replace all stores with the new store. Recursively remove corresponding
// values if they are no longer used.
for (unsigned i = 0; i < NumElem; ++i) {
SDValue Val = StoreNodes[i].MemNode->getOperand(1);
CombineTo(StoreNodes[i].MemNode, NewStore);
if (Val.getNode()->use_empty())
recursivelyDeleteUnusedNodes(Val.getNode());
}
MadeChange = true;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
NumConsecutiveStores -= NumElem;
}
return MadeChange;
}
bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
return false;
// TODO: Extend this function to merge stores of scalable vectors.
// (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
// store since we know <vscale x 16 x i8> is exactly twice as large as
// <vscale x 8 x i8>). Until then, bail out for scalable vectors.
EVT MemVT = St->getMemoryVT();
if (MemVT.isScalableVector())
return false;
if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
return false;
// This function cannot currently deal with non-byte-sized memory sizes.
int64_t ElementSizeBytes = MemVT.getStoreSize();
if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
return false;
// Do not bother looking at stored values that are not constants, loads, or
// extracted vector elements.
SDValue StoredVal = peekThroughBitcasts(St->getValue());
const StoreSource StoreSrc = getStoreSource(StoredVal);
if (StoreSrc == StoreSource::Unknown)
return false;
SmallVector<MemOpLink, 8> StoreNodes;
SDNode *RootNode;
// Find potential store merge candidates by searching through chain sub-DAG
getStoreMergeCandidates(St, StoreNodes, RootNode);
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
return false;
// Sort the memory operands according to their distance from the
// base pointer.
llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
return LHS.OffsetFromBase < RHS.OffsetFromBase;
});
bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat);
bool IsNonTemporalStore = St->isNonTemporal();
bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
cast<LoadSDNode>(StoredVal)->isNonTemporal();
// Store Merge attempts to merge the lowest stores. This generally
// works out as if successful, as the remaining stores are checked
// after the first collection of stores is merged. However, in the
// case that a non-mergeable store is found first, e.g., {p[-2],
// p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
// mergeable cases. To prevent this, we prune such stores from the
// front of StoreNodes here.
bool MadeChange = false;
while (StoreNodes.size() > 1) {
unsigned NumConsecutiveStores =
getConsecutiveStores(StoreNodes, ElementSizeBytes);
// There are no more stores in the list to examine.
if (NumConsecutiveStores == 0)
return MadeChange;
// We have at least 2 consecutive stores. Try to merge them.
assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
switch (StoreSrc) {
case StoreSource::Constant:
MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
MemVT, RootNode, AllowVectors);
break;
case StoreSource::Extract:
MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
MemVT, RootNode);
break;
case StoreSource::Load:
MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
MemVT, RootNode, AllowVectors,
IsNonTemporalStore, IsNonTemporalLoad);
break;
default:
llvm_unreachable("Unhandled store source type");
}
}
return MadeChange;
}
SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
SDLoc SL(ST);
SDValue ReplStore;
// Replace the chain to avoid dependency.
if (ST->isTruncatingStore()) {
ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
ST->getBasePtr(), ST->getMemoryVT(),
ST->getMemOperand());
} else {
ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
ST->getMemOperand());
}
// Create token to keep both nodes around.
SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
MVT::Other, ST->getChain(), ReplStore);
// Make sure the new and old chains are cleaned up.
AddToWorklist(Token.getNode());
// Don't add users to work list.
return CombineTo(ST, Token, false);
}
SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
SDValue Value = ST->getValue();
if (Value.getOpcode() == ISD::TargetConstantFP)
return SDValue();
if (!ISD::isNormalStore(ST))
return SDValue();
SDLoc DL(ST);
SDValue Chain = ST->getChain();
SDValue Ptr = ST->getBasePtr();
const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
// NOTE: If the original store is volatile, this transform must not increase
// the number of stores. For example, on x86-32 an f64 can be stored in one
// processor operation but an i64 (which is not legal) requires two. So the
// transform should not be done in this case.
SDValue Tmp;
switch (CFP->getSimpleValueType(0).SimpleTy) {
default:
llvm_unreachable("Unknown FP type");
case MVT::f16: // We don't do this for these yet.
case MVT::f80:
case MVT::f128:
case MVT::ppcf128:
return SDValue();
case MVT::f32:
if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
;
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
bitcastToAPInt().getZExtValue(), SDLoc(CFP),
MVT::i32);
return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
}
return SDValue();
case MVT::f64:
if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
;
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
getZExtValue(), SDLoc(CFP), MVT::i64);
return DAG.getStore(Chain, DL, Tmp,
Ptr, ST->getMemOperand());
}
if (ST->isSimple() &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
// 64-bit integer store into two 32-bit stores.
uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getOriginalAlign(), MMOFlags, AAInfo);
Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
ST->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
St0, St1);
}
return SDValue();
}
}
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
SDValue Value = ST->getValue();
SDValue Ptr = ST->getBasePtr();
// If this is a store of a bit convert, store the input value if the
// resultant store does not need a higher alignment than the original.
if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
EVT SVT = Value.getOperand(0).getValueType();
// If the store is volatile, we only want to change the store type if the
// resulting store is legal. Otherwise we might increase the number of
// memory accesses. We don't care if the original type was legal or not
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
// TODO: May be able to relax for unordered atomics (see D66309)
if (((!LegalOperations && ST->isSimple()) ||
TLI.isOperationLegal(ISD::STORE, SVT)) &&
TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
DAG, *ST->getMemOperand())) {
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
ST->getMemOperand());
}
}
// Turn 'store undef, Ptr' -> nothing.
if (Value.isUndef() && ST->isUnindexed())
return Chain;
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
if (*Alignment > ST->getAlign() &&
isAligned(*Alignment, ST->getSrcValueOffset())) {
SDValue NewStore =
DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
ST->getMemoryVT(), *Alignment,
ST->getMemOperand()->getFlags(), ST->getAAInfo());
// NewStore will always be N as we are only refining the alignment
assert(NewStore.getNode() == N);
(void)NewStore;
}
}
}
// Try transforming a pair floating point load / store ops to integer
// load / store ops.
if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
// Try transforming several stores into STORE (BSWAP).
if (SDValue Store = mergeTruncStores(ST))
return Store;
if (ST->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes, on this store and any
// adjacent stores.
if (findBetterNeighborChains(ST)) {
// replaceStoreChain uses CombineTo, which handled all of the worklist
// manipulation. Return the original node to not do anything else.
return SDValue(ST, 0);
}
Chain = ST->getChain();
}
// FIXME: is there such a thing as a truncating indexed store?
if (ST->isTruncatingStore() && ST->isUnindexed() &&
Value.getValueType().isInteger() &&
(!isa<ConstantSDNode>(Value) ||
!cast<ConstantSDNode>(Value)->isOpaque())) {
APInt TruncDemandedBits =
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
ST->getMemoryVT().getScalarSizeInBits());
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
AddToWorklist(Value.getNode());
if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
ST->getMemOperand());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
// Re-visit the store if anything changed and the store hasn't been merged
// with another node (N is deleted) SimplifyDemandedBits will add Value's
// node back to the worklist if necessary, but we also need to re-visit
// the Store node itself.
if (N->getOpcode() != ISD::DELETED_NODE)
AddToWorklist(N);
return SDValue(N, 0);
}
}
// If this is a load followed by a store to the same location, then the store
// is dead/noop.
// TODO: Can relax for unordered atomics (see D66309)
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
ST->isUnindexed() && ST->isSimple() &&
Ld->getAddressSpace() == ST->getAddressSpace() &&
// There can't be any side effects between the load and store, such as
// a call or store.
Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
// The store is dead, remove it.
return Chain;
}
}
// TODO: Can relax for unordered atomics (see D66309)
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
if (ST->isUnindexed() && ST->isSimple() &&
ST1->isUnindexed() && ST1->isSimple()) {
if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
ST->getMemoryVT() == ST1->getMemoryVT() &&
ST->getAddressSpace() == ST1->getAddressSpace()) {
// If this is a store followed by a store with the same value to the
// same location, then the store is dead/noop.
return Chain;
}
if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
!ST1->getBasePtr().isUndef() &&
// BaseIndexOffset and the code below requires knowing the size
// of a vector, so bail out if MemoryVT is scalable.
!ST->getMemoryVT().isScalableVector() &&
!ST1->getMemoryVT().isScalableVector() &&
ST->getAddressSpace() == ST1->getAddressSpace()) {
const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
// If this is a store who's preceding store to a subset of the current
// location and no one other node is chained to that store we can
// effectively drop the store. Do not remove stores to undef as they may
// be used as data sinks.
if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
CombineTo(ST1, ST1->getChain());
return SDValue();
}
}
}
}
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
// truncating store. We can do this even if this is already a truncstore.
if ((Value.getOpcode() == ISD::FP_ROUND ||
Value.getOpcode() == ISD::TRUNCATE) &&
Value.getNode()->hasOneUse() && ST->isUnindexed() &&
TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
ST->getMemoryVT(), LegalOperations)) {
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
Ptr, ST->getMemoryVT(), ST->getMemOperand());
}
// Always perform this optimization before types are legal. If the target
// prefers, also try this after legalization to catch stores that were created
// by intrinsics or other nodes.
if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
while (true) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
bool Changed = mergeConsecutiveStores(ST);
if (!Changed) break;
// Return N as merge only uses CombineTo and no worklist clean
// up is necessary.
if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
return SDValue(N, 0);
}
}
// Try transforming N to an indexed store.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
//
// Make sure to do this only after attempting to merge stores in order to
// avoid changing the types of some subset of stores due to visit order,
// preventing their merging.
if (isa<ConstantFPSDNode>(ST->getValue())) {
if (SDValue NewSt = replaceStoreOfFPConstant(ST))
return NewSt;
}
if (SDValue NewSt = splitMergedValStore(ST))
return NewSt;
return ReduceLoadOpStoreWidth(N);
}
SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
if (!LifetimeEnd->hasOffset())
return SDValue();
const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
LifetimeEnd->getOffset(), false);
// We walk up the chains to find stores.
SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
while (!Chains.empty()) {
SDValue Chain = Chains.pop_back_val();
if (!Chain.hasOneUse())
continue;
switch (Chain.getOpcode()) {
case ISD::TokenFactor:
for (unsigned Nops = Chain.getNumOperands(); Nops;)
Chains.push_back(Chain.getOperand(--Nops));
break;
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
// We can forward past any lifetime start/end that can be proven not to
// alias the node.
if (!isAlias(Chain.getNode(), N))
Chains.push_back(Chain.getOperand(0));
break;
case ISD::STORE: {
StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
// TODO: Can relax for unordered atomics (see D66309)
if (!ST->isSimple() || ST->isIndexed())
continue;
const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
// The bounds of a scalable store are not known until runtime, so this
// store cannot be elided.
if (StoreSize.isScalable())
continue;
const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
// If we store purely within object bounds just before its lifetime ends,
// we can remove the store.
if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
StoreSize.getFixedSize() * 8)) {
LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
dbgs() << "\nwithin LIFETIME_END of : ";
LifetimeEndBase.dump(); dbgs() << "\n");
CombineTo(ST, ST->getChain());
return SDValue(N, 0);
}
}
}
}
return SDValue();
}
/// For the instruction sequence of store below, F and I values
/// are bundled together as an i64 value before being stored into memory.
/// Sometimes it is more efficent to generate separate stores for F and I,
/// which can remove the bitwise instructions or sink them to colder places.
///
/// (store (or (zext (bitcast F to i32) to i64),
/// (shl (zext I to i64), 32)), addr) -->
/// (store F, addr) and (store I, addr+4)
///
/// Similarly, splitting for other merged store can also be beneficial, like:
/// For pair of {i32, i32}, i64 store --> two i32 stores.
/// For pair of {i32, i16}, i64 store --> two i32 stores.
/// For pair of {i16, i16}, i32 store --> two i16 stores.
/// For pair of {i16, i8}, i32 store --> two i16 stores.
/// For pair of {i8, i8}, i16 store --> two i8 stores.
///
/// We allow each target to determine specifically which kind of splitting is
/// supported.
///
/// The store patterns are commonly seen from the simple code snippet below
/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
/// void goo(const std::pair<int, float> &);
/// hoo() {
/// ...
/// goo(std::make_pair(tmp, ftmp));
/// ...
/// }
///
SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
if (OptLevel == CodeGenOpt::None)
return SDValue();
// Can't change the number of memory accesses for a volatile store or break
// atomicity for an atomic one.
if (!ST->isSimple())
return SDValue();
SDValue Val = ST->getValue();
SDLoc DL(ST);
// Match OR operand.
if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
return SDValue();
// Match SHL operand and get Lower and Higher parts of Val.
SDValue Op1 = Val.getOperand(0);
SDValue Op2 = Val.getOperand(1);
SDValue Lo, Hi;
if (Op1.getOpcode() != ISD::SHL) {
std::swap(Op1, Op2);
if (Op1.getOpcode() != ISD::SHL)
return SDValue();
}
Lo = Op2;
Hi = Op1.getOperand(0);
if (!Op1.hasOneUse())
return SDValue();
// Match shift amount to HalfValBitSize.
unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
return SDValue();
// Lo and Hi are zero-extended from int with size less equal than 32
// to i64.
if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
!Lo.getOperand(0).getValueType().isScalarInteger() ||
Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
!Hi.getOperand(0).getValueType().isScalarInteger() ||
Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
return SDValue();
// Use the EVT of low and high parts before bitcast as the input
// of target query.
EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
? Lo.getOperand(0).getValueType()
: Lo.getValueType();
EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
? Hi.getOperand(0).getValueType()
: Hi.getValueType();
if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
return SDValue();
// Start to split store.
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
// Change the sizes of Lo and Hi's value types to HalfValBitSize.
EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
SDValue Chain = ST->getChain();
SDValue Ptr = ST->getBasePtr();
// Lower value store.
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getOriginalAlign(), MMOFlags, AAInfo);
Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
// Higher value store.
SDValue St1 = DAG.getStore(
St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
ST->getOriginalAlign(), MMOFlags, AAInfo);
return St1;
}
/// Convert a disguised subvector insertion into a shuffle:
SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
"Expected extract_vector_elt");
SDValue InsertVal = N->getOperand(1);
SDValue Vec = N->getOperand(0);
// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
// InsIndex)
// --> (vector_shuffle X, Y) and variations where shuffle operands may be
// CONCAT_VECTORS.
if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(InsertVal.getOperand(1))) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
ArrayRef<int> Mask = SVN->getMask();
SDValue X = Vec.getOperand(0);
SDValue Y = Vec.getOperand(1);
// Vec's operand 0 is using indices from 0 to N-1 and
// operand 1 from N to 2N - 1, where N is the number of
// elements in the vectors.
SDValue InsertVal0 = InsertVal.getOperand(0);
int ElementOffset = -1;
// We explore the inputs of the shuffle in order to see if we find the
// source of the extract_vector_elt. If so, we can use it to modify the
// shuffle rather than perform an insert_vector_elt.
SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
ArgWorkList.emplace_back(Mask.size(), Y);
ArgWorkList.emplace_back(0, X);
while (!ArgWorkList.empty()) {
int ArgOffset;
SDValue ArgVal;
std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
if (ArgVal == InsertVal0) {
ElementOffset = ArgOffset;
break;
}
// Peek through concat_vector.
if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
int CurrentArgOffset =
ArgOffset + ArgVal.getValueType().getVectorNumElements();
int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
for (SDValue Op : reverse(ArgVal->ops())) {
CurrentArgOffset -= Step;
ArgWorkList.emplace_back(CurrentArgOffset, Op);
}
// Make sure we went through all the elements and did not screw up index
// computation.
assert(CurrentArgOffset == ArgOffset);
}
}
if (ElementOffset != -1) {
SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
assert(NewMask[InsIndex] <
(int)(2 * Vec.getValueType().getVectorNumElements()) &&
NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
SDValue LegalShuffle =
TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
Y, NewMask, DAG);
if (LegalShuffle)
return LegalShuffle;
}
}
// insert_vector_elt V, (bitcast X from vector type), IdxC -->
// bitcast(shuffle (bitcast V), (extended X), Mask)
// Note: We do not use an insert_subvector node because that requires a
// legal subvector type.
if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
!InsertVal.getOperand(0).getValueType().isVector())
return SDValue();
SDValue SubVec = InsertVal.getOperand(0);
SDValue DestVec = N->getOperand(0);
EVT SubVecVT = SubVec.getValueType();
EVT VT = DestVec.getValueType();
unsigned NumSrcElts = SubVecVT.getVectorNumElements();
// If the source only has a single vector element, the cost of creating adding
// it to a vector is likely to exceed the cost of a insert_vector_elt.
if (NumSrcElts == 1)
return SDValue();
unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
unsigned NumMaskVals = ExtendRatio * NumSrcElts;
// Step 1: Create a shuffle mask that implements this insert operation. The
// vector that we are inserting into will be operand 0 of the shuffle, so
// those elements are just 'i'. The inserted subvector is in the first
// positions of operand 1 of the shuffle. Example:
// insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
SmallVector<int, 16> Mask(NumMaskVals);
for (unsigned i = 0; i != NumMaskVals; ++i) {
if (i / NumSrcElts == InsIndex)
Mask[i] = (i % NumSrcElts) + NumMaskVals;
else
Mask[i] = i;
}
// Bail out if the target can not handle the shuffle we want to create.
EVT SubVecEltVT = SubVecVT.getVectorElementType();
EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
return SDValue();
// Step 2: Create a wide vector from the inserted source vector by appending
// undefined elements. This is the same size as our destination vector.
SDLoc DL(N);
SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
ConcatOps[0] = SubVec;
SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
// Step 3: Shuffle in the padded subvector.
SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
AddToWorklist(PaddedSubV.getNode());
AddToWorklist(DestVecBC.getNode());
AddToWorklist(Shuf.getNode());
return DAG.getBitcast(VT, Shuf);
}
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
SDValue EltNo = N->getOperand(2);
SDLoc DL(N);
EVT VT = InVec.getValueType();
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
// Insert into out-of-bounds element is undefined.
if (IndexC && VT.isFixedLengthVector() &&
IndexC->getZExtValue() >= VT.getVectorNumElements())
return DAG.getUNDEF(VT);
// Remove redundant insertions:
// (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
return InVec;
if (!IndexC) {
// If this is variable insert to undef vector, it might be better to splat:
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
if (VT.isScalableVector())
return DAG.getSplatVector(VT, DL, InVal);
else {
SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
return DAG.getBuildVector(VT, DL, Ops);
}
}
return SDValue();
}
if (VT.isScalableVector())
return SDValue();
unsigned NumElts = VT.getVectorNumElements();
// We must know which element is being inserted for folds below here.
unsigned Elt = IndexC->getZExtValue();
if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
return Shuf;
// Canonicalize insert_vector_elt dag nodes.
// Example:
// (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
// -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
//
// Do this only if the child insert_vector node has one use; also
// do this only if indices are both constants and Idx1 < Idx0.
if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
&& isa<ConstantSDNode>(InVec.getOperand(2))) {
unsigned OtherElt = InVec.getConstantOperandVal(2);
if (Elt < OtherElt) {
// Swap nodes.
SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
InVec.getOperand(0), InVal, EltNo);
AddToWorklist(NewOp.getNode());
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
}
}
// If we can't generate a legal BUILD_VECTOR, exit
if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
return SDValue();
// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
// vector elements.
SmallVector<SDValue, 8> Ops;
// Do not combine these two vectors if the output vector will not replace
// the input vector.
if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
Ops.append(InVec.getNode()->op_begin(),
InVec.getNode()->op_end());
} else if (InVec.isUndef()) {
Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
} else {
return SDValue();
}
assert(Ops.size() == NumElts && "Unexpected vector size");
// Insert the element
if (Elt < Ops.size()) {
// All the operands of BUILD_VECTOR must have the same type;
// we enforce that here.
EVT OpVT = Ops[0].getValueType();
Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
}
// Return the new vector
return DAG.getBuildVector(VT, DL, Ops);
}
SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
SDValue EltNo,
LoadSDNode *OriginalLoad) {
assert(OriginalLoad->isSimple());
EVT ResultVT = EVE->getValueType(0);
EVT VecEltVT = InVecVT.getVectorElementType();
// If the vector element type is not a multiple of a byte then we are unable
// to correctly compute an address to load only the extracted element as a
// scalar.
if (!VecEltVT.isByteSized())
return SDValue();
Align Alignment = OriginalLoad->getAlign();
Align NewAlign = DAG.getDataLayout().getABITypeAlign(
VecEltVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Alignment ||
!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
return SDValue();
ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
ISD::NON_EXTLOAD : ISD::EXTLOAD;
if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
return SDValue();
Alignment = NewAlign;
MachinePointerInfo MPI;
SDLoc DL(EVE);
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
int Elt = ConstEltNo->getZExtValue();
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
} else {
// Discard the pointer info except the address space because the memory
// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
}
SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
InVecVT, EltNo);
// The replacement we need to do here is a little tricky: we need to
// replace an extractelement of a load with a load.
// Use ReplaceAllUsesOfValuesWith to do the replacement.
// Note that this replacement assumes that the extractvalue is the only
// use of the load; that's okay because we don't want to perform this
// transformation in other cases anyway.
SDValue Load;
SDValue Chain;
if (ResultVT.bitsGT(VecEltVT)) {
// If the result type of vextract is wider than the load, then issue an
// extending load instead.
ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
VecEltVT)
? ISD::ZEXTLOAD
: ISD::EXTLOAD;
Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
Alignment, OriginalLoad->getMemOperand()->getFlags(),
OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
} else {
Load = DAG.getLoad(
VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
if (ResultVT.bitsLT(VecEltVT))
Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
else
Load = DAG.getBitcast(ResultVT, Load);
}
WorklistRemover DeadNodes(*this);
SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
SDValue To[] = { Load, Chain };
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
// Make sure to revisit this node to clean it up; it will usually be dead.
AddToWorklist(EVE);
// Since we're explicitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
AddToWorklistWithUsers(Load.getNode());
++OpsNarrowed;
return SDValue(EVE, 0);
}
/// Transform a vector binary operation into a scalar binary operation by moving
/// the math/logic after an extract element of a vector.
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
bool LegalOperations) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Vec = ExtElt->getOperand(0);
SDValue Index = ExtElt->getOperand(1);
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
Vec.getNode()->getNumValues() != 1)
return SDValue();
// Targets may want to avoid this to prevent an expensive register transfer.
if (!TLI.shouldScalarizeBinop(Vec))
return SDValue();
// Extracting an element of a vector constant is constant-folded, so this
// transform is just replacing a vector op with a scalar op while moving the
// extract.
SDValue Op0 = Vec.getOperand(0);
SDValue Op1 = Vec.getOperand(1);
if (isAnyConstantBuildVector(Op0, true) ||
isAnyConstantBuildVector(Op1, true)) {
// extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
// extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
SDLoc DL(ExtElt);
EVT VT = ExtElt->getValueType(0);
SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
}
return SDValue();
}
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue VecOp = N->getOperand(0);
SDValue Index = N->getOperand(1);
EVT ScalarVT = N->getValueType(0);
EVT VecVT = VecOp.getValueType();
if (VecOp.isUndef())
return DAG.getUNDEF(ScalarVT);
// extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
//
// This only really matters if the index is non-constant since other combines
// on the constant elements already work.
SDLoc DL(N);
if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
Index == VecOp.getOperand(2)) {
SDValue Elt = VecOp.getOperand(1);
return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
}
// (vextract (scalar_to_vector val, 0) -> val
if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Only 0'th element of SCALAR_TO_VECTOR is defined.
if (DAG.isKnownNeverZero(Index))
return DAG.getUNDEF(ScalarVT);
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
SDValue InOp = VecOp.getOperand(0);
if (InOp.getValueType() != ScalarVT) {
assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
}
return InOp;
}
// extract_vector_elt of out-of-bounds element -> UNDEF
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
if (IndexC && VecVT.isFixedLengthVector() &&
IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
return DAG.getUNDEF(ScalarVT);
// extract_vector_elt (build_vector x, y), 1 -> y
if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
TLI.isTypeLegal(VecVT) &&
(VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
VecVT.isFixedLengthVector()) &&
"BUILD_VECTOR used for scalable vectors");
unsigned IndexVal =
VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
SDValue Elt = VecOp.getOperand(IndexVal);
EVT InEltVT = Elt.getValueType();
// Sometimes build_vector's scalar input types do not match result type.
if (ScalarVT == InEltVT)
return Elt;
// TODO: It may be useful to truncate if free if the build_vector implicitly
// converts.
}
if (VecVT.isScalableVector())
return SDValue();
// All the code from this point onwards assumes fixed width vectors, but it's
// possible that some of the combinations could be made to work for scalable
// vectors too.
unsigned NumElts = VecVT.getVectorNumElements();
unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
// TODO: These transforms should not require the 'hasOneUse' restriction, but
// there are regressions on multiple targets without it. We can end up with a
// mess of scalar and vector code if we reduce only part of the DAG to scalar.
if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
VecOp.hasOneUse()) {
// The vector index of the LSBs of the source depend on the endian-ness.
bool IsLE = DAG.getDataLayout().isLittleEndian();
unsigned ExtractIndex = IndexC->getZExtValue();
// extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
SDValue BCSrc = VecOp.getOperand(0);
if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
if (LegalTypes && BCSrc.getValueType().isInteger() &&
BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
// trunc i64 X to i32
SDValue X = BCSrc.getOperand(0);
assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
"Extract element and scalar to vector can't change element type "
"from FP to integer.");
unsigned XBitWidth = X.getValueSizeInBits();
BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
// An extract element return value type can be wider than its vector
// operand element type. In that case, the high bits are undefined, so
// it's possible that we may need to extend rather than truncate.
if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
assert(XBitWidth % VecEltBitWidth == 0 &&
"Scalar bitwidth must be a multiple of vector element bitwidth");
return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
}
}
}
if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
return BO;
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
// we may introduce new vector instructions which are not backed by TD
// patterns. For example on AVX, extracting elements from a wide vector
// without using extract_subvector. However, if we can find an underlying
// scalar value, then we can always use that.
if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
// Find the new index to extract from.
int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
// Extracting an undef index is undef.
if (OrigElt == -1)
return DAG.getUNDEF(ScalarVT);
// Select the right vector half to extract from.
SDValue SVInVec;
if (OrigElt < (int)NumElts) {
SVInVec = VecOp.getOperand(0);
} else {
SVInVec = VecOp.getOperand(1);
OrigElt -= NumElts;
}
if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
SDValue InOp = SVInVec.getOperand(OrigElt);
if (InOp.getValueType() != ScalarVT) {
assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
}
return InOp;
}
// FIXME: We should handle recursing on other vector shuffles and
// scalar_to_vector here as well.
if (!LegalOperations ||
// FIXME: Should really be just isOperationLegalOrCustom.
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
DAG.getVectorIdxConstant(OrigElt, DL));
}
}
// If only EXTRACT_VECTOR_ELT nodes use the source vector we can
// simplify it based on the (valid) extraction indices.
if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Use->getOperand(0) == VecOp &&
isa<ConstantSDNode>(Use->getOperand(1));
})) {
APInt DemandedElts = APInt::getNullValue(NumElts);
for (SDNode *Use : VecOp->uses()) {
auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
if (CstElt->getAPIntValue().ult(NumElts))
DemandedElts.setBit(CstElt->getZExtValue());
}
if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
// We simplified the vector operand of this extract element. If this
// extract is not dead, visit it again so it is folded properly.
if (N->getOpcode() != ISD::DELETED_NODE)
AddToWorklist(N);
return SDValue(N, 0);
}
APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
// We simplified the vector operand of this extract element. If this
// extract is not dead, visit it again so it is folded properly.
if (N->getOpcode() != ISD::DELETED_NODE)
AddToWorklist(N);
return SDValue(N, 0);
}
}
// Everything under here is trying to match an extract of a loaded value.
// If the result of load has to be truncated, then it's not necessarily
// profitable.
bool BCNumEltsChanged = false;
EVT ExtVT = VecVT.getVectorElementType();
EVT LVT = ExtVT;
if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
return SDValue();
if (VecOp.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!VecOp.hasOneUse())
return SDValue();
EVT BCVT = VecOp.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
if (NumElts != BCVT.getVectorNumElements())
BCNumEltsChanged = true;
VecOp = VecOp.getOperand(0);
ExtVT = BCVT.getVectorElementType();
}
// extract (vector load $addr), i --> load $addr + i * size
if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
ISD::isNormalLoad(VecOp.getNode()) &&
!Index->hasPredecessor(VecOp.getNode())) {
auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
if (VecLoad && VecLoad->isSimple())
return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
}
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
if (!LegalOperations || !IndexC)
return SDValue();
// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
int Elt = IndexC->getZExtValue();
LoadSDNode *LN0 = nullptr;
if (ISD::isNormalLoad(VecOp.getNode())) {
LN0 = cast<LoadSDNode>(VecOp);
} else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
VecOp.getOperand(0).getValueType() == ExtVT &&
ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
// Don't duplicate a load with other uses.
if (!VecOp.hasOneUse())
return SDValue();
LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
}
if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
// =>
// (load $addr+1*size)
// Don't duplicate a load with other uses.
if (!VecOp.hasOneUse())
return SDValue();
// If the bit convert changed the number of elements, it is unsafe
// to examine the mask.
if (BCNumEltsChanged)
return SDValue();
// Select the input vector, guarding against out of range extract vector.
int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
if (VecOp.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!VecOp.hasOneUse())
return SDValue();
VecOp = VecOp.getOperand(0);
}
if (ISD::isNormalLoad(VecOp.getNode())) {
LN0 = cast<LoadSDNode>(VecOp);
Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
Index = DAG.getConstant(Elt, DL, Index.getValueType());
}
} else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
VecVT.getVectorElementType() == ScalarVT &&
(!LegalTypes ||
TLI.isTypeLegal(
VecOp.getOperand(0).getValueType().getVectorElementType()))) {
// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
// -> extract_vector_elt a, 0
// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
// -> extract_vector_elt a, 1
// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
// -> extract_vector_elt b, 0
// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
// -> extract_vector_elt b, 1
SDLoc SL(N);
EVT ConcatVT = VecOp.getOperand(0).getValueType();
unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
Index.getValueType());
SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
ConcatVT.getVectorElementType(),
ConcatOp, NewIdx);
return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
}
// Make sure we found a non-volatile load and the extractelement is
// the only use.
if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
return SDValue();
// If Idx was -1 above, Elt is going to be -1, so just return undef.
if (Elt == -1)
return DAG.getUNDEF(LVT);
return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
}
// Simplify (build_vec (ext )) to (bitcast (build_vec ))
SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
// We perform this optimization post type-legalization because
// the type-legalizer often scalarizes integer-promoted vectors.
// Performing this optimization before may create bit-casts which
// will be type-legalized to complex code sequences.
// We perform this optimization only before the operation legalizer because we
// may introduce illegal operations.
if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
return SDValue();
unsigned NumInScalars = N->getNumOperands();
SDLoc DL(N);
EVT VT = N->getValueType(0);
// Check to see if this is a BUILD_VECTOR of a bunch of values
// which come from any_extend or zero_extend nodes. If so, we can create
// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
// optimizations. We do not handle sign-extend because we can't fill the sign
// using shuffles.
EVT SourceType = MVT::Other;
bool AllAnyExt = true;
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
// Ignore undef inputs.
if (In.isUndef()) continue;
bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
// Abort if the element is not an extension.
if (!ZeroExt && !AnyExt) {
SourceType = MVT::Other;
break;
}
// The input is a ZeroExt or AnyExt. Check the original type.
EVT InTy = In.getOperand(0).getValueType();
// Check that all of the widened source types are the same.
if (SourceType == MVT::Other)
// First time.
SourceType = InTy;
else if (InTy != SourceType) {
// Multiple income types. Abort.
SourceType = MVT::Other;
break;
}
// Check if all of the extends are ANY_EXTENDs.
AllAnyExt &= AnyExt;
}
// In order to have valid types, all of the inputs must be extended from the
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
EVT OutScalarTy = VT.getScalarType();
bool ValidTypes = SourceType != MVT::Other &&
isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
isPowerOf2_32(SourceType.getSizeInBits());
// Create a new simpler BUILD_VECTOR sequence which other optimizations can
// turn into a single shuffle instruction.
if (!ValidTypes)
return SDValue();
// If we already have a splat buildvector, then don't fold it if it means
// introducing zeros.
if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
return SDValue();
bool isLE = DAG.getDataLayout().isLittleEndian();
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");
SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
DAG.getConstant(0, DL, SourceType);
unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
// Populate the new build_vector
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDValue Cast = N->getOperand(i);
assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
Cast.getOpcode() == ISD::ZERO_EXTEND ||
Cast.isUndef()) && "Invalid cast opcode");
SDValue In;
if (Cast.isUndef())
In = DAG.getUNDEF(SourceType);
else
In = Cast->getOperand(0);
unsigned Index = isLE ? (i * ElemRatio) :
(i * ElemRatio + (ElemRatio - 1));
assert(Index < Ops.size() && "Invalid index");
Ops[Index] = In;
}
// The type of the new BUILD_VECTOR node.
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
"Invalid vector size");
// Check if the new vector type is legal.
if (!isTypeLegal(VecVT) ||
(!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
return SDValue();
// Make the new BUILD_VECTOR.
SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
// The new BUILD_VECTOR node has the potential to be further optimized.
AddToWorklist(BV.getNode());
// Bitcast to the desired type.
return DAG.getBitcast(VT, BV);
}
// Simplify (build_vec (trunc $1)
// (trunc (srl $1 half-width))
// (trunc (srl $1 (2 * half-width))) …)
// to (bitcast $1)
SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
// Only for little endian
if (!DAG.getDataLayout().isLittleEndian())
return SDValue();
SDLoc DL(N);
EVT VT = N->getValueType(0);
EVT OutScalarTy = VT.getScalarType();
uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
// Only for power of two types to be sure that bitcast works well
if (!isPowerOf2_64(ScalarTypeBitsize))
return SDValue();
unsigned NumInScalars = N->getNumOperands();
// Look through bitcasts
auto PeekThroughBitcast = [](SDValue Op) {
if (Op.getOpcode() == ISD::BITCAST)
return Op.getOperand(0);
return Op;
};
// The source value where all the parts are extracted.
SDValue Src;
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = PeekThroughBitcast(N->getOperand(i));
// Ignore undef inputs.
if (In.isUndef()) continue;
if (In.getOpcode() != ISD::TRUNCATE)
return SDValue();
In = PeekThroughBitcast(In.getOperand(0));
if (In.getOpcode() != ISD::SRL) {
// For now only build_vec without shuffling, handle shifts here in the
// future.
if (i != 0)
return SDValue();
Src = In;
} else {
// In is SRL
SDValue part = PeekThroughBitcast(In.getOperand(0));
if (!Src) {
Src = part;
} else if (Src != part) {
// Vector parts do not stem from the same variable
return SDValue();
}
SDValue ShiftAmtVal = In.getOperand(1);
if (!isa<ConstantSDNode>(ShiftAmtVal))
return SDValue();
uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
// The extracted value is not extracted at the right position
if (ShiftAmt != i * ScalarTypeBitsize)
return SDValue();
}
}
// Only cast if the size is the same
if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
return SDValue();
return DAG.getBitcast(VT, Src);
}
SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
unsigned LeftIdx, bool DidSplitVec) {
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
EVT VT = N->getValueType(0);
EVT InVT1 = VecIn1.getValueType();
EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
unsigned NumElems = VT.getVectorNumElements();
unsigned ShuffleNumElems = NumElems;
// If we artificially split a vector in two already, then the offsets in the
// operands will all be based off of VecIn1, even those in VecIn2.
unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
uint64_t VTSize = VT.getFixedSizeInBits();
uint64_t InVT1Size = InVT1.getFixedSizeInBits();
uint64_t InVT2Size = InVT2.getFixedSizeInBits();
assert(InVT2Size <= InVT1Size &&
"Inputs must be sorted to be in non-increasing vector size order.");
// We can't generate a shuffle node with mismatched input and output types.
// Try to make the types match the type of the output.
if (InVT1 != VT || InVT2 != VT) {
if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
// If the output vector length is a multiple of both input lengths,
// we can concatenate them and pad the rest with undefs.
unsigned NumConcats = VTSize / InVT1Size;
assert(NumConcats >= 2 && "Concat needs at least two inputs!");
SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
ConcatOps[0] = VecIn1;
ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
VecIn2 = SDValue();
} else if (InVT1Size == VTSize * 2) {
if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
return SDValue();
if (!VecIn2.getNode()) {
// If we only have one input vector, and it's twice the size of the
// output, split it in two.
VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
DAG.getVectorIdxConstant(NumElems, DL));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
// Since we now have shorter input vectors, adjust the offset of the
// second vector's start.
Vec2Offset = NumElems;
} else {
assert(InVT2Size <= InVT1Size &&
"Second input is not going to be larger than the first one.");
// VecIn1 is wider than the output, and we have another, possibly
// smaller input. Pad the smaller input with undefs, shuffle at the
// input vector width, and extract the output.
// The shuffle type is different than VT, so check legality again.
if (LegalOperations &&
!TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
return SDValue();
// Legalizing INSERT_SUBVECTOR is tricky - you basically have to
// lower it back into a BUILD_VECTOR. So if the inserted type is
// illegal, don't even try.
if (InVT1 != InVT2) {
if (!TLI.isTypeLegal(InVT2))
return SDValue();
VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
}
ShuffleNumElems = NumElems * 2;
}
} else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
ConcatOps[0] = VecIn2;
VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
} else {
// TODO: Support cases where the length mismatch isn't exactly by a
// factor of 2.
// TODO: Move this check upwards, so that if we have bad type
// mismatches, we don't create any DAG nodes.
return SDValue();
}
}
// Initialize mask to undef.
SmallVector<int, 8> Mask(ShuffleNumElems, -1);
// Only need to run up to the number of elements actually used, not the
// total number of elements in the shuffle - if we are shuffling a wider
// vector, the high lanes should be set to undef.
for (unsigned i = 0; i != NumElems; ++i) {
if (VectorMask[i] <= 0)
continue;
unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
if (VectorMask[i] == (int)LeftIdx) {
Mask[i] = ExtIndex;
} else if (VectorMask[i] == (int)LeftIdx + 1) {
Mask[i] = Vec2Offset + ExtIndex;
}
}
// The type the input vectors may have changed above.
InVT1 = VecIn1.getValueType();
// If we already have a VecIn2, it should have the same type as VecIn1.
// If we don't, get an undef/zero vector of the appropriate type.
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
if (ShuffleNumElems > NumElems)
Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
return Shuffle;
}
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
// First, determine where the build vector is not undef.
// TODO: We could extend this to handle zero elements as well as undefs.
int NumBVOps = BV->getNumOperands();
int ZextElt = -1;
for (int i = 0; i != NumBVOps; ++i) {
SDValue Op = BV->getOperand(i);
if (Op.isUndef())
continue;
if (ZextElt == -1)
ZextElt = i;
else
return SDValue();
}
// Bail out if there's no non-undef element.
if (ZextElt == -1)
return SDValue();
// The build vector contains some number of undef elements and exactly
// one other element. That other element must be a zero-extended scalar
// extracted from a vector at a constant index to turn this into a shuffle.
// Also, require that the build vector does not implicitly truncate/extend
// its elements.
// TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
EVT VT = BV->getValueType(0);
SDValue Zext = BV->getOperand(ZextElt);
if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
return SDValue();
// The zero-extend must be a multiple of the source size, and we must be
// building a vector of the same size as the source of the extract element.
SDValue Extract = Zext.getOperand(0);
unsigned DestSize = Zext.getValueSizeInBits();
unsigned SrcSize = Extract.getValueSizeInBits();
if (DestSize % SrcSize != 0 ||
Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
return SDValue();
// Create a shuffle mask that will combine the extracted element with zeros
// and undefs.
int ZextRatio = DestSize / SrcSize;
int NumMaskElts = NumBVOps * ZextRatio;
SmallVector<int, 32> ShufMask(NumMaskElts, -1);
for (int i = 0; i != NumMaskElts; ++i) {
if (i / ZextRatio == ZextElt) {
// The low bits of the (potentially translated) extracted element map to
// the source vector. The high bits map to zero. We will use a zero vector
// as the 2nd source operand of the shuffle, so use the 1st element of
// that vector (mask value is number-of-elements) for the high bits.
if (i % ZextRatio == 0)
ShufMask[i] = Extract.getConstantOperandVal(1);
else
ShufMask[i] = NumMaskElts;
}
// Undef elements of the build vector remain undef because we initialize
// the shuffle mask with -1.
}
// buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
// bitcast (shuffle V, ZeroVec, VectorMask)
SDLoc DL(BV);
EVT VecVT = Extract.getOperand(0).getValueType();
SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
ZeroVec, ShufMask, DAG);
if (!Shuf)
return SDValue();
return DAG.getBitcast(VT, Shuf);
}
// FIXME: promote to STLExtras.
template <typename R, typename T>
static auto getFirstIndexOf(R &&Range, const T &Val) {
auto I = find(Range, Val);
if (I == Range.end())
return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
return std::distance(Range.begin(), I);
}
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If the types of the vectors we're extracting from allow it,
// turn this into a vector_shuffle node.
SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
// Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
if (!isTypeLegal(VT))
return SDValue();
if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
return V;
// May only combine to shuffle after legalize if shuffle is legal.
if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
return SDValue();
bool UsesZeroVector = false;
unsigned NumElems = N->getNumOperands();
// Record, for each element of the newly built vector, which input vector
// that element comes from. -1 stands for undef, 0 for the zero vector,
// and positive values for the input vectors.
// VectorMask maps each element to its vector number, and VecIn maps vector
// numbers to their initial SDValues.
SmallVector<int, 8> VectorMask(NumElems, -1);
SmallVector<SDValue, 8> VecIn;
VecIn.push_back(SDValue());
for (unsigned i = 0; i != NumElems; ++i) {
SDValue Op = N->getOperand(i);
if (Op.isUndef())
continue;
// See if we can use a blend with a zero vector.
// TODO: Should we generalize this to a blend with an arbitrary constant
// vector?
if (isNullConstant(Op) || isNullFPConstant(Op)) {
UsesZeroVector = true;
VectorMask[i] = 0;
continue;
}
// Not an undef or zero. If the input is something other than an
// EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Op.getOperand(1)))
return SDValue();
SDValue ExtractedFromVec = Op.getOperand(0);
if (ExtractedFromVec.getValueType().isScalableVector())
return SDValue();
const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
return SDValue();
// All inputs must have the same element type as the output.
if (VT.getVectorElementType() !=
ExtractedFromVec.getValueType().getVectorElementType())
return SDValue();
// Have we seen this input vector before?
// The vectors are expected to be tiny (usually 1 or 2 elements), so using
// a map back from SDValues to numbers isn't worth it.
int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
if (Idx == -1) { // A new source vector?
Idx = VecIn.size();
VecIn.push_back(ExtractedFromVec);
}
VectorMask[i] = Idx;
}
// If we didn't find at least one input vector, bail out.
if (VecIn.size() < 2)
return SDValue();
// If all the Operands of BUILD_VECTOR extract from same
// vector, then split the vector efficiently based on the maximum
// vector access index and adjust the VectorMask and
// VecIn accordingly.
bool DidSplitVec = false;
if (VecIn.size() == 2) {
unsigned MaxIndex = 0;
unsigned NearestPow2 = 0;
SDValue Vec = VecIn.back();
EVT InVT = Vec.getValueType();
SmallVector<unsigned, 8> IndexVec(NumElems, 0);
for (unsigned i = 0; i < NumElems; i++) {
if (VectorMask[i] <= 0)
continue;
unsigned Index = N->getOperand(i).getConstantOperandVal(1);
IndexVec[i] = Index;
MaxIndex = std::max(MaxIndex, Index);
}
NearestPow2 = PowerOf2Ceil(MaxIndex);
if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
NumElems * 2 < NearestPow2) {
unsigned SplitSize = NearestPow2 / 2;
EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
InVT.getVectorElementType(), SplitSize);
if (TLI.isTypeLegal(SplitVT) &&
SplitSize + SplitVT.getVectorNumElements() <=
InVT.getVectorNumElements()) {
SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
DAG.getVectorIdxConstant(SplitSize, DL));
SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
DAG.getVectorIdxConstant(0, DL));
VecIn.pop_back();
VecIn.push_back(VecIn1);
VecIn.push_back(VecIn2);
DidSplitVec = true;
for (unsigned i = 0; i < NumElems; i++) {
if (VectorMask[i] <= 0)
continue;
VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
}
}
}
}
// Sort input vectors by decreasing vector element count,
// while preserving the relative order of equally-sized vectors.
// Note that we keep the first "implicit zero vector as-is.
SmallVector<SDValue, 8> SortedVecIn(VecIn);
llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
[](const SDValue &a, const SDValue &b) {
return a.getValueType().getVectorNumElements() >
b.getValueType().getVectorNumElements();
});
// We now also need to rebuild the VectorMask, because it referenced element
// order in VecIn, and we just sorted them.
for (int &SourceVectorIndex : VectorMask) {
if (SourceVectorIndex <= 0)
continue;
unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
assert(Idx > 0 && Idx < SortedVecIn.size() &&
VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
SourceVectorIndex = Idx;
}
VecIn = std::move(SortedVecIn);
// TODO: Should this fire if some of the input vectors has illegal type (like
// it does now), or should we let legalization run its course first?
// Shuffle phase:
// Take pairs of vectors, and shuffle them so that the result has elements
// from these vectors in the correct places.
// For example, given:
// t10: i32 = extract_vector_elt t1, Constant:i64<0>
// t11: i32 = extract_vector_elt t2, Constant:i64<0>
// t12: i32 = extract_vector_elt t3, Constant:i64<0>
// t13: i32 = extract_vector_elt t1, Constant:i64<1>
// t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
// We will generate:
// t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
// t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
SmallVector<SDValue, 4> Shuffles;
for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
unsigned LeftIdx = 2 * In + 1;
SDValue VecLeft = VecIn[LeftIdx];
SDValue VecRight =
(LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
VecRight, LeftIdx, DidSplitVec))
Shuffles.push_back(Shuffle);
else
return SDValue();
}
// If we need the zero vector as an "ingredient" in the blend tree, add it
// to the list of shuffles.
if (UsesZeroVector)
Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
: DAG.getConstantFP(0.0, DL, VT));
// If we only have one shuffle, we're done.
if (Shuffles.size() == 1)
return Shuffles[0];
// Update the vector mask to point to the post-shuffle vectors.
for (int &Vec : VectorMask)
if (Vec == 0)
Vec = Shuffles.size() - 1;
else
Vec = (Vec - 1) / 2;
// More than one shuffle. Generate a binary tree of blends, e.g. if from
// the previous step we got the set of shuffles t10, t11, t12, t13, we will
// generate:
// t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
// t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
// t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
// t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
// t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
// t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
// t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
// Make sure the initial size of the shuffle list is even.
if (Shuffles.size() % 2)
Shuffles.push_back(DAG.getUNDEF(VT));
for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
if (CurSize % 2) {
Shuffles[CurSize] = DAG.getUNDEF(VT);
CurSize++;
}
for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
int Left = 2 * In;
int Right = 2 * In + 1;
SmallVector<int, 8> Mask(NumElems, -1);
for (unsigned i = 0; i != NumElems; ++i) {
if (VectorMask[i] == Left) {
Mask[i] = i;
VectorMask[i] = In;
} else if (VectorMask[i] == Right) {
Mask[i] = i + NumElems;
VectorMask[i] = In;
}
}
Shuffles[In] =
DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
}
}
return Shuffles[0];
}
// Try to turn a build vector of zero extends of extract vector elts into a
// a vector zero extend and possibly an extract subvector.
// TODO: Support sign extend?
// TODO: Allow undef elements?
SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
if (LegalOperations)
return SDValue();
EVT VT = N->getValueType(0);
bool FoundZeroExtend = false;
SDValue Op0 = N->getOperand(0);
auto checkElem = [&](SDValue Op) -> int64_t {
unsigned Opc = Op.getOpcode();
FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
return C->getZExtValue();
return -1;
};
// Make sure the first element matches
// (zext (extract_vector_elt X, C))
int64_t Offset = checkElem(Op0);
if (Offset < 0)
return SDValue();
unsigned NumElems = N->getNumOperands();
SDValue In = Op0.getOperand(0).getOperand(0);
EVT InSVT = In.getValueType().getScalarType();
EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
// Don't create an illegal input type after type legalization.
if (LegalTypes && !TLI.isTypeLegal(InVT))
return SDValue();
// Ensure all the elements come from the same vector and are adjacent.
for (unsigned i = 1; i != NumElems; ++i) {
if ((Offset + i) != checkElem(N->getOperand(i)))
return SDValue();
}
SDLoc DL(N);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
Op0.getOperand(0).getOperand(1));
return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
VT, In);
}
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
// A vector built entirely of undefs is undef.
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
// If this is a splat of a bitcast from another vector, change to a
// concat_vector.
// For example:
// (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
// (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
//
// If X is a build_vector itself, the concat can become a larger build_vector.
// TODO: Maybe this is useful for non-splat too?
if (!LegalOperations) {
if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
Splat = peekThroughBitcasts(Splat);
EVT SrcVT = Splat.getValueType();
if (SrcVT.isVector()) {
unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
SrcVT.getVectorElementType(), NumElts);
if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
NewVT, Ops);
return DAG.getBitcast(VT, Concat);
}
}
}
}
// Check if we can express BUILD VECTOR via subvector extract.
if (!LegalTypes && (N->getNumOperands() > 1)) {
SDValue Op0 = N->getOperand(0);
auto checkElem = [&](SDValue Op) -> uint64_t {
if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
(Op0.getOperand(0) == Op.getOperand(0)))
if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
return CNode->getZExtValue();
return -1;
};
int Offset = checkElem(Op0);
for (unsigned i = 0; i < N->getNumOperands(); ++i) {
if (Offset + i != checkElem(N->getOperand(i))) {
Offset = -1;
break;
}
}
if ((Offset == 0) &&
(Op0.getOperand(0).getValueType() == N->getValueType(0)))
return Op0.getOperand(0);
if ((Offset != -1) &&
((Offset % N->getValueType(0).getVectorNumElements()) ==
0)) // IDX must be multiple of output size.
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
Op0.getOperand(0), Op0.getOperand(1));
}
if (SDValue V = convertBuildVecZextToZext(N))
return V;
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
if (SDValue V = reduceBuildVecTruncToBitCast(N))
return V;
if (SDValue V = reduceBuildVecToShuffle(N))
return V;
// A splat of a single element is a SPLAT_VECTOR if supported on the target.
// Do this late as some of the above may replace the splat.
if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
assert(!V.isUndef() && "Splat of undef should have been handled earlier");
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
}
return SDValue();
}
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT OpVT = N->getOperand(0).getValueType();
// If the operands are legal vectors, leave them alone.
if (TLI.isTypeLegal(OpVT))
return SDValue();
SDLoc DL(N);
EVT VT = N->getValueType(0);
SmallVector<SDValue, 8> Ops;
EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
// Keep track of what we encounter.
bool AnyInteger = false;
bool AnyFP = false;
for (const SDValue &Op : N->ops()) {
if (ISD::BITCAST == Op.getOpcode() &&
!Op.getOperand(0).getValueType().isVector())
Ops.push_back(Op.getOperand(0));
else if (ISD::UNDEF == Op.getOpcode())
Ops.push_back(ScalarUndef);
else
return SDValue();
// Note whether we encounter an integer or floating point scalar.
// If it's neither, bail out, it could be something weird like x86mmx.
EVT LastOpVT = Ops.back().getValueType();
if (LastOpVT.isFloatingPoint())
AnyFP = true;
else if (LastOpVT.isInteger())
AnyInteger = true;
else
return SDValue();
}
// If any of the operands is a floating point scalar bitcast to a vector,
// use floating point types throughout, and bitcast everything.
// Replace UNDEFs by another scalar UNDEF node, of the final desired type.
if (AnyFP) {
SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
if (AnyInteger) {
for (SDValue &Op : Ops) {
if (Op.getValueType() == SVT)
continue;
if (Op.isUndef())
Op = ScalarUndef;
else
Op = DAG.getBitcast(SVT, Op);
}
}
}
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
VT.getSizeInBits() / SVT.getSizeInBits());
return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
}
// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
// most two distinct vectors the same size as the result, attempt to turn this
// into a legal shuffle.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
EVT OpVT = N->getOperand(0).getValueType();
// We currently can't generate an appropriate shuffle for a scalable vector.
if (VT.isScalableVector())
return SDValue();
int NumElts = VT.getVectorNumElements();
int NumOpElts = OpVT.getVectorNumElements();
SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
SmallVector<int, 8> Mask;
for (SDValue Op : N->ops()) {
Op = peekThroughBitcasts(Op);
// UNDEF nodes convert to UNDEF shuffle mask values.
if (Op.isUndef()) {
Mask.append((unsigned)NumOpElts, -1);
continue;
}
if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return SDValue();
// What vector are we extracting the subvector from and at what index?
SDValue ExtVec = Op.getOperand(0);
int ExtIdx = Op.getConstantOperandVal(1);
// We want the EVT of the original extraction to correctly scale the
// extraction index.
EVT ExtVT = ExtVec.getValueType();
ExtVec = peekThroughBitcasts(ExtVec);
// UNDEF nodes convert to UNDEF shuffle mask values.
if (ExtVec.isUndef()) {
Mask.append((unsigned)NumOpElts, -1);
continue;
}
// Ensure that we are extracting a subvector from a vector the same
// size as the result.
if (ExtVT.getSizeInBits() != VT.getSizeInBits())
return SDValue();
// Scale the subvector index to account for any bitcast.
int NumExtElts = ExtVT.getVectorNumElements();
if (0 == (NumExtElts % NumElts))
ExtIdx /= (NumExtElts / NumElts);
else if (0 == (NumElts % NumExtElts))
ExtIdx *= (NumElts / NumExtElts);
else
return SDValue();
// At most we can reference 2 inputs in the final shuffle.
if (SV0.isUndef() || SV0 == ExtVec) {
SV0 = ExtVec;
for (int i = 0; i != NumOpElts; ++i)
Mask.push_back(i + ExtIdx);
} else if (SV1.isUndef() || SV1 == ExtVec) {
SV1 = ExtVec;
for (int i = 0; i != NumOpElts; ++i)
Mask.push_back(i + ExtIdx + NumElts);
} else {
return SDValue();
}
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
DAG.getBitcast(VT, SV1), Mask, DAG);
}
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
unsigned CastOpcode = N->getOperand(0).getOpcode();
switch (CastOpcode) {
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
// TODO: Allow more opcodes?
// case ISD::BITCAST:
// case ISD::TRUNCATE:
// case ISD::ZERO_EXTEND:
// case ISD::SIGN_EXTEND:
// case ISD::FP_EXTEND:
break;
default:
return SDValue();
}
EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
if (!SrcVT.isVector())
return SDValue();
// All operands of the concat must be the same kind of cast from the same
// source type.
SmallVector<SDValue, 4> SrcOps;
for (SDValue Op : N->ops()) {
if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
Op.getOperand(0).getValueType() != SrcVT)
return SDValue();
SrcOps.push_back(Op.getOperand(0));
}
// The wider cast must be supported by the target. This is unusual because
// the operation support type parameter depends on the opcode. In addition,
// check the other type in the cast to make sure this is really legal.
EVT VT = N->getValueType(0);
EVT SrcEltVT = SrcVT.getVectorElementType();
ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
switch (CastOpcode) {
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
!TLI.isTypeLegal(VT))
return SDValue();
break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
!TLI.isTypeLegal(ConcatSrcVT))
return SDValue();
break;
default:
llvm_unreachable("Unexpected cast opcode");
}
// concat (cast X), (cast Y)... -> cast (concat X, Y...)
SDLoc DL(N);
SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
return DAG.getNode(CastOpcode, DL, VT, NewConcat);
}
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If we only have one input vector, we don't need to do any concatenation.
if (N->getNumOperands() == 1)
return N->getOperand(0);
// Check if all of the operands are undefs.
EVT VT = N->getValueType(0);
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
// Optimize concat_vectors where all but the first of the vectors are undef.
if (all_of(drop_begin(N->ops()),
[](const SDValue &Op) { return Op.isUndef(); })) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
// If the input is a concat_vectors, just make a larger concat by padding
// with smaller undefs.
if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
unsigned NumOps = N->getNumOperands() * In.getNumOperands();
SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
SDValue Scalar = peekThroughOneUseBitcasts(In);
// concat_vectors(scalar_to_vector(scalar), undef) ->
// scalar_to_vector(scalar)
if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
Scalar.hasOneUse()) {
EVT SVT = Scalar.getValueType().getVectorElementType();
if (SVT == Scalar.getOperand(0).getValueType())
Scalar = Scalar.getOperand(0);
}
// concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
if (!Scalar.getValueType().isVector()) {
// If the bitcast type isn't legal, it might be a trunc of a legal type;
// look through the trunc so we can still do the transform:
// concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
if (Scalar->getOpcode() == ISD::TRUNCATE &&
!TLI.isTypeLegal(Scalar.getValueType()) &&
TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
Scalar = Scalar->getOperand(0);
EVT SclTy = Scalar.getValueType();
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
return SDValue();
// Bail out if the vector size is not a multiple of the scalar size.
if (VT.getSizeInBits() % SclTy.getSizeInBits())
return SDValue();
unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
if (VNTNumElms < 2)
return SDValue();
EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
return SDValue();
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
return DAG.getBitcast(VT, Res);
}
}
// Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
// We have already tested above for an UNDEF only concatenation.
// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
// -> (BUILD_VECTOR A, B, ..., C, D, ...)
auto IsBuildVectorOrUndef = [](const SDValue &Op) {
return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
};
if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
SmallVector<SDValue, 8> Opnds;
EVT SVT = VT.getScalarType();
EVT MinVT = SVT;
if (!SVT.isFloatingPoint()) {
// If BUILD_VECTOR are from built from integer, they may have different
// operand types. Get the smallest type and truncate all operands to it.
bool FoundMinVT = false;
for (const SDValue &Op : N->ops())
if (ISD::BUILD_VECTOR == Op.getOpcode()) {
EVT OpSVT = Op.getOperand(0).getValueType();
MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
FoundMinVT = true;
}
assert(FoundMinVT && "Concat vector type mismatch");
}
for (const SDValue &Op : N->ops()) {
EVT OpVT = Op.getValueType();
unsigned NumElts = OpVT.getVectorNumElements();
if (ISD::UNDEF == Op.getOpcode())
Opnds.append(NumElts, DAG.getUNDEF(MinVT));
if (ISD::BUILD_VECTOR == Op.getOpcode()) {
if (SVT.isFloatingPoint()) {
assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
} else {
for (unsigned i = 0; i != NumElts; ++i)
Opnds.push_back(
DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
}
}
}
assert(VT.getVectorNumElements() == Opnds.size() &&
"Concat vector type mismatch");
return DAG.getBuildVector(VT, SDLoc(N), Opnds);
}
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
return V;
// Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
return V;
if (SDValue V = combineConcatVectorOfCasts(N, DAG))
return V;
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
// operands and look for a CONCAT operations that place the incoming vectors
// at the exact same location.
//
// For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
SDValue SingleSource = SDValue();
unsigned PartNumElem =
N->getOperand(0).getValueType().getVectorMinNumElements();
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDValue Op = N->getOperand(i);
if (Op.isUndef())
continue;
// Check if this is the identity extract:
if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return SDValue();
// Find the single incoming vector for the extract_subvector.
if (SingleSource.getNode()) {
if (Op.getOperand(0) != SingleSource)
return SDValue();
} else {
SingleSource = Op.getOperand(0);
// Check the source type is the same as the type of the result.
// If not, this concat may extend the vector, so we can not
// optimize it away.
if (SingleSource.getValueType() != N->getValueType(0))
return SDValue();
}
// Check that we are reading from the identity index.
unsigned IdentityIndex = i * PartNumElem;
if (Op.getConstantOperandAPInt(1) != IdentityIndex)
return SDValue();
}
if (SingleSource.getNode())
return SingleSource;
return SDValue();
}
// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
// if the subvector can be sourced for free.
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
return V.getOperand(1);
}
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
V.getOperand(0).getValueType() == SubVT &&
(IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
return V.getOperand(SubIdx);
}
return SDValue();
}
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
SelectionDAG &DAG,
bool LegalOperations) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = Extract->getOperand(0);
unsigned BinOpcode = BinOp.getOpcode();
if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
return SDValue();
EVT VecVT = BinOp.getValueType();
SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
return SDValue();
SDValue Index = Extract->getOperand(1);
EVT SubVT = Extract->getValueType(0);
if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
return SDValue();
SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
// TODO: We could handle the case where only 1 operand is being inserted by
// creating an extract of the other operand, but that requires checking
// number of uses and/or costs.
if (!Sub0 || !Sub1)
return SDValue();
// We are inserting both operands of the wide binop only to extract back
// to the narrow vector size. Eliminate all of the insert/extract:
// ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
BinOp->getFlags());
}
/// If we are extracting a subvector produced by a wide binary operator try
/// to use a narrow binary operator and/or avoid concatenation and extraction.
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
bool LegalOperations) {
// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
// some of these bailouts with other transforms.
if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
return V;
// The extract index must be a constant, so we can map it to a concat operand.
auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
if (!ExtractIndexC)
return SDValue();
// We are looking for an optionally bitcasted wide vector binary operator
// feeding an extract subvector.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
unsigned BOpcode = BinOp.getOpcode();
if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
return SDValue();
// Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
// reduced to the unary fneg when it is visited, and we probably want to deal
// with fneg in a target-specific way.
if (BOpcode == ISD::FSUB) {
auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
if (C && C->getValueAPF().isNegZero())
return SDValue();
}
// The binop must be a vector type, so we can extract some fraction of it.
EVT WideBVT = BinOp.getValueType();
// The optimisations below currently assume we are dealing with fixed length
// vectors. It is possible to add support for scalable vectors, but at the
// moment we've done no analysis to prove whether they are profitable or not.
if (!WideBVT.isFixedLengthVector())
return SDValue();
EVT VT = Extract->getValueType(0);
unsigned ExtractIndex = ExtractIndexC->getZExtValue();
assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
"Extract index is not a multiple of the vector length.");
// Bail out if this is not a proper multiple width extraction.
unsigned WideWidth = WideBVT.getSizeInBits();
unsigned NarrowWidth = VT.getSizeInBits();
if (WideWidth % NarrowWidth != 0)
return SDValue();
// Bail out if we are extracting a fraction of a single operation. This can
// occur because we potentially looked through a bitcast of the binop.
unsigned NarrowingRatio = WideWidth / NarrowWidth;
unsigned WideNumElts = WideBVT.getVectorNumElements();
if (WideNumElts % NarrowingRatio != 0)
return SDValue();
// Bail out if the target does not support a narrower version of the binop.
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
WideNumElts / NarrowingRatio);
if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
return SDValue();
// If extraction is cheap, we don't need to look at the binop operands
// for concat ops. The narrow binop alone makes this transform profitable.
// We can't just reuse the original extract index operand because we may have
// bitcasted.
unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
// extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
SDLoc DL(Extract);
SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
BinOp.getOperand(0), NewExtIndex);
SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
BinOp.getOperand(1), NewExtIndex);
SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
BinOp.getNode()->getFlags());
return DAG.getBitcast(VT, NarrowBinOp);
}
// Only handle the case where we are doubling and then halving. A larger ratio
// may require more than two narrow binops to replace the wide binop.
if (NarrowingRatio != 2)
return SDValue();
// TODO: The motivating case for this transform is an x86 AVX1 target. That
// target has temptingly almost legal versions of bitwise logic ops in 256-bit
// flavors, but no other 256-bit integer support. This could be extended to
// handle any binop, but that may require fixing/adding other folds to avoid
// codegen regressions.
if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
return SDValue();
// We need at least one concatenation operation of a binop operand to make
// this transform worthwhile. The concat must double the input vector sizes.
auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
return V.getOperand(ConcatOpNum);
return SDValue();
};
SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
if (SubVecL || SubVecR) {
// If a binop operand was not the result of a concat, we must extract a
// half-sized operand for our new narrow binop:
// extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
// extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
// extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
SDLoc DL(Extract);
SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
: DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
BinOp.getOperand(0), IndexC);
SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
: DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
BinOp.getOperand(1), IndexC);
SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
return DAG.getBitcast(VT, NarrowBinOp);
}
return SDValue();
}
/// If we are extracting a subvector from a wide vector load, convert to a
/// narrow load to eliminate the extraction:
/// (extract_subvector (load wide vector)) --> (load narrow vector)
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// TODO: Add support for big-endian. The offset calculation must be adjusted.
if (DAG.getDataLayout().isBigEndian())
return SDValue();
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
!ExtIdx)
return SDValue();
// Allow targets to opt-out.
EVT VT = Extract->getValueType(0);
// We can only create byte sized loads.
if (!VT.isByteSized())
return SDValue();
unsigned Index = ExtIdx->getZExtValue();
unsigned NumElts = VT.getVectorMinNumElements();
// The definition of EXTRACT_SUBVECTOR states that the index must be a
// multiple of the minimum number of elements in the result type.
assert(Index % NumElts == 0 && "The extract subvector index is not a "
"multiple of the result's element count");
// It's fine to use TypeSize here as we know the offset will not be negative.
TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
return SDValue();
// The narrow load will be offset from the base address of the old load if
// we are extracting from something besides index 0 (little-endian).
SDLoc DL(Extract);
// TODO: Use "BaseIndexOffset" to make this more effective.
SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO;
if (Offset.isScalable()) {
MachinePointerInfo MPI =
MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
} else
MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
StoreSize);
SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
return NewLd;
}
SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
uint64_t ExtIdx = N->getConstantOperandVal(1);
// Extract from UNDEF is UNDEF.
if (V.isUndef())
return DAG.getUNDEF(NVT);
if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
return NarrowLoad;
// Combine an extract of an extract into a single extract_subvector.
// ext (ext X, C), 0 --> ext X, C
if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
V.getConstantOperandVal(1)) &&
TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
V.getOperand(1));
}
}
// Try to move vector bitcast after extract_subv by scaling extraction index:
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
if (V.getOpcode() == ISD::BITCAST &&
V.getOperand(0).getValueType().isVector() &&
(!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
SDValue SrcOp = V.getOperand(0);
EVT SrcVT = SrcOp.getValueType();
unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
if ((SrcNumElts % DestNumElts) == 0) {
unsigned SrcDestRatio = SrcNumElts / DestNumElts;
ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
NewExtEC);
if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
SDLoc DL(N);
SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
V.getOperand(0), NewIndex);
return DAG.getBitcast(NVT, NewExtract);
}
}
if ((DestNumElts % SrcNumElts) == 0) {
unsigned DestSrcRatio = DestNumElts / SrcNumElts;
if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
ElementCount NewExtEC =
NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
EVT ScalarVT = SrcVT.getScalarType();
if ((ExtIdx % DestSrcRatio) == 0) {
SDLoc DL(N);
unsigned IndexValScaled = ExtIdx / DestSrcRatio;
EVT NewExtVT =
EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
SDValue NewExtract =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
V.getOperand(0), NewIndex);
return DAG.getBitcast(NVT, NewExtract);
}
if (NewExtEC.isScalar() &&
TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
SDValue NewExtract =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
V.getOperand(0), NewIndex);
return DAG.getBitcast(NVT, NewExtract);
}
}
}
}
}
if (V.getOpcode() == ISD::CONCAT_VECTORS) {
unsigned ExtNumElts = NVT.getVectorMinNumElements();
EVT ConcatSrcVT = V.getOperand(0).getValueType();
assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
"Concat and extract subvector do not change element type");
assert((ExtIdx % ExtNumElts) == 0 &&
"Extract index is not a multiple of the input vector length.");
unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
// If the concatenated source types match this extract, it's a direct
// simplification:
// extract_subvec (concat V1, V2, ...), i --> Vi
if (ConcatSrcNumElts == ExtNumElts)
return V.getOperand(ConcatOpIdx);
// If the concatenated source vectors are a multiple length of this extract,
// then extract a fraction of one of those source vectors directly from a
// concat operand. Example:
// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
// v2i8 extract_subvec v8i8 Y, 6
if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
SDLoc DL(N);
unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
"Trying to extract from >1 concat operand?");
assert(NewExtIdx % ExtNumElts == 0 &&
"Extract index is not a multiple of the input vector length.");
SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
V.getOperand(ConcatOpIdx), NewIndexC);
}
}
V = peekThroughBitcasts(V);
// If the input is a build vector. Try to make a smaller build vector.
if (V.getOpcode() == ISD::BUILD_VECTOR) {
EVT InVT = V.getValueType();
unsigned ExtractSize = NVT.getSizeInBits();
unsigned EltSize = InVT.getScalarSizeInBits();
// Only do this if we won't split any elements.
if (ExtractSize % EltSize == 0) {
unsigned NumElems = ExtractSize / EltSize;
EVT EltVT = InVT.getVectorElementType();
EVT ExtractVT =
NumElems == 1 ? EltVT
: EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
if ((Level < AfterLegalizeDAG ||
(NumElems == 1 ||
TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
(!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
if (NumElems == 1) {
SDValue Src = V->getOperand(IdxVal);
if (EltVT != Src.getValueType())
Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
return DAG.getBitcast(NVT, Src);
}
// Extract the pieces from the original build_vector.
SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
V->ops().slice(IdxVal, NumElems));
return DAG.getBitcast(NVT, BuildVec);
}
}
}
if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
// Handle only simple case where vector being inserted and vector
// being extracted are of same size.
EVT SmallVT = V.getOperand(1).getValueType();
if (!NVT.bitsEq(SmallVT))
return SDValue();
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
// indices are equal or bit offsets are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
uint64_t InsIdx = V.getConstantOperandVal(2);
if (InsIdx * SmallVT.getScalarSizeInBits() ==
ExtIdx * NVT.getScalarSizeInBits()) {
if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
return SDValue();
return DAG.getBitcast(NVT, V.getOperand(1));
}
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
N->getOperand(1));
}
if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
return NarrowBOp;
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
}
/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
/// followed by concatenation. Narrow vector ops may have better performance
/// than wide ops, and this can unlock further narrowing of other vector ops.
/// Targets can invert this transform later if it is not profitable.
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
SelectionDAG &DAG) {
SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
!N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
return SDValue();
// Split the wide shuffle mask into halves. Any mask element that is accessing
// operand 1 is offset down to account for narrowing of the vectors.
ArrayRef<int> Mask = Shuf->getMask();
EVT VT = Shuf->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
unsigned HalfNumElts = NumElts / 2;
SmallVector<int, 16> Mask0(HalfNumElts, -1);
SmallVector<int, 16> Mask1(HalfNumElts, -1);
for (unsigned i = 0; i != NumElts; ++i) {
if (Mask[i] == -1)
continue;
// If we reference the upper (undef) subvector then the element is undef.
if ((Mask[i] % NumElts) >= HalfNumElts)
continue;
int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
if (i < HalfNumElts)
Mask0[i] = M;
else
Mask1[i - HalfNumElts] = M;
}
// Ask the target if this is a valid transform.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
HalfNumElts);
if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
!TLI.isShuffleMaskLegal(Mask1, HalfVT))
return SDValue();
// shuffle (concat X, undef), (concat Y, undef), Mask -->
// concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
SDLoc DL(Shuf);
SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
}
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
// or turn a shuffle of a single concat into simpler shuffle then concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
ArrayRef<int> Mask = SVN->getMask();
SmallVector<SDValue, 4> Ops;
EVT ConcatVT = N0.getOperand(0).getValueType();
unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
unsigned NumConcats = NumElts / NumElemsPerConcat;
auto IsUndefMaskElt = [](int i) { return i == -1; };
// Special case: shuffle(concat(A,B)) can be more efficiently represented
// as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
// half vector elements.
if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
IsUndefMaskElt)) {
N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
N0.getOperand(1),
Mask.slice(0, NumElemsPerConcat));
N1 = DAG.getUNDEF(ConcatVT);
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
}
// Look at every vector that's inserted. We're looking for exact
// subvector-sized copies from a concatenated vector
for (unsigned I = 0; I != NumConcats; ++I) {
unsigned Begin = I * NumElemsPerConcat;
ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
// Make sure we're dealing with a copy.
if (llvm::all_of(SubMask, IsUndefMaskElt)) {
Ops.push_back(DAG.getUNDEF(ConcatVT));
continue;
}
int OpIdx = -1;
for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
if (IsUndefMaskElt(SubMask[i]))
continue;
if ((SubMask[i] % (int)NumElemsPerConcat) != i)
return SDValue();
int EltOpIdx = SubMask[i] / NumElemsPerConcat;
if (0 <= OpIdx && EltOpIdx != OpIdx)
return SDValue();
OpIdx = EltOpIdx;
}
assert(0 <= OpIdx && "Unknown concat_vectors op");
if (OpIdx < (int)N0.getNumOperands())
Ops.push_back(N0.getOperand(OpIdx));
else
Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
}
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
//
// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
// a simplification in some sense, but it isn't appropriate in general: some
// BUILD_VECTORs are substantially cheaper than others. The general case
// of a BUILD_VECTOR requires inserting each element individually (or
// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
// all constants is a single constant pool load. A BUILD_VECTOR where each
// element is identical is a splat. A BUILD_VECTOR where most of the operands
// are undef lowers to a small number of element insertions.
//
// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
// We don't fold shuffles where one side is a non-zero constant, and we don't
// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
// non-constant operands. This seems to work out reasonably well in practice.
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
const TargetLowering &TLI) {
EVT VT = SVN->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = SVN->getOperand(0);
SDValue N1 = SVN->getOperand(1);
if (!N0->hasOneUse())
return SDValue();
// If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
// discussed above.
if (!N1.isUndef()) {
if (!N1->hasOneUse())
return SDValue();
bool N0AnyConst = isAnyConstantBuildVector(N0);
bool N1AnyConst = isAnyConstantBuildVector(N1);
if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
return SDValue();
if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
return SDValue();
}
// If both inputs are splats of the same value then we can safely merge this
// to a single BUILD_VECTOR with undef elements based on the shuffle mask.
bool IsSplat = false;
auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
if (BV0 && BV1)
if (SDValue Splat0 = BV0->getSplatValue())
IsSplat = (Splat0 == BV1->getSplatValue());
SmallVector<SDValue, 8> Ops;
SmallSet<SDValue, 16> DuplicateOps;
for (int M : SVN->getMask()) {
SDValue Op = DAG.getUNDEF(VT.getScalarType());
if (M >= 0) {
int Idx = M < (int)NumElts ? M : M - NumElts;
SDValue &S = (M < (int)NumElts ? N0 : N1);
if (S.getOpcode() == ISD::BUILD_VECTOR) {
Op = S.getOperand(Idx);
} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
SDValue Op0 = S.getOperand(0);
Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
} else {
// Operand can't be combined - bail out.
return SDValue();
}
}
// Don't duplicate a non-constant BUILD_VECTOR operand unless we're
// generating a splat; semantically, this is fine, but it's likely to
// generate low-quality code if the target can't reconstruct an appropriate
// shuffle.
if (!Op.isUndef() && !isIntOrFPConstant(Op))
if (!IsSplat && !DuplicateOps.insert(Op).second)
return SDValue();
Ops.push_back(Op);
}
// BUILD_VECTOR requires all inputs to be of the same type, find the
// maximum type and extend them all.
EVT SVT = VT.getScalarType();
if (SVT.isInteger())
for (SDValue &Op : Ops)
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
if (SVT != VT.getScalarType())
for (SDValue &Op : Ops)
Op = TLI.isZExtFree(Op.getValueType(), SVT)
? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
: DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
}
// Match shuffles that can be converted to any_vector_extend_in_reg.
// This is often generated during legalization.
// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
const TargetLowering &TLI,
bool LegalOperations) {
EVT VT = SVN->getValueType(0);
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
// TODO Add support for big-endian when we have a test case.
if (!VT.isInteger() || IsBigEndian)
return SDValue();
unsigned NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
ArrayRef<int> Mask = SVN->getMask();
SDValue N0 = SVN->getOperand(0);
// shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
for (unsigned i = 0; i != NumElts; ++i) {
if (Mask[i] < 0)
continue;
if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
continue;
return false;
}
return true;
};
// Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
// power-of-2 extensions as they are the most likely.
for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
// Check for non power of 2 vector sizes
if (NumElts % Scale != 0)
continue;
if (!isAnyExtend(Scale))
continue;
EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
// Never create an illegal type. Only create unsupported operations if we
// are pre-legalization.
if (TLI.isTypeLegal(OutVT))
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
return DAG.getBitcast(VT,
DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
SDLoc(SVN), OutVT, N0));
}
return SDValue();
}
// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
// each source element of a large type into the lowest elements of a smaller
// destination type. This is often generated during legalization.
// If the source node itself was a '*_extend_vector_inreg' node then we should
// then be able to remove it.
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG) {
EVT VT = SVN->getValueType(0);
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
// TODO Add support for big-endian when we have a test case.
if (!VT.isInteger() || IsBigEndian)
return SDValue();
SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
unsigned Opcode = N0.getOpcode();
if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
return SDValue();
SDValue N00 = N0.getOperand(0);
ArrayRef<int> Mask = SVN->getMask();
unsigned NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
return SDValue();
unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
// (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
// (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
// (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
for (unsigned i = 0; i != NumElts; ++i) {
if (Mask[i] < 0)
continue;
if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
continue;
return false;
}
return true;
};
// At the moment we just handle the case where we've truncated back to the
// same size as before the extension.
// TODO: handle more extension/truncation cases as cases arise.
if (EltSizeInBits != ExtSrcSizeInBits)
return SDValue();
// We can remove *extend_vector_inreg only if the truncation happens at
// the same scale as the extension.
if (isTruncate(ExtScale))
return DAG.getBitcast(VT, N00);
return SDValue();
}
// Combine shuffles of splat-shuffles of the form:
// shuffle (shuffle V, undef, splat-mask), undef, M
// If splat-mask contains undef elements, we need to be careful about
// introducing undef's in the folded mask which are not the result of composing
// the masks of the shuffles.
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
SelectionDAG &DAG) {
if (!Shuf->getOperand(1).isUndef())
return SDValue();
auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
if (!Splat || !Splat->isSplat())
return SDValue();
ArrayRef<int> ShufMask = Shuf->getMask();
ArrayRef<int> SplatMask = Splat->getMask();
assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
// Prefer simplifying to the splat-shuffle, if possible. This is legal if
// every undef mask element in the splat-shuffle has a corresponding undef
// element in the user-shuffle's mask or if the composition of mask elements
// would result in undef.
// Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
// * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
// In this case it is not legal to simplify to the splat-shuffle because we
// may be exposing the users of the shuffle an undef element at index 1
// which was not there before the combine.
// * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
// In this case the composition of masks yields SplatMask, so it's ok to
// simplify to the splat-shuffle.
// * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
// In this case the composed mask includes all undef elements of SplatMask
// and in addition sets element zero to undef. It is safe to simplify to
// the splat-shuffle.
auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
ArrayRef<int> SplatMask) {
for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
if (UserMask[i] != -1 && SplatMask[i] == -1 &&
SplatMask[UserMask[i]] != -1)
return false;
return true;
};
if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
return Shuf->getOperand(0);
// Create a new shuffle with a mask that is composed of the two shuffles'
// masks.
SmallVector<int, 32> NewMask;
for (int Idx : ShufMask)
NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
Splat->getOperand(0), Splat->getOperand(1),
NewMask);
}
/// Combine shuffle of shuffle of the form:
/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
SelectionDAG &DAG) {
if (!OuterShuf->getOperand(1).isUndef())
return SDValue();
auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
return SDValue();
ArrayRef<int> OuterMask = OuterShuf->getMask();
ArrayRef<int> InnerMask = InnerShuf->getMask();
unsigned NumElts = OuterMask.size();
assert(NumElts == InnerMask.size() && "Mask length mismatch");
SmallVector<int, 32> CombinedMask(NumElts, -1);
int SplatIndex = -1;
for (unsigned i = 0; i != NumElts; ++i) {
// Undef lanes remain undef.
int OuterMaskElt = OuterMask[i];
if (OuterMaskElt == -1)
continue;
// Peek through the shuffle masks to get the underlying source element.
int InnerMaskElt = InnerMask[OuterMaskElt];
if (InnerMaskElt == -1)
continue;
// Initialize the splatted element.
if (SplatIndex == -1)
SplatIndex = InnerMaskElt;
// Non-matching index - this is not a splat.
if (SplatIndex != InnerMaskElt)
return SDValue();
CombinedMask[i] = InnerMaskElt;
}
assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
getSplatIndex(CombinedMask) != -1) &&
"Expected a splat mask");
// TODO: The transform may be a win even if the mask is not legal.
EVT VT = OuterShuf->getValueType(0);
assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
return SDValue();
return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
InnerShuf->getOperand(1), CombinedMask);
}
/// If the shuffle mask is taking exactly one element from the first vector
/// operand and passing through all other elements from the second vector
/// operand, return the index of the mask element that is choosing an element
/// from the first operand. Otherwise, return -1.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
int MaskSize = Mask.size();
int EltFromOp0 = -1;
// TODO: This does not match if there are undef elements in the shuffle mask.
// Should we ignore undefs in the shuffle mask instead? The trade-off is
// removing an instruction (a shuffle), but losing the knowledge that some
// vector lanes are not needed.
for (int i = 0; i != MaskSize; ++i) {
if (Mask[i] >= 0 && Mask[i] < MaskSize) {
// We're looking for a shuffle of exactly one element from operand 0.
if (EltFromOp0 != -1)
return -1;
EltFromOp0 = i;
} else if (Mask[i] != i + MaskSize) {
// Nothing from operand 1 can change lanes.
return -1;
}
}
return EltFromOp0;
}
/// If a shuffle inserts exactly one element from a source vector operand into
/// another vector operand and we can access the specified element as a scalar,
/// then we can eliminate the shuffle.
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
SelectionDAG &DAG) {
// First, check if we are taking one element of a vector and shuffling that
// element into another vector.
ArrayRef<int> Mask = Shuf->getMask();
SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
SDValue Op0 = Shuf->getOperand(0);
SDValue Op1 = Shuf->getOperand(1);
int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
if (ShufOp0Index == -1) {
// Commute mask and check again.
ShuffleVectorSDNode::commuteMask(CommutedMask);
ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
if (ShufOp0Index == -1)
return SDValue();
// Commute operands to match the commuted shuffle mask.
std::swap(Op0, Op1);
Mask = CommutedMask;
}
// The shuffle inserts exactly one element from operand 0 into operand 1.
// Now see if we can access that element as a scalar via a real insert element
// instruction.
// TODO: We can try harder to locate the element as a scalar. Examples: it
// could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
"Shuffle mask value must be from operand 0");
if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
return SDValue();
auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
return SDValue();
// There's an existing insertelement with constant insertion index, so we
// don't need to check the legality/profitability of a replacement operation
// that differs at most in the constant value. The target should be able to
// lower any of those in a similar way. If not, legalization will expand this
// to a scalar-to-vector plus shuffle.
//
// Note that the shuffle may move the scalar from the position that the insert
// element used. Therefore, our new insert element occurs at the shuffle's
// mask index value, not the insert's index value.
// shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
Op1, Op0.getOperand(1), NewInsIndex);
}
/// If we have a unary shuffle of a shuffle, see if it can be folded away
/// completely. This has the potential to lose undef knowledge because the first
/// shuffle may not have an undef mask element where the second one does. So
/// only call this after doing simplifications based on demanded elements.
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
// shuf (shuf0 X, Y, Mask0), undef, Mask
auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
if (!Shuf0 || !Shuf->getOperand(1).isUndef())
return SDValue();
ArrayRef<int> Mask = Shuf->getMask();
ArrayRef<int> Mask0 = Shuf0->getMask();
for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
// Ignore undef elements.
if (Mask[i] == -1)
continue;
assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
// Is the element of the shuffle operand chosen by this shuffle the same as
// the element chosen by the shuffle operand itself?
if (Mask0[Mask[i]] != Mask0[i])
return SDValue();
}
// Every element of this shuffle is identical to the result of the previous
// shuffle, so we can replace this value.
return Shuf->getOperand(0);
}
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
// Canonicalize shuffle undef, undef -> undef
if (N0.isUndef() && N1.isUndef())
return DAG.getUNDEF(VT);
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
// Canonicalize shuffle v, v -> v, undef
if (N0 == N1) {
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx >= (int)NumElts) Idx -= NumElts;
NewMask.push_back(Idx);
}
return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
}
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
if (N0.isUndef())
return DAG.getCommutedVectorShuffle(*SVN);
// Remove references to rhs if it is undef
if (N1.isUndef()) {
bool Changed = false;
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx >= (int)NumElts) {
Idx = -1;
Changed = true;
}
NewMask.push_back(Idx);
}
if (Changed)
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}
if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
return InsElt;
// A shuffle of a single vector that is a splatted value can always be folded.
if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
return V;
if (SDValue V = formSplatFromShuffles(SVN, DAG))
return V;
// If it is a splat, check if the argument vector is another splat or a
// build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
int SplatIndex = SVN->getSplatIndex();
if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
// splat (vector_bo L, R), Index -->
// splat (scalar_bo (extelt L, Index), (extelt R, Index))
SDValue L = N0.getOperand(0), R = N0.getOperand(1);
SDLoc DL(N);
EVT EltVT = VT.getScalarType();
SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
N0.getNode()->getFlags());
SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
}
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
SDNode *V = N0.getNode();
if (V->getOpcode() == ISD::BITCAST) {
SDValue ConvInput = V->getOperand(0);
if (ConvInput.getValueType().isVector() &&
ConvInput.getValueType().getVectorNumElements() == NumElts)
V = ConvInput.getNode();
}
if (V->getOpcode() == ISD::BUILD_VECTOR) {
assert(V->getNumOperands() == NumElts &&
"BUILD_VECTOR has wrong number of operands");
SDValue Base;
bool AllSame = true;
for (unsigned i = 0; i != NumElts; ++i) {
if (!V->getOperand(i).isUndef()) {
Base = V->getOperand(i);
break;
}
}
// Splat of <u, u, u, u>, return <u, u, u, u>
if (!Base.getNode())
return N0;
for (unsigned i = 0; i != NumElts; ++i) {
if (V->getOperand(i) != Base) {
AllSame = false;
break;
}
}
// Splat of <x, x, x, x>, return <x, x, x, x>
if (AllSame)
return N0;
// Canonicalize any other splat as a build_vector.
SDValue Splatted = V->getOperand(SplatIndex);
SmallVector<SDValue, 8> Ops(NumElts, Splatted);
SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
// We may have jumped through bitcasts, so the type of the
// BUILD_VECTOR may not match the type of the shuffle.
if (V->getValueType(0) != VT)
NewBV = DAG.getBitcast(VT, NewBV);
return NewBV;
}
}
// Simplify source operands based on shuffle mask.
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
// This is intentionally placed after demanded elements simplification because
// it could eliminate knowledge of undef elements created by this shuffle.
if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
return ShufOp;
// Match shuffles that can be converted to any_vector_extend_in_reg.
if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
return V;
// Combine "truncate_vector_in_reg" style shuffles.
if (SDValue V = combineTruncationShuffle(SVN, DAG))
return V;
if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
Level < AfterLegalizeVectorOps &&
(N1.isUndef() ||
(N1.getOpcode() == ISD::CONCAT_VECTORS &&
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
if (SDValue V = partitionShuffleOfConcats(N, DAG))
return V;
}
// A shuffle of a concat of the same narrow vector can be reduced to use
// only low-half elements of a concat with undef:
// shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
N0.getNumOperands() == 2 &&
N0.getOperand(0) == N0.getOperand(1)) {
int HalfNumElts = (int)NumElts / 2;
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx >= HalfNumElts) {
assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
Idx -= HalfNumElts;
}
NewMask.push_back(Idx);
}
if (TLI.isShuffleMaskLegal(NewMask, VT)) {
SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
N0.getOperand(0), UndefVec);
return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
}
}
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
return Res;
// If this shuffle only has a single input that is a bitcasted shuffle,
// attempt to merge the 2 shuffles and suitably bitcast the inputs/output
// back to their original types.
if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
N1.isUndef() && Level < AfterLegalizeVectorOps &&
TLI.isTypeLegal(VT)) {
SDValue BC0 = peekThroughOneUseBitcasts(N0);
if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
EVT SVT = VT.getScalarType();
EVT InnerVT = BC0->getValueType(0);
EVT InnerSVT = InnerVT.getScalarType();
// Determine which shuffle works with the smaller scalar type.
EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
EVT ScaleSVT = ScaleVT.getScalarType();
if (TLI.isTypeLegal(ScaleVT) &&
0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
// Scale the shuffle masks to the smaller scalar type.
ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
SmallVector<int, 8> InnerMask;
SmallVector<int, 8> OuterMask;
narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
// Merge the shuffle masks.
SmallVector<int, 8> NewMask;
for (int M : OuterMask)
NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
// Test for shuffle mask legality over both commutations.
SDValue SV0 = BC0->getOperand(0);
SDValue SV1 = BC0->getOperand(1);
bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
if (!LegalMask) {
std::swap(SV0, SV1);
ShuffleVectorSDNode::commuteMask(NewMask);
LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
}
if (LegalMask) {
SV0 = DAG.getBitcast(ScaleVT, SV0);
SV1 = DAG.getBitcast(ScaleVT, SV1);
return DAG.getBitcast(
VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
}
}
}
}
// Compute the combined shuffle mask for a shuffle with SV0 as the first
// operand, and SV1 as the second operand.
// i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
// Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
auto MergeInnerShuffle =
[NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
ShuffleVectorSDNode *OtherSVN, SDValue N1,
const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
SmallVectorImpl<int> &Mask) -> bool {
// Don't try to fold splats; they're likely to simplify somehow, or they
// might be free.
if (OtherSVN->isSplat())
return false;
SV0 = SV1 = SDValue();
Mask.clear();
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx < 0) {
// Propagate Undef.
Mask.push_back(Idx);
continue;
}
if (Commute)
Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
SDValue CurrentVec;
if (Idx < (int)NumElts) {
// This shuffle index refers to the inner shuffle N0. Lookup the inner
// shuffle mask to identify which vector is actually referenced.
Idx = OtherSVN->getMaskElt(Idx);
if (Idx < 0) {
// Propagate Undef.
Mask.push_back(Idx);
continue;
}
CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
: OtherSVN->getOperand(1);
} else {
// This shuffle index references an element within N1.
CurrentVec = N1;
}
// Simple case where 'CurrentVec' is UNDEF.
if (CurrentVec.isUndef()) {
Mask.push_back(-1);
continue;
}
// Canonicalize the shuffle index. We don't know yet if CurrentVec
// will be the first or second operand of the combined shuffle.
Idx = Idx % NumElts;
if (!SV0.getNode() || SV0 == CurrentVec) {
// Ok. CurrentVec is the left hand side.
// Update the mask accordingly.
SV0 = CurrentVec;
Mask.push_back(Idx);
continue;
}
if (!SV1.getNode() || SV1 == CurrentVec) {
// Ok. CurrentVec is the right hand side.
// Update the mask accordingly.
SV1 = CurrentVec;
Mask.push_back(Idx + NumElts);
continue;
}
// Last chance - see if the vector is another shuffle and if it
// uses one of the existing candidate shuffle ops.
if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
int InnerIdx = CurrentSVN->getMaskElt(Idx);
if (InnerIdx < 0) {
Mask.push_back(-1);
continue;
}
SDValue InnerVec = (InnerIdx < (int)NumElts)
? CurrentSVN->getOperand(0)
: CurrentSVN->getOperand(1);
if (InnerVec.isUndef()) {
Mask.push_back(-1);
continue;
}
InnerIdx %= NumElts;
if (InnerVec == SV0) {
Mask.push_back(InnerIdx);
continue;
}
if (InnerVec == SV1) {
Mask.push_back(InnerIdx + NumElts);
continue;
}
}
// Bail out if we cannot convert the shuffle pair into a single shuffle.
return false;
}
if (llvm::all_of(Mask, [](int M) { return M < 0; }))
return true;
// Avoid introducing shuffles with illegal mask.
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
if (TLI.isShuffleMaskLegal(Mask, VT))
return true;
std::swap(SV0, SV1);
ShuffleVectorSDNode::commuteMask(Mask);
return TLI.isShuffleMaskLegal(Mask, VT);
};
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
// Canonicalize shuffles according to rules:
// shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
// shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
// shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
// The incoming shuffle must be of the same type as the result of the
// current shuffle.
assert(N1->getOperand(0).getValueType() == VT &&
"Shuffle types don't match");
SDValue SV0 = N1->getOperand(0);
SDValue SV1 = N1->getOperand(1);
bool HasSameOp0 = N0 == SV0;
bool IsSV1Undef = SV1.isUndef();
if (HasSameOp0 || IsSV1Undef || N0 == SV1)
// Commute the operands of this shuffle so merging below will trigger.
return DAG.getCommutedVectorShuffle(*SVN);
}
// Canonicalize splat shuffles to the RHS to improve merging below.
// shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
cast<ShuffleVectorSDNode>(N0)->isSplat() &&
!cast<ShuffleVectorSDNode>(N1)->isSplat()) {
return DAG.getCommutedVectorShuffle(*SVN);
}
// Try to fold according to rules:
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
// Don't try to fold shuffles with illegal type.
// Only fold if this shuffle is the only user of the other shuffle.
// Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
for (int i = 0; i != 2; ++i) {
if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
N->isOnlyUserOf(N->getOperand(i).getNode())) {
// The incoming shuffle must be of the same type as the result of the
// current shuffle.
auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
assert(OtherSV->getOperand(0).getValueType() == VT &&
"Shuffle types don't match");
SDValue SV0, SV1;
SmallVector<int, 4> Mask;
if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
SV0, SV1, Mask)) {
// Check if all indices in Mask are Undef. In case, propagate Undef.
if (llvm::all_of(Mask, [](int M) { return M < 0; }))
return DAG.getUNDEF(VT);
return DAG.getVectorShuffle(VT, SDLoc(N),
SV0 ? SV0 : DAG.getUNDEF(VT),
SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
}
}
}
// Merge shuffles through binops if we are able to merge it with at least
// one other shuffles.
// shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
// shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
unsigned SrcOpcode = N0.getOpcode();
if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
(N1.isUndef() ||
(SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
// Get binop source ops, or just pass on the undef.
SDValue Op00 = N0.getOperand(0);
SDValue Op01 = N0.getOperand(1);
SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
// TODO: We might be able to relax the VT check but we don't currently
// have any isBinOp() that has different result/ops VTs so play safe until
// we have test coverage.
if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
Op01.getValueType() == VT && Op11.getValueType() == VT &&
(Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
SmallVectorImpl<int> &Mask, bool LeftOp,
bool Commute) {
SDValue InnerN = Commute ? N1 : N0;
SDValue Op0 = LeftOp ? Op00 : Op01;
SDValue Op1 = LeftOp ? Op10 : Op11;
if (Commute)
std::swap(Op0, Op1);
// Only accept the merged shuffle if we don't introduce undef elements,
// or the inner shuffle already contained undef elements.
auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
Mask) &&
(llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
llvm::none_of(Mask, [](int M) { return M < 0; }));
};
// Ensure we don't increase the number of shuffles - we must merge a
// shuffle from at least one of the LHS and RHS ops.
bool MergedLeft = false;
SDValue LeftSV0, LeftSV1;
SmallVector<int, 4> LeftMask;
if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
MergedLeft = true;
} else {
LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
LeftSV0 = Op00, LeftSV1 = Op10;
}
bool MergedRight = false;
SDValue RightSV0, RightSV1;
SmallVector<int, 4> RightMask;
if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
MergedRight = true;
} else {
RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
RightSV0 = Op01, RightSV1 = Op11;
}
if (MergedLeft || MergedRight) {
SDLoc DL(N);
SDValue LHS = DAG.getVectorShuffle(
VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
SDValue RHS = DAG.getVectorShuffle(
VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
}
}
}
}
if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
return V;
return SDValue();
}
SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
SDValue InVal = N->getOperand(0);
EVT VT = N->getValueType(0);
// Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
// with a VECTOR_SHUFFLE and possible truncate.
if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
VT.isFixedLengthVector() &&
InVal->getOperand(0).getValueType().isFixedLengthVector()) {
SDValue InVec = InVal->getOperand(0);
SDValue EltNo = InVal->getOperand(1);
auto InVecT = InVec.getValueType();
if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
int Elt = C0->getZExtValue();
NewMask[0] = Elt;
// If we have an implict truncate do truncate here as long as it's legal.
// if it's not legal, this should
if (VT.getScalarType() != InVal.getValueType() &&
InVal.getValueType().isScalarInteger() &&
isTypeLegal(VT.getScalarType())) {
SDValue Val =
DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
}
if (VT.getScalarType() == InVecT.getScalarType() &&
VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
SDValue LegalShuffle =
TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
DAG.getUNDEF(InVecT), NewMask, DAG);
if (LegalShuffle) {
// If the initial vector is the correct size this shuffle is a
// valid result.
if (VT == InVecT)
return LegalShuffle;
// If not we must truncate the vector.
if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
InVecT.getVectorElementType(),
VT.getVectorNumElements());
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
LegalShuffle, ZeroIdx);
}
}
}
}
}
return SDValue();
}
SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
uint64_t InsIdx = N->getConstantOperandVal(2);
// If inserting an UNDEF, just return the original vector.
if (N1.isUndef())
return N0;
// If this is an insert of an extracted vector into an undef vector, we can
// just use the input to the extract.
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
return N1.getOperand(0);
// If we are inserting a bitcast value into an undef, with the same
// number of elements, just use the bitcast input of the extract.
// i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
// BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N1.getOperand(0).getOperand(1) == N2 &&
N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
VT.getVectorElementCount() &&
N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
VT.getSizeInBits()) {
return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
}
// If both N1 and N2 are bitcast values on which insert_subvector
// would makes sense, pull the bitcast through.
// i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
// BITCAST (INSERT_SUBVECTOR N0 N1 N2)
if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
SDValue CN0 = N0.getOperand(0);
SDValue CN1 = N1.getOperand(0);
EVT CN0VT = CN0.getValueType();
EVT CN1VT = CN1.getValueType();
if (CN0VT.isVector() && CN1VT.isVector() &&
CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
CN0.getValueType(), CN0, CN1, N2);
return DAG.getBitcast(VT, NewINSERT);
}
}
// Combine INSERT_SUBVECTORs where we are inserting to the same index.
// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
N0.getOperand(1).getValueType() == N1.getValueType() &&
N0.getOperand(2) == N2)
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
N1, N2);
// Eliminate an intermediate insert into an undef vector:
// insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
// insert_subvector undef, X, N2
if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
N1.getOperand(1), N2);
// Push subvector bitcasts to the output, adjusting the index as we go.
// insert_subvector(bitcast(v), bitcast(s), c1)
// -> bitcast(insert_subvector(v, s, c2))
if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
N1.getOpcode() == ISD::BITCAST) {
SDValue N0Src = peekThroughBitcasts(N0);
SDValue N1Src = peekThroughBitcasts(N1);
EVT N0SrcSVT = N0Src.getValueType().getScalarType();
EVT N1SrcSVT = N1Src.getValueType().getScalarType();
if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
EVT NewVT;
SDLoc DL(N);
SDValue NewIdx;
LLVMContext &Ctx = *DAG.getContext();
ElementCount NumElts = VT.getVectorElementCount();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
} else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
NumElts.divideCoefficientBy(Scale));
NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
}
}
if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
SDValue Res = DAG.getBitcast(NewVT, N0Src);
Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
return DAG.getBitcast(VT, Res);
}
}
}
// Canonicalize insert_subvector dag nodes.
// Example:
// (insert_subvector (insert_subvector A, Idx0), Idx1)
// -> (insert_subvector (insert_subvector A, Idx1), Idx0)
if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
N1.getValueType() == N0.getOperand(1).getValueType()) {
unsigned OtherIdx = N0.getConstantOperandVal(2);
if (InsIdx < OtherIdx) {
// Swap nodes.
SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
N0.getOperand(0), N1, N2);
AddToWorklist(NewOp.getNode());
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
VT, NewOp, N0.getOperand(1), N0.getOperand(2));
}
}
// If the input vector is a concatenation, and the insert replaces
// one of the pieces, we can optimize into a single concat_vectors.
if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
N0.getOperand(0).getValueType() == N1.getValueType() &&
N0.getOperand(0).getValueType().isScalableVector() ==
N1.getValueType().isScalableVector()) {
unsigned Factor = N1.getValueType().getVectorMinNumElements();
SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
Ops[InsIdx / Factor] = N1;
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
// Simplify source operands based on insertion.
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
}
SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
SDValue N0 = N->getOperand(0);
// fold (fp_to_fp16 (fp16_to_fp op)) -> op
if (N0->getOpcode() == ISD::FP16_TO_FP)
return N0->getOperand(0);
return SDValue();
}
SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
// fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
if (AndConst && AndConst->getAPIntValue() == 0xffff) {
return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
N0.getOperand(0));
}
}
return SDValue();
}
SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
unsigned Opcode = N->getOpcode();
// VECREDUCE over 1-element vector is just an extract.
if (VT.getVectorElementCount().isScalar()) {
SDLoc dl(N);
SDValue Res =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
DAG.getVectorIdxConstant(0, dl));
if (Res.getValueType() != N->getValueType(0))
Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
return Res;
}
// On an boolean vector an and/or reduction is the same as a umin/umax
// reduction. Convert them if the latter is legal while the former isn't.
if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
}
return SDValue();
}
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
/// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = peekThroughBitcasts(N->getOperand(1));
SDLoc DL(N);
// Make sure we're not running after operation legalization where it
// may have custom lowered the vector shuffles.
if (LegalOperations)
return SDValue();
if (RHS.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
EVT RVT = RHS.getValueType();
unsigned NumElts = RHS.getNumOperands();
// Attempt to create a valid clear mask, splitting the mask into
// sub elements and checking to see if each is
// all zeros or all ones - suitable for shuffle masking.
auto BuildClearMask = [&](int Split) {
int NumSubElts = NumElts * Split;
int NumSubBits = RVT.getScalarSizeInBits() / Split;
SmallVector<int, 8> Indices;
for (int i = 0; i != NumSubElts; ++i) {
int EltIdx = i / Split;
int SubIdx = i % Split;
SDValue Elt = RHS.getOperand(EltIdx);
// X & undef --> 0 (not undef). So this lane must be converted to choose
// from the zero constant vector (same as if the element had all 0-bits).
if (Elt.isUndef()) {
Indices.push_back(i + NumSubElts);
continue;
}
APInt Bits;
if (isa<ConstantSDNode>(Elt))
Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
else if (isa<ConstantFPSDNode>(Elt))
Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
else
return SDValue();
// Extract the sub element from the constant bit mask.
if (DAG.getDataLayout().isBigEndian())
Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
else
Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
if (Bits.isAllOnesValue())
Indices.push_back(i);
else if (Bits == 0)
Indices.push_back(i + NumSubElts);
else
return SDValue();
}
// Let's see if the target supports this vector_shuffle.
EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
return SDValue();
SDValue Zero = DAG.getConstant(0, DL, ClearVT);
return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
DAG.getBitcast(ClearVT, LHS),
Zero, Indices));
};
// Determine maximum split level (byte level masking).
int MaxSplit = 1;
if (RVT.getScalarSizeInBits() % 8 == 0)
MaxSplit = RVT.getScalarSizeInBits() / 8;
for (int Split = 1; Split <= MaxSplit; ++Split)
if (RVT.getScalarSizeInBits() % Split == 0)
if (SDValue S = BuildClearMask(Split))
return S;
return SDValue();
}
/// If a vector binop is performed on splat values, it may be profitable to
/// extract, scalarize, and insert/splat.
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
unsigned Opcode = N->getOpcode();
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// TODO: Remove/replace the extract cost check? If the elements are available
// as scalars, then there may be no extract cost. Should we ask if
// inserting a scalar back into a vector is cheap instead?
int Index0, Index1;
SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
if (!Src0 || !Src1 || Index0 != Index1 ||
Src0.getValueType().getVectorElementType() != EltVT ||
Src1.getValueType().getVectorElementType() != EltVT ||
!TLI.isExtractVecEltCheap(VT, Index0) ||
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
return SDValue();
SDLoc DL(N);
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
// If all lanes but 1 are undefined, no need to splat the scalar result.
// TODO: Keep track of undefs and use that info in the general case.
if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
// bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
// build_vec ..undef, (bo X, Y), undef...
SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
Ops[Index0] = ScalarBO;
return DAG.getBuildVector(VT, DL, Ops);
}
// bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
return DAG.getBuildVector(VT, DL, Ops);
}
/// Visit a binary vector operation, like ADD.
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
assert(N->getValueType(0).isVector() &&
"SimplifyVBinOp only works on vectors!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Ops[] = {LHS, RHS};
EVT VT = N->getValueType(0);
unsigned Opcode = N->getOpcode();
SDNodeFlags Flags = N->getFlags();
// See if we can constant fold the vector operation.
if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
return Fold;
// Move unary shuffles with identical masks after a vector binop:
// VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
// --> shuffle (VBinOp A, B), Undef, Mask
// This does not require type legality checks because we are creating the
// same types of operations that are in the original sequence. We do have to
// restrict ops like integer div that have immediate UB (eg, div-by-zero)
// though. This code is adapted from the identical transform in instcombine.
if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
Opcode != ISD::UREM && Opcode != ISD::SREM &&
Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
(LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
SDLoc DL(N);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
RHS.getOperand(0), Flags);
SDValue UndefV = LHS.getOperand(1);
return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
}
// Try to sink a splat shuffle after a binop with a uniform constant.
// This is limited to cases where neither the shuffle nor the constant have
// undefined elements because that could be poison-unsafe or inhibit
// demanded elements analysis. It is further limited to not change a splat
// of an inserted scalar because that may be optimized better by
// load-folding or other target-specific behaviors.
if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat X), (splat C) --> splat (binop X, C)
SDLoc DL(N);
SDValue X = Shuf0->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
Shuf0->getMask());
}
if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat C), (splat X) --> splat (binop C, X)
SDLoc DL(N);
SDValue X = Shuf1->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
Shuf1->getMask());
}
}
// The following pattern is likely to emerge with vector reduction ops. Moving
// the binary operation ahead of insertion may allow using a narrower vector
// instruction that has better performance than the wide version of the op:
// VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
LHS.getOperand(2) == RHS.getOperand(2) &&
(LHS.hasOneUse() || RHS.hasOneUse())) {
SDValue X = LHS.getOperand(1);
SDValue Y = RHS.getOperand(1);
SDValue Z = LHS.getOperand(2);
EVT NarrowVT = X.getValueType();
if (NarrowVT == Y.getValueType() &&
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
LegalOperations)) {
// (binop undef, undef) may not return undef, so compute that result.
SDLoc DL(N);
SDValue VecC =
DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
}
}
// Make sure all but the first op are undef or constant.
auto ConcatWithConstantOrUndef = [](SDValue Concat) {
return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
return Op.isUndef() ||
ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
});
};
// The following pattern is likely to emerge with vector reduction ops. Moving
// the binary operation ahead of the concat may allow using a narrower vector
// instruction that has better performance than the wide version of the op:
// VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
// concat (VBinOp X, Y), VecC
if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
(LHS.hasOneUse() || RHS.hasOneUse())) {
EVT NarrowVT = LHS.getOperand(0).getValueType();
if (NarrowVT == RHS.getOperand(0).getValueType() &&
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
SDLoc DL(N);
unsigned NumOperands = LHS.getNumOperands();
SmallVector<SDValue, 4> ConcatOps;
for (unsigned i = 0; i != NumOperands; ++i) {
// This constant fold for operands 1 and up.
ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
RHS.getOperand(i)));
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
}
}
if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
return V;
return SDValue();
}
SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2) {
assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If we got a simplified select_cc node back from SimplifySelectCC, then
// break it down into a new SETCC node, and a new SELECT node, and then return
// the SELECT node, since we were called with a SELECT node.
if (SCC.getNode()) {
// Check to see if we got a select_cc back (to turn into setcc/select).
// Otherwise, just return whatever node we got back, like fabs.
if (SCC.getOpcode() == ISD::SELECT_CC) {
const SDNodeFlags Flags = N0.getNode()->getFlags();
SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
N0.getValueType(),
SCC.getOperand(0), SCC.getOperand(1),
SCC.getOperand(4), Flags);
AddToWorklist(SETCC.getNode());
SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
SCC.getOperand(2), SCC.getOperand(3));
SelectNode->setFlags(Flags);
return SelectNode;
}
return SCC;
}
return SDValue();
}
/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
/// being selected between, see if we can simplify the select. Callers of this
/// should assume that TheSelect is deleted if this returns true. As such, they
/// should return the appropriate thing (e.g. the node) back to the top-level of
/// the DAG combiner loop to avoid it being looked at.
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
// fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
// The select + setcc is redundant, because fsqrt returns NaN for X < 0.
if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
// We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
SDValue Sqrt = RHS;
ISD::CondCode CC;
SDValue CmpLHS;
const ConstantFPSDNode *Zero = nullptr;
if (TheSelect->getOpcode() == ISD::SELECT_CC) {
CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
CmpLHS = TheSelect->getOperand(0);
Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
} else {
// SELECT or VSELECT
SDValue Cmp = TheSelect->getOperand(0);
if (Cmp.getOpcode() == ISD::SETCC) {
CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
CmpLHS = Cmp.getOperand(0);
Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
}
}
if (Zero && Zero->isZero() &&
Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
CC == ISD::SETULT || CC == ISD::SETLT)) {
// We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
CombineTo(TheSelect, Sqrt);
return true;
}
}
}
// Cannot simplify select with vector condition
if (TheSelect->getOperand(0).getValueType().isVector()) return false;
// If this is a select from two identical things, try to pull the operation
// through the select.
if (LHS.getOpcode() != RHS.getOpcode() ||
!LHS.hasOneUse() || !RHS.hasOneUse())
return false;
// If this is a load and the token chain is identical, replace the select
// of two loads with a load through a select of the address to load from.
// This triggers in things like "select bool X, 10.0, 123.0" after the FP
// constants have been dropped into the constant pool.
if (LHS.getOpcode() == ISD::LOAD) {
LoadSDNode *LLD = cast<LoadSDNode>(LHS);
LoadSDNode *RLD = cast<LoadSDNode>(RHS);
// Token chains must be identical.
if (LHS.getOperand(0) != RHS.getOperand(0) ||
// Do not let this transformation reduce the number of volatile loads.
// Be conservative for atomics for the moment
// TODO: This does appear to be legal for unordered atomics (see D66309)
!LLD->isSimple() || !RLD->isSimple() ||
// FIXME: If either is a pre/post inc/dec load,
// we'd need to split out the address adjustment.
LLD->isIndexed() || RLD->isIndexed() ||
// If this is an EXTLOAD, the VT's must match.
LLD->getMemoryVT() != RLD->getMemoryVT() ||
// If this is an EXTLOAD, the kind of extension must match.
(LLD->getExtensionType() != RLD->getExtensionType() &&
// The only exception is if one of the extensions is anyext.
LLD->getExtensionType() != ISD::EXTLOAD &&
RLD->getExtensionType() != ISD::EXTLOAD) ||
// FIXME: this discards src value information. This is
// over-conservative. It would be beneficial to be able to remember
// both potential memory locations. Since we are discarding
// src value info, don't do the transformation if the memory
// locations are not in the default address space.
LLD->getPointerInfo().getAddrSpace() != 0 ||
RLD->getPointerInfo().getAddrSpace() != 0 ||
// We can't produce a CMOV of a TargetFrameIndex since we won't
// generate the address generation required.
LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
!TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
LLD->getBasePtr().getValueType()))
return false;
// The loads must not depend on one another.
if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
return false;
// Check that the select condition doesn't reach either load. If so,
// folding this will induce a cycle into the DAG. If not, this is safe to
// xform, so create a select of the addresses.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
// Always fail if LLD and RLD are not independent. TheSelect is a
// predecessor to all Nodes in question so we need not search past it.
Visited.insert(TheSelect);
Worklist.push_back(LLD);
Worklist.push_back(RLD);
if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
return false;
SDValue Addr;
if (TheSelect->getOpcode() == ISD::SELECT) {
// We cannot do this optimization if any pair of {RLD, LLD} is a
// predecessor to {RLD, LLD, CondNode}. As we've already compared the
// Loads, we only need to check if CondNode is a successor to one of the
// loads. We can further avoid this if there's no use of their chain
// value.
SDNode *CondNode = TheSelect->getOperand(0).getNode();
Worklist.push_back(CondNode);
if ((LLD->hasAnyUseOfValue(1) &&
SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
(RLD->hasAnyUseOfValue(1) &&
SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
return false;
Addr = DAG.getSelect(SDLoc(TheSelect),
LLD->getBasePtr().getValueType(),
TheSelect->getOperand(0), LLD->getBasePtr(),
RLD->getBasePtr());
} else { // Otherwise SELECT_CC
// We cannot do this optimization if any pair of {RLD, LLD} is a
// predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
// the Loads, we only need to check if CondLHS/CondRHS is a successor to
// one of the loads. We can further avoid this if there's no use of their
// chain value.
SDNode *CondLHS = TheSelect->getOperand(0).getNode();
SDNode *CondRHS = TheSelect->getOperand(1).getNode();
Worklist.push_back(CondLHS);
Worklist.push_back(CondRHS);
if ((LLD->hasAnyUseOfValue(1) &&
SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
(RLD->hasAnyUseOfValue(1) &&
SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
return false;
Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
LLD->getBasePtr().getValueType(),
TheSelect->getOperand(0),
TheSelect->getOperand(1),
LLD->getBasePtr(), RLD->getBasePtr(),
TheSelect->getOperand(4));
}
SDValue Load;
// It is safe to replace the two loads if they have different alignments,
// but the new load must be the minimum (most restrictive) alignment of the
// inputs.
Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
if (!RLD->isInvariant())
MMOFlags &= ~MachineMemOperand::MOInvariant;
if (!RLD->isDereferenceable())
MMOFlags &= ~MachineMemOperand::MODereferenceable;
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
// FIXME: Discards pointer and AA info.
Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
MMOFlags);
} else {
// FIXME: Discards pointer and AA info.
Load = DAG.getExtLoad(
LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
: LLD->getExtensionType(),
SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
}
// Users of the select now use the result of the load.
CombineTo(TheSelect, Load);
// Users of the old loads now use the new load's chain. We know the
// old-load value is dead now.
CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
return true;
}
return false;
}
/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
/// bitwise 'and'.
SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
// If this is a select where the false operand is zero and the compare is a
// check of the sign bit, see if we can perform the "gzip trick":
// select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
// select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
EVT XType = N0.getValueType();
EVT AType = N2.getValueType();
if (!isNullConstant(N3) || !XType.bitsGE(AType))
return SDValue();
// If the comparison is testing for a positive value, we have to invert
// the sign bit mask, so only do that transform if the target has a bitwise
// 'and not' instruction (the invert is free).
if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
// (X > -1) ? A : 0
// (X > 0) ? X : 0 <-- This is canonical signed max.
if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
return SDValue();
} else if (CC == ISD::SETLT) {
// (X < 0) ? A : 0
// (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
return SDValue();
} else {
return SDValue();
}
// and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
// constant.
EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
AddToWorklist(Shift.getNode());
if (XType.bitsGT(AType)) {
Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
AddToWorklist(Shift.getNode());
}
if (CC == ISD::SETGT)
Shift = DAG.getNOT(DL, Shift, AType);
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
}
}
unsigned ShCt = XType.getSizeInBits() - 1;
if (TLI.shouldAvoidTransformToShift(XType, ShCt))
return SDValue();
SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
AddToWorklist(Shift.getNode());
if (XType.bitsGT(AType)) {
Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
AddToWorklist(Shift.getNode());
}
if (CC == ISD::SETGT)
Shift = DAG.getNOT(DL, Shift, AType);
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
}
// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
EVT VT = N->getValueType(0);
SDLoc DL(N);
unsigned BinOpc = N1.getOpcode();
if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
return SDValue();
if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode()))
return SDValue();
// Fold select(cond, binop(x, y), binop(z, y))
// --> binop(select(cond, x, z), y)
if (N1.getOperand(1) == N2.getOperand(1)) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
}
// Fold select(cond, binop(x, y), binop(x, z))
// --> binop(x, select(cond, y, z))
// Second op VT might be different (e.g. shift amount type)
if (N1.getOperand(0) == N2.getOperand(0) &&
VT == N1.getOperand(1).getValueType() &&
VT == N2.getOperand(1).getValueType()) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
}
// TODO: Handle isCommutativeBinOp patterns as well?
return SDValue();
}
// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
bool IsFabs = N->getOpcode() == ISD::FABS;
bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
return SDValue();
SDValue Int = N0.getOperand(0);
EVT IntVT = Int.getValueType();
// The operand to cast should be integer.
if (!IntVT.isInteger() || IntVT.isVector())
return SDValue();
// (fneg (bitconvert x)) -> (bitconvert (xor x sign))
// (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
APInt SignMask;
if (N0.getValueType().isVector()) {
// For vector, create a sign mask (0x80...) or its inverse (for fabs,
// 0x7f...) per element and splat it.
SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
if (IsFabs)
SignMask = ~SignMask;
SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
} else {
// For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
SignMask = APInt::getSignMask(IntVT.getSizeInBits());
if (IsFabs)
SignMask = ~SignMask;
}
SDLoc DL(N0);
Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
DAG.getConstant(SignMask, DL, IntVT));
AddToWorklist(Int.getNode());
return DAG.getBitcast(VT, Int);
}
/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
/// in it. This may be a win when the constant is not otherwise available
/// because it replaces two constant pool loads with one.
SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
return SDValue();
// If we are before legalize types, we want the other legalization to happen
// first (for example, to avoid messing with soft float).
auto *TV = dyn_cast<ConstantFPSDNode>(N2);
auto *FV = dyn_cast<ConstantFPSDNode>(N3);
EVT VT = N2.getValueType();
if (!TV || !FV || !TLI.isTypeLegal(VT))
return SDValue();
// If a constant can be materialized without loads, this does not make sense.
if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
return SDValue();
// If both constants have multiple uses, then we won't need to do an extra
// load. The values are likely around in registers for other users.
if (!TV->hasOneUse() && !FV->hasOneUse())
return SDValue();
Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
const_cast<ConstantFP*>(TV->getConstantFPValue()) };
Type *FPTy = Elts[0]->getType();
const DataLayout &TD = DAG.getDataLayout();
// Create a ConstantArray of the two constants.
Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
TD.getPrefTypeAlign(FPTy));
Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
// Get offsets to the 0 and 1 elements of the array, so we can select between
// them.
SDValue Zero = DAG.getIntPtrConstant(0, DL);
unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
SDValue Cond =
DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
AddToWorklist(Cond.getNode());
SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
AddToWorklist(CstOffset.getNode());
CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
AddToWorklist(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(
DAG.getMachineFunction()), Alignment);
}
/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
/// where 'cond' is the comparison specified by CC.
SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC,
bool NotExtCompare) {
// (x ? y : y) -> y.
if (N2 == N3) return N2;
EVT CmpOpVT = N0.getValueType();
EVT CmpResVT = getSetCCResultType(CmpOpVT);
EVT VT = N2.getValueType();
auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
// Determine if the condition we're dealing with is constant.
if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
AddToWorklist(SCC.getNode());
if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
// fold select_cc true, x, y -> x
// fold select_cc false, x, y -> y
return !(SCCC->isNullValue()) ? N2 : N3;
}
}
if (SDValue V =
convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
return V;
if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
return V;
// fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
// where y is has a single bit set.
// A plaintext description would be, we can turn the SELECT_CC into an AND
// when the condition can be materialized as an all-ones register. Any
// single bit-test can be materialized as an all-ones register with
// shift-left and shift-right-arith.
if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
SDValue AndLHS = N0->getOperand(0);
auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
// Shift the tested bit over the sign bit.
const APInt &AndMask = ConstAndRHS->getAPIntValue();
unsigned ShCt = AndMask.getBitWidth() - 1;
if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
SDValue ShlAmt =
DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
getShiftAmountTy(AndLHS.getValueType()));
SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
// Now arithmetic right shift it all the way over, so the result is
// either all-ones, or zero.
SDValue ShrAmt =
DAG.getConstant(ShCt, SDLoc(Shl),
getShiftAmountTy(Shl.getValueType()));
SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
}
}
}
// fold select C, 16, 0 -> shl C, 4
bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
if ((Fold || Swap) &&
TLI.getBooleanContents(CmpOpVT) ==
TargetLowering::ZeroOrOneBooleanContent &&
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
if (Swap) {
CC = ISD::getSetCCInverse(CC, CmpOpVT);
std::swap(N2C, N3C);
}
// If the caller doesn't want us to simplify this into a zext of a compare,
// don't do it.
if (NotExtCompare && N2C->isOne())
return SDValue();
SDValue Temp, SCC;
// zext (setcc n0, n1)
if (LegalTypes) {
SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
if (VT.bitsLT(SCC.getValueType()))
Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
else
Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
} else {
SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
}
AddToWorklist(SCC.getNode());
AddToWorklist(Temp.getNode());
if (N2C->isOne())
return Temp;
unsigned ShCt = N2C->getAPIntValue().logBase2();
if (TLI.shouldAvoidTransformToShift(VT, ShCt))
return SDValue();
// shl setcc result by log2 n2c
return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
DAG.getConstant(ShCt, SDLoc(Temp),
getShiftAmountTy(Temp.getValueType())));
}
// select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
// select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
// select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
// select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
// select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
// select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
// select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
// select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
SDValue ValueOnZero = N2;
SDValue Count = N3;
// If the condition is NE instead of E, swap the operands.
if (CC == ISD::SETNE)
std::swap(ValueOnZero, Count);
// Check if the value on zero is a constant equal to the bits in the type.
if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
// If the other operand is cttz/cttz_zero_undef of N0, and cttz is
// legal, combine to just cttz.
if ((Count.getOpcode() == ISD::CTTZ ||
Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
N0 == Count.getOperand(0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
return DAG.getNode(ISD::CTTZ, DL, VT, N0);
// If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
// legal, combine to just ctlz.
if ((Count.getOpcode() == ISD::CTLZ ||
Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
N0 == Count.getOperand(0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
return DAG.getNode(ISD::CTLZ, DL, VT, N0);
}
}
}
return SDValue();
}
/// This is a stub for TargetLowering::SimplifySetCC.
SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, const SDLoc &DL,
bool foldBooleans) {
TargetLowering::DAGCombinerInfo
DagCombineInfo(DAG, Level, false, this);
return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
}
/// Given an ISD::SDIV node expressing a divide by constant, return
/// a DAG expression to select that will generate the same value by multiplying
/// by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
SmallVector<SDNode *, 8> Built;
if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
for (SDNode *N : Built)
AddToWorklist(N);
return S;
}
return SDValue();
}
/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
/// DAG expression that will generate the same value by right shifting.
SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
if (!C)
return SDValue();
// Avoid division by zero.
if (C->isNullValue())
return SDValue();
SmallVector<SDNode *, 8> Built;
if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
for (SDNode *N : Built)
AddToWorklist(N);
return S;
}
return SDValue();
}
/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
/// expression that will generate the same value by multiplying by a magic
/// number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
SmallVector<SDNode *, 8> Built;
if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
for (SDNode *N : Built)
AddToWorklist(N);
return S;
}
return SDValue();
}
/// Determines the LogBase2 value for a non-null input value using the
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
EVT VT = V.getValueType();
SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
return LogBase2;
}
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
/// For the reciprocal, we need to find the zero of the function:
/// F(X) = A X - 1 [which has a zero at X = 1/A]
/// =>
/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
/// does not require additional intermediate precision]
/// For the last iteration, put numerator N into it to gain more precision:
/// Result = N X_i + X_i (N - N A X_i)
SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
SDNodeFlags Flags) {
if (LegalDAG)
return SDValue();
// TODO: Handle half and/or extended types?
EVT VT = Op.getValueType();
if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
MachineFunction &MF = DAG.getMachineFunction();
int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
if (Enabled == TLI.ReciprocalEstimate::Disabled)
return SDValue();
// Estimates may be explicitly enabled for this type with a custom number of
// refinement steps.
int Iterations = TLI.getDivRefinementSteps(VT, MF);
if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
AddToWorklist(Est.getNode());
SDLoc DL(Op);
if (Iterations) {
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
// Newton iterations: Est = Est + Est (N - Arg * Est)
// If this is the last iteration, also multiply by the numerator.
for (int i = 0; i < Iterations; ++i) {
SDValue MulEst = Est;
if (i == Iterations - 1) {
MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
AddToWorklist(MulEst.getNode());
}
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FSUB, DL, VT,
(i == Iterations - 1 ? N : FPOne), NewEst, Flags);
AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(NewEst.getNode());
Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
AddToWorklist(Est.getNode());
}
} else {
// If no iterations are available, multiply with N.
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
AddToWorklist(Est.getNode());
}
return Est;
}
return SDValue();
}
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
/// For the reciprocal sqrt, we need to find the zero of the function:
/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
/// =>
/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
/// As a result, we precompute A/2 prior to the iteration loop.
SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
unsigned Iterations,
SDNodeFlags Flags, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
}
// If non-reciprocal square root is requested, multiply the result by Arg.
if (!Reciprocal)
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
return Est;
}
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
/// For the reciprocal sqrt, we need to find the zero of the function:
/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
/// =>
/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
unsigned Iterations,
SDNodeFlags Flags, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
// This routine must enter the loop below to work correctly
// when (Reciprocal == false).
assert(Iterations > 0);
// Newton iterations for reciprocal square root:
// E = (E * -0.5) * ((A * E) * E + -3.0)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
// When calculating a square root at the last iteration build:
// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
// (notice a common subexpression)
SDValue LHS;
if (Reciprocal || (i + 1) < Iterations) {
// RSQRT: LHS = (E * -0.5)
LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
} else {
// SQRT: LHS = (A * E) * -0.5
LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
}
Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
}
return Est;
}
/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
/// Op can be zero.
SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
bool Reciprocal) {
if (LegalDAG)
return SDValue();
// TODO: Handle half and/or extended types?
EVT VT = Op.getValueType();
if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
MachineFunction &MF = DAG.getMachineFunction();
int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
if (Enabled == TLI.ReciprocalEstimate::Disabled)
return SDValue();
// Estimates may be explicitly enabled for this type with a custom number of
// refinement steps.
int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
bool UseOneConstNR = false;
if (SDValue Est =
TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
Reciprocal)) {
AddToWorklist(Est.getNode());
if (Iterations)
Est = UseOneConstNR
? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
if (!Reciprocal) {
SDLoc DL(Op);
// Try the target specific test first.
SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
// The estimate is now completely wrong if the input was exactly 0.0 or
// possibly a denormal. Force the answer to 0.0 or value provided by
// target for those cases.
Est = DAG.getNode(
Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
}
return Est;
}
return SDValue();
}
SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
return buildSqrtEstimateImpl(Op, Flags, true);
}
SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
return buildSqrtEstimateImpl(Op, Flags, false);
}
/// Return true if there is any possibility that the two addresses overlap.
bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
struct MemUseCharacteristics {
bool IsVolatile;
bool IsAtomic;
SDValue BasePtr;
int64_t Offset;
Optional<int64_t> NumBytes;
MachineMemOperand *MMO;
};
auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
int64_t Offset = 0;
if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
? C->getSExtValue()
: (LSN->getAddressingMode() == ISD::PRE_DEC)
? -1 * C->getSExtValue()
: 0;
uint64_t Size =
MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
Offset /*base offset*/,
Optional<int64_t>(Size),
LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
(LN->hasOffset()) ? LN->getOffset() : 0,
(LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
: Optional<int64_t>(),
(MachineMemOperand *)nullptr};
// Default.
return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
(int64_t)0 /*offset*/,
Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
};
MemUseCharacteristics MUC0 = getCharacteristics(Op0),
MUC1 = getCharacteristics(Op1);
// If they are to the same address, then they must be aliases.
if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
MUC0.Offset == MUC1.Offset)
return true;
// If they are both volatile then they cannot be reordered.
if (MUC0.IsVolatile && MUC1.IsVolatile)
return true;
// Be conservative about atomics for the moment
// TODO: This is way overconservative for unordered atomics (see D66309)
if (MUC0.IsAtomic && MUC1.IsAtomic)
return true;
if (MUC0.MMO && MUC1.MMO) {
if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
return false;
}
// Try to prove that there is aliasing, or that there is no aliasing. Either
// way, we can return now. If nothing can be proved, proceed with more tests.
bool IsAlias;
if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
DAG, IsAlias))
return IsAlias;
// The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
// either are not known.
if (!MUC0.MMO || !MUC1.MMO)
return true;
// If one operation reads from invariant memory, and the other may store, they
// cannot alias. These should really be checking the equivalent of mayWrite,
// but it only matters for memory nodes other than load /store.
if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
return false;
// If we know required SrcValue1 and SrcValue2 have relatively large
// alignment compared to the size and offset of the access, we may be able
// to prove they do not alias. This check is conservative for now to catch
// cases created by splitting vector types, it only works when the offsets are
// multiples of the size of the data.
int64_t SrcValOffset0 = MUC0.MMO->getOffset();
int64_t SrcValOffset1 = MUC1.MMO->getOffset();
Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
auto &Size0 = MUC0.NumBytes;
auto &Size1 = MUC1.NumBytes;
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
SrcValOffset1 % *Size1 == 0) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
// There is no overlap between these relatively aligned accesses of
// similar size. Return no alias.
if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
return false;
}
bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
? CombinerGlobalAA
: DAG.getSubtarget().useAA();
#ifndef NDEBUG
if (CombinerAAOnlyFunc.getNumOccurrences() &&
CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
UseAA = false;
#endif
if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
Size0.hasValue() && Size1.hasValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
if (AA->isNoAlias(
MemoryLocation(MUC0.MMO->getValue(), Overlap0,
UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
MemoryLocation(MUC1.MMO->getValue(), Overlap1,
UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
return false;
}
// Otherwise we have to assume they alias.
return true;
}
/// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallVectorImpl<SDValue> &Aliases) {
SmallVector<SDValue, 8> Chains; // List of chains to visit.
SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
// Get alias information for node.
// TODO: relax aliasing for unordered atomics (see D66309)
const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
// Starting off.
Chains.push_back(OriginalChain);
unsigned Depth = 0;
// Attempt to improve chain by a single step
std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
switch (C.getOpcode()) {
case ISD::EntryToken:
// No need to mark EntryToken.
C = SDValue();
return true;
case ISD::LOAD:
case ISD::STORE: {
// Get alias information for C.
// TODO: Relax aliasing for unordered atomics (see D66309)
bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
cast<LSBaseSDNode>(C.getNode())->isSimple();
if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;
}
// Alias, so stop here.
return false;
}
case ISD::CopyFromReg:
// Always forward past past CopyFromReg.
C = C.getOperand(0);
return true;
case ISD::LIFETIME_START:
case ISD::LIFETIME_END: {
// We can forward past any lifetime start/end that can be proven not to
// alias the memory access.
if (!isAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;
}
return false;
}
default:
return false;
}
};
// Look at each chain and determine if it is an alias. If so, add it to the
// aliases list. If not, then continue up the chain looking for the next
// candidate.
while (!Chains.empty()) {
SDValue Chain = Chains.pop_back_val();
// Don't bother if we've seen Chain before.
if (!Visited.insert(Chain.getNode()).second)
continue;
// For TokenFactor nodes, look at each operand and only continue up the
// chain until we reach the depth limit.
//
// FIXME: The depth check could be made to return the last non-aliasing
// chain we found before we hit a tokenfactor rather than the original
// chain.
if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
Aliases.clear();
Aliases.push_back(OriginalChain);
return;
}
if (Chain.getOpcode() == ISD::TokenFactor) {
// We have to check each of the operands of the token factor for "small"
// token factors, so we queue them up. Adding the operands to the queue
// (stack) in reverse order maintains the original order and increases the
// likelihood that getNode will find a matching token factor (CSE.)
if (Chain.getNumOperands() > 16) {
Aliases.push_back(Chain);
continue;
}
for (unsigned n = Chain.getNumOperands(); n;)
Chains.push_back(Chain.getOperand(--n));
++Depth;
continue;
}
// Everything else
if (ImproveChain(Chain)) {
// Updated Chain Found, Consider new chain if one exists.
if (Chain.getNode())
Chains.push_back(Chain);
++Depth;
continue;
}
// No Improved Chain Possible, treat as Alias.
Aliases.push_back(Chain);
}
}
/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
/// (aliasing node.)
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
if (OptLevel == CodeGenOpt::None)
return OldChain;
// Ops for replacing token factor.
SmallVector<SDValue, 8> Aliases;
// Accumulate all the aliases to this node.
GatherAllAliases(N, OldChain, Aliases);
// If no operands then chain to entry token.
if (Aliases.size() == 0)
return DAG.getEntryNode();
// If a single operand then chain to it. We don't need to revisit it.
if (Aliases.size() == 1)
return Aliases[0];
// Construct a custom tailored token factor.
return DAG.getTokenFactor(SDLoc(N), Aliases);
}
namespace {
// TODO: Replace with with std::monostate when we move to C++17.
struct UnitT { } Unit;
bool operator==(const UnitT &, const UnitT &) { return true; }
bool operator!=(const UnitT &, const UnitT &) { return false; }
} // namespace
// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
// redundant, as this function gets called when visiting every store
// node, so why not let the work be done on each store as it's visited?
//
// I believe this is mainly important because mergeConsecutiveStores
// is unable to deal with merging stores of different sizes, so unless
// we improve the chains of all the potential candidates up-front
// before running mergeConsecutiveStores, it might only see some of
// the nodes that will eventually be candidates, and then not be able
// to go from a partially-merged state to the desired final
// fully-merged state.
bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
SmallVector<StoreSDNode *, 8> ChainedStores;
StoreSDNode *STChain = St;
// Intervals records which offsets from BaseIndex have been covered. In
// the common case, every store writes to the immediately previous address
// space and thus merged with the previous interval at insertion time.
using IMap =
llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
IMap::Allocator A;
IMap Intervals(A);
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
return false;
// Do not handle stores to undef base pointers.
if (BasePtr.getBase().isUndef())
return false;
// Do not handle stores to opaque types
if (St->getMemoryVT().isZeroSized())
return false;
// BaseIndexOffset assumes that offsets are fixed-size, which
// is not valid for scalable vectors where the offsets are
// scaled by `vscale`, so bail out early.
if (St->getMemoryVT().isScalableVector())
return false;
// Add ST's interval.
Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
if (Chain->getMemoryVT().isScalableVector())
return false;
// If the chain has more than one use, then we can't reorder the mem ops.
if (!SDValue(Chain, 0)->hasOneUse())
break;
// TODO: Relax for unordered atomics (see D66309)
if (!Chain->isSimple() || Chain->isIndexed())
break;
// Find the base pointer and offset for this memory node.
const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
// Check that the base pointer is the same as the original one.
int64_t Offset;
if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
break;
int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
// Make sure we don't overlap with other intervals by checking the ones to
// the left or right before inserting.
auto I = Intervals.find(Offset);
// If there's a next interval, we should end before it.
if (I != Intervals.end() && I.start() < (Offset + Length))
break;
// If there's a previous interval, we should start after it.
if (I != Intervals.begin() && (--I).stop() <= Offset)
break;
Intervals.insert(Offset, Offset + Length, Unit);
ChainedStores.push_back(Chain);
STChain = Chain;
}
// If we didn't find a chained store, exit.
if (ChainedStores.size() == 0)
return false;
// Improve all chained stores (St and ChainedStores members) starting from
// where the store chain ended and return single TokenFactor.
SDValue NewChain = STChain->getChain();
SmallVector<SDValue, 8> TFOps;
for (unsigned I = ChainedStores.size(); I;) {
StoreSDNode *S = ChainedStores[--I];
SDValue BetterChain = FindBetterChain(S, NewChain);
S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
TFOps.push_back(SDValue(S, 0));
ChainedStores[I] = S;
}
// Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
SDValue BetterChain = FindBetterChain(St, NewChain);
SDValue NewST;
if (St->isTruncatingStore())
NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
St->getBasePtr(), St->getMemoryVT(),
St->getMemOperand());
else
NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
St->getBasePtr(), St->getMemOperand());
TFOps.push_back(NewST);
// If we improved every element of TFOps, then we've lost the dependence on
// NewChain to successors of St and we need to add it back to TFOps. Do so at
// the beginning to keep relative order consistent with FindBetterChains.
auto hasImprovedChain = [&](SDValue ST) -> bool {
return ST->getOperand(0) != NewChain;
};
bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
if (AddNewChain)
TFOps.insert(TFOps.begin(), NewChain);
SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
CombineTo(St, TF);
// Add TF and its operands to the worklist.
AddToWorklist(TF.getNode());
for (const SDValue &Op : TF->ops())
AddToWorklist(Op.getNode());
AddToWorklist(STChain);
return true;
}
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
if (OptLevel == CodeGenOpt::None)
return false;
const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
return false;
// Do not handle stores to undef base pointers.
if (BasePtr.getBase().isUndef())
return false;
// Directly improve a chain of disjoint stores starting at St.
if (parallelizeChainedStores(St))
return true;
// Improve St's Chain..
SDValue BetterChain = FindBetterChain(St, St->getChain());
if (St->getChain() != BetterChain) {
replaceStoreChain(St, BetterChain);
return true;
}
return false;
}
/// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
CodeGenOpt::Level OptLevel) {
/// This is the main entry point to this class.
DAGCombiner(*this, AA, OptLevel).Run(Level);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b8a3dd014901..328e9430d635 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1,5027 +1,5030 @@
//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements integer type expansion and promotion for LegalizeTypes.
// Promotion is the act of changing a computation in an illegal type into a
// computation in a larger type. For example, implementing i8 arithmetic in an
// i32 register (often needed on powerpc).
// Expansion is the act of changing a computation in an illegal type into a
// computation in two identical registers of a smaller type. For example,
// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
// targets).
//
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
//===----------------------------------------------------------------------===//
// Integer Result Promotion
//===----------------------------------------------------------------------===//
/// PromoteIntegerResult - This method is called when a result of a node is
/// found to be in need of promotion to a larger type. At this point, the node
/// may also have invalid operands or may have other results that need
/// expansion, we just know that (at least) one result needs promotion.
void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG);
dbgs() << "\n");
SDValue Res = SDValue();
// See if the target wants to custom expand this node.
if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
return;
}
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to promote this operator!");
case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break;
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break;
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
case ISD::PARITY:
case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break;
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
break;
case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
break;
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UMIN:
case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break;
case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
case ISD::SIGN_EXTEND_INREG:
Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
case ISD::VSCALE: Res = PromoteIntRes_VSCALE(N); break;
case ISD::EXTRACT_SUBVECTOR:
Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_SUBVECTOR:
Res = PromoteIntRes_INSERT_SUBVECTOR(N); break;
case ISD::VECTOR_REVERSE:
Res = PromoteIntRes_VECTOR_REVERSE(N); break;
case ISD::VECTOR_SHUFFLE:
Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
case ISD::VECTOR_SPLICE:
Res = PromoteIntRes_VECTOR_SPLICE(N); break;
case ISD::INSERT_VECTOR_ELT:
Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
case ISD::BUILD_VECTOR:
Res = PromoteIntRes_BUILD_VECTOR(N); break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
case ISD::SPLAT_VECTOR:
Res = PromoteIntRes_SPLAT_VECTOR(N); break;
case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
case ISD::CONCAT_VECTORS:
Res = PromoteIntRes_CONCAT_VECTORS(N); break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
Res = PromoteIntRes_FP_TO_XINT_SAT(N); break;
case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break;
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::ADD:
case ISD::SUB:
case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
case ISD::SDIV:
case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UDIV:
case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
case ISD::SADDO:
case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
case ISD::UADDO:
case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
case ISD::SMULO:
case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
case ISD::ADDE:
case ISD::SUBE:
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break;
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT:
case ISD::SSHLSAT:
case ISD::USHLSAT: Res = PromoteIntRes_ADDSUBSHLSAT(N); break;
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;
case ISD::SDIVFIX:
case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
case ISD::UDIVFIXSAT: Res = PromoteIntRes_DIVFIX(N); break;
case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
case ISD::ATOMIC_LOAD:
Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_SWAP:
Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
case ISD::ATOMIC_CMP_SWAP:
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
Res = PromoteIntRes_VECREDUCE(N);
break;
case ISD::FREEZE:
Res = PromoteIntRes_FREEZE(N);
break;
case ISD::ROTL:
case ISD::ROTR:
Res = PromoteIntRes_Rotate(N);
break;
case ISD::FSHL:
case ISD::FSHR:
Res = PromoteIntRes_FunnelShift(N);
break;
}
// If the result is null then the sub-method took care of registering it.
if (Res.getNode())
SetPromotedInteger(SDValue(N, ResNo), Res);
}
SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
return GetPromotedInteger(Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
// Sign-extend the new bits, and continue the assertion.
SDValue Op = SExtPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::AssertSext, SDLoc(N),
Op.getValueType(), Op, N->getOperand(1));
}
SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
// Zero the new bits, and continue the assertion.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::AssertZext, SDLoc(N),
Op.getValueType(), Op, N->getOperand(1));
}
SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
N->getMemoryVT(), ResVT,
N->getChain(), N->getBasePtr(),
N->getMemOperand());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
SDValue Op2 = GetPromotedInteger(N->getOperand(2));
SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
N->getMemoryVT(),
N->getChain(), N->getBasePtr(),
Op2, N->getMemOperand());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
unsigned ResNo) {
if (ResNo == 1) {
assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
EVT SVT = getSetCCResultType(N->getOperand(2).getValueType());
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
// Only use the result of getSetCCResultType if it is legal,
// otherwise just use the promoted result type (NVT).
if (!TLI.isTypeLegal(SVT))
SVT = NVT;
SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other);
SDValue Res = DAG.getAtomicCmpSwap(
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs,
N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3),
N->getMemOperand());
ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
return Res.getValue(1);
}
// Op2 is used for the comparison and thus must be extended according to the
// target's atomic operations. Op3 is merely stored and so can be left alone.
SDValue Op2 = N->getOperand(2);
SDValue Op3 = GetPromotedInteger(N->getOperand(3));
switch (TLI.getExtendForAtomicCmpSwapArg()) {
case ISD::SIGN_EXTEND:
Op2 = SExtPromotedInteger(Op2);
break;
case ISD::ZERO_EXTEND:
Op2 = ZExtPromotedInteger(Op2);
break;
case ISD::ANY_EXTEND:
Op2 = GetPromotedInteger(Op2);
break;
default:
llvm_unreachable("Invalid atomic op extension");
}
SDVTList VTs =
DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
SDValue Res = DAG.getAtomicCmpSwap(
N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
N->getBasePtr(), Op2, Op3, N->getMemOperand());
// Update the use to N with the newly created Res.
for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i)
ReplaceValueWith(SDValue(N, i), Res.getValue(i));
return Res;
}
SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
SDLoc dl(N);
switch (getTypeAction(InVT)) {
case TargetLowering::TypeLegal:
break;
case TargetLowering::TypePromoteInteger:
if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
// The input promotes to the same size. Convert the promoted value.
return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
break;
case TargetLowering::TypeSoftenFloat:
// Promote the integer operand by hand.
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
case TargetLowering::TypeSoftPromoteHalf:
// Promote the integer operand by hand.
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftPromotedHalf(InOp));
case TargetLowering::TypePromoteFloat: {
// Convert the promoted float by hand.
if (!NOutVT.isVector())
return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, GetPromotedFloat(InOp));
break;
}
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat:
break;
case TargetLowering::TypeScalarizeVector:
// Convert the element to an integer and promote it by hand.
if (!NOutVT.isVector())
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
BitConvertToInteger(GetScalarizedVector(InOp)));
break;
case TargetLowering::TypeScalarizeScalableVector:
report_fatal_error("Scalarization of scalable vectors is not supported.");
case TargetLowering::TypeSplitVector: {
if (!NOutVT.isVector()) {
// For example, i32 = BITCAST v2i16 on alpha. Convert the split
// pieces of the input into integers and reassemble in the final type.
SDValue Lo, Hi;
GetSplitVector(N->getOperand(0), Lo, Hi);
Lo = BitConvertToInteger(Lo);
Hi = BitConvertToInteger(Hi);
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
EVT::getIntegerVT(*DAG.getContext(),
NOutVT.getSizeInBits()),
JoinIntegers(Lo, Hi));
return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
}
break;
}
case TargetLowering::TypeWidenVector:
// The input is widened to the same size. Convert to the widened value.
// Make sure that the outgoing value is not a vector, because this would
// make us bitcast between two vectors which are legalized in different ways.
if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) {
SDValue Res =
DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
// For big endian targets we need to shift the casted value or the
// interesting bits will end up at the wrong place.
if (DAG.getDataLayout().isBigEndian()) {
unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
EVT ShiftAmtTy = TLI.getShiftAmountTy(NOutVT, DAG.getDataLayout());
assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!");
Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res,
DAG.getConstant(ShiftAmt, dl, ShiftAmtTy));
}
return Res;
}
// If the output type is also a vector and widening it to the same size
// as the widened input type would be a legal type, we can widen the bitcast
// and handle the promotion after.
if (NOutVT.isVector()) {
unsigned WidenInSize = NInVT.getSizeInBits();
unsigned OutSize = OutVT.getSizeInBits();
if (WidenInSize % OutSize == 0) {
unsigned Scale = WidenInSize / OutSize;
EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(),
OutVT.getVectorElementType(),
OutVT.getVectorNumElements() * Scale);
if (isTypeLegal(WideOutVT)) {
InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
DAG.getVectorIdxConstant(0, dl));
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp);
}
}
}
}
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
CreateStackStoreLoad(InOp, OutVT));
}
// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount
// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
SelectionDAG &DAG) {
EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
// If any possible shift value won't fit in the prefered type, just use
// something safe. It will be legalized when the shift is expanded.
if (!ShiftVT.isVector() &&
ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))
ShiftVT = MVT::i32;
return ShiftVT;
}
SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) {
SDValue V = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::FREEZE, SDLoc(N),
V.getValueType(), V);
}
SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
SDLoc dl(N);
// If the larger BSWAP isn't supported by the target, try to expand now.
// If we expand later we'll end up with more operations since we lost the
// original type. We only do this for scalars since we have a shuffle
// based lowering for vectors in LegalizeVectorOps.
if (!OVT.isVector() &&
!TLI.isOperationLegalOrCustomOrPromote(ISD::BSWAP, NVT)) {
if (SDValue Res = TLI.expandBSWAP(N, DAG))
return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res);
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
SDLoc dl(N);
// If the larger BITREVERSE isn't supported by the target, try to expand now.
// If we expand later we'll end up with more operations since we lost the
// original type. We only do this for scalars since we have a shuffle
// based lowering for vectors in LegalizeVectorOps.
if (!OVT.isVector() && OVT.isSimple() &&
!TLI.isOperationLegalOrCustomOrPromote(ISD::BITREVERSE, NVT)) {
if (SDValue Res = TLI.expandBITREVERSE(N, DAG))
return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res);
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
// The pair element type may be legal, or may not promote to the same type as
// the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N),
TLI.getTypeToTransformTo(*DAG.getContext(),
N->getValueType(0)), JoinIntegers(N->getOperand(0),
N->getOperand(1)));
}
SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
EVT VT = N->getValueType(0);
// FIXME there is no actual debug info here
SDLoc dl(N);
// Zero extend things like i1, sign extend everything else. It shouldn't
// matter in theory which one we pick, but this tends to give better code?
unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
SDValue Result = DAG.getNode(Opc, dl,
TLI.getTypeToTransformTo(*DAG.getContext(), VT),
SDValue(N, 0));
assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
return Result;
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
// Zero extend to the promoted type and do the count there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
SDLoc dl(N);
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Subtract off the extra leading bits in the bigger type.
return DAG.getNode(
ISD::SUB, dl, NVT, Op,
DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl,
NVT));
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
// Zero extend to the promoted type and do the count or parity there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
SDLoc dl(N);
if (N->getOpcode() == ISD::CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
OVT.getScalarSizeInBits());
Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
}
return DAG.getNode(N->getOpcode(), dl, NVT, Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// If the input also needs to be promoted, do that first so we can get a
// get a good idea for the output type.
if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType())
== TargetLowering::TypePromoteInteger) {
SDValue In = GetPromotedInteger(Op0);
// If the new type is larger than NVT, use it. We probably won't need to
// promote it again.
EVT SVT = In.getValueType().getScalarType();
if (SVT.bitsGE(NVT)) {
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1);
return DAG.getAnyExtOrTrunc(Ext, dl, NVT);
}
}
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1);
}
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NewOpc = N->getOpcode();
SDLoc dl(N);
// If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
// not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
// and SINT conversions are Custom, there is no way to tell which is
// preferable. We choose SINT because that's the right thing on PPC.)
if (N->getOpcode() == ISD::FP_TO_UINT &&
!TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
NewOpc = ISD::FP_TO_SINT;
if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
!TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
NewOpc = ISD::STRICT_FP_TO_SINT;
SDValue Res;
if (N->isStrictFPOpcode()) {
Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
{N->getOperand(0), N->getOperand(1)});
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
} else
Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
// Assert that the converted value fits in the original type. If it doesn't
// (eg: because the value being converted is too big), then the result of the
// original operation was undefined anyway, so the assert is still correct.
//
// NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example:
// before legalization: fp-to-uint16, 65534. -> 0xfffe
// after legalization: fp-to-sint32, 65534. -> 0x0000fffe
return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||
N->getOpcode() == ISD::STRICT_FP_TO_UINT) ?
ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
DAG.getValueType(N->getValueType(0).getScalarType()));
}
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) {
// Promote the result type, while keeping the original width in Op1.
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0),
N->getOperand(1));
}
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
SDValue Res =
DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other}, N->getOperand(0));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
if (getTypeAction(N->getOperand(0).getValueType())
== TargetLowering::TypePromoteInteger) {
SDValue Res = GetPromotedInteger(N->getOperand(0));
assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
// If the result and operand types are the same after promotion, simplify
// to an in-register extension.
if (NVT == Res.getValueType()) {
// The high bits are not guaranteed to be anything. Insert an extend.
if (N->getOpcode() == ISD::SIGN_EXTEND)
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
DAG.getValueType(N->getOperand(0).getValueType()));
if (N->getOpcode() == ISD::ZERO_EXTEND)
return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
return Res;
}
}
// Otherwise, just extend the original operand all the way to the larger type.
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
ISD::LoadExtType ExtType =
ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
SDLoc dl(N);
SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
N->getMemoryVT(), N->getMemOperand());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
N->getOffset(), N->getMask(), ExtPassThru,
N->getMemoryVT(), N->getMemOperand(),
N->getAddressingMode(), ISD::EXTLOAD);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
assert(NVT == ExtPassThru.getValueType() &&
"Gather result type and the passThru argument type should be the same");
ISD::LoadExtType ExtType = N->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD)
ExtType = ISD::EXTLOAD;
SDLoc dl(N);
SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
N->getIndex(), N->getScale() };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
N->getMemOperand(), N->getIndexType(),
ExtType);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
/// Promote the overflow flag of an overflowing arithmetic node.
SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
// Change the return type of the boolean result while obeying
// getSetCCResultType.
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
EVT VT = N->getValueType(0);
EVT SVT = getSetCCResultType(VT);
SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) };
unsigned NumOps = N->getNumOperands();
assert(NumOps <= 3 && "Too many operands");
if (NumOps == 3)
Ops[2] = N->getOperand(2);
SDLoc dl(N);
SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT),
makeArrayRef(Ops, NumOps));
// Modified the sum result - switch anything that used the old sum to use
// the new one.
ReplaceValueWith(SDValue(N, 0), Res);
// Convert to the expected type.
return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
// If the promoted type is legal, we can convert this to:
// 1. ANY_EXTEND iN to iM
// 2. SHL by M-N
// 3. [US][ADD|SUB|SHL]SAT
// 4. L/ASHR by M-N
// Else it is more efficient to convert this to a min and a max
// operation in the higher precision arithmetic.
SDLoc dl(N);
SDValue Op1 = N->getOperand(0);
SDValue Op2 = N->getOperand(1);
unsigned OldBits = Op1.getScalarValueSizeInBits();
unsigned Opcode = N->getOpcode();
bool IsShift = Opcode == ISD::USHLSAT || Opcode == ISD::SSHLSAT;
SDValue Op1Promoted, Op2Promoted;
if (IsShift) {
Op1Promoted = GetPromotedInteger(Op1);
Op2Promoted = ZExtPromotedInteger(Op2);
} else if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {
Op1Promoted = ZExtPromotedInteger(Op1);
Op2Promoted = ZExtPromotedInteger(Op2);
} else {
Op1Promoted = SExtPromotedInteger(Op1);
Op2Promoted = SExtPromotedInteger(Op2);
}
EVT PromotedType = Op1Promoted.getValueType();
unsigned NewBits = PromotedType.getScalarSizeInBits();
if (Opcode == ISD::UADDSAT) {
APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
SDValue Add =
DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
}
// USUBSAT can always be promoted as long as we have zero-extended the args.
if (Opcode == ISD::USUBSAT)
return DAG.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted,
Op2Promoted);
// Shift cannot use a min/max expansion, we can't detect overflow if all of
// the bits have been shifted out.
if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
unsigned ShiftOp;
switch (Opcode) {
case ISD::SADDSAT:
case ISD::SSUBSAT:
case ISD::SSHLSAT:
ShiftOp = ISD::SRA;
break;
case ISD::USHLSAT:
ShiftOp = ISD::SRL;
break;
default:
llvm_unreachable("Expected opcode to be signed or unsigned saturation "
"addition, subtraction or left shift");
}
unsigned SHLAmount = NewBits - OldBits;
EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
Op1Promoted =
DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
if (!IsShift)
Op2Promoted =
DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
SDValue Result =
DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
}
unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
SDValue Result =
DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
return Result;
}
SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
// Can just promote the operands then continue with operation.
SDLoc dl(N);
SDValue Op1Promoted, Op2Promoted;
bool Signed =
N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT;
bool Saturating =
N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT;
if (Signed) {
Op1Promoted = SExtPromotedInteger(N->getOperand(0));
Op2Promoted = SExtPromotedInteger(N->getOperand(1));
} else {
Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
}
EVT OldType = N->getOperand(0).getValueType();
EVT PromotedType = Op1Promoted.getValueType();
unsigned DiffSize =
PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();
if (Saturating) {
// Promoting the operand and result values changes the saturation width,
// which is extends the values that we clamp to on saturation. This could be
// resolved by shifting one of the operands the same amount, which would
// also shift the result we compare against, then shifting back.
EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
DAG.getConstant(DiffSize, dl, ShiftTy));
SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
Op2Promoted, N->getOperand(2));
unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL;
return DAG.getNode(ShiftOp, dl, PromotedType, Result,
DAG.getConstant(DiffSize, dl, ShiftTy));
}
return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
N->getOperand(2));
}
static SDValue SaturateWidenedDIVFIX(SDValue V, SDLoc &dl,
unsigned SatW, bool Signed,
const TargetLowering &TLI,
SelectionDAG &DAG) {
EVT VT = V.getValueType();
unsigned VTW = VT.getScalarSizeInBits();
if (!Signed) {
// Saturate to the unsigned maximum by getting the minimum of V and the
// maximum.
return DAG.getNode(ISD::UMIN, dl, VT, V,
DAG.getConstant(APInt::getLowBitsSet(VTW, SatW),
dl, VT));
}
// Saturate to the signed maximum (the low SatW - 1 bits) by taking the
// signed minimum of it and V.
V = DAG.getNode(ISD::SMIN, dl, VT, V,
DAG.getConstant(APInt::getLowBitsSet(VTW, SatW - 1),
dl, VT));
// Saturate to the signed minimum (the high SatW + 1 bits) by taking the
// signed maximum of it and V.
V = DAG.getNode(ISD::SMAX, dl, VT, V,
DAG.getConstant(APInt::getHighBitsSet(VTW, VTW - SatW + 1),
dl, VT));
return V;
}
static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
unsigned Scale, const TargetLowering &TLI,
SelectionDAG &DAG, unsigned SatW = 0) {
EVT VT = LHS.getValueType();
unsigned VTSize = VT.getScalarSizeInBits();
bool Signed = N->getOpcode() == ISD::SDIVFIX ||
N->getOpcode() == ISD::SDIVFIXSAT;
bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
N->getOpcode() == ISD::UDIVFIXSAT;
SDLoc dl(N);
// Widen the types by a factor of two. This is guaranteed to expand, since it
// will always have enough high bits in the LHS to shift into.
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
if (VT.isVector())
WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
VT.getVectorElementCount());
if (Signed) {
LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT);
RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT);
} else {
LHS = DAG.getZExtOrTrunc(LHS, dl, WideVT);
RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT);
}
SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
DAG);
assert(Res && "Expanding DIVFIX with wide type failed?");
if (Saturating) {
// If the caller has told us to saturate at something less, use that width
// instead of the type before doubling. However, it cannot be more than
// what we just widened!
assert(SatW <= VTSize &&
"Tried to saturate to more than the original type?");
Res = SaturateWidenedDIVFIX(Res, dl, SatW == 0 ? VTSize : SatW, Signed,
TLI, DAG);
}
return DAG.getZExtOrTrunc(Res, dl, VT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
SDLoc dl(N);
SDValue Op1Promoted, Op2Promoted;
bool Signed = N->getOpcode() == ISD::SDIVFIX ||
N->getOpcode() == ISD::SDIVFIXSAT;
bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
N->getOpcode() == ISD::UDIVFIXSAT;
if (Signed) {
Op1Promoted = SExtPromotedInteger(N->getOperand(0));
Op2Promoted = SExtPromotedInteger(N->getOperand(1));
} else {
Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
}
EVT PromotedType = Op1Promoted.getValueType();
unsigned Scale = N->getConstantOperandVal(2);
// If the type is already legal and the operation is legal in that type, we
// should not early expand.
if (TLI.isTypeLegal(PromotedType)) {
TargetLowering::LegalizeAction Action =
TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) {
EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
unsigned Diff = PromotedType.getScalarSizeInBits() -
N->getValueType(0).getScalarSizeInBits();
if (Saturating)
Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
DAG.getConstant(Diff, dl, ShiftTy));
SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
Op2Promoted, N->getOperand(2));
if (Saturating)
Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res,
DAG.getConstant(Diff, dl, ShiftTy));
return Res;
}
}
// See if we can perform the division in this type without expanding.
if (SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, Op1Promoted,
Op2Promoted, Scale, DAG)) {
if (Saturating)
Res = SaturateWidenedDIVFIX(Res, dl,
N->getValueType(0).getScalarSizeInBits(),
Signed, TLI, DAG);
return Res;
}
// If we cannot, expand it to twice the type width. If we are saturating, give
// it the original width as a saturating width so we don't need to emit
// two saturations.
return earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG,
N->getValueType(0).getScalarSizeInBits());
}
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
// The operation overflowed iff the result in the larger type is not the
// sign extension of its truncation to the original type.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = SExtPromotedInteger(N->getOperand(1));
EVT OVT = N->getOperand(0).getValueType();
EVT NVT = LHS.getValueType();
SDLoc dl(N);
// Do the arithmetic in the larger type.
unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
// Calculate the overflow flag: sign extend the arithmetic result from
// the original type.
SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
DAG.getValueType(OVT));
// Overflowed if and only if this is not equal to Res.
Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
// Use the calculated overflow everywhere.
ReplaceValueWith(SDValue(N, 1), Ofl);
return Res;
}
SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
return DAG.getSelect(SDLoc(N),
LHS.getValueType(), N->getOperand(0), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
SDValue Mask = N->getOperand(0);
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
return DAG.getNode(ISD::VSELECT, SDLoc(N),
LHS.getValueType(), Mask, LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(2));
SDValue RHS = GetPromotedInteger(N->getOperand(3));
return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
LHS.getValueType(), N->getOperand(0),
N->getOperand(1), LHS, RHS, N->getOperand(4));
}
SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
EVT InVT = N->getOperand(OpNo).getValueType();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT SVT = getSetCCResultType(InVT);
// If we got back a type that needs to be promoted, this likely means the
// the input type also needs to be promoted. So get the promoted type for
// the input and try the query again.
if (getTypeAction(SVT) == TargetLowering::TypePromoteInteger) {
if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
InVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
SVT = getSetCCResultType(InVT);
} else {
// Input type isn't promoted, just use the default promoted type.
SVT = NVT;
}
}
SDLoc dl(N);
assert(SVT.isVector() == N->getOperand(OpNo).getValueType().isVector() &&
"Vector compare must return a vector result!");
// Get the SETCC result using the canonical SETCC type.
SDValue SetCC;
if (N->isStrictFPOpcode()) {
EVT VTs[] = {SVT, MVT::Other};
SDValue Opers[] = {N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3)};
SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1));
} else
SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
N->getOperand(1), N->getOperand(2));
// Convert to the expected type.
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N),
Op.getValueType(), Op, N->getOperand(1));
}
SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
// The input may have strange things in the top bits of the registers, but
// these operations don't care. They may have weird bits going out, but
// that too is okay if they are integer operations.
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = GetPromotedInteger(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N),
LHS.getValueType(), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
// Sign extend the input.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = SExtPromotedInteger(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N),
LHS.getValueType(), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
// Zero extend the input.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N),
LHS.getValueType(), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
// It doesn't matter if we sign extend or zero extend in the inputs. So do
// whatever is best for the target.
SDValue LHS = SExtOrZExtPromotedInteger(N->getOperand(0));
SDValue RHS = SExtOrZExtPromotedInteger(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N),
LHS.getValueType(), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
// The input value must be properly sign extended.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
// The input value must be properly zero extended.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
// Lower the rotate to shifts and ORs which can be promoted.
SDValue Res;
TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
ReplaceValueWith(SDValue(N, 0), Res);
return SDValue();
}
SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
SDValue Hi = GetPromotedInteger(N->getOperand(0));
SDValue Lo = GetPromotedInteger(N->getOperand(1));
SDValue Amount = GetPromotedInteger(N->getOperand(2));
SDLoc DL(N);
EVT OldVT = N->getOperand(0).getValueType();
EVT VT = Lo.getValueType();
unsigned Opcode = N->getOpcode();
bool IsFSHR = Opcode == ISD::FSHR;
unsigned OldBits = OldVT.getScalarSizeInBits();
unsigned NewBits = VT.getScalarSizeInBits();
// Amount has to be interpreted modulo the old bit width.
Amount =
DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT));
// If the promoted type is twice the size (or more), then we use the
// traditional funnel 'double' shift codegen. This isn't necessary if the
// shift amount is constant.
// fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
// fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amount) &&
!TLI.isOperationLegalOrCustom(Opcode, VT)) {
SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift);
Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo);
Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount);
if (!IsFSHR)
Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift);
return Res;
}
// Shift Lo up to occupy the upper bits of the promoted type.
SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT);
Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset);
// Increase Amount to shift the result into the lower bits of the promoted
// type.
if (IsFSHR)
Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset);
return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount);
}
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Res;
SDValue InOp = N->getOperand(0);
SDLoc dl(N);
switch (getTypeAction(InOp.getValueType())) {
default: llvm_unreachable("Unknown type action!");
case TargetLowering::TypeLegal:
case TargetLowering::TypeExpandInteger:
Res = InOp;
break;
case TargetLowering::TypePromoteInteger:
Res = GetPromotedInteger(InOp);
break;
case TargetLowering::TypeSplitVector: {
EVT InVT = InOp.getValueType();
assert(InVT.isVector() && "Cannot split scalar types");
ElementCount NumElts = InVT.getVectorElementCount();
assert(NumElts == NVT.getVectorElementCount() &&
"Dst and Src must have the same number of elements");
assert(isPowerOf2_32(NumElts.getKnownMinValue()) &&
"Promoted vector type must be a power of two");
SDValue EOp1, EOp2;
GetSplitVector(InOp, EOp1, EOp2);
EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
NumElts.divideCoefficientBy(2));
EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
}
case TargetLowering::TypeWidenVector: {
SDValue WideInOp = GetWidenedVector(InOp);
// Truncate widened InOp.
unsigned NumElem = WideInOp.getValueType().getVectorNumElements();
EVT TruncVT = EVT::getVectorVT(*DAG.getContext(),
N->getValueType(0).getScalarType(), NumElem);
SDValue WideTrunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, WideInOp);
// Zero extend so that the elements are of same type as those of NVT
EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), NVT.getVectorElementType(),
NumElem);
SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc);
// Extract the low NVT subvector.
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx);
}
}
// Truncate to NVT instead of VT
return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
}
SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
// The operation overflowed iff the result in the larger type is not the
// zero extension of its truncation to the original type.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
EVT OVT = N->getOperand(0).getValueType();
EVT NVT = LHS.getValueType();
SDLoc dl(N);
// Do the arithmetic in the larger type.
unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
// Calculate the overflow flag: zero extend the arithmetic result from
// the original type.
SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
// Overflowed if and only if this is not equal to Res.
Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
// Use the calculated overflow everywhere.
ReplaceValueWith(SDValue(N, 1), Ofl);
return Res;
}
// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that
// the third operand of ADDE/SUBE nodes is carry flag, which differs from
// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean.
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
// We need to sign-extend the operands so the carry value computed by the
// wide operation will be equivalent to the carry value computed by the
// narrow operation.
// An ADDCARRY can generate carry only if any of the operands has its
// most significant bit set. Sign extension propagates the most significant
// bit into the higher bits which means the extra bit that the narrow
// addition would need (i.e. the carry) will be propagated through the higher
// bits of the wide addition.
// A SUBCARRY can generate borrow only if LHS < RHS and this property will be
// preserved by sign extension.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = SExtPromotedInteger(N->getOperand(1));
EVT ValueVTs[] = {LHS.getValueType(), N->getValueType(1)};
// Do the arithmetic in the wide type.
SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs),
LHS, RHS, N->getOperand(2));
// Update the users of the original carry/borrow value.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return SDValue(Res.getNode(), 0);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
unsigned ResNo) {
assert(ResNo == 1 && "Don't know how to promote other results yet.");
return PromoteIntRes_Overflow(N);
}
SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
}
SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
// Promote the overflow bit trivially.
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
SDLoc DL(N);
EVT SmallVT = LHS.getValueType();
// To determine if the result overflowed in a larger type, we extend the
// input to the larger type, do the multiply (checking if it overflows),
// then also check the high bits of the result to see if overflow happened
// there.
if (N->getOpcode() == ISD::SMULO) {
LHS = SExtPromotedInteger(LHS);
RHS = SExtPromotedInteger(RHS);
} else {
LHS = ZExtPromotedInteger(LHS);
RHS = ZExtPromotedInteger(RHS);
}
SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1));
SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS);
// Overflow occurred if it occurred in the larger type, or if the high part
// of the result does not zero/sign-extend the low part. Check this second
// possibility first.
SDValue Overflow;
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
unsigned Shift = SmallVT.getScalarSizeInBits();
EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);
SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
DAG.getConstant(Shift, DL, ShiftTy));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
DAG.getConstant(0, DL, Hi.getValueType()),
ISD::SETNE);
} else {
// Signed overflow occurred if the high part does not sign extend the low.
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
Mul, DAG.getValueType(SmallVT));
Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
}
// The only other way for overflow to occur is if the multiplication in the
// larger type itself overflowed.
Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow,
SDValue(Mul.getNode(), 1));
// Use the calculated overflow everywhere.
ReplaceValueWith(SDValue(N, 1), Overflow);
return Mul;
}
SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
N->getValueType(0)));
}
SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
return DAG.getVScale(SDLoc(N), VT, MulImm.sextOrSelf(VT.getSizeInBits()));
}
SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
SDValue Chain = N->getOperand(0); // Get the chain.
SDValue Ptr = N->getOperand(1); // Get the pointer.
EVT VT = N->getValueType(0);
SDLoc dl(N);
MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
// The argument is passed as NumRegs registers of type RegVT.
SmallVector<SDValue, 8> Parts(NumRegs);
for (unsigned i = 0; i < NumRegs; ++i) {
Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
N->getConstantOperandVal(3));
Chain = Parts[i].getValue(1);
}
// Handle endianness of the load.
if (DAG.getDataLayout().isBigEndian())
std::reverse(Parts.begin(), Parts.end());
// Assemble the parts in the promoted type.
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
for (unsigned i = 1; i < NumRegs; ++i) {
SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
// Shift it to the right position and "or" it in.
Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
DAG.getConstant(i * RegVT.getSizeInBits(), dl,
TLI.getPointerTy(DAG.getDataLayout())));
Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
}
// Modified the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Chain);
return Res;
}
//===----------------------------------------------------------------------===//
// Integer Operand Promotion
//===----------------------------------------------------------------------===//
/// PromoteIntegerOperand - This method is called when the specified operand of
/// the specified node is found to need promotion. At this point, all of the
/// result types of the node are known to be legal, but other operands of the
/// node may need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG);
dbgs() << "\n");
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
return false;
}
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to promote this operator's operand!");
case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
case ISD::ATOMIC_STORE:
Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N));
break;
case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::INSERT_VECTOR_ELT:
Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
case ISD::SPLAT_VECTOR:
Res = PromoteIntOp_SPLAT_VECTOR(N); break;
case ISD::VSELECT:
case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
OpNo); break;
case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
OpNo); break;
case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
OpNo); break;
case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
OpNo); break;
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY:
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
case ISD::FRAMEADDR:
case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
case ISD::SDIVFIX:
case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break;
case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
if (!Res.getNode()) return false;
// If the result is N, the sub-method updated N in place. Tell the legalizer
// core about this.
if (Res.getNode() == N)
return true;
const bool IsStrictFp = N->isStrictFPOpcode();
assert(Res.getValueType() == N->getValueType(0) &&
N->getNumValues() == (IsStrictFp ? 2 : 1) &&
"Invalid operand expansion");
LLVM_DEBUG(dbgs() << "Replacing: "; N->dump(&DAG); dbgs() << " with: ";
Res.dump());
ReplaceValueWith(SDValue(N, 0), Res);
if (IsStrictFp)
ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
return false;
}
/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
/// shared among BR_CC, SELECT_CC, and SETCC handlers.
void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
ISD::CondCode CCCode) {
// We have to insert explicit sign or zero extends. Note that we could
// insert sign extends for ALL conditions. For those operations where either
// zero or sign extension would be valid, use SExtOrZExtPromotedInteger
// which will choose the cheapest for the target.
switch (CCCode) {
default: llvm_unreachable("Unknown integer comparison!");
case ISD::SETEQ:
case ISD::SETNE: {
SDValue OpL = GetPromotedInteger(NewLHS);
SDValue OpR = GetPromotedInteger(NewRHS);
// We would prefer to promote the comparison operand with sign extension.
// If the width of OpL/OpR excluding the duplicated sign bits is no greater
// than the width of NewLHS/NewRH, we can avoid inserting real truncate
// instruction, which is redundant eventually.
unsigned OpLEffectiveBits =
OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
unsigned OpREffectiveBits =
OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&
OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {
NewLHS = OpL;
NewRHS = OpR;
} else {
NewLHS = SExtOrZExtPromotedInteger(NewLHS);
NewRHS = SExtOrZExtPromotedInteger(NewRHS);
}
break;
}
case ISD::SETUGE:
case ISD::SETUGT:
case ISD::SETULE:
case ISD::SETULT:
NewLHS = SExtOrZExtPromotedInteger(NewLHS);
NewRHS = SExtOrZExtPromotedInteger(NewRHS);
break;
case ISD::SETGE:
case ISD::SETGT:
case ISD::SETLT:
case ISD::SETLE:
NewLHS = SExtPromotedInteger(NewLHS);
NewRHS = SExtPromotedInteger(NewRHS);
break;
}
}
SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op);
}
SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
SDValue Op2 = GetPromotedInteger(N->getOperand(2));
return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
N->getChain(), N->getBasePtr(), Op2, N->getMemOperand());
}
SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
// This should only occur in unusual situations like bitcasting to an
// x86_fp80, so just turn it into a store+load
return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
}
SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
assert(OpNo == 2 && "Don't know how to promote this operand!");
SDValue LHS = N->getOperand(2);
SDValue RHS = N->getOperand(3);
PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
// The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
// legal types.
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
N->getOperand(1), LHS, RHS, N->getOperand(4)),
0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
assert(OpNo == 1 && "only know how to promote condition");
// Promote all the way up to the canonical SetCC type.
SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other);
// The chain (Op#0) and basic block destination (Op#2) are always legal types.
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
N->getOperand(2)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
// Since the result type is legal, the operands must promote to it.
EVT OVT = N->getOperand(0).getValueType();
SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
SDValue Hi = GetPromotedInteger(N->getOperand(1));
assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
SDLoc dl(N);
Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
DAG.getConstant(OVT.getSizeInBits(), dl,
TLI.getPointerTy(DAG.getDataLayout())));
return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
}
SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
// The vector type is legal but the element type is not. This implies
// that the vector is a power-of-two in length and that the element
// type does not have a strange size (eg: it is not i1).
EVT VecVT = N->getValueType(0);
unsigned NumElts = VecVT.getVectorNumElements();
assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
"Legal vector of one illegal element?");
// Promote the inserted value. The type does not need to match the
// vector element type. Check that any extra bits introduced will be
// truncated away.
assert(N->getOperand(0).getValueSizeInBits() >=
N->getValueType(0).getScalarSizeInBits() &&
"Type of inserted value narrower than vector element type!");
SmallVector<SDValue, 16> NewOps;
for (unsigned i = 0; i < NumElts; ++i)
NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
unsigned OpNo) {
if (OpNo == 1) {
// Promote the inserted value. This is valid because the type does not
// have to match the vector element type.
// Check that any extra bits introduced will be truncated away.
assert(N->getOperand(1).getValueSizeInBits() >=
N->getValueType(0).getScalarSizeInBits() &&
"Type of inserted value narrower than vector element type!");
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
GetPromotedInteger(N->getOperand(1)),
N->getOperand(2)),
0);
}
assert(OpNo == 2 && "Different operand and result vector types?");
// Promote the index.
SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N),
TLI.getVectorIdxTy(DAG.getDataLayout()));
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
N->getOperand(1), Idx), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
// Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
// the operand in place.
return SDValue(DAG.UpdateNodeOperands(N,
GetPromotedInteger(N->getOperand(0))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) {
// Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the
// operand in place.
return SDValue(
DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
assert(OpNo == 0 && "Only know how to promote the condition!");
SDValue Cond = N->getOperand(0);
EVT OpTy = N->getOperand(1).getValueType();
if (N->getOpcode() == ISD::VSELECT)
if (SDValue Res = WidenVSELECTMask(N))
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
Res, N->getOperand(1), N->getOperand(2));
// Promote all the way up to the canonical SetCC type.
EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
Cond = PromoteTargetBoolean(Cond, OpVT);
return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
N->getOperand(2)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
assert(OpNo == 0 && "Don't know how to promote this operand!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
// The CC (#4) and the possible return values (#2 and #3) have legal types.
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
N->getOperand(3), N->getOperand(4)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
assert(OpNo == 0 && "Don't know how to promote this operand!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
// The CC (#2) is always legal.
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
ZExtPromotedInteger(N->getOperand(1))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
SDLoc dl(N);
Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
Op, DAG.getValueType(N->getOperand(0).getValueType()));
}
SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N,
SExtPromotedInteger(N->getOperand(0))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
SExtPromotedInteger(N->getOperand(1))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
SDLoc dl(N);
SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
// Truncate the value and store the result.
return DAG.getTruncStore(Ch, dl, Val, Ptr,
N->getMemoryVT(), N->getMemOperand());
}
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {
SDValue DataOp = N->getValue();
EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
SDLoc dl(N);
bool TruncateStore = false;
if (OpNo == 4) {
Mask = PromoteTargetBoolean(Mask, DataVT);
// Update in place.
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
NewOps[4] = Mask;
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
} else { // Data operand
assert(OpNo == 1 && "Unexpected operand for promotion");
DataOp = GetPromotedInteger(DataOp);
TruncateStore = true;
}
return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(),
N->getOffset(), Mask, N->getMemoryVT(),
N->getMemOperand(), N->getAddressingMode(),
TruncateStore, N->isCompressingStore());
}
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
unsigned OpNo) {
assert(OpNo == 3 && "Only know how to promote the mask!");
EVT DataVT = N->getValueType(0);
SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
NewOps[OpNo] = Mask;
SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
if (Res == N)
return SDValue(Res, 0);
// Update triggered CSE, do our own replacement since caller can't.
ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
return SDValue();
}
SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
unsigned OpNo) {
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
if (OpNo == 2) {
// The Mask
EVT DataVT = N->getValueType(0);
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
} else if (OpNo == 4) {
// The Index
if (N->isIndexSigned())
// Need to sign extend the index since the bits will likely be used.
NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
else
NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
if (Res == N)
return SDValue(Res, 0);
// Update triggered CSE, do our own replacement since caller can't.
ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
return SDValue();
}
SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
unsigned OpNo) {
bool TruncateStore = N->isTruncatingStore();
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
if (OpNo == 2) {
// The Mask
EVT DataVT = N->getValue().getValueType();
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
} else if (OpNo == 4) {
// The Index
if (N->isIndexSigned())
// Need to sign extend the index since the bits will likely be used.
NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
else
NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
N->setIndexType(TLI.getCanonicalIndexType(N->getIndexType(),
N->getMemoryVT(), NewOps[OpNo]));
} else {
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
TruncateStore = true;
}
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(),
SDLoc(N), NewOps, N->getMemOperand(),
N->getIndexType(), TruncateStore);
}
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
}
SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N,
ZExtPromotedInteger(N->getOperand(0))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
ZExtPromotedInteger(N->getOperand(1))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
SDLoc dl(N);
SDValue Op = GetPromotedInteger(N->getOperand(0));
Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
}
SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
assert(OpNo == 2 && "Don't know how to promote this operand!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Carry = N->getOperand(2);
SDLoc DL(N);
Carry = PromoteTargetBoolean(Carry, LHS.getValueType());
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) {
SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
return SDValue(
DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) {
// Promote the RETURNADDR/FRAMEADDR argument to a supported integer width.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
assert(OpNo > 1 && "Don't know how to promote this operand!");
// Promote the rw, locality, and cache type arguments to a supported integer
// width.
SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
SDValue Op3 = ZExtPromotedInteger(N->getOperand(3));
SDValue Op4 = ZExtPromotedInteger(N->getOperand(4));
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
Op2, Op3, Op4),
0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
// FIXME: Support for promotion of STRICT_FPOWI is not implemented yet.
assert(N->getOpcode() == ISD::FPOWI && "No STRICT_FPOWI support here yet.");
// The integer operand is the last operand in FPOWI (so the result and
// floating point operand is already type legalized).
// We can't just promote the exponent type in FPOWI, since we want to lower
// the node to a libcall and we if we promote to a type larger than
// sizeof(int) the libcall might not be according to the targets ABI. Instead
// we rewrite to a libcall here directly, letting makeLibCall handle promotion
// if the target accepts it according to shouldSignExtendTypeInLibCall.
RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
if (!TLI.getLibcallName(LC)) {
// Some targets don't have a powi libcall; use pow instead.
// FIXME: Implement this if some target needs it.
DAG.getContext()->emitError("Don't know how to promote fpowi to fpow");
return DAG.getUNDEF(N->getValueType(0));
}
// The exponent should fit in a sizeof(int) type for the libcall to be valid.
assert(DAG.getLibInfo().getIntSize() ==
N->getOperand(1).getValueType().getSizeInBits() &&
"POWI exponent should match with sizeof(int) when doing the libcall.");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
std::pair<SDValue, SDValue> Tmp =
TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops,
CallOptions, SDLoc(N), SDValue());
ReplaceValueWith(SDValue(N, 0), Tmp.first);
return SDValue();
}
SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
SDLoc dl(N);
SDValue Op;
switch (N->getOpcode()) {
default: llvm_unreachable("Expected integer vector reduction");
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
Op = GetPromotedInteger(N->getOperand(0));
break;
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
Op = SExtPromotedInteger(N->getOperand(0));
break;
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
Op = ZExtPromotedInteger(N->getOperand(0));
break;
}
EVT EltVT = Op.getValueType().getVectorElementType();
EVT VT = N->getValueType(0);
if (VT.bitsGE(EltVT))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
// Result size must be >= element size. If this is not the case after
// promotion, also promote the result type and then truncate.
SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op);
return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
SDValue Op = ZExtPromotedInteger(N->getOperand(1));
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
}
//===----------------------------------------------------------------------===//
// Integer Result Expansion
//===----------------------------------------------------------------------===//
/// ExpandIntegerResult - This method is called when the specified result of the
/// specified node is found to need expansion. At this point, the node may also
/// have invalid operands or may have other results that need promotion, we just
/// know that (at least) one result needs expansion.
void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG);
dbgs() << "\n");
SDValue Lo, Hi;
Lo = Hi = SDValue();
// See if the target wants to custom expand this node.
if (CustomLowerNode(N, N->getValueType(ResNo), true))
return;
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
report_fatal_error("Do not know how to expand the result of this "
"operator!");
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break;
case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
case ISD::PARITY: ExpandIntRes_PARITY(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break;
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break;
case ISD::STRICT_LLROUND:
case ISD::STRICT_LLRINT:
case ISD::LLROUND:
case ISD::LLRINT: ExpandIntRes_LLROUND_LLRINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;
case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break;
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_CMP_SWAP: {
std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
SplitInteger(Tmp.first, Lo, Hi);
ReplaceValueWith(SDValue(N, 1), Tmp.second);
break;
}
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
AtomicSDNode *AN = cast<AtomicSDNode>(N);
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other);
SDValue Tmp = DAG.getAtomicCmpSwap(
ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs,
N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3),
AN->getMemOperand());
// Expanding to the strong ATOMIC_CMP_SWAP node means we can determine
// success simply by comparing the loaded value against the ingoing
// comparison.
SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp,
N->getOperand(2), ISD::SETEQ);
SplitInteger(Tmp, Lo, Hi);
ReplaceValueWith(SDValue(N, 1), Success);
ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1));
break;
}
case ISD::AND:
case ISD::OR:
case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
case ISD::UMAX:
case ISD::SMAX:
case ISD::UMIN:
case ISD::SMIN: ExpandIntRes_MINMAX(N, Lo, Hi); break;
case ISD::ADD:
case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
case ISD::ADDC:
case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
case ISD::ADDE:
case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
case ISD::ADDCARRY:
case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
case ISD::SADDO:
case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
case ISD::UADDO:
case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
case ISD::UMULO:
case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
case ISD::SSHLSAT:
case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break;
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
case ISD::SDIVFIX:
case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
case ISD::UDIVFIXSAT: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;
case ISD::ROTL:
case ISD::ROTR:
ExpandIntRes_Rotate(N, Lo, Hi);
break;
case ISD::FSHL:
case ISD::FSHR:
ExpandIntRes_FunnelShift(N, Lo, Hi);
break;
case ISD::VSCALE:
ExpandIntRes_VSCALE(N, Lo, Hi);
break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
if (Lo.getNode())
SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
}
/// Lower an atomic node to the appropriate builtin call.
std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
unsigned Opc = Node->getOpcode();
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
AtomicOrdering order = cast<AtomicSDNode>(Node)->getMergedOrdering();
// Lower to outline atomic libcall if outline atomics enabled,
// or to sync libcall otherwise
RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT);
EVT RetVT = Node->getValueType(0);
TargetLowering::MakeLibCallOptions CallOptions;
SmallVector<SDValue, 4> Ops;
if (TLI.getLibcallName(LC)) {
Ops.append(Node->op_begin() + 2, Node->op_end());
Ops.push_back(Node->getOperand(1));
} else {
LC = RTLIB::getSYNC(Opc, VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected atomic op or value type!");
Ops.append(Node->op_begin() + 1, Node->op_end());
}
return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node),
Node->getOperand(0));
}
/// N is a shift by a value that needs to be expanded,
/// and the shift amount is a constant 'Amt'. Expand the operation.
void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
SDValue &Lo, SDValue &Hi) {
SDLoc DL(N);
// Expand the incoming operand to be shifted, so that we have its parts
SDValue InL, InH;
GetExpandedInteger(N->getOperand(0), InL, InH);
// Though Amt shouldn't usually be 0, it's possible. E.g. when legalization
// splitted a vector shift, like this: <op1, op2> SHL <0, 2>.
if (!Amt) {
Lo = InL;
Hi = InH;
return;
}
EVT NVT = InL.getValueType();
unsigned VTBits = N->getValueType(0).getSizeInBits();
unsigned NVTBits = NVT.getSizeInBits();
EVT ShTy = N->getOperand(1).getValueType();
if (N->getOpcode() == ISD::SHL) {
if (Amt.ugt(VTBits)) {
Lo = Hi = DAG.getConstant(0, DL, NVT);
} else if (Amt.ugt(NVTBits)) {
Lo = DAG.getConstant(0, DL, NVT);
Hi = DAG.getNode(ISD::SHL, DL,
NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy));
} else if (Amt == NVTBits) {
Lo = DAG.getConstant(0, DL, NVT);
Hi = InL;
} else {
Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy));
Hi = DAG.getNode(ISD::OR, DL, NVT,
DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(Amt, DL, ShTy)),
DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
}
return;
}
if (N->getOpcode() == ISD::SRL) {
if (Amt.ugt(VTBits)) {
Lo = Hi = DAG.getConstant(0, DL, NVT);
} else if (Amt.ugt(NVTBits)) {
Lo = DAG.getNode(ISD::SRL, DL,
NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy));
Hi = DAG.getConstant(0, DL, NVT);
} else if (Amt == NVTBits) {
Lo = InH;
Hi = DAG.getConstant(0, DL, NVT);
} else {
Lo = DAG.getNode(ISD::OR, DL, NVT,
DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, DL, ShTy)),
DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
}
return;
}
assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
if (Amt.ugt(VTBits)) {
Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits - 1, DL, ShTy));
} else if (Amt.ugt(NVTBits)) {
Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(Amt - NVTBits, DL, ShTy));
Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits - 1, DL, ShTy));
} else if (Amt == NVTBits) {
Lo = InH;
Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits - 1, DL, ShTy));
} else {
Lo = DAG.getNode(ISD::OR, DL, NVT,
DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, DL, ShTy)),
DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
}
}
/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
/// this shift based on knowledge of the high bit of the shift amount. If we
/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
/// shift amount.
bool DAGTypeLegalizer::
ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Amt = N->getOperand(1);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT ShTy = Amt.getValueType();
unsigned ShBits = ShTy.getScalarSizeInBits();
unsigned NVTBits = NVT.getScalarSizeInBits();
assert(isPowerOf2_32(NVTBits) &&
"Expanded integer type size not a power of two!");
SDLoc dl(N);
APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
KnownBits Known = DAG.computeKnownBits(N->getOperand(1));
// If we don't know anything about the high bits, exit.
if (((Known.Zero|Known.One) & HighBitMask) == 0)
return false;
// Get the incoming operand to be shifted.
SDValue InL, InH;
GetExpandedInteger(N->getOperand(0), InL, InH);
// If we know that any of the high bits of the shift amount are one, then we
// can do this as a couple of simple shifts.
if (Known.One.intersects(HighBitMask)) {
// Mask out the high bit, which we know is set.
Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
DAG.getConstant(~HighBitMask, dl, ShTy));
switch (N->getOpcode()) {
default: llvm_unreachable("Unknown shift");
case ISD::SHL:
Lo = DAG.getConstant(0, dl, NVT); // Low part is zero.
Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
return true;
case ISD::SRL:
Hi = DAG.getConstant(0, dl, NVT); // Hi part is zero.
Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
return true;
case ISD::SRA:
Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
DAG.getConstant(NVTBits - 1, dl, ShTy));
Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
return true;
}
}
// If we know that all of the high bits of the shift amount are zero, then we
// can do this as a couple of simple shifts.
if (HighBitMask.isSubsetOf(Known.Zero)) {
// Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
// shift if x is zero. We can use XOR here because x is known to be smaller
// than 32.
SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
DAG.getConstant(NVTBits - 1, dl, ShTy));
unsigned Op1, Op2;
switch (N->getOpcode()) {
default: llvm_unreachable("Unknown shift");
case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
case ISD::SRL:
case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
}
// When shifting right the arithmetic for Lo and Hi is swapped.
if (N->getOpcode() != ISD::SHL)
std::swap(InL, InH);
// Use a little trick to get the bits that move from Lo to Hi. First
// shift by one bit.
SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, dl, ShTy));
// Then compute the remaining shift with amount-1.
SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
if (N->getOpcode() != ISD::SHL)
std::swap(Hi, Lo);
return true;
}
return false;
}
/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
/// of any size.
bool DAGTypeLegalizer::
ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Amt = N->getOperand(1);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT ShTy = Amt.getValueType();
unsigned NVTBits = NVT.getSizeInBits();
assert(isPowerOf2_32(NVTBits) &&
"Expanded integer type size not a power of two!");
SDLoc dl(N);
// Get the incoming operand to be shifted.
SDValue InL, InH;
GetExpandedInteger(N->getOperand(0), InL, InH);
SDValue NVBitsNode = DAG.getConstant(NVTBits, dl, ShTy);
SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy),
Amt, NVBitsNode, ISD::SETULT);
SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy),
Amt, DAG.getConstant(0, dl, ShTy),
ISD::SETEQ);
SDValue LoS, HiS, LoL, HiL;
switch (N->getOpcode()) {
default: llvm_unreachable("Unknown shift");
case ISD::SHL:
// Short: ShAmt < NVTBits
LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
HiS = DAG.getNode(ISD::OR, dl, NVT,
DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
// Long: ShAmt >= NVTBits
LoL = DAG.getConstant(0, dl, NVT); // Lo part is zero.
HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
Hi = DAG.getSelect(dl, NVT, isZero, InH,
DAG.getSelect(dl, NVT, isShort, HiS, HiL));
return true;
case ISD::SRL:
// Short: ShAmt < NVTBits
HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
LoS = DAG.getNode(ISD::OR, dl, NVT,
DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
// FIXME: If Amt is zero, the following shift generates an undefined result
// on some architectures.
DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
// Long: ShAmt >= NVTBits
HiL = DAG.getConstant(0, dl, NVT); // Hi part is zero.
LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
Lo = DAG.getSelect(dl, NVT, isZero, InL,
DAG.getSelect(dl, NVT, isShort, LoS, LoL));
Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
return true;
case ISD::SRA:
// Short: ShAmt < NVTBits
HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
LoS = DAG.getNode(ISD::OR, dl, NVT,
DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
// Long: ShAmt >= NVTBits
HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part.
DAG.getConstant(NVTBits - 1, dl, ShTy));
LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
Lo = DAG.getSelect(dl, NVT, isZero, InL,
DAG.getSelect(dl, NVT, isShort, LoS, LoL));
Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
return true;
}
}
static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) {
switch (Op) {
default: llvm_unreachable("invalid min/max opcode");
case ISD::SMAX:
return std::make_pair(ISD::SETGT, ISD::UMAX);
case ISD::UMAX:
return std::make_pair(ISD::SETUGT, ISD::UMAX);
case ISD::SMIN:
return std::make_pair(ISD::SETLT, ISD::UMIN);
case ISD::UMIN:
return std::make_pair(ISD::SETULT, ISD::UMIN);
}
}
void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc DL(N);
ISD::NodeType LoOpc;
ISD::CondCode CondC;
std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
// Value types
EVT NVT = LHSL.getValueType();
EVT CCT = getSetCCResultType(NVT);
// Hi part is always the same op
Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
// We need to know whether to select Lo part that corresponds to 'winning'
// Hi part or if Hi parts are equal.
SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC);
SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ);
// Lo part corresponding to the 'winning' Hi part
SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
// Recursed Lo part if Hi parts are equal, this uses unsigned version
SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});
Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
}
void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
EVT NVT = LHSL.getValueType();
SDValue LoOps[2] = { LHSL, RHSL };
SDValue HiOps[3] = { LHSH, RHSH };
bool HasOpCarry = TLI.isOperationLegalOrCustom(
N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (HasOpCarry) {
SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps);
} else {
Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps);
}
return;
}
// Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
// them. TODO: Teach operation legalization how to expand unsupported
// ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
// a carry of type MVT::Glue, but there doesn't seem to be any way to
// generate a value of this type in the expanded code sequence.
bool hasCarry =
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
ISD::ADDC : ISD::SUBC,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (hasCarry) {
SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
} else {
Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
}
return;
}
bool hasOVF =
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
ISD::UADDO : ISD::USUBO,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
if (hasOVF) {
EVT OvfVT = getSetCCResultType(NVT);
SDVTList VTList = DAG.getVTList(NVT, OvfVT);
int RevOpc;
if (N->getOpcode() == ISD::ADD) {
RevOpc = ISD::SUB;
Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
} else {
RevOpc = ISD::ADD;
Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
}
SDValue OVF = Lo.getValue(1);
switch (BoolType) {
case TargetLoweringBase::UndefinedBooleanContent:
OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF);
LLVM_FALLTHROUGH;
case TargetLoweringBase::ZeroOrOneBooleanContent:
OVF = DAG.getZExtOrTrunc(OVF, dl, NVT);
Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
break;
case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
OVF = DAG.getSExtOrTrunc(OVF, dl, NVT);
Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF);
}
return;
}
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
ISD::SETULT);
if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) {
SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT);
Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
return;
}
SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
DAG.getConstant(1, dl, NVT),
DAG.getConstant(0, dl, NVT));
SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1],
ISD::SETULT);
SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2,
DAG.getConstant(1, dl, NVT), Carry1);
Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
} else {
Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
SDValue Cmp =
DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
LoOps[0], LoOps[1], ISD::SETULT);
SDValue Borrow;
if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT);
else
Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
DAG.getConstant(0, dl, NVT));
Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
}
}
void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
SDValue &Lo, SDValue &Hi) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
SDLoc dl(N);
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
SDValue LoOps[2] = { LHSL, RHSL };
SDValue HiOps[3] = { LHSH, RHSH };
if (N->getOpcode() == ISD::ADDC) {
Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
} else {
Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
}
// Legalized the flag result - switch anything that used the old flag to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
SDValue &Lo, SDValue &Hi) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
SDLoc dl(N);
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
SDValue HiOps[3] = { LHSH, RHSH };
Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);
// Legalized the flag result - switch anything that used the old flag to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDLoc dl(N);
SDValue Ovf;
unsigned CarryOp, NoCarryOp;
ISD::CondCode Cond;
switch(N->getOpcode()) {
case ISD::UADDO:
CarryOp = ISD::ADDCARRY;
NoCarryOp = ISD::ADD;
Cond = ISD::SETULT;
break;
case ISD::USUBO:
CarryOp = ISD::SUBCARRY;
NoCarryOp = ISD::SUB;
Cond = ISD::SETUGT;
break;
default:
llvm_unreachable("Node has unexpected Opcode");
}
bool HasCarryOp = TLI.isOperationLegalOrCustom(
CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
if (HasCarryOp) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
GetExpandedInteger(LHS, LHSL, LHSH);
GetExpandedInteger(RHS, RHSL, RHSH);
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
SDValue LoOps[2] = { LHSL, RHSL };
SDValue HiOps[3] = { LHSH, RHSH };
Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(CarryOp, dl, VTList, HiOps);
Ovf = Hi.getValue(1);
} else {
// Expand the result by simply replacing it with the equivalent
// non-overflow-checking operation.
SDValue Sum = DAG.getNode(NoCarryOp, dl, LHS.getValueType(), LHS, RHS);
SplitInteger(Sum, Lo, Hi);
// Calculate the overflow: addition overflows iff a + b < a, and subtraction
// overflows iff a - b > a.
Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
}
// Legalized the flag result - switch anything that used the old flag to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Ovf);
}
void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
SDValue &Lo, SDValue &Hi) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
SDLoc dl(N);
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
SDValue HiOps[3] = { LHSH, RHSH, SDValue() };
Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);
// Legalized the flag result - switch anything that used the old flag to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N,
SDValue &Lo, SDValue &Hi) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
SDLoc dl(N);
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
// We need to use an unsigned carry op for the lo part.
unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY
: ISD::SUBCARRY;
Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) });
Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
// Legalized the flag result - switch anything that used the old flag to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
SDValue Op = N->getOperand(0);
if (Op.getValueType().bitsLE(NVT)) {
// The low part is any extension of the input (which degenerates to a copy).
Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
Hi = DAG.getUNDEF(NVT); // The high part is undefined.
} else {
// For example, extension of an i48 to an i64. The operand type necessarily
// promotes to the result type, so will end up being expanded too.
assert(getTypeAction(Op.getValueType()) ==
TargetLowering::TypePromoteInteger &&
"Only know how to promote this result!");
SDValue Res = GetPromotedInteger(Op);
assert(Res.getValueType() == N->getValueType(0) &&
"Operand over promoted?");
// Split the promoted operand. This will simplify when it is expanded.
SplitInteger(Res, Lo, Hi);
}
}
void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
GetExpandedInteger(N->getOperand(0), Lo, Hi);
EVT NVT = Lo.getValueType();
EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
unsigned NVTBits = NVT.getSizeInBits();
unsigned EVTBits = EVT.getSizeInBits();
if (NVTBits < EVTBits) {
Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
EVTBits - NVTBits)));
} else {
Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
// The high part replicates the sign bit of Lo, make it explicit.
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
DAG.getConstant(NVTBits - 1, dl,
TLI.getPointerTy(DAG.getDataLayout())));
}
}
void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
GetExpandedInteger(N->getOperand(0), Lo, Hi);
EVT NVT = Lo.getValueType();
EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
unsigned NVTBits = NVT.getSizeInBits();
unsigned EVTBits = EVT.getSizeInBits();
if (NVTBits < EVTBits) {
Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
EVTBits - NVTBits)));
} else {
Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
// The high part must be zero, make it explicit.
Hi = DAG.getConstant(0, dl, NVT);
}
}
void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo);
Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi);
}
void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
}
void DAGTypeLegalizer::ExpandIntRes_PARITY(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
// parity(HiLo) -> parity(Lo^Hi)
GetExpandedInteger(N->getOperand(0), Lo, Hi);
EVT NVT = Lo.getValueType();
Lo =
DAG.getNode(ISD::PARITY, dl, NVT, DAG.getNode(ISD::XOR, dl, NVT, Lo, Hi));
Hi = DAG.getConstant(0, dl, NVT);
}
void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NBitWidth = NVT.getSizeInBits();
auto Constant = cast<ConstantSDNode>(N);
const APInt &Cst = Constant->getAPIntValue();
bool IsTarget = Constant->isTargetOpcode();
bool IsOpaque = Constant->isOpaque();
SDLoc dl(N);
Lo = DAG.getConstant(Cst.trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque);
Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), dl, NVT, IsTarget,
IsOpaque);
}
void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
GetExpandedInteger(N0, Lo, Hi);
EVT NVT = Lo.getValueType();
// If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we
// use in LegalizeDAG. The ADD part of the expansion is based on
// ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that
// ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded
// if needed. Shift expansion has a special case for filling with sign bits
// so that we will only end up with one SRA.
bool HasAddCarry = TLI.isOperationLegalOrCustom(
ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (HasAddCarry) {
EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);
SDValue Sign =
DAG.getNode(ISD::SRA, dl, NVT, Hi,
DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign);
Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
return;
}
// abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
EVT VT = N->getValueType(0);
SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
DAG.getConstant(0, dl, VT), N0);
SDValue NegLo, NegHi;
SplitInteger(Neg, NegLo, NegHi);
SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
// ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
GetExpandedInteger(N->getOperand(0), Lo, Hi);
EVT NVT = Lo.getValueType();
SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi,
DAG.getConstant(0, dl, NVT), ISD::SETNE);
SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ,
DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
DAG.getConstant(NVT.getSizeInBits(), dl,
NVT)));
Hi = DAG.getConstant(0, dl, NVT);
}
void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
// ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
GetExpandedInteger(N->getOperand(0), Lo, Hi);
EVT NVT = Lo.getValueType();
Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
Hi = DAG.getConstant(0, dl, NVT);
}
void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
// cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
GetExpandedInteger(N->getOperand(0), Lo, Hi);
EVT NVT = Lo.getValueType();
SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo,
DAG.getConstant(0, dl, NVT), ISD::SETNE);
SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ,
DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
DAG.getConstant(NVT.getSizeInBits(), dl,
NVT)));
Hi = DAG.getConstant(0, dl, NVT);
}
void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NBitWidth = NVT.getSizeInBits();
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0));
SDValue Chain = Lo.getValue(1);
// The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Chain);
}
void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
bool IsStrict = N->isStrictFPOpcode();
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
Op = GetPromotedFloat(Op);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
}
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
CallOptions, dl, Chain);
SplitInteger(Tmp.first, Lo, Hi);
if (IsStrict)
ReplaceValueWith(SDValue(N, 1), Tmp.second);
}
void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
bool IsStrict = N->isStrictFPOpcode();
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
Op = GetPromotedFloat(Op);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
}
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
CallOptions, dl, Chain);
SplitInteger(Tmp.first, Lo, Hi);
if (IsStrict)
ReplaceValueWith(SDValue(N, 1), Tmp.second);
}
void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Res = TLI.expandFP_TO_INT_SAT(N, DAG);
SplitInteger(Res, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat &&
"Input type needs to be promoted!");
EVT VT = Op.getValueType();
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (N->getOpcode() == ISD::LLROUND ||
N->getOpcode() == ISD::STRICT_LLROUND) {
if (VT == MVT::f32)
LC = RTLIB::LLROUND_F32;
else if (VT == MVT::f64)
LC = RTLIB::LLROUND_F64;
else if (VT == MVT::f80)
LC = RTLIB::LLROUND_F80;
else if (VT == MVT::f128)
LC = RTLIB::LLROUND_F128;
else if (VT == MVT::ppcf128)
LC = RTLIB::LLROUND_PPCF128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
} else if (N->getOpcode() == ISD::LLRINT ||
N->getOpcode() == ISD::STRICT_LLRINT) {
if (VT == MVT::f32)
LC = RTLIB::LLRINT_F32;
else if (VT == MVT::f64)
LC = RTLIB::LLRINT_F64;
else if (VT == MVT::f80)
LC = RTLIB::LLRINT_F80;
else if (VT == MVT::f128)
LC = RTLIB::LLRINT_F128;
else if (VT == MVT::ppcf128)
LC = RTLIB::LLRINT_PPCF128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
} else
llvm_unreachable("Unexpected opcode!");
SDLoc dl(N);
EVT RetVT = N->getValueType(0);
SDValue Chain = N->isStrictFPOpcode() ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
Op, CallOptions, dl,
Chain);
SplitInteger(Tmp.first, Lo, Hi);
if (N->isStrictFPOpcode())
ReplaceValueWith(SDValue(N, 1), Tmp.second);
}
void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue &Lo, SDValue &Hi) {
if (N->isAtomic()) {
// It's typical to have larger CAS than atomic load instructions.
SDLoc dl(N);
EVT VT = N->getMemoryVT();
SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue Swap = DAG.getAtomicCmpSwap(
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
VT, VTs, N->getOperand(0),
N->getOperand(1), Zero, Zero, N->getMemOperand());
ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
return;
}
if (ISD::isNormalLoad(N)) {
ExpandRes_NormalLoad(N, Lo, Hi);
return;
}
assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
ISD::LoadExtType ExtType = N->getExtensionType();
MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
assert(NVT.isByteSized() && "Expanded type not byte sized!");
if (N->getMemoryVT().bitsLE(NVT)) {
EVT MemVT = N->getMemoryVT();
Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT,
N->getOriginalAlign(), MMOFlags, AAInfo);
// Remember the chain.
Ch = Lo.getValue(1);
if (ExtType == ISD::SEXTLOAD) {
// The high part is obtained by SRA'ing all but one of the bits of the
// lo part.
unsigned LoSize = Lo.getValueSizeInBits();
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
DAG.getConstant(LoSize - 1, dl,
TLI.getPointerTy(DAG.getDataLayout())));
} else if (ExtType == ISD::ZEXTLOAD) {
// The high part is just a zero.
Hi = DAG.getConstant(0, dl, NVT);
} else {
assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
// The high part is undefined.
Hi = DAG.getUNDEF(NVT);
}
} else if (DAG.getDataLayout().isLittleEndian()) {
// Little-endian - low bits are at low addresses.
Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
N->getOriginalAlign(), MMOFlags, AAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
N->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
} else {
// Big-endian - high bits are at low addresses. Favor aligned loads at
// the cost of some bit-fiddling.
EVT MemVT = N->getMemoryVT();
unsigned EBytes = MemVT.getStoreSize();
unsigned IncrementSize = NVT.getSizeInBits()/8;
unsigned ExcessBits = (EBytes - IncrementSize)*8;
// Load both the high bits and maybe some of the low bits.
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
// Load the rest of the low bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
N->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
if (ExcessBits < NVT.getSizeInBits()) {
// Transfer low bits from the bottom of Hi to the top of Lo.
Lo = DAG.getNode(
ISD::OR, dl, NVT, Lo,
DAG.getNode(ISD::SHL, dl, NVT, Hi,
DAG.getConstant(ExcessBits, dl,
TLI.getPointerTy(DAG.getDataLayout()))));
// Move high bits to the right position in Hi.
Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT,
Hi,
DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
TLI.getPointerTy(DAG.getDataLayout())));
}
}
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Ch);
}
void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
SDValue LL, LH, RL, RH;
GetExpandedInteger(N->getOperand(0), LL, LH);
GetExpandedInteger(N->getOperand(1), RL, RH);
Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
}
void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDLoc dl(N);
SDValue LL, LH, RL, RH;
GetExpandedInteger(N->getOperand(0), LL, LH);
GetExpandedInteger(N->getOperand(1), RL, RH);
if (TLI.expandMUL(N, Lo, Hi, NVT, DAG,
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
LL, LH, RL, RH))
return;
// If nothing else, we can make a libcall.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i16)
LC = RTLIB::MUL_I16;
else if (VT == MVT::i32)
LC = RTLIB::MUL_I32;
else if (VT == MVT::i64)
LC = RTLIB::MUL_I64;
else if (VT == MVT::i128)
LC = RTLIB::MUL_I128;
if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
// We'll expand the multiplication by brute force because we have no other
// options. This is a trivially-generalized version of the code from
// Hacker's Delight (itself derived from Knuth's Algorithm M from section
// 4.3.1).
unsigned Bits = NVT.getSizeInBits();
unsigned HalfBits = Bits >> 1;
SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,
NVT);
SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) {
// The type from TLI is too small to fit the shift amount we want.
// Override it with i32. The shift will have to be legalized.
ShiftAmtTy = MVT::i32;
}
SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);
SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);
SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),
DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,
DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
DAG.getNode(ISD::ADD, dl, NVT,
DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
return;
}
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first,
Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc DL(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0));
Lo = R.getValue(0);
Hi = R.getValue(1);
ReplaceValueWith(SDValue(N, 1), R.getValue(2));
}
void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Result = TLI.expandAddSubSat(N, DAG);
SplitInteger(Result, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_SHLSAT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Result = TLI.expandShlSat(N, DAG);
SplitInteger(Result, Lo, Hi);
}
/// This performs an expansion of the integer result for a fixed point
/// multiplication. The default expansion performs rounding down towards
/// negative infinity, though targets that do care about rounding should specify
/// a target hook for rounding and provide their own expansion or lowering of
/// fixed point multiplication to be consistent with rounding.
void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
unsigned VTSize = VT.getScalarSizeInBits();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
uint64_t Scale = N->getConstantOperandVal(2);
bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT ||
N->getOpcode() == ISD::UMULFIXSAT);
bool Signed = (N->getOpcode() == ISD::SMULFIX ||
N->getOpcode() == ISD::SMULFIXSAT);
// Handle special case when scale is equal to zero.
if (!Scale) {
SDValue Result;
if (!Saturating) {
Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
} else {
EVT BoolVT = getSetCCResultType(VT);
unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO;
Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
SDValue Product = Result.getValue(0);
SDValue Overflow = Result.getValue(1);
if (Signed) {
APInt MinVal = APInt::getSignedMinValue(VTSize);
APInt MaxVal = APInt::getSignedMaxValue(VTSize);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ // Xor the inputs, if resulting sign bit is 0 the product will be
+ // positive, else negative.
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
} else {
// For unsigned multiplication, we only need to check the max since we
// can't really overflow towards zero.
APInt MaxVal = APInt::getMaxValue(VTSize);
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product);
}
}
SplitInteger(Result, Lo, Hi);
return;
}
// For SMULFIX[SAT] we only expect to find Scale<VTSize, but this assert will
// cover for unhandled cases below, while still being valid for UMULFIX[SAT].
assert(Scale <= VTSize && "Scale can't be larger than the value type size.");
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue LL, LH, RL, RH;
GetExpandedInteger(LHS, LL, LH);
GetExpandedInteger(RHS, RL, RH);
SmallVector<SDValue, 4> Result;
unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
LL, LH, RL, RH)) {
report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI.");
return;
}
unsigned NVTSize = NVT.getScalarSizeInBits();
assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "
"the size of the current value type");
EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
// After getting the multiplication result in 4 parts, we need to perform a
// shift right by the amount of the scale to get the result in that scale.
//
// Let's say we multiply 2 64 bit numbers. The resulting value can be held in
// 128 bits that are cut into 4 32-bit parts:
//
// HH HL LH LL
// |---32---|---32---|---32---|---32---|
// 128 96 64 32 0
//
// |------VTSize-----|
//
// |NVTSize-|
//
// The resulting Lo and Hi would normally be in LL and LH after the shift. But
// to avoid unneccessary shifting of all 4 parts, we can adjust the shift
// amount and get Lo and Hi using two funnel shifts. Or for the special case
// when Scale is a multiple of NVTSize we can just pick the result without
// shifting.
uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed.
if (Scale % NVTSize) {
SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy);
Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0],
ShiftAmount);
Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1],
ShiftAmount);
} else {
Lo = Result[Part0];
Hi = Result[Part0 + 1];
}
// Unless saturation is requested we are done. The result is in <Hi,Lo>.
if (!Saturating)
return;
// Can not overflow when there is no integer part.
if (Scale == VTSize)
return;
// To handle saturation we must check for overflow in the multiplication.
//
// Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result)
// aren't all zeroes.
//
// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result)
// aren't all ones or all zeroes.
//
// We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
// highest bit of HH determines saturation direction in the event of signed
// saturation.
SDValue ResultHL = Result[2];
SDValue ResultHH = Result[3];
SDValue SatMax, SatMin;
SDValue NVTZero = DAG.getConstant(0, dl, NVT);
SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
EVT BoolNVT = getSetCCResultType(NVT);
if (!Signed) {
if (Scale < NVTSize) {
// Overflow happened if ((HH | (HL >> Scale)) != 0).
SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
DAG.getConstant(Scale, dl, ShiftTy));
SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH);
SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE);
} else if (Scale == NVTSize) {
// Overflow happened if (HH != 0).
SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE);
} else if (Scale < VTSize) {
// Overflow happened if ((HH >> (Scale - NVTSize)) != 0).
SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
DAG.getConstant(Scale - NVTSize, dl,
ShiftTy));
SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE);
} else
llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT"
"(and saturation can't happen with Scale==VTSize).");
Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi);
Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo);
return;
}
if (Scale < NVTSize) {
// The number of overflow bits we can check are VTSize - Scale + 1 (we
// include the sign bit). If these top bits are > 0, then we overflowed past
// the max value. If these top bits are < -1, then we overflowed past the
// min value. Otherwise, we did not overflow.
unsigned OverflowBits = VTSize - Scale + 1;
assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
"Extent of overflow bits must start within HL");
SDValue HLHiMask = DAG.getConstant(
APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
SDValue HLLoMask = DAG.getConstant(
APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
// We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask).
SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT));
// We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask).
SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT));
} else if (Scale == NVTSize) {
// We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1).
SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg));
// We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0).
SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos));
} else if (Scale < VTSize) {
// This is similar to the case when we saturate if Scale < NVTSize, but we
// only need to check HH.
unsigned OverflowBits = VTSize - Scale + 1;
SDValue HHHiMask = DAG.getConstant(
APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
SDValue HHLoMask = DAG.getConstant(
APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
} else
llvm_unreachable("Illegal scale for signed fixed point mul.");
// Saturate to signed maximum.
APInt MaxHi = APInt::getSignedMaxValue(NVTSize);
APInt MaxLo = APInt::getAllOnesValue(NVTSize);
Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);
Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);
// Saturate to signed minimum.
APInt MinHi = APInt::getSignedMinValue(NVTSize);
Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi);
Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
}
void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
// Try expanding in the existing type first.
SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, N->getOperand(0),
N->getOperand(1),
N->getConstantOperandVal(2), DAG);
if (!Res)
Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
N->getConstantOperandVal(2), TLI, DAG);
SplitInteger(Res, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue &Lo, SDValue &Hi) {
assert((Node->getOpcode() == ISD::SADDO || Node->getOpcode() == ISD::SSUBO) &&
"Node has unexpected Opcode");
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
SDLoc dl(Node);
SDValue Ovf;
bool IsAdd = Node->getOpcode() == ISD::SADDO;
unsigned CarryOp = IsAdd ? ISD::SADDO_CARRY : ISD::SSUBO_CARRY;
bool HasCarryOp = TLI.isOperationLegalOrCustom(
CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
if (HasCarryOp) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
GetExpandedInteger(LHS, LHSL, LHSH);
GetExpandedInteger(RHS, RHSL, RHSH);
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1));
Lo = DAG.getNode(IsAdd ? ISD::UADDO : ISD::USUBO, dl, VTList, {LHSL, RHSL});
Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
Ovf = Hi.getValue(1);
} else {
// Expand the result by simply replacing it with the equivalent
// non-overflow-checking operation.
SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
LHS, RHS);
SplitInteger(Sum, Lo, Hi);
// Compute the overflow.
//
// LHSSign -> LHS < 0
// RHSSign -> RHS < 0
// SumSign -> Sum < 0
//
// Add:
// Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
// Sub:
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
//
// To get better codegen we can rewrite this by doing bitwise math on
// the integers and extract the final sign bit at the end. So the
// above becomes:
//
// Add:
// Overflow -> (~(LHS ^ RHS) & (LHS ^ Sum)) < 0
// Sub:
// Overflow -> ((LHS ^ RHS) & (LHS ^ Sum)) < 0
//
// NOTE: This is different than the expansion we do in expandSADDSUBO
// because it is more costly to determine the RHS is > 0 for SSUBO with the
// integers split.
EVT VT = LHS.getValueType();
SDValue SignsMatch = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
if (IsAdd)
SignsMatch = DAG.getNOT(dl, SignsMatch, VT);
SDValue SumSignNE = DAG.getNode(ISD::XOR, dl, VT, LHS, Sum);
Ovf = DAG.getNode(ISD::AND, dl, VT, SignsMatch, SumSignNE);
EVT OType = Node->getValueType(1);
Ovf = DAG.getSetCC(dl, OType, Ovf, DAG.getConstant(0, dl, VT), ISD::SETLT);
}
// Use the calculated overflow everywhere.
ReplaceValueWith(SDValue(Node, 1), Ovf);
}
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
SplitInteger(Res.getValue(0), Lo, Hi);
return;
}
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i16)
LC = RTLIB::SDIV_I16;
else if (VT == MVT::i32)
LC = RTLIB::SDIV_I32;
else if (VT == MVT::i64)
LC = RTLIB::SDIV_I64;
else if (VT == MVT::i128)
LC = RTLIB::SDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
// If we can emit an efficient shift operation, do so now. Check to see if
// the RHS is a constant.
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return ExpandShiftByConstant(N, CN->getAPIntValue(), Lo, Hi);
// If we can determine that the high bit of the shift is zero or one, even if
// the low bits are variable, emit this shift in an optimized form.
if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
return;
// If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
unsigned PartsOpc;
if (N->getOpcode() == ISD::SHL) {
PartsOpc = ISD::SHL_PARTS;
} else if (N->getOpcode() == ISD::SRL) {
PartsOpc = ISD::SRL_PARTS;
} else {
assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
PartsOpc = ISD::SRA_PARTS;
}
// Next check to see if the target supports this SHL_PARTS operation or if it
// will custom expand it. Don't lower this to SHL_PARTS when we optimise for
// size, but create a libcall instead.
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
const bool LegalOrCustom =
(Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
Action == TargetLowering::Custom;
if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) {
// Expand the subcomponents.
SDValue LHSL, LHSH;
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
EVT VT = LHSL.getValueType();
// If the shift amount operand is coming from a vector legalization it may
// have an illegal type. Fix that first by casting the operand, otherwise
// the new SHL_PARTS operation would need further legalization.
SDValue ShiftOp = N->getOperand(1);
EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
assert(ShiftTy.getScalarSizeInBits() >=
Log2_32_Ceil(VT.getScalarSizeInBits()) &&
"ShiftAmountTy is too small to cover the range of this type!");
if (ShiftOp.getValueType() != ShiftTy)
ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
SDValue Ops[] = { LHSL, LHSH, ShiftOp };
Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops);
Hi = Lo.getValue(1);
return;
}
// Otherwise, emit a libcall.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
bool isSigned;
if (N->getOpcode() == ISD::SHL) {
isSigned = false; /*sign irrelevant*/
if (VT == MVT::i16)
LC = RTLIB::SHL_I16;
else if (VT == MVT::i32)
LC = RTLIB::SHL_I32;
else if (VT == MVT::i64)
LC = RTLIB::SHL_I64;
else if (VT == MVT::i128)
LC = RTLIB::SHL_I128;
} else if (N->getOpcode() == ISD::SRL) {
isSigned = false;
if (VT == MVT::i16)
LC = RTLIB::SRL_I16;
else if (VT == MVT::i32)
LC = RTLIB::SRL_I32;
else if (VT == MVT::i64)
LC = RTLIB::SRL_I64;
else if (VT == MVT::i128)
LC = RTLIB::SRL_I128;
} else {
assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
isSigned = true;
if (VT == MVT::i16)
LC = RTLIB::SRA_I16;
else if (VT == MVT::i32)
LC = RTLIB::SRA_I32;
else if (VT == MVT::i64)
LC = RTLIB::SRA_I64;
else if (VT == MVT::i128)
LC = RTLIB::SRA_I128;
}
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(isSigned);
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
return;
}
if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
llvm_unreachable("Unsupported shift!");
}
void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
SDValue Op = N->getOperand(0);
if (Op.getValueType().bitsLE(NVT)) {
// The low part is sign extension of the input (degenerates to a copy).
Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
// The high part is obtained by SRA'ing all but one of the bits of low part.
unsigned LoSize = NVT.getSizeInBits();
Hi = DAG.getNode(
ISD::SRA, dl, NVT, Lo,
DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout())));
} else {
// For example, extension of an i48 to an i64. The operand type necessarily
// promotes to the result type, so will end up being expanded too.
assert(getTypeAction(Op.getValueType()) ==
TargetLowering::TypePromoteInteger &&
"Only know how to promote this result!");
SDValue Res = GetPromotedInteger(Op);
assert(Res.getValueType() == N->getValueType(0) &&
"Operand over promoted?");
// Split the promoted operand. This will simplify when it is expanded.
SplitInteger(Res, Lo, Hi);
unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
ExcessBits)));
}
}
void DAGTypeLegalizer::
ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
GetExpandedInteger(N->getOperand(0), Lo, Hi);
EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
if (EVT.bitsLE(Lo.getValueType())) {
// sext_inreg the low part if needed.
Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
N->getOperand(1));
// The high part gets the sign extension from the lo-part. This handles
// things like sextinreg V:i64 from i8.
Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
DAG.getConstant(Hi.getValueSizeInBits() - 1, dl,
TLI.getPointerTy(DAG.getDataLayout())));
} else {
// For example, extension of an i48 to an i64. Leave the low part alone,
// sext_inreg the high part.
unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueSizeInBits();
Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
ExcessBits)));
}
}
void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
SplitInteger(Res.getValue(1), Lo, Hi);
return;
}
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i16)
LC = RTLIB::SREM_I16;
else if (VT == MVT::i32)
LC = RTLIB::SREM_I32;
else if (VT == MVT::i64)
LC = RTLIB::SREM_I64;
else if (VT == MVT::i128)
LC = RTLIB::SREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(),
N->getOperand(0),
DAG.getConstant(NVT.getSizeInBits(), dl,
TLI.getPointerTy(DAG.getDataLayout())));
Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
if (N->getOpcode() == ISD::UMULO) {
// This section expands the operation into the following sequence of
// instructions. `iNh` here refers to a type which has half the bit width of
// the type the original operation operated on.
//
// %0 = %LHS.HI != 0 && %RHS.HI != 0
// %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO)
// %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO)
// %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN)
// %4 = add iNh %1.0, %2.0 as iN
// %5 = { iNh, i1 } @uadd.with.overflow.iNh(iNh %4, iNh %3.HIGH)
//
// %lo = %3.LO
// %hi = %5.0
// %ovf = %0 || %1.1 || %2.1 || %5.1
SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
SDValue LHSHigh, LHSLow, RHSHigh, RHSLow;
GetExpandedInteger(LHS, LHSLow, LHSHigh);
GetExpandedInteger(RHS, RHSLow, RHSHigh);
EVT HalfVT = LHSLow.getValueType();
EVT BitVT = N->getValueType(1);
SDVTList VTHalfWithO = DAG.getVTList(HalfVT, BitVT);
SDValue HalfZero = DAG.getConstant(0, dl, HalfVT);
SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT,
DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE),
DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE));
SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, LHSHigh, RHSLow);
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1));
SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, RHSHigh, LHSLow);
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1));
SDValue HighSum = DAG.getNode(ISD::ADD, dl, HalfVT, One, Two);
// Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not
// know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this
// operation recursively legalized?).
//
// Many backends understand this pattern and will convert into LOHI
// themselves, if applicable.
SDValue Three = DAG.getNode(ISD::MUL, dl, VT,
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow),
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow));
SplitInteger(Three, Lo, Hi);
Hi = DAG.getNode(ISD::UADDO, dl, VTHalfWithO, Hi, HighSum);
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), Overflow);
return;
}
Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
// Replace this with a libcall that will check overflow.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i32)
LC = RTLIB::MULO_I32;
else if (VT == MVT::i64)
LC = RTLIB::MULO_I64;
else if (VT == MVT::i128)
LC = RTLIB::MULO_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
SDValue Temp = DAG.CreateStackTemporary(PtrVT);
// Temporary for the overflow value, default it to zero.
SDValue Chain =
DAG.getStore(DAG.getEntryNode(), dl, DAG.getConstant(0, dl, PtrVT), Temp,
MachinePointerInfo());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
for (const SDValue &Op : N->op_values()) {
EVT ArgVT = Op.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
Entry.IsSExt = true;
Entry.IsZExt = false;
Args.push_back(Entry);
}
// Also pass the address of the overflow check.
Entry.Node = Temp;
Entry.Ty = PtrTy->getPointerTo();
Entry.IsSExt = true;
Entry.IsZExt = false;
Args.push_back(Entry);
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Chain)
.setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
.setSExtResult();
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
SplitInteger(CallInfo.first, Lo, Hi);
SDValue Temp2 =
DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, MachinePointerInfo());
SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
DAG.getConstant(0, dl, PtrVT),
ISD::SETNE);
// Use the overflow from the libcall everywhere.
ReplaceValueWith(SDValue(N, 1), Ofl);
}
void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
SplitInteger(Res.getValue(0), Lo, Hi);
return;
}
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i16)
LC = RTLIB::UDIV_I16;
else if (VT == MVT::i32)
LC = RTLIB::UDIV_I32;
else if (VT == MVT::i64)
LC = RTLIB::UDIV_I64;
else if (VT == MVT::i128)
LC = RTLIB::UDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
SplitInteger(Res.getValue(1), Lo, Hi);
return;
}
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i16)
LC = RTLIB::UREM_I16;
else if (VT == MVT::i32)
LC = RTLIB::UREM_I32;
else if (VT == MVT::i64)
LC = RTLIB::UREM_I64;
else if (VT == MVT::i128)
LC = RTLIB::UREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
TargetLowering::MakeLibCallOptions CallOptions;
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
SDValue Op = N->getOperand(0);
if (Op.getValueType().bitsLE(NVT)) {
// The low part is zero extension of the input (degenerates to a copy).
Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
Hi = DAG.getConstant(0, dl, NVT); // The high part is just a zero.
} else {
// For example, extension of an i48 to an i64. The operand type necessarily
// promotes to the result type, so will end up being expanded too.
assert(getTypeAction(Op.getValueType()) ==
TargetLowering::TypePromoteInteger &&
"Only know how to promote this result!");
SDValue Res = GetPromotedInteger(Op);
assert(Res.getValueType() == N->getValueType(0) &&
"Operand over promoted?");
// Split the promoted operand. This will simplify when it is expanded.
SplitInteger(Res, Lo, Hi);
unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
Hi = DAG.getZeroExtendInReg(Hi, dl,
EVT::getIntegerVT(*DAG.getContext(),
ExcessBits));
}
}
void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue Swap = DAG.getAtomicCmpSwap(
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand());
ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
}
void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
SDValue &Lo, SDValue &Hi) {
// TODO For VECREDUCE_(AND|OR|XOR) we could split the vector and calculate
// both halves independently.
SDValue Res = TLI.expandVecReduce(N, DAG);
SplitInteger(Res, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,
SDValue &Lo, SDValue &Hi) {
// Lower the rotate to shifts and ORs which can be expanded.
SDValue Res;
TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
SplitInteger(Res, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,
SDValue &Lo, SDValue &Hi) {
// Lower the funnel shift to shifts and ORs which can be expanded.
SDValue Res;
TLI.expandFunnelShift(N, Res, DAG);
SplitInteger(Res, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT VT = N->getValueType(0);
EVT HalfVT =
EVT::getIntegerVT(*DAG.getContext(), N->getValueSizeInBits(0) / 2);
SDLoc dl(N);
// We assume VSCALE(1) fits into a legal integer.
APInt One(HalfVT.getSizeInBits(), 1);
SDValue VScaleBase = DAG.getVScale(dl, HalfVT, One);
VScaleBase = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, VScaleBase);
SDValue Res = DAG.getNode(ISD::MUL, dl, VT, VScaleBase, N->getOperand(0));
SplitInteger(Res, Lo, Hi);
}
//===----------------------------------------------------------------------===//
// Integer Operand Expansion
//===----------------------------------------------------------------------===//
/// ExpandIntegerOperand - This method is called when the specified operand of
/// the specified node is found to need expansion. At this point, all of the
/// result types of the node are known to be legal, but other operands of the
/// node may need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG);
dbgs() << "\n");
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
return false;
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
report_fatal_error("Do not know how to expand this operator's operand!");
case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break;
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
case ISD::STRICT_UINT_TO_FP:
case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
case ISD::RETURNADDR:
case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
if (!Res.getNode()) return false;
// If the result is N, the sub-method updated N in place. Tell the legalizer
// core about this.
if (Res.getNode() == N)
return true;
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");
ReplaceValueWith(SDValue(N, 0), Res);
return false;
}
/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code
/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
SDValue &NewRHS,
ISD::CondCode &CCCode,
const SDLoc &dl) {
SDValue LHSLo, LHSHi, RHSLo, RHSHi;
GetExpandedInteger(NewLHS, LHSLo, LHSHi);
GetExpandedInteger(NewRHS, RHSLo, RHSHi);
if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
if (RHSLo == RHSHi) {
if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
if (RHSCST->isAllOnesValue()) {
// Equality comparison to -1.
NewLHS = DAG.getNode(ISD::AND, dl,
LHSLo.getValueType(), LHSLo, LHSHi);
NewRHS = RHSLo;
return;
}
}
}
NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
NewRHS = DAG.getConstant(0, dl, NewLHS.getValueType());
return;
}
// If this is a comparison of the sign bit, just look at the top part.
// X > -1, x < 0
if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
(CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
NewLHS = LHSHi;
NewRHS = RHSHi;
return;
}
// FIXME: This generated code sucks.
ISD::CondCode LowCC;
switch (CCCode) {
default: llvm_unreachable("Unknown integer setcc!");
case ISD::SETLT:
case ISD::SETULT: LowCC = ISD::SETULT; break;
case ISD::SETGT:
case ISD::SETUGT: LowCC = ISD::SETUGT; break;
case ISD::SETLE:
case ISD::SETULE: LowCC = ISD::SETULE; break;
case ISD::SETGE:
case ISD::SETUGE: LowCC = ISD::SETUGE; break;
}
// LoCmp = lo(op1) < lo(op2) // Always unsigned comparison
// HiCmp = hi(op1) < hi(op2) // Signedness depends on operands
// dest = hi(op1) == hi(op2) ? LoCmp : HiCmp;
// NOTE: on targets without efficient SELECT of bools, we can always use
// this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true,
nullptr);
SDValue LoCmp, HiCmp;
if (TLI.isTypeLegal(LHSLo.getValueType()) &&
TLI.isTypeLegal(RHSLo.getValueType()))
LoCmp = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo,
RHSLo, LowCC, false, DagCombineInfo, dl);
if (!LoCmp.getNode())
LoCmp = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo,
RHSLo, LowCC);
if (TLI.isTypeLegal(LHSHi.getValueType()) &&
TLI.isTypeLegal(RHSHi.getValueType()))
HiCmp = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi,
RHSHi, CCCode, false, DagCombineInfo, dl);
if (!HiCmp.getNode())
HiCmp =
DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()),
LHSHi, RHSHi, DAG.getCondCode(CCCode));
ConstantSDNode *LoCmpC = dyn_cast<ConstantSDNode>(LoCmp.getNode());
ConstantSDNode *HiCmpC = dyn_cast<ConstantSDNode>(HiCmp.getNode());
bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
CCCode == ISD::SETUGE || CCCode == ISD::SETULE);
if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) ||
(!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) ||
(LoCmpC && LoCmpC->isNullValue())))) {
// For LE / GE, if high part is known false, ignore the low part.
// For LT / GT: if low part is known false, return the high part.
// if high part is known true, ignore the low part.
NewLHS = HiCmp;
NewRHS = SDValue();
return;
}
if (LHSHi == RHSHi) {
// Comparing the low bits is enough.
NewLHS = LoCmp;
NewRHS = SDValue();
return;
}
// Lower with SETCCCARRY if the target supports it.
EVT HiVT = LHSHi.getValueType();
EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT);
bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT);
// FIXME: Make all targets support this, then remove the other lowering.
if (HasSETCCCARRY) {
// SETCCCARRY can detect < and >= directly. For > and <=, flip
// operands and condition code.
bool FlipOperands = false;
switch (CCCode) {
case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break;
case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break;
case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break;
case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break;
default: break;
}
if (FlipOperands) {
std::swap(LHSLo, RHSLo);
std::swap(LHSHi, RHSHi);
}
// Perform a wide subtraction, feeding the carry from the low part into
// SETCCCARRY. The SETCCCARRY operation is essentially looking at the high
// part of the result of LHS - RHS. It is negative iff LHS < RHS. It is
// zero or positive iff LHS >= RHS.
EVT LoVT = LHSLo.getValueType();
SDVTList VTList = DAG.getVTList(LoVT, getSetCCResultType(LoVT));
SDValue LowCmp = DAG.getNode(ISD::USUBO, dl, VTList, LHSLo, RHSLo);
SDValue Res = DAG.getNode(ISD::SETCCCARRY, dl, getSetCCResultType(HiVT),
LHSHi, RHSHi, LowCmp.getValue(1),
DAG.getCondCode(CCCode));
NewLHS = Res;
NewRHS = SDValue();
return;
}
NewLHS = TLI.SimplifySetCC(getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ,
false, DagCombineInfo, dl);
if (!NewLHS.getNode())
NewLHS =
DAG.getSetCC(dl, getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ);
NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), NewLHS, LoCmp, HiCmp);
NewRHS = SDValue();
}
SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
// If ExpandSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!NewRHS.getNode()) {
NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
CCCode = ISD::SETNE;
}
// Update N to have the operands specified.
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
DAG.getCondCode(CCCode), NewLHS, NewRHS,
N->getOperand(4)), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
// If ExpandSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!NewRHS.getNode()) {
NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
CCCode = ISD::SETNE;
}
// Update N to have the operands specified.
return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
N->getOperand(2), N->getOperand(3),
DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
// If ExpandSetCCOperands returned a scalar, use it.
if (!NewRHS.getNode()) {
assert(NewLHS.getValueType() == N->getValueType(0) &&
"Unexpected setcc expansion!");
return NewLHS;
}
// Otherwise, update N to have the operands specified.
return SDValue(
DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Carry = N->getOperand(2);
SDValue Cond = N->getOperand(3);
SDLoc dl = SDLoc(N);
SDValue LHSLo, LHSHi, RHSLo, RHSHi;
GetExpandedInteger(LHS, LHSLo, LHSHi);
GetExpandedInteger(RHS, RHSLo, RHSHi);
// Expand to a SUBE for the low part and a smaller SETCCCARRY for the high.
SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType());
SDValue LowCmp = DAG.getNode(ISD::SUBCARRY, dl, VTList, LHSLo, RHSLo, Carry);
return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi,
LowCmp.getValue(1), Cond);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SPLAT_VECTOR(SDNode *N) {
// Split the operand and replace with SPLAT_VECTOR_PARTS.
SDValue Lo, Hi;
GetExpandedInteger(N->getOperand(0), Lo, Hi);
return DAG.getNode(ISD::SPLAT_VECTOR_PARTS, SDLoc(N), N->getValueType(0), Lo,
Hi);
}
SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
// The value being shifted is legal, but the shift amount is too big.
// It follows that either the result of the shift is undefined, or the
// upper half of the shift amount is zero. Just use the lower half.
SDValue Lo, Hi;
GetExpandedInteger(N->getOperand(1), Lo, Hi);
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
// The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This
// surely makes pretty nice problems on 8/16 bit targets. Just truncate this
// constant to valid type.
SDValue Lo, Hi;
GetExpandedInteger(N->getOperand(0), Lo, Hi);
return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT DstVT = N->getValueType(0);
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
std::pair<SDValue, SDValue> Tmp =
TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
if (!IsStrict)
return Tmp.first;
ReplaceValueWith(SDValue(N, 1), Tmp.second);
ReplaceValueWith(SDValue(N, 0), Tmp.first);
return SDValue();
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (N->isAtomic()) {
// It's typical to have larger CAS than atomic store instructions.
SDLoc dl(N);
SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
N->getMemoryVT(),
N->getOperand(0), N->getOperand(2),
N->getOperand(1),
N->getMemOperand());
return Swap.getValue(1);
}
if (ISD::isNormalStore(N))
return ExpandOp_NormalStore(N, OpNo);
assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
assert(OpNo == 1 && "Can only expand the stored value so far");
EVT VT = N->getOperand(1).getValueType();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
SDValue Lo, Hi;
assert(NVT.isByteSized() && "Expanded type not byte sized!");
if (N->getMemoryVT().bitsLE(NVT)) {
GetExpandedInteger(N->getValue(), Lo, Hi);
return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
N->getMemoryVT(), N->getOriginalAlign(), MMOFlags,
AAInfo);
}
if (DAG.getDataLayout().isLittleEndian()) {
// Little-endian - low bits are at low addresses.
GetExpandedInteger(N->getValue(), Lo, Hi);
Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
N->getOriginalAlign(), MMOFlags, AAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
NEVT, N->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
// Big-endian - high bits are at low addresses. Favor aligned stores at
// the cost of some bit-fiddling.
GetExpandedInteger(N->getValue(), Lo, Hi);
EVT ExtVT = N->getMemoryVT();
unsigned EBytes = ExtVT.getStoreSize();
unsigned IncrementSize = NVT.getSizeInBits()/8;
unsigned ExcessBits = (EBytes - IncrementSize)*8;
EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
ExtVT.getSizeInBits() - ExcessBits);
if (ExcessBits < NVT.getSizeInBits()) {
// Transfer high bits from the top of Lo to the bottom of Hi.
Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
TLI.getPointerTy(DAG.getDataLayout())));
Hi = DAG.getNode(
ISD::OR, dl, NVT, Hi,
DAG.getNode(ISD::SRL, dl, NVT, Lo,
DAG.getConstant(ExcessBits, dl,
TLI.getPointerTy(DAG.getDataLayout()))));
}
// Store both the high bits and maybe some of the low bits.
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT,
N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
// Store the lowest ExcessBits bits in the second half.
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
N->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
SDValue InL, InH;
GetExpandedInteger(N->getOperand(0), InL, InH);
// Just truncate the low part of the source.
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL);
}
SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT DstVT = N->getValueType(0);
RTLIB::Libcall LC = RTLIB::getUINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
std::pair<SDValue, SDValue> Tmp =
TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
if (!IsStrict)
return Tmp.first;
ReplaceValueWith(SDValue(N, 1), Tmp.second);
ReplaceValueWith(SDValue(N, 0), Tmp.first);
return SDValue();
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
SDLoc dl(N);
SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
cast<AtomicSDNode>(N)->getMemoryVT(),
N->getOperand(0),
N->getOperand(1), N->getOperand(2),
cast<AtomicSDNode>(N)->getMemOperand());
return Swap.getValue(1);
}
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
SDValue V1 = GetPromotedInteger(N->getOperand(1));
EVT OutVT = V0.getValueType();
return DAG.getNode(ISD::VECTOR_SPLICE, dl, OutVT, V0, V1, N->getOperand(2));
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
EVT NOutVTElem = NOutVT.getVectorElementType();
SDLoc dl(N);
SDValue BaseIdx = N->getOperand(1);
// TODO: We may be able to use this for types other than scalable
// vectors and fix those tests that expect BUILD_VECTOR to be used
if (OutVT.isScalableVector()) {
SDValue InOp0 = N->getOperand(0);
EVT InVT = InOp0.getValueType();
// Promote operands and see if this is handled by target lowering,
// Otherwise, use the BUILD_VECTOR approach below
if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
// Collect the (promoted) operands
SDValue Ops[] = { GetPromotedInteger(InOp0), BaseIdx };
EVT PromEltVT = Ops[0].getValueType().getVectorElementType();
assert(PromEltVT.bitsLE(NOutVTElem) &&
"Promoted operand has an element type greater than result");
EVT ExtVT = NOutVT.changeVectorElementType(PromEltVT);
SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), ExtVT, Ops);
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext);
}
}
if (OutVT.isScalableVector())
report_fatal_error("Unable to promote scalable types using BUILD_VECTOR");
SDValue InOp0 = N->getOperand(0);
if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger)
InOp0 = GetPromotedInteger(N->getOperand(0));
EVT InVT = InOp0.getValueType();
unsigned OutNumElems = OutVT.getVectorNumElements();
SmallVector<SDValue, 8> Ops;
Ops.reserve(OutNumElems);
for (unsigned i = 0; i != OutNumElems; ++i) {
// Extract the element from the original vector.
SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType()));
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
InVT.getVectorElementType(), N->getOperand(0), Index);
SDValue Op = DAG.getAnyExtOrTrunc(Ext, dl, NOutVTElem);
// Insert the converted element to the new vector.
Ops.push_back(Op);
}
return DAG.getBuildVector(NOutVT, dl, Ops);
}
SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_SUBVECTOR(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
SDLoc dl(N);
SDValue Vec = N->getOperand(0);
SDValue SubVec = N->getOperand(1);
SDValue Idx = N->getOperand(2);
EVT SubVecVT = SubVec.getValueType();
EVT NSubVT =
EVT::getVectorVT(*DAG.getContext(), NOutVT.getVectorElementType(),
SubVecVT.getVectorElementCount());
Vec = GetPromotedInteger(Vec);
SubVec = DAG.getNode(ISD::ANY_EXTEND, dl, NSubVT, SubVec);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NOutVT, Vec, SubVec, Idx);
}
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
EVT OutVT = V0.getValueType();
return DAG.getNode(ISD::VECTOR_REVERSE, dl, OutVT, V0);
}
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
EVT VT = N->getValueType(0);
SDLoc dl(N);
ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements());
SDValue V0 = GetPromotedInteger(N->getOperand(0));
SDValue V1 = GetPromotedInteger(N->getOperand(1));
EVT OutVT = V0.getValueType();
return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask);
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
unsigned NumElems = N->getNumOperands();
EVT NOutVTElem = NOutVT.getVectorElementType();
SDLoc dl(N);
SmallVector<SDValue, 8> Ops;
Ops.reserve(NumElems);
for (unsigned i = 0; i != NumElems; ++i) {
SDValue Op;
// BUILD_VECTOR integer operand types are allowed to be larger than the
// result's element type. This may still be true after the promotion. For
// example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to
// (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>.
if (N->getOperand(i).getValueType().bitsLT(NOutVTElem))
Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
else
Op = N->getOperand(i);
Ops.push_back(Op);
}
return DAG.getBuildVector(NOutVT, dl, Ops);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
SDLoc dl(N);
assert(!N->getOperand(0).getValueType().isVector() &&
"Input must be a scalar");
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
EVT NOutVTElem = NOutVT.getVectorElementType();
SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {
SDLoc dl(N);
SDValue SplatVal = N->getOperand(0);
assert(!SplatVal.getValueType().isVector() && "Input must be a scalar");
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "Type must be promoted to a vector type");
EVT NOutElemVT = NOutVT.getVectorElementType();
SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal);
return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
SDLoc dl(N);
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "Type must be promoted to a vector type");
APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
return DAG.getStepVector(dl, NOutVT,
StepVal.sext(NOutVT.getScalarSizeInBits()));
}
SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
SDLoc dl(N);
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
EVT OutElemTy = NOutVT.getVectorElementType();
unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
unsigned NumOutElem = NOutVT.getVectorNumElements();
unsigned NumOperands = N->getNumOperands();
assert(NumElem * NumOperands == NumOutElem &&
"Unexpected number of elements");
// Take the elements from the first vector.
SmallVector<SDValue, 8> Ops(NumOutElem);
for (unsigned i = 0; i < NumOperands; ++i) {
SDValue Op = N->getOperand(i);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteInteger)
Op = GetPromotedInteger(Op);
EVT SclrTy = Op.getValueType().getVectorElementType();
assert(NumElem == Op.getValueType().getVectorNumElements() &&
"Unexpected number of elements");
for (unsigned j = 0; j < NumElem; ++j) {
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op,
DAG.getVectorIdxConstant(j, dl));
Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy);
}
}
return DAG.getBuildVector(NOutVT, dl, Ops);
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) {
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
assert(NVT.isVector() && "This type must be promoted to a vector type");
SDLoc dl(N);
// For operands whose TypeAction is to promote, extend the promoted node
// appropriately (ZERO_EXTEND or SIGN_EXTEND) from the original pre-promotion
// type, and then construct a new *_EXTEND_VECTOR_INREG node to the promote-to
// type..
if (getTypeAction(N->getOperand(0).getValueType())
== TargetLowering::TypePromoteInteger) {
SDValue Promoted;
switch(N->getOpcode()) {
case ISD::SIGN_EXTEND_VECTOR_INREG:
Promoted = SExtPromotedInteger(N->getOperand(0));
break;
case ISD::ZERO_EXTEND_VECTOR_INREG:
Promoted = ZExtPromotedInteger(N->getOperand(0));
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
Promoted = GetPromotedInteger(N->getOperand(0));
break;
default:
llvm_unreachable("Node has unexpected Opcode");
}
return DAG.getNode(N->getOpcode(), dl, NVT, Promoted);
}
// Directly extend to the appropriate transform-to type.
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
EVT NOutVTElem = NOutVT.getVectorElementType();
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
NOutVTElem, N->getOperand(1));
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT,
V0, ConvElem, N->getOperand(2));
}
SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
// The VECREDUCE result size may be larger than the element size, so
// we can simply change the result type.
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl,
TLI.getVectorIdxTy(DAG.getDataLayout()));
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
V0->getValueType(0).getScalarType(), V0, V1);
// EXTRACT_VECTOR_ELT can return types which are wider than the incoming
// element types. If this is the case then we need to expand the outgoing
// value and not truncate it.
return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
}
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
MVT InVT = V0.getValueType().getSimpleVT();
MVT OutVT = MVT::getVectorVT(InVT.getVectorElementType(),
N->getValueType(0).getVectorNumElements());
SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, V0, N->getOperand(1));
return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
}
SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
SDLoc dl(N);
EVT ResVT = N->getValueType(0);
unsigned NumElems = N->getNumOperands();
if (ResVT.isScalableVector()) {
SDValue ResVec = DAG.getUNDEF(ResVT);
for (unsigned OpIdx = 0; OpIdx < NumElems; ++OpIdx) {
SDValue Op = N->getOperand(OpIdx);
unsigned OpNumElts = Op.getValueType().getVectorMinNumElements();
ResVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ResVec, Op,
DAG.getIntPtrConstant(OpIdx * OpNumElts, dl));
}
return ResVec;
}
EVT RetSclrTy = N->getValueType(0).getVectorElementType();
SmallVector<SDValue, 8> NewOps;
NewOps.reserve(NumElems);
// For each incoming vector
for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) {
SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
for (unsigned i=0; i<NumElem; ++i) {
// Extract element from incoming vector
SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming,
DAG.getVectorIdxConstant(i, dl));
SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
NewOps.push_back(Tr);
}
}
return DAG.getBuildVector(N->getValueType(0), dl, NewOps);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a08548393979..bd2ebfd0bd3b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1,11153 +1,11155 @@
//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This implements routines for translating from LLVM IR into SelectionDAG IR.
//
//===----------------------------------------------------------------------===//
#include "SelectionDAGBuilder.h"
#include "SDNodeDbgValue.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
#include <cstring>
#include <iterator>
#include <limits>
#include <numeric>
#include <tuple>
using namespace llvm;
using namespace PatternMatch;
using namespace SwitchCG;
#define DEBUG_TYPE "isel"
/// LimitFloatPrecision - Generate low-precision inline sequences for
/// some float libcalls (6, 8 or 12 bits).
static unsigned LimitFloatPrecision;
static cl::opt<bool>
InsertAssertAlign("insert-assert-align", cl::init(true),
cl::desc("Insert the experimental `assertalign` node."),
cl::ReallyHidden);
static cl::opt<unsigned, true>
LimitFPPrecision("limit-float-precision",
cl::desc("Generate low-precision inline sequences "
"for some float libcalls"),
cl::location(LimitFloatPrecision), cl::Hidden,
cl::init(0));
static cl::opt<unsigned> SwitchPeelThreshold(
"switch-peel-threshold", cl::Hidden, cl::init(66),
cl::desc("Set the case probability threshold for peeling the case from a "
"switch statement. A value greater than 100 will void this "
"optimization"));
// Limit the width of DAG chains. This is important in general to prevent
// DAG-based analysis from blowing up. For example, alias analysis and
// load clustering may not complete in reasonable time. It is difficult to
// recognize and avoid this situation within each individual analysis, and
// future analyses are likely to have the same behavior. Limiting DAG width is
// the safe approach and will be especially important with global DAGs.
//
// MaxParallelChains default is arbitrarily high to avoid affecting
// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
// sequence over this should have been converted to llvm.memcpy by the
// frontend. It is easy to induce this behavior with .ll code such as:
// %buffer = alloca [4096 x i8]
// %data = load [4096 x i8]* %argPtr
// store [4096 x i8] %data, [4096 x i8]* %buffer
static const unsigned MaxParallelChains = 64;
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
Optional<CallingConv::ID> CC);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger than ValueVT then AssertOp can be used to specify whether the extra
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
/// (ISD::AssertSext).
static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
Optional<CallingConv::ID> CC = None,
Optional<ISD::NodeType> AssertOp = None) {
// Let the target assemble the parts if it wants to
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
PartVT, ValueVT, CC))
return Val;
if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
CC);
assert(NumParts > 0 && "No parts to assemble!");
SDValue Val = Parts[0];
if (NumParts > 1) {
// Assemble the value from multiple parts.
if (ValueVT.isInteger()) {
unsigned PartBits = PartVT.getSizeInBits();
unsigned ValueBits = ValueVT.getSizeInBits();
// Assemble the power of 2 part.
unsigned RoundParts =
(NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts;
unsigned RoundBits = PartBits * RoundParts;
EVT RoundVT = RoundBits == ValueBits ?
ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
SDValue Lo, Hi;
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
if (RoundParts > 2) {
Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
PartVT, HalfVT, V);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT, V);
} else {
Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
}
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
if (RoundParts < NumParts) {
// Assemble the trailing non-power-of-2 part.
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
OddVT, V, CC);
// Combine the round and odd parts.
Lo = Val;
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
Hi =
DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
DAG.getConstant(Lo.getValueSizeInBits(), DL,
TLI.getPointerTy(DAG.getDataLayout())));
Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
}
} else if (PartVT.isFloatingPoint()) {
// FP split into multiple FP parts (for ppcf128)
assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
"Unexpected split");
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
} else {
// FP split into integer parts (soft fp)
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
}
}
// There is now one part, held in Val. Correct it to match ValueVT.
// PartEVT is the type of the register class that holds the value.
// ValueVT is the type of the inline asm operation.
EVT PartEVT = Val.getValueType();
if (PartEVT == ValueVT)
return Val;
if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
ValueVT.bitsLT(PartEVT)) {
// For an FP value in an integer part, we need to truncate to the right
// width first.
PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
}
// Handle types that have the same size.
if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
// Handle types with different sizes.
if (PartEVT.isInteger() && ValueVT.isInteger()) {
if (ValueVT.bitsLT(PartEVT)) {
// For a truncate, see if we have any information to
// indicate whether the truncated bits will always be
// zero or sign-extension.
if (AssertOp.hasValue())
Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
DAG.getValueType(ValueVT));
return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
}
return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
}
if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
// FP_ROUND's are always exact here.
if (ValueVT.bitsLT(Val.getValueType()))
return DAG.getNode(
ISD::FP_ROUND, DL, ValueVT, Val,
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
// Handle MMX to a narrower integer type by bitcasting MMX to integer and
// then truncating.
if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
ValueVT.bitsLT(PartEVT)) {
Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
}
report_fatal_error("Unknown mismatch in getCopyFromParts!");
}
static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
const Twine &ErrMsg) {
const Instruction *I = dyn_cast_or_null<Instruction>(V);
if (!V)
return Ctx.emitError(ErrMsg);
const char *AsmError = ", possible invalid constraint for vector type";
if (const CallInst *CI = dyn_cast<CallInst>(I))
if (CI->isInlineAsm())
return Ctx.emitError(I, ErrMsg + AsmError);
return Ctx.emitError(I, ErrMsg);
}
/// getCopyFromPartsVector - Create a value that contains the specified legal
/// parts combined into the value they represent. If the parts combine to a
/// type larger than ValueVT then AssertOp can be used to specify whether the
/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
/// ValueVT (ISD::AssertSext).
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
Optional<CallingConv::ID> CallConv) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
const bool IsABIRegCopy = CallConv.hasValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
// Handle a multi-element vector.
if (NumParts > 1) {
EVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
unsigned NumRegs;
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
*DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
} else {
NumRegs =
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
}
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
assert(RegisterVT.getSizeInBits() ==
Parts[0].getSimpleValueType().getSizeInBits() &&
"Part type sizes don't match!");
// Assemble the parts into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
if (NumIntermediates == NumParts) {
// If the register was not expanded, truncate or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
PartVT, IntermediateVT, V, CallConv);
} else if (NumParts > 0) {
// If the intermediate type was expanded, build the intermediate
// operands from the parts.
assert(NumParts % NumIntermediates == 0 &&
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
PartVT, IntermediateVT, V, CallConv);
}
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
// intermediate operands.
EVT BuiltVectorTy =
IntermediateVT.isVector()
? EVT::getVectorVT(
*DAG.getContext(), IntermediateVT.getScalarType(),
IntermediateVT.getVectorElementCount() * NumParts)
: EVT::getVectorVT(*DAG.getContext(),
IntermediateVT.getScalarType(),
NumIntermediates);
Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
: ISD::BUILD_VECTOR,
DL, BuiltVectorTy, Ops);
}
// There is now one part, held in Val. Correct it to match ValueVT.
EVT PartEVT = Val.getValueType();
if (PartEVT == ValueVT)
return Val;
if (PartEVT.isVector()) {
// If the element type of the source/dest vectors are the same, but the
// parts vector has more elements than the value vector, then we have a
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
// elements we want.
if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
assert((PartEVT.getVectorElementCount().getKnownMinValue() >
ValueVT.getVectorElementCount().getKnownMinValue()) &&
(PartEVT.getVectorElementCount().isScalable() ==
ValueVT.getVectorElementCount().isScalable()) &&
"Cannot narrow, it would be a lossy transformation");
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
DAG.getVectorIdxConstant(0, DL));
}
// Vector/Vector bitcast.
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() &&
"Cannot handle this kind of promotion");
// Promoted vector extract
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
}
// Trivial bitcast if the types are the same size and the destination
// vector type is legal.
if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
TLI.isTypeLegal(ValueVT))
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
if (ValueVT.getVectorNumElements() != 1) {
// Certain ABIs require that vectors are passed as integers. For vectors
// are the same size, this is an obvious bitcast.
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
} else if (ValueVT.bitsLT(PartEVT)) {
const uint64_t ValueSize = ValueVT.getFixedSizeInBits();
EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
// Drop the extra bits.
Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val);
return DAG.getBitcast(ValueVT, Val);
}
diagnosePossiblyInvalidConstraint(
*DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
return DAG.getUNDEF(ValueVT);
}
// Handle cases such as i8 -> <1 x i1>
EVT ValueSVT = ValueVT.getVectorElementType();
if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits())
Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val);
else
Val = ValueVT.isFloatingPoint()
? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
: DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
}
return DAG.getBuildVector(ValueVT, DL, Val);
}
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
Optional<CallingConv::ID> CallConv);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
SDValue *Parts, unsigned NumParts, MVT PartVT,
const Value *V,
Optional<CallingConv::ID> CallConv = None,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
// Let the target split the parts if it wants to
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
CallConv))
return;
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
if (ValueVT.isVector())
return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
CallConv);
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
"Copying to an illegal type!");
if (NumParts == 0)
return;
assert(!ValueVT.isVector() && "Vector case handled elsewhere");
EVT PartEVT = PartVT;
if (PartEVT == ValueVT) {
assert(NumParts == 1 && "No-op copy with multiple parts!");
Parts[0] = Val;
return;
}
if (NumParts * PartBits > ValueVT.getSizeInBits()) {
// If the parts cover more bits than the value has, promote the value.
if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
assert(NumParts == 1 && "Do not know what to promote to!");
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
if (ValueVT.isFloatingPoint()) {
// FP values need to be bitcast, then extended if they are being put
// into a larger container.
ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
}
assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
if (PartVT == MVT::x86mmx)
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
} else if (PartBits == ValueVT.getSizeInBits()) {
// Different types of the same size.
assert(NumParts == 1 && PartEVT != ValueVT);
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
// If the parts cover less bits than value has, truncate the value.
assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
if (PartVT == MVT::x86mmx)
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
// The value may have changed - recompute ValueVT.
ValueVT = Val.getValueType();
assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
"Failed to tile the value with PartVT!");
if (NumParts == 1) {
if (PartEVT != ValueVT) {
diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
"scalar-to-vector conversion failed");
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
Parts[0] = Val;
return;
}
// Expand the value into multiple parts.
if (NumParts & (NumParts - 1)) {
// The number of parts is not a power of 2. Split off and copy the tail.
assert(PartVT.isInteger() && ValueVT.isInteger() &&
"Do not know what to expand to!");
unsigned RoundParts = 1 << Log2_32(NumParts);
unsigned RoundBits = RoundParts * PartBits;
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false));
getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
CallConv);
if (DAG.getDataLayout().isBigEndian())
// The odd parts were reversed by getCopyToParts - unreverse them.
std::reverse(Parts + RoundParts, Parts + NumParts);
NumParts = RoundParts;
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
}
// The number of parts is a power of 2. Repeatedly bisect the value using
// EXTRACT_ELEMENT.
Parts[0] = DAG.getNode(ISD::BITCAST, DL,
EVT::getIntegerVT(*DAG.getContext(),
ValueVT.getSizeInBits()),
Val);
for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
for (unsigned i = 0; i < NumParts; i += StepSize) {
unsigned ThisBits = StepSize * PartBits / 2;
EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
SDValue &Part0 = Parts[i];
SDValue &Part1 = Parts[i+StepSize/2];
Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
if (ThisBits == PartBits && ThisVT != PartVT) {
Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
}
}
}
if (DAG.getDataLayout().isBigEndian())
std::reverse(Parts, Parts + OrigNumParts);
}
static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
const SDLoc &DL, EVT PartVT) {
if (!PartVT.isVector())
return SDValue();
EVT ValueVT = Val.getValueType();
ElementCount PartNumElts = PartVT.getVectorElementCount();
ElementCount ValueNumElts = ValueVT.getVectorElementCount();
// We only support widening vectors with equivalent element types and
// fixed/scalable properties. If a target needs to widen a fixed-length type
// to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
PartNumElts.isScalable() != ValueNumElts.isScalable() ||
PartVT.getVectorElementType() != ValueVT.getVectorElementType())
return SDValue();
// Widening a scalable vector to another scalable vector is done by inserting
// the vector into a larger undef one.
if (PartNumElts.isScalable())
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
Val, DAG.getVectorIdxConstant(0, DL));
EVT ElementVT = PartVT.getVectorElementType();
// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
// undef elements.
SmallVector<SDValue, 16> Ops;
DAG.ExtractVectorElements(Val, Ops);
SDValue EltUndef = DAG.getUNDEF(ElementVT);
Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef);
// FIXME: Use CONCAT for 2x -> 4x.
return DAG.getBuildVector(PartVT, DL, Ops);
}
/// getCopyToPartsVector - Create a series of nodes that contain the specified
/// value split into legal parts.
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
Optional<CallingConv::ID> CallConv) {
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const bool IsABIRegCopy = CallConv.hasValue();
if (NumParts == 1) {
EVT PartEVT = PartVT;
if (PartEVT == ValueVT) {
// Nothing to do.
} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
Val = Widened;
} else if (PartVT.isVector() &&
PartEVT.getVectorElementType().bitsGE(
ValueVT.getVectorElementType()) &&
PartEVT.getVectorElementCount() ==
ValueVT.getVectorElementCount()) {
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
} else {
if (ValueVT.getVectorElementCount().isScalar()) {
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getVectorIdxConstant(0, DL));
} else {
uint64_t ValueSize = ValueVT.getFixedSizeInBits();
assert(PartVT.getFixedSizeInBits() > ValueSize &&
"lossy conversion of vector to scalar type");
EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
Val = DAG.getBitcast(IntermediateType, Val);
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
}
}
assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
Parts[0] = Val;
return;
}
// Handle a multi-element vector.
EVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
unsigned NumRegs;
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
*DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
} else {
NumRegs =
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
}
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
"Mixing scalable and fixed vectors when copying in parts");
Optional<ElementCount> DestEltCnt;
if (IntermediateVT.isVector())
DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
else
DestEltCnt = ElementCount::getFixed(NumIntermediates);
EVT BuiltVectorTy = EVT::getVectorVT(
*DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue());
if (ValueVT == BuiltVectorTy) {
// Nothing to do.
} else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
} else if (SDValue Widened =
widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
Val = Widened;
} else if (BuiltVectorTy.getVectorElementType().bitsGE(
ValueVT.getVectorElementType()) &&
BuiltVectorTy.getVectorElementCount() ==
ValueVT.getVectorElementCount()) {
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy);
}
assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
if (IntermediateVT.isVector()) {
// This does something sensible for scalable vectors - see the
// definition of EXTRACT_SUBVECTOR for further details.
unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements();
Ops[i] =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
DAG.getVectorIdxConstant(i * IntermediateNumElts, DL));
} else {
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
DAG.getVectorIdxConstant(i, DL));
}
}
// Split the intermediate operands into legal parts.
if (NumParts == NumIntermediates) {
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
assert(NumIntermediates != 0 && "division by zero");
assert(NumParts % NumIntermediates == 0 &&
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
CallConv);
}
}
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
EVT valuevt, Optional<CallingConv::ID> CC)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
RegCount(1, regs.size()), CallConv(CC) {}
RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
Optional<CallingConv::ID> CC) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
CallConv = CC;
for (EVT ValueVT : ValueVTs) {
unsigned NumRegs =
isABIMangled()
? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)
: TLI.getNumRegisters(Context, ValueVT);
MVT RegisterVT =
isABIMangled()
? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)
: TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(Reg + i);
RegVTs.push_back(RegisterVT);
RegCount.push_back(NumRegs);
Reg += NumRegs;
}
}
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
const SDLoc &dl, SDValue &Chain,
SDValue *Flag, const Value *V) const {
// A Value with type {} or [0 x %t] needs no registers.
if (ValueVTs.empty())
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Assemble the legal parts into the final values.
SmallVector<SDValue, 4> Values(ValueVTs.size());
SmallVector<SDValue, 8> Parts;
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
// Copy the legal parts from the registers.
EVT ValueVT = ValueVTs[Value];
unsigned NumRegs = RegCount[Value];
MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
*DAG.getContext(),
CallConv.getValue(), RegVTs[Value])
: RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
SDValue P;
if (!Flag) {
P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
} else {
P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
*Flag = P.getValue(2);
}
Chain = P.getValue(1);
Parts[i] = P;
// If the source register was virtual and if we know something about it,
// add an assert node.
if (!Register::isVirtualRegister(Regs[Part + i]) ||
!RegisterVT.isInteger())
continue;
const FunctionLoweringInfo::LiveOutInfo *LOI =
FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
if (!LOI)
continue;
unsigned RegSize = RegisterVT.getScalarSizeInBits();
unsigned NumSignBits = LOI->NumSignBits;
unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
if (NumZeroBits == RegSize) {
// The current value is a zero.
// Explicitly express that as it would be easier for
// optimizations to kick in.
Parts[i] = DAG.getConstant(0, dl, RegisterVT);
continue;
}
// FIXME: We capture more information than the dag can represent. For
// now, just use the tightest assertzext/assertsext possible.
bool isSExt;
EVT FromVT(MVT::Other);
if (NumZeroBits) {
FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
isSExt = false;
} else if (NumSignBits > 1) {
FromVT =
EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
isSExt = true;
} else {
continue;
}
// Add an assertion node.
assert(FromVT != MVT::Other);
Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
RegisterVT, P, DAG.getValueType(FromVT));
}
Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
RegisterVT, ValueVT, V, CallConv);
Part += NumRegs;
Parts.clear();
}
return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
}
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
const SDLoc &dl, SDValue &Chain, SDValue *Flag,
const Value *V,
ISD::NodeType PreferredExtendType) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ISD::NodeType ExtendKind = PreferredExtendType;
// Get the list of the values's legal parts.
unsigned NumRegs = Regs.size();
SmallVector<SDValue, 8> Parts(NumRegs);
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumParts = RegCount[Value];
MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
*DAG.getContext(),
CallConv.getValue(), RegVTs[Value])
: RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
NumParts, RegisterVT, V, CallConv, ExtendKind);
Part += NumParts;
}
// Copy the parts into the registers.
SmallVector<SDValue, 8> Chains(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
SDValue Part;
if (!Flag) {
Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
} else {
Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
*Flag = Part.getValue(1);
}
Chains[i] = Part.getValue(0);
}
if (NumRegs == 1 || Flag)
// If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
// flagged to it. That is the CopyToReg nodes and the user are considered
// a single scheduling unit. If we create a TokenFactor and return it as
// chain, then the TokenFactor is both a predecessor (operand) of the
// user as well as a successor (the TF operands are flagged to the user).
// c1, f1 = CopyToReg
// c2, f2 = CopyToReg
// c3 = TokenFactor c1, c2
// ...
// = op c3, ..., f2
Chain = Chains[NumRegs-1];
else
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG,
std::vector<SDValue> &Ops) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
if (HasMatching)
Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
// Put the register class of the virtual registers in the flag word. That
// way, later passes can recompute register class constraints for inline
// assembly as well as normal instructions.
// Don't do this for tied operands that can use the regclass information
// from the def.
const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
}
SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
Ops.push_back(Res);
if (Code == InlineAsm::Kind_Clobber) {
// Clobbers should always have a 1:1 mapping with registers, and may
// reference registers that have illegal (e.g. vector) types. Hence, we
// shouldn't try to apply any sort of splitting logic to them.
assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
"No 1:1 mapping from clobbers to regs?");
Register SP = TLI.getStackPointerRegisterToSaveRestore();
(void)SP;
for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
assert(
(Regs[I] != SP ||
DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
"If we clobbered the stack pointer, MFI should know about it.");
}
return;
}
for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
MVT RegisterVT = RegVTs[Value];
unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value],
RegisterVT);
for (unsigned i = 0; i != NumRegs; ++i) {
assert(Reg < Regs.size() && "Mismatch in # registers expected");
unsigned TheReg = Regs[Reg++];
Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
}
}
}
SmallVector<std::pair<unsigned, TypeSize>, 4>
RegsForValue::getRegsAndSizes() const {
SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec;
unsigned I = 0;
for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
unsigned RegCount = std::get<0>(CountAndVT);
MVT RegisterVT = std::get<1>(CountAndVT);
TypeSize RegisterSize = RegisterVT.getSizeInBits();
for (unsigned E = I + RegCount; I != E; ++I)
OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
}
return OutVec;
}
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
const TargetLibraryInfo *li) {
AA = aa;
GFI = gfi;
LibInfo = li;
DL = &DAG.getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
}
void SelectionDAGBuilder::clear() {
NodeMap.clear();
UnusedArgNodeMap.clear();
PendingLoads.clear();
PendingExports.clear();
PendingConstrainedFP.clear();
PendingConstrainedFPStrict.clear();
CurInst = nullptr;
HasTailCall = false;
SDNodeOrder = LowestSDNodeOrder;
StatepointLowering.clear();
}
void SelectionDAGBuilder::clearDanglingDebugInfo() {
DanglingDebugInfoMap.clear();
}
// Update DAG root to include dependencies on Pending chains.
SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
SDValue Root = DAG.getRoot();
if (Pending.empty())
return Root;
// Add current root to PendingChains, unless we already indirectly
// depend on it.
if (Root.getOpcode() != ISD::EntryToken) {
unsigned i = 0, e = Pending.size();
for (; i != e; ++i) {
assert(Pending[i].getNode()->getNumOperands() > 1);
if (Pending[i].getNode()->getOperand(0) == Root)
break; // Don't add the root if we already indirectly depend on it.
}
if (i == e)
Pending.push_back(Root);
}
if (Pending.size() == 1)
Root = Pending[0];
else
Root = DAG.getTokenFactor(getCurSDLoc(), Pending);
DAG.setRoot(Root);
Pending.clear();
return Root;
}
SDValue SelectionDAGBuilder::getMemoryRoot() {
return updateRoot(PendingLoads);
}
SDValue SelectionDAGBuilder::getRoot() {
// Chain up all pending constrained intrinsics together with all
// pending loads, by simply appending them to PendingLoads and
// then calling getMemoryRoot().
PendingLoads.reserve(PendingLoads.size() +
PendingConstrainedFP.size() +
PendingConstrainedFPStrict.size());
PendingLoads.append(PendingConstrainedFP.begin(),
PendingConstrainedFP.end());
PendingLoads.append(PendingConstrainedFPStrict.begin(),
PendingConstrainedFPStrict.end());
PendingConstrainedFP.clear();
PendingConstrainedFPStrict.clear();
return getMemoryRoot();
}
SDValue SelectionDAGBuilder::getControlRoot() {
// We need to emit pending fpexcept.strict constrained intrinsics,
// so append them to the PendingExports list.
PendingExports.append(PendingConstrainedFPStrict.begin(),
PendingConstrainedFPStrict.end());
PendingConstrainedFPStrict.clear();
return updateRoot(PendingExports);
}
void SelectionDAGBuilder::visit(const Instruction &I) {
// Set up outgoing PHI node register values before emitting the terminator.
if (I.isTerminator()) {
HandlePHINodesInSuccessorBlocks(I.getParent());
}
// Increase the SDNodeOrder if dealing with a non-debug instruction.
if (!isa<DbgInfoIntrinsic>(I))
++SDNodeOrder;
CurInst = &I;
visit(I.getOpcode(), I);
if (!I.isTerminator() && !HasTailCall &&
!isa<GCStatepointInst>(I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
CurInst = nullptr;
}
void SelectionDAGBuilder::visitPHI(const PHINode &) {
llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
}
void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
// Note: this doesn't use InstVisitor, because it has to work with
// ConstantExpr's in addition to instructions.
switch (Opcode) {
default: llvm_unreachable("Unknown instruction type encountered!");
// Build the switch statement using the Instruction.def file.
#define HANDLE_INST(NUM, OPCODE, CLASS) \
case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
#include "llvm/IR/Instruction.def"
}
}
void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
DebugLoc DL, unsigned Order) {
// We treat variadic dbg_values differently at this stage.
if (DI->hasArgList()) {
// For variadic dbg_values we will now insert an undef.
// FIXME: We can potentially recover these!
SmallVector<SDDbgOperand, 2> Locs;
for (const Value *V : DI->getValues()) {
auto Undef = UndefValue::get(V->getType());
Locs.push_back(SDDbgOperand::fromConst(Undef));
}
SDDbgValue *SDV = DAG.getDbgValueList(
DI->getVariable(), DI->getExpression(), Locs, {},
/*IsIndirect=*/false, DL, Order, /*IsVariadic=*/true);
DAG.AddDbgValue(SDV, /*isParameter=*/false);
} else {
// TODO: Dangling debug info will eventually either be resolved or produce
// an Undef DBG_VALUE. However in the resolution case, a gap may appear
// between the original dbg.value location and its resolved DBG_VALUE,
// which we should ideally fill with an extra Undef DBG_VALUE.
assert(DI->getNumVariableLocationOps() == 1 &&
"DbgValueInst without an ArgList should have a single location "
"operand.");
DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, DL, Order);
}
}
void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
const DIExpression *Expr) {
auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
const DbgValueInst *DI = DDI.getDI();
DIVariable *DanglingVariable = DI->getVariable();
DIExpression *DanglingExpr = DI->getExpression();
if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
return true;
}
return false;
};
for (auto &DDIMI : DanglingDebugInfoMap) {
DanglingDebugInfoVector &DDIV = DDIMI.second;
// If debug info is to be dropped, run it through final checks to see
// whether it can be salvaged.
for (auto &DDI : DDIV)
if (isMatchingDbgValue(DDI))
salvageUnresolvedDbgValue(DDI);
erase_if(DDIV, isMatchingDbgValue);
}
}
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
// generate the debug data structures now that we've seen its definition.
void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
SDValue Val) {
auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
return;
DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
for (auto &DDI : DDIV) {
const DbgValueInst *DI = DDI.getDI();
assert(!DI->hasArgList() && "Not implemented for variadic dbg_values");
assert(DI && "Ill-formed DanglingDebugInfo");
DebugLoc dl = DDI.getdl();
unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
DILocalVariable *Variable = DI->getVariable();
DIExpression *Expr = DI->getExpression();
assert(Variable->isValidLocationForIntrinsic(dl) &&
"Expected inlined-at fields to agree");
SDDbgValue *SDV;
if (Val.getNode()) {
// FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
// FuncArgumentDbgValue (it would be hoisted to the function entry, and if
// we couldn't resolve it directly when examining the DbgValue intrinsic
// in the first place we should not be more successful here). Unless we
// have some test case that prove this to be correct we should avoid
// calling EmitFuncArgumentDbgValue here.
if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
<< DbgSDNodeOrder << "] for:\n " << *DI << "\n");
LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
// Increase the SDNodeOrder for the DbgValue here to make sure it is
// inserted after the definition of Val when emitting the instructions
// after ISel. An alternative could be to teach
// ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
<< "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
<< ValSDNodeOrder << "\n");
SDV = getDbgValue(Val, Variable, Expr, dl,
std::max(DbgSDNodeOrder, ValSDNodeOrder));
DAG.AddDbgValue(SDV, false);
} else
LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
<< "in EmitFuncArgumentDbgValue\n");
} else {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
auto SDV =
DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, false);
}
}
DDIV.clear();
}
void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// TODO: For the variadic implementation, instead of only checking the fail
// state of `handleDebugValue`, we need know specifically which values were
// invalid, so that we attempt to salvage only those values when processing
// a DIArgList.
assert(!DDI.getDI()->hasArgList() &&
"Not implemented for variadic dbg_values");
Value *V = DDI.getDI()->getValue(0);
DILocalVariable *Var = DDI.getDI()->getVariable();
DIExpression *Expr = DDI.getDI()->getExpression();
DebugLoc DL = DDI.getdl();
DebugLoc InstDL = DDI.getDI()->getDebugLoc();
unsigned SDOrder = DDI.getSDNodeOrder();
// Currently we consider only dbg.value intrinsics -- we tell the salvager
// that DW_OP_stack_value is desired.
assert(isa<DbgValueInst>(DDI.getDI()));
bool StackValue = true;
// Can this Value can be encoded without any further work?
if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /*IsVariadic=*/false))
return;
// Attempt to salvage back through as many instructions as possible. Bail if
// a non-instruction is seen, such as a constant expression or global
// variable. FIXME: Further work could recover those too.
while (isa<Instruction>(V)) {
Instruction &VAsInst = *cast<Instruction>(V);
// Temporary "0", awaiting real implementation.
SmallVector<Value *, 4> AdditionalValues;
DIExpression *SalvagedExpr =
salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues);
// If we cannot salvage any further, and haven't yet found a suitable debug
// expression, bail out.
// TODO: If AdditionalValues isn't empty, then the salvage can only be
// represented with a DBG_VALUE_LIST, so we give up. When we have support
// here for variadic dbg_values, remove that condition.
if (!SalvagedExpr || !AdditionalValues.empty())
break;
// New value and expr now represent this debuginfo.
V = VAsInst.getOperand(0);
Expr = SalvagedExpr;
// Some kind of simplification occurred: check whether the operand of the
// salvaged debug expression can be encoded in this DAG.
if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder,
/*IsVariadic=*/false)) {
LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
<< DDI.getDI() << "\nBy stripping back to:\n " << V);
return;
}
}
// This was the final opportunity to salvage this debug information, and it
// couldn't be done. Place an undef DBG_VALUE at this location to terminate
// any earlier variable location.
auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI()
<< "\n");
LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
<< "\n");
}
bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
DILocalVariable *Var,
DIExpression *Expr, DebugLoc dl,
DebugLoc InstDL, unsigned Order,
bool IsVariadic) {
if (Values.empty())
return true;
SmallVector<SDDbgOperand> LocationOps;
SmallVector<SDNode *> Dependencies;
for (const Value *V : Values) {
// Constant value.
if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
isa<ConstantPointerNull>(V)) {
LocationOps.emplace_back(SDDbgOperand::fromConst(V));
continue;
}
// If the Value is a frame index, we can create a FrameIndex debug value
// without relying on the DAG at all.
if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
auto SI = FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(SI->second));
continue;
}
}
// Do not use getValue() in here; we don't want to generate code at
// this point if it hasn't been done yet.
SDValue N = NodeMap[V];
if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
N = UnusedArgNodeMap[V];
if (N.getNode()) {
// Only emit func arg dbg value for non-variadic dbg.values for now.
if (!IsVariadic && EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
return true;
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
// describe stack slot locations.
//
// Consider "int x = 0; int *px = &x;". There are two kinds of
// interesting debug values here after optimization:
//
// dbg.value(i32* %px, !"int *px", !DIExpression()), and
// dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
//
// Both describe the direct values of their associated variables.
Dependencies.push_back(N.getNode());
LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(FISDN->getIndex()));
continue;
}
LocationOps.emplace_back(
SDDbgOperand::fromNode(N.getNode(), N.getResNo()));
continue;
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Special rules apply for the first dbg.values of parameter variables in a
// function. Identify them by the fact they reference Argument Values, that
// they're parameters, and they are parameters of the current function. We
// need to let them dangle until they get an SDNode.
bool IsParamOfFunc =
isa<Argument>(V) && Var->isParameter() && !InstDL.getInlinedAt();
if (IsParamOfFunc)
return false;
// The value is not used in this block yet (or it would have an SDNode).
// We still want the value to appear for the user if possible -- if it has
// an associated VReg, we can refer to that instead.
auto VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
unsigned Reg = VMI->second;
// If this is a PHI node, it may be split up into several MI PHI nodes
// (in FunctionLoweringInfo::set).
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
V->getType(), None);
if (RFV.occupiesMultipleRegs()) {
// FIXME: We could potentially support variadic dbg_values here.
if (IsVariadic)
return false;
unsigned Offset = 0;
unsigned BitsToDescribe = 0;
if (auto VarSize = Var->getSizeInBits())
BitsToDescribe = *VarSize;
if (auto Fragment = Expr->getFragmentInfo())
BitsToDescribe = Fragment->SizeInBits;
for (auto RegAndSize : RFV.getRegsAndSizes()) {
// Bail out if all bits are described already.
if (Offset >= BitsToDescribe)
break;
// TODO: handle scalable vectors.
unsigned RegisterSize = RegAndSize.second;
unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
? BitsToDescribe - Offset
: RegisterSize;
auto FragmentExpr = DIExpression::createFragmentExpression(
Expr, Offset, FragmentSize);
if (!FragmentExpr)
continue;
SDDbgValue *SDV = DAG.getVRegDbgValue(
Var, *FragmentExpr, RegAndSize.first, false, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
Offset += RegisterSize;
}
return true;
}
// We can use simple vreg locations for variadic dbg_values as well.
LocationOps.emplace_back(SDDbgOperand::fromVReg(Reg));
continue;
}
// We failed to create a SDDbgOperand for V.
return false;
}
// We have created a SDDbgOperand for each Value in Values.
// Should use Order instead of SDNodeOrder?
assert(!LocationOps.empty());
SDDbgValue *SDV =
DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
/*IsIndirect=*/false, dl, SDNodeOrder, IsVariadic);
DAG.AddDbgValue(SDV, /*isParameter=*/false);
return true;
}
void SelectionDAGBuilder::resolveOrClearDbgInfo() {
// Try to fixup any remaining dangling debug info -- and drop it if we can't.
for (auto &Pair : DanglingDebugInfoMap)
for (auto &DDI : Pair.second)
salvageUnresolvedDbgValue(DDI);
clearDanglingDebugInfo();
}
/// getCopyFromRegs - If there was virtual register allocated for the value V
/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(V);
SDValue Result;
if (It != FuncInfo.ValueMap.end()) {
Register InReg = It->second;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), InReg, Ty,
None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
V);
resolveDanglingDebugInfo(V, Result);
}
return Result;
}
/// getValue - Return an SDValue for the given Value.
SDValue SelectionDAGBuilder::getValue(const Value *V) {
// If we already have an SDValue for this value, use it. It's important
// to do this first, so that we don't create a CopyFromReg if we already
// have a regular SDValue.
SDValue &N = NodeMap[V];
if (N.getNode()) return N;
// If there's a virtual register allocated and initialized for this
// value, use it.
if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
return copyFromReg;
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
NodeMap[V] = Val;
resolveDanglingDebugInfo(V, Val);
return Val;
}
/// getNonRegisterValue - Return an SDValue for the given Value, but
/// don't look in FuncInfo.ValueMap for a virtual register.
SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
// If we already have an SDValue for this value, use it.
SDValue &N = NodeMap[V];
if (N.getNode()) {
if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
// Remove the debug location from the node as the node is about to be used
// in a location which may differ from the original debug location. This
// is relevant to Constant and ConstantFP nodes because they can appear
// as constant expressions inside PHI nodes.
N->setDebugLoc(DebugLoc());
}
return N;
}
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
NodeMap[V] = Val;
resolveDanglingDebugInfo(V, Val);
return Val;
}
/// getValueImpl - Helper function for getValue and getNonRegisterValue.
/// Create an SDValue for the given value.
SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (const Constant *C = dyn_cast<Constant>(V)) {
EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
return DAG.getConstant(*CI, getCurSDLoc(), VT);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
if (isa<ConstantPointerNull>(C)) {
unsigned AS = V->getType()->getPointerAddressSpace();
return DAG.getConstant(0, getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout(), AS));
}
if (match(C, m_VScale(DAG.getDataLayout())))
return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1));
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
return DAG.getUNDEF(VT);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
visit(CE->getOpcode(), *CE);
SDValue N1 = NodeMap[V];
assert(N1.getNode() && "visit didn't populate the NodeMap!");
return N1;
}
if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
SmallVector<SDValue, 4> Constants;
for (const Use &U : C->operands()) {
SDNode *Val = getValue(U).getNode();
// If the operand is an empty aggregate, there are no values.
if (!Val) continue;
// Add each leaf value from the operand to the Constants list
// to form a flattened list of all the values.
for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
Constants.push_back(SDValue(Val, i));
}
return DAG.getMergeValues(Constants, getCurSDLoc());
}
if (const ConstantDataSequential *CDS =
dyn_cast<ConstantDataSequential>(C)) {
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
// Add each leaf value from the operand to the Constants list
// to form a flattened list of all the values.
for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
Ops.push_back(SDValue(Val, i));
}
if (isa<ArrayType>(CDS->getType()))
return DAG.getMergeValues(Ops, getCurSDLoc());
return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
}
if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
"Unknown struct or array constant!");
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
unsigned NumElts = ValueVTs.size();
if (NumElts == 0)
return SDValue(); // empty struct
SmallVector<SDValue, 4> Constants(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
EVT EltVT = ValueVTs[i];
if (isa<UndefValue>(C))
Constants[i] = DAG.getUNDEF(EltVT);
else if (EltVT.isFloatingPoint())
Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
else
Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
}
return DAG.getMergeValues(Constants, getCurSDLoc());
}
if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
return DAG.getBlockAddress(BA, VT);
if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C))
return getValue(Equiv->getGlobalValue());
VectorType *VecTy = cast<VectorType>(V->getType());
// Now that we know the number and type of the elements, get that number of
// elements into the Ops array based on what kind of constant it is.
if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
SmallVector<SDValue, 16> Ops;
unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
for (unsigned i = 0; i != NumElements; ++i)
Ops.push_back(getValue(CV->getOperand(i)));
return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
} else if (isa<ConstantAggregateZero>(C)) {
EVT EltVT =
TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
SDValue Op;
if (EltVT.isFloatingPoint())
Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
else
Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
if (isa<ScalableVectorType>(VecTy))
return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op);
else {
SmallVector<SDValue, 16> Ops;
Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
}
}
llvm_unreachable("Unknown vector constant");
}
// If this is a static alloca, generate it as the frameindex instead of
// computation.
if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end())
return DAG.getFrameIndex(SI->second,
TLI.getFrameIndexTy(DAG.getDataLayout()));
}
// If this is an instruction which fast-isel has deferred, select it now.
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
Inst->getType(), None);
SDValue Chain = DAG.getEntryNode();
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) {
return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
}
llvm_unreachable("Can't get register for value!");
}
void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
bool IsSEH = isAsynchronousEHPersonality(Pers);
MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
if (!IsSEH)
CatchPadMBB->setIsEHScopeEntry();
// In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
if (IsMSVCCXX || IsCoreCLR)
CatchPadMBB->setIsEHFuncletEntry();
}
void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
// Update machine-CFG edge.
MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
FuncInfo.MBB->addSuccessor(TargetMBB);
TargetMBB->setIsEHCatchretTarget(true);
DAG.getMachineFunction().setHasEHCatchret(true);
auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsSEH = isAsynchronousEHPersonality(Pers);
if (IsSEH) {
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
if (TargetMBB != NextBlock(FuncInfo.MBB) ||
TM.getOptLevel() == CodeGenOpt::None)
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(TargetMBB)));
return;
}
// Figure out the funclet membership for the catchret's successor.
// This will be used by the FuncletLayout pass to determine how to order the
// BB's.
// A 'catchret' returns to the outer scope's color.
Value *ParentPad = I.getCatchSwitchParentPad();
const BasicBlock *SuccessorColor;
if (isa<ConstantTokenNone>(ParentPad))
SuccessorColor = &FuncInfo.Fn->getEntryBlock();
else
SuccessorColor = cast<Instruction>(ParentPad)->getParent();
assert(SuccessorColor && "No parent funclet for catchret!");
MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
// Create the terminator node.
SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(TargetMBB),
DAG.getBasicBlock(SuccessorColorMBB));
DAG.setRoot(Ret);
}
void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
// Don't emit any special code for the cleanuppad instruction. It just marks
// the start of an EH scope/funclet.
FuncInfo.MBB->setIsEHScopeEntry();
auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
if (Pers != EHPersonality::Wasm_CXX) {
FuncInfo.MBB->setIsEHFuncletEntry();
FuncInfo.MBB->setIsCleanupFuncletEntry();
}
}
// In wasm EH, even though a catchpad may not catch an exception if a tag does
// not match, it is OK to add only the first unwind destination catchpad to the
// successors, because there will be at least one invoke instruction within the
// catch scope that points to the next unwind destination, if one exists, so
// CFGSort cannot mess up with BB sorting order.
// (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic
// call within them, and catchpads only consisting of 'catch (...)' have a
// '__cxa_end_catch' call within them, both of which generate invokes in case
// the next unwind destination exists, i.e., the next unwind destination is not
// the caller.)
//
// Having at most one EH pad successor is also simpler and helps later
// transformations.
//
// For example,
// current:
// invoke void @foo to ... unwind label %catch.dispatch
// catch.dispatch:
// %0 = catchswitch within ... [label %catch.start] unwind label %next
// catch.start:
// ...
// ... in this BB or some other child BB dominated by this BB there will be an
// invoke that points to 'next' BB as an unwind destination
//
// next: ; We don't need to add this to 'current' BB's successor
// ...
static void findWasmUnwindDestinations(
FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
BranchProbability Prob,
SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
&UnwindDests) {
while (EHPadBB) {
const Instruction *Pad = EHPadBB->getFirstNonPHI();
if (isa<CleanupPadInst>(Pad)) {
// Stop on cleanup pads.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
UnwindDests.back().first->setIsEHScopeEntry();
break;
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
// Add the catchpad handlers to the possible destinations. We don't
// continue to the unwind destination of the catchswitch for wasm.
for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
UnwindDests.back().first->setIsEHScopeEntry();
}
break;
} else {
continue;
}
}
}
/// When an invoke or a cleanupret unwinds to the next EH pad, there are
/// many places it could ultimately go. In the IR, we have a single unwind
/// destination, but in the machine CFG, we enumerate all the possible blocks.
/// This function skips over imaginary basic blocks that hold catchswitch
/// instructions, and finds all the "real" machine
/// basic block destinations. As those destinations may not be successors of
/// EHPadBB, here we also calculate the edge probability to those destinations.
/// The passed-in Prob is the edge probability to EHPadBB.
static void findUnwindDestinations(
FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
BranchProbability Prob,
SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
&UnwindDests) {
EHPersonality Personality =
classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
bool IsSEH = isAsynchronousEHPersonality(Personality);
if (IsWasmCXX) {
findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
assert(UnwindDests.size() <= 1 &&
"There should be at most one unwind destination for wasm");
return;
}
while (EHPadBB) {
const Instruction *Pad = EHPadBB->getFirstNonPHI();
BasicBlock *NewEHPadBB = nullptr;
if (isa<LandingPadInst>(Pad)) {
// Stop on landingpads. They are not funclets.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
break;
} else if (isa<CleanupPadInst>(Pad)) {
// Stop on cleanup pads. Cleanups are always funclet entries for all known
// personalities.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
UnwindDests.back().first->setIsEHScopeEntry();
UnwindDests.back().first->setIsEHFuncletEntry();
break;
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
// Add the catchpad handlers to the possible destinations.
for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
// For MSVC++ and the CLR, catchblocks are funclets and need prologues.
if (IsMSVCCXX || IsCoreCLR)
UnwindDests.back().first->setIsEHFuncletEntry();
if (!IsSEH)
UnwindDests.back().first->setIsEHScopeEntry();
}
NewEHPadBB = CatchSwitch->getUnwindDest();
} else {
continue;
}
BranchProbabilityInfo *BPI = FuncInfo.BPI;
if (BPI && NewEHPadBB)
Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
EHPadBB = NewEHPadBB;
}
}
void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
// Update successor info.
SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
auto UnwindDest = I.getUnwindDest();
BranchProbabilityInfo *BPI = FuncInfo.BPI;
BranchProbability UnwindDestProb =
(BPI && UnwindDest)
? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
: BranchProbability::getZero();
findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
for (auto &UnwindDest : UnwindDests) {
UnwindDest.first->setIsEHPad();
addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
}
FuncInfo.MBB->normalizeSuccProbs();
// Create the terminator node.
SDValue Ret =
DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
DAG.setRoot(Ret);
}
void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
report_fatal_error("visitCatchSwitch not yet implemented!");
}
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
SDValue Chain = getControlRoot();
SmallVector<ISD::OutputArg, 8> Outs;
SmallVector<SDValue, 8> OutVals;
// Calls to @llvm.experimental.deoptimize don't generate a return value, so
// lower
//
// %val = call <ty> @llvm.experimental.deoptimize()
// ret <ty> %val
//
// differently.
if (I.getParent()->getTerminatingDeoptimizeCall()) {
LowerDeoptimizingReturn();
return;
}
if (!FuncInfo.CanLowerReturn) {
unsigned DemoteReg = FuncInfo.DemoteRegister;
const Function *F = I.getParent()->getParent();
// Emit a store of the return value through the virtual register.
// Leave Outs empty so that LowerReturn won't try to load return
// registers the usual way.
SmallVector<EVT, 1> PtrValueVTs;
ComputeValueVTs(TLI, DL,
F->getReturnType()->getPointerTo(
DAG.getDataLayout().getAllocaAddrSpace()),
PtrValueVTs);
SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
DemoteReg, PtrValueVTs[0]);
SDValue RetOp = getValue(I.getOperand(0));
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
&Offsets);
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType());
for (unsigned i = 0; i != NumValues; ++i) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr,
TypeSize::Fixed(Offsets[i]));
SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
Chains[i] = DAG.getStore(
Chain, getCurSDLoc(), Val,
// FIXME: better loc info would be nice.
Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
commonAlignment(BaseAlign, Offsets[i]));
}
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
MVT::Other, Chains);
} else if (I.getNumOperands() != 0) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues) {
SDValue RetOp = getValue(I.getOperand(0));
const Function *F = I.getParent()->getParent();
bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
I.getOperand(0)->getType(), F->getCallingConv(),
/*IsVarArg*/ false, DL);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
LLVMContext &Context = F->getContext();
bool RetInReg = F->getAttributes().hasAttribute(
AttributeList::ReturnIndex, Attribute::InReg);
for (unsigned j = 0; j != NumValues; ++j) {
EVT VT = ValueVTs[j];
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
CallingConv::ID CC = F->getCallingConv();
unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
&Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
if (RetInReg)
Flags.setInReg();
if (I.getOperand(0)->getType()->isPointerTy()) {
Flags.setPointer();
Flags.setPointerAddrSpace(
cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace());
}
if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
if (j == NumValues - 1)
Flags.setInConsecutiveRegsLast();
}
// Propagate extension type if any
if (ExtendKind == ISD::SIGN_EXTEND)
Flags.setSExt();
else if (ExtendKind == ISD::ZERO_EXTEND)
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i) {
Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
VT, /*isfixed=*/true, 0, 0));
OutVals.push_back(Parts[i]);
}
}
}
}
// Push in swifterror virtual register as the last element of Outs. This makes
// sure swifterror virtual register will be returned in the swifterror
// physical register.
const Function *F = I.getParent()->getParent();
if (TLI.supportSwiftError() &&
F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
assert(SwiftError.getFunctionArg() && "Need a swift error argument");
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
Flags.setSwiftError();
Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
EVT(TLI.getPointerTy(DL)) /*argvt*/,
true /*isfixed*/, 1 /*origidx*/,
0 /*partOffs*/));
// Create SDNode for the swifterror virtual register.
OutVals.push_back(
DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
&I, FuncInfo.MBB, SwiftError.getFunctionArg()),
EVT(TLI.getPointerTy(DL))));
}
bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
CallingConv::ID CallConv =
DAG.getMachineFunction().getFunction().getCallingConv();
Chain = DAG.getTargetLoweringInfo().LowerReturn(
Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
// Verify that the target's LowerReturn behaved as expected.
assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
"LowerReturn didn't return a valid chain!");
// Update the DAG with the new chain value resulting from return lowering.
DAG.setRoot(Chain);
}
/// CopyToExportRegsIfNeeded - If the given value has virtual registers
/// created for it, emit nodes to copy the value into the virtual
/// registers.
void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
// Skip empty types
if (V->getType()->isEmptyTy())
return;
DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
assert(!V->use_empty() && "Unused value assigned virtual registers!");
CopyValueToVirtualRegister(V, VMI->second);
}
}
/// ExportFromCurrentBlock - If this condition isn't known to be exported from
/// the current basic block, add it to ValueMap now so that we'll get a
/// CopyTo/FromReg.
void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
// No need to export constants.
if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
// Already exported?
if (FuncInfo.isExportedInst(V)) return;
unsigned Reg = FuncInfo.InitializeRegForValue(V);
CopyValueToVirtualRegister(V, Reg);
}
bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
const BasicBlock *FromBB) {
// The operands of the setcc have to be in this block. We don't know
// how to export them from some other block.
if (const Instruction *VI = dyn_cast<Instruction>(V)) {
// Can export from current BB.
if (VI->getParent() == FromBB)
return true;
// Is already exported, noop.
return FuncInfo.isExportedInst(V);
}
// If this is an argument, we can export it if the BB is the entry block or
// if it is already exported.
if (isa<Argument>(V)) {
if (FromBB->isEntryBlock())
return true;
// Otherwise, can only export this if it is already exported.
return FuncInfo.isExportedInst(V);
}
// Otherwise, constants can always be exported.
return true;
}
/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
BranchProbability
SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
const MachineBasicBlock *Dst) const {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
const BasicBlock *SrcBB = Src->getBasicBlock();
const BasicBlock *DstBB = Dst->getBasicBlock();
if (!BPI) {
// If BPI is not available, set the default probability as 1 / N, where N is
// the number of successors.
auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
return BranchProbability(1, SuccSize);
}
return BPI->getEdgeProbability(SrcBB, DstBB);
}
void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
MachineBasicBlock *Dst,
BranchProbability Prob) {
if (!FuncInfo.BPI)
Src->addSuccessorWithoutProb(Dst);
else {
if (Prob.isUnknown())
Prob = getEdgeProbability(Src, Dst);
Src->addSuccessor(Dst, Prob);
}
}
static bool InBlock(const Value *V, const BasicBlock *BB) {
if (const Instruction *I = dyn_cast<Instruction>(V))
return I->getParent() == BB;
return true;
}
/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
/// This function emits a branch and is used at the leaves of an OR or an
/// AND operator tree.
void
SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
BranchProbability TProb,
BranchProbability FProb,
bool InvertCond) {
const BasicBlock *BB = CurBB->getBasicBlock();
// If the leaf of the tree is a comparison, merge the condition into
// the caseblock.
if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
// The operands of the cmp have to be in this block. We don't know
// how to export them from some other block. If this is the first block
// of the sequence, no exporting is needed.
if (CurBB == SwitchBB ||
(isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
ISD::CondCode Condition;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
ICmpInst::Predicate Pred =
InvertCond ? IC->getInversePredicate() : IC->getPredicate();
Condition = getICmpCondCode(Pred);
} else {
const FCmpInst *FC = cast<FCmpInst>(Cond);
FCmpInst::Predicate Pred =
InvertCond ? FC->getInversePredicate() : FC->getPredicate();
Condition = getFCmpCondCode(Pred);
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
}
CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
SL->SwitchCases.push_back(CB);
return;
}
}
// Create a CaseBlock record representing this branch.
ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
SL->SwitchCases.push_back(CB);
}
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
Instruction::BinaryOps Opc,
BranchProbability TProb,
BranchProbability FProb,
bool InvertCond) {
// Skip over not part of the tree and remember to invert op and operands at
// next level.
Value *NotCond;
if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
InBlock(NotCond, CurBB->getBasicBlock())) {
FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
!InvertCond);
return;
}
const Instruction *BOp = dyn_cast<Instruction>(Cond);
const Value *BOpOp0, *BOpOp1;
// Compute the effective opcode for Cond, taking into account whether it needs
// to be inverted, e.g.
// and (not (or A, B)), C
// gets lowered as
// and (and (not A, not B), C)
Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
if (BOp) {
BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
? Instruction::And
: (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
? Instruction::Or
: (Instruction::BinaryOps)0);
if (InvertCond) {
if (BOpc == Instruction::And)
BOpc = Instruction::Or;
else if (BOpc == Instruction::Or)
BOpc = Instruction::And;
}
}
// If this node is not part of the or/and tree, emit it as a branch.
// Note that all nodes in the tree should have same opcode.
bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
!InBlock(BOpOp0, CurBB->getBasicBlock()) ||
!InBlock(BOpOp1, CurBB->getBasicBlock())) {
EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
TProb, FProb, InvertCond);
return;
}
// Create TmpBB after CurBB.
MachineFunction::iterator BBI(CurBB);
MachineFunction &MF = DAG.getMachineFunction();
MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
CurBB->getParent()->insert(++BBI, TmpBB);
if (Opc == Instruction::Or) {
// Codegen X | Y as:
// BB1:
// jmp_if_X TBB
// jmp TmpBB
// TmpBB:
// jmp_if_Y TBB
// jmp FBB
//
// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
// The requirement is that
// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
// = TrueProb for original BB.
// Assuming the original probabilities are A and B, one choice is to set
// BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
// A/(1+B) and 2B/(1+B). This choice assumes that
// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
// Another choice is to assume TrueProb for BB1 equals to TrueProb for
// TmpBB, but the math is more complicated.
auto NewTrueProb = TProb / 2;
auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
FindMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
NewFalseProb, InvertCond);
// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
Probs[1], InvertCond);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
// BB1:
// jmp_if_X TmpBB
// jmp FBB
// TmpBB:
// jmp_if_Y TBB
// jmp FBB
//
// This requires creation of TmpBB after CurBB.
// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
// The requirement is that
// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
// = FalseProb for original BB.
// Assuming the original probabilities are A and B, one choice is to set
// BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
// 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
// TrueProb for BB1 * FalseProb for TmpBB.
auto NewTrueProb = TProb + FProb / 2;
auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
FindMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
NewFalseProb, InvertCond);
// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
Probs[1], InvertCond);
}
}
/// If the set of cases should be emitted as a series of branches, return true.
/// If we should emit this as a bunch of and/or'd together conditions, return
/// false.
bool
SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
if (Cases.size() != 2) return true;
// If this is two comparisons of the same values or'd or and'd together, they
// will get folded into a single comparison, so don't emit two blocks.
if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
Cases[0].CmpRHS == Cases[1].CmpRHS) ||
(Cases[0].CmpRHS == Cases[1].CmpLHS &&
Cases[0].CmpLHS == Cases[1].CmpRHS)) {
return false;
}
// Handle: (X != null) | (Y != null) --> (X|Y) != 0
// Handle: (X == null) & (Y == null) --> (X|Y) == 0
if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
Cases[0].CC == Cases[1].CC &&
isa<Constant>(Cases[0].CmpRHS) &&
cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
return false;
if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
return false;
}
return true;
}
void SelectionDAGBuilder::visitBr(const BranchInst &I) {
MachineBasicBlock *BrMBB = FuncInfo.MBB;
// Update machine-CFG edges.
MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
if (I.isUnconditional()) {
// Update machine-CFG edges.
BrMBB->addSuccessor(Succ0MBB);
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
MVT::Other, getControlRoot(),
DAG.getBasicBlock(Succ0MBB)));
return;
}
// If this condition is one of the special cases we handle, do special stuff
// now.
const Value *CondVal = I.getCondition();
MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
// As long as jumps are not expensive (exceptions for multi-use logic ops,
// unpredictable branches, and vector extracts because those jumps are likely
// expensive for any target), this should improve performance.
// For example, instead of something like:
// cmp A, B
// C = seteq
// cmp D, E
// F = setle
// or C, F
// jnz foo
// Emit:
// cmp A, B
// je foo
// cmp D, E
// jle foo
const Instruction *BOp = dyn_cast<Instruction>(CondVal);
if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp &&
BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable)) {
Value *Vec;
const Value *BOp0, *BOp1;
Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
if (match(BOp, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
Opcode = Instruction::And;
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
Opcode = Instruction::Or;
if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
getEdgeProbability(BrMBB, Succ1MBB),
/*InvertCond=*/false);
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
// Allow some cases to be rejected.
if (ShouldEmitAsBranches(SL->SwitchCases)) {
for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS);
ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS);
}
// Emit the branch for this block.
visitSwitchCase(SL->SwitchCases[0], BrMBB);
SL->SwitchCases.erase(SL->SwitchCases.begin());
return;
}
// Okay, we decided not to do this, remove any inserted MBB's and clear
// SwitchCases.
for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB);
SL->SwitchCases.clear();
}
}
// Create a CaseBlock record representing this branch.
CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
// Use visitSwitchCase to actually insert the fast branch sequence for this
// cond branch.
visitSwitchCase(CB, BrMBB);
}
/// visitSwitchCase - Emits the necessary code to represent a single node in
/// the binary search tree resulting from lowering a switch instruction.
void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB) {
SDValue Cond;
SDValue CondLHS = getValue(CB.CmpLHS);
SDLoc dl = CB.DL;
if (CB.CC == ISD::SETTRUE) {
// Branch or fall through to TrueBB.
addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
SwitchBB->normalizeSuccProbs();
if (CB.TrueBB != NextBlock(SwitchBB)) {
DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
DAG.getBasicBlock(CB.TrueBB)));
}
return;
}
auto &TLI = DAG.getTargetLoweringInfo();
EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());
// Build the setcc now.
if (!CB.CmpMHS) {
// Fold "(X == true)" to X and "(X == false)" to !X to
// handle common cases produced by branch lowering.
if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
CB.CC == ISD::SETEQ)
Cond = CondLHS;
else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
CB.CC == ISD::SETEQ) {
SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
} else {
SDValue CondRHS = getValue(CB.CmpRHS);
// If a pointer's DAG type is larger than its memory type then the DAG
// values are zero-extended. This breaks signed comparisons so truncate
// back to the underlying type before doing the compare.
if (CondLHS.getValueType() != MemVT) {
CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT);
CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT);
}
Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC);
}
} else {
assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
SDValue CmpOp = getValue(CB.CmpMHS);
EVT VT = CmpOp.getValueType();
if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
ISD::SETLE);
} else {
SDValue SUB = DAG.getNode(ISD::SUB, dl,
VT, CmpOp, DAG.getConstant(Low, dl, VT));
Cond = DAG.getSetCC(dl, MVT::i1, SUB,
DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
}
}
// Update successor info
addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
// TrueBB and FalseBB are always different unless the incoming IR is
// degenerate. This only happens when running llc on weird IR.
if (CB.TrueBB != CB.FalseBB)
addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
SwitchBB->normalizeSuccProbs();
// If the lhs block is the next block, invert the condition so that we can
// fall through to the lhs instead of the rhs block.
if (CB.TrueBB == NextBlock(SwitchBB)) {
std::swap(CB.TrueBB, CB.FalseBB);
SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
}
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(CB.TrueBB));
// Insert the false branch. Do this even if it's a fall through branch,
// this makes it easier to do DAG optimizations which require inverting
// the branch condition.
BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
DAG.getBasicBlock(CB.FalseBB));
DAG.setRoot(BrCond);
}
/// visitJumpTable - Emit JumpTable node in the current MBB
void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
// Emit the code for the jump table
assert(JT.Reg != -1U && "Should lower JT Header first!");
EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
JT.Reg, PTy);
SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
MVT::Other, Index.getValue(1),
Table, Index);
DAG.setRoot(BrJumpTable);
}
/// visitJumpTableHeader - This function emits necessary code to produce index
/// in the JumpTable from switch case.
void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
// Subtract the lowest switch case value from the value being switched on.
SDValue SwitchOp = getValue(JTH.SValue);
EVT VT = SwitchOp.getValueType();
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
DAG.getConstant(JTH.First, dl, VT));
// The SDNode we just created, which holds the value being switched on minus
// the smallest case value, needs to be copied to a virtual register so it
// can be used as an index into the jump table in a subsequent basic block.
// This value may be smaller or larger than the target's pointer type, and
// therefore require extension or truncating.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
unsigned JumpTableReg =
FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
if (!JTH.OmitRangeCheck) {
// Emit the range check for the jump table, and branch to the default block
// for the switch statement if the value being switched on exceeds the
// largest case in the switch.
SDValue CMP = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
Sub.getValueType()),
Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, CopyTo, CMP,
DAG.getBasicBlock(JT.Default));
// Avoid emitting unnecessary branches to the next block.
if (JT.MBB != NextBlock(SwitchBB))
BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
DAG.getBasicBlock(JT.MBB));
DAG.setRoot(BrCond);
} else {
// Avoid emitting unnecessary branches to the next block.
if (JT.MBB != NextBlock(SwitchBB))
DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
DAG.getBasicBlock(JT.MBB)));
else
DAG.setRoot(CopyTo);
}
}
/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
/// variable if there exists one.
static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
SDValue &Chain) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
MachineSDNode *Node =
DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
if (Global) {
MachinePointerInfo MPInfo(Global);
auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable;
MachineMemOperand *MemRef = MF.getMachineMemOperand(
MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy));
DAG.setNodeMemRefs(Node, {MemRef});
}
if (PtrTy != PtrMemTy)
return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy);
return SDValue(Node, 0);
}
/// Codegen a new tail for a stack protector check ParentMBB which has had its
/// tail spliced into a stack protector check success bb.
///
/// For a high level explanation of how this fits into the stack protector
/// generation see the comment on the declaration of class
/// StackProtectorDescriptor.
void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
MachineBasicBlock *ParentBB) {
// First create the loads to the guard/stack slot for the comparison.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
int FI = MFI.getStackProtectorIndex();
SDValue Guard;
SDLoc dl = getCurSDLoc();
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
const Module &M = *ParentBB->getParent()->getFunction().getParent();
Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
// Generate code to load the content of the guard slot.
SDValue GuardVal = DAG.getLoad(
PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
MachineMemOperand::MOVolatile);
if (TLI.useStackGuardXorFP())
GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
// Retrieve guard check function, nullptr if instrumentation is inlined.
if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
// The target provides a guard check function to validate the guard value.
// Generate a call to that function with the content of the guard slot as
// argument.
FunctionType *FnTy = GuardCheckFn->getFunctionType();
assert(FnTy->getNumParams() == 1 && "Invalid function signature");
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = GuardVal;
Entry.Ty = FnTy->getParamType(0);
if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
Entry.IsInReg = true;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(DAG.getEntryNode())
.setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
getValue(GuardCheckFn), std::move(Args));
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
return;
}
// If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
// Otherwise, emit a volatile load to retrieve the stack guard value.
SDValue Chain = DAG.getEntryNode();
if (TLI.useLoadStackGuardNode()) {
Guard = getLoadStackGuard(DAG, dl, Chain);
} else {
const Value *IRGuard = TLI.getSDagStackGuard(M);
SDValue GuardPtr = getValue(IRGuard);
Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
MachinePointerInfo(IRGuard, 0), Align,
MachineMemOperand::MOVolatile);
}
// Perform the comparison via a getsetcc.
SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(),
Guard.getValueType()),
Guard, GuardVal, ISD::SETNE);
// If the guard/stackslot do not equal, branch to failure MBB.
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, GuardVal.getOperand(0),
Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
// Otherwise branch to success MBB.
SDValue Br = DAG.getNode(ISD::BR, dl,
MVT::Other, BrCond,
DAG.getBasicBlock(SPD.getSuccessMBB()));
DAG.setRoot(Br);
}
/// Codegen the failure basic block for a stack protector check.
///
/// A failure stack protector machine basic block consists simply of a call to
/// __stack_chk_fail().
///
/// For a high level explanation of how this fits into the stack protector
/// generation see the comment on the declaration of class
/// StackProtectorDescriptor.
void
SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setDiscardResult(true);
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
None, CallOptions, getCurSDLoc()).second;
// On PS4, the "return address" must still be within the calling function,
// even if it's at the very end, so emit an explicit TRAP here.
// Passing 'true' for doesNotReturn above won't generate the trap for us.
if (TM.getTargetTriple().isPS4CPU())
Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
// WebAssembly needs an unreachable instruction after a non-returning call,
// because the function return type can be different from __stack_chk_fail's
// return type (void).
if (TM.getTargetTriple().isWasm())
Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
DAG.setRoot(Chain);
}
/// visitBitTestHeader - This function emits necessary code to produce value
/// suitable for "bit tests"
void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
// Subtract the minimum value.
SDValue SwitchOp = getValue(B.SValue);
EVT VT = SwitchOp.getValueType();
SDValue RangeSub =
DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT));
// Determine the type of the test operands.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
bool UsePtrType = false;
if (!TLI.isTypeLegal(VT)) {
UsePtrType = true;
} else {
for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
// Switch table case range are encoded into series of masks.
// Just use pointer type, it's guaranteed to fit.
UsePtrType = true;
break;
}
}
SDValue Sub = RangeSub;
if (UsePtrType) {
VT = TLI.getPointerTy(DAG.getDataLayout());
Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
}
B.RegVT = VT.getSimpleVT();
B.Reg = FuncInfo.CreateReg(B.RegVT);
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
if (!B.OmitRangeCheck)
addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
SDValue Root = CopyTo;
if (!B.OmitRangeCheck) {
// Conditional branch to the default block.
SDValue RangeCmp = DAG.getSetCC(dl,
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
RangeSub.getValueType()),
RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),
ISD::SETUGT);
Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
DAG.getBasicBlock(B.Default));
}
// Avoid emitting unnecessary branches to the next block.
if (MBB != NextBlock(SwitchBB))
Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));
DAG.setRoot(Root);
}
/// visitBitTestCase - this function produces one "bit test"
void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
BranchProbability BranchProbToNext,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
MVT VT = BB.RegVT;
SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
SDValue Cmp;
unsigned PopCount = countPopulation(B.Mask);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (PopCount == 1) {
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
Cmp = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
ISD::SETEQ);
} else if (PopCount == BB.Range) {
// There is only one zero bit in the range, test for it directly.
Cmp = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
ISD::SETNE);
} else {
// Make desired shift
SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
DAG.getConstant(1, dl, VT), ShiftOp);
// Emit bit tests and jumps
SDValue AndOp = DAG.getNode(ISD::AND, dl,
VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
Cmp = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
}
// The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
// The branch probability from SwitchBB to NextMBB is BranchProbToNext.
addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
// It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
// one as they are relative probabilities (and thus work more like weights),
// and hence we need to normalize them to let the sum of them become one.
SwitchBB->normalizeSuccProbs();
SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, getControlRoot(),
Cmp, DAG.getBasicBlock(B.TargetBB));
// Avoid emitting unnecessary branches to the next block.
if (NextMBB != NextBlock(SwitchBB))
BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
DAG.getBasicBlock(NextMBB));
DAG.setRoot(BrAnd);
}
void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
// Retrieve successors. Look through artificial IR level blocks like
// catchswitch for successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
const BasicBlock *EHPadBB = I.getSuccessor(1);
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_gc_transition,
LLVMContext::OB_gc_live, LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget,
LLVMContext::OB_clang_arc_attachedcall}) &&
"Cannot lower invokes with arbitrary operand bundles yet!");
const Value *Callee(I.getCalledOperand());
const Function *Fn = dyn_cast<Function>(Callee);
if (isa<InlineAsm>(Callee))
visitInlineAsm(I, EHPadBB);
else if (Fn && Fn->isIntrinsic()) {
switch (Fn->getIntrinsicID()) {
default:
llvm_unreachable("Cannot invoke this intrinsic");
case Intrinsic::donothing:
// Ignore invokes to @llvm.donothing: jump directly to the next BB.
case Intrinsic::seh_try_begin:
case Intrinsic::seh_scope_begin:
case Intrinsic::seh_try_end:
case Intrinsic::seh_scope_end:
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
visitPatchpoint(I, EHPadBB);
break;
case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB);
break;
case Intrinsic::wasm_rethrow: {
// This is usually done in visitTargetIntrinsic, but this intrinsic is
// special because it can be invoked, so we manually lower it to a DAG
// node here.
SmallVector<SDValue, 8> Ops;
Ops.push_back(getRoot()); // inchain
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Ops.push_back(
DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout())));
SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
break;
}
}
} else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
// Currently we do not lower any intrinsic calls with deopt operand bundles.
// Eventually we will support lowering the @llvm.experimental.deoptimize
// intrinsic, and right now there are no plans to support other intrinsics
// with deopt state.
LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
} else {
LowerCallTo(I, getValue(Callee), false, false, EHPadBB);
}
// If the value of the invoke is used outside of its defining block, make it
// available as a virtual register.
// We already took care of the exported value for the statepoint instruction
// during call to the LowerStatepoint.
if (!isa<GCStatepointInst>(I)) {
CopyToExportRegsIfNeeded(&I);
}
SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
BranchProbabilityInfo *BPI = FuncInfo.BPI;
BranchProbability EHPadBBProb =
BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
: BranchProbability::getZero();
findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
// Update successor info.
addSuccessorWithProb(InvokeMBB, Return);
for (auto &UnwindDest : UnwindDests) {
UnwindDest.first->setIsEHPad();
addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
}
InvokeMBB->normalizeSuccProbs();
// Drop into normal successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
DAG.getBasicBlock(Return)));
}
void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
"Cannot lower callbrs with arbitrary operand bundles yet!");
assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
visitInlineAsm(I);
CopyToExportRegsIfNeeded(&I);
// Retrieve successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)];
addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
Target->setIsInlineAsmBrIndirectTarget();
}
CallBrMBB->normalizeSuccProbs();
// Drop into default successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
MVT::Other, getControlRoot(),
DAG.getBasicBlock(Return)));
}
void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
}
void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
assert(FuncInfo.MBB->isEHPad() &&
"Call to landingpad not in landing pad!");
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother to create these DAG nodes.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
return;
// If landingpad's return type is token type, we don't create DAG nodes
// for its exception pointer and selector value. The extraction of exception
// pointer or selector value from token type landingpads is not currently
// supported.
if (LP.getType()->isTokenTy())
return;
SmallVector<EVT, 2> ValueVTs;
SDLoc dl = getCurSDLoc();
ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
// Get the two live-in registers as SDValues. The physregs have already been
// copied into virtual registers.
SDValue Ops[2];
if (FuncInfo.ExceptionPointerVirtReg) {
Ops[0] = DAG.getZExtOrTrunc(
DAG.getCopyFromReg(DAG.getEntryNode(), dl,
FuncInfo.ExceptionPointerVirtReg,
TLI.getPointerTy(DAG.getDataLayout())),
dl, ValueVTs[0]);
} else {
Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
}
Ops[1] = DAG.getZExtOrTrunc(
DAG.getCopyFromReg(DAG.getEntryNode(), dl,
FuncInfo.ExceptionSelectorVirtReg,
TLI.getPointerTy(DAG.getDataLayout())),
dl, ValueVTs[1]);
// Merge into one.
SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
DAG.getVTList(ValueVTs), Ops);
setValue(&LP, Res);
}
void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
MachineBasicBlock *Last) {
// Update JTCases.
for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i)
if (SL->JTCases[i].first.HeaderBB == First)
SL->JTCases[i].first.HeaderBB = Last;
// Update BitTestCases.
for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i)
if (SL->BitTestCases[i].Parent == First)
SL->BitTestCases[i].Parent = Last;
}
void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
// Update machine-CFG edges with unique successors.
SmallSet<BasicBlock*, 32> Done;
for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
BasicBlock *BB = I.getSuccessor(i);
bool Inserted = Done.insert(BB).second;
if (!Inserted)
continue;
MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
addSuccessorWithProb(IndirectBrMBB, Succ);
}
IndirectBrMBB->normalizeSuccProbs();
DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
MVT::Other, getControlRoot(),
getValue(I.getAddress())));
}
void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
if (!DAG.getTarget().Options.TrapUnreachable)
return;
// We may be able to ignore unreachable behind a noreturn call.
if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
const BasicBlock &BB = *I.getParent();
if (&I != &BB.front()) {
BasicBlock::const_iterator PredI =
std::prev(BasicBlock::const_iterator(&I));
if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
if (Call->doesNotReturn())
return;
}
}
}
DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
SDValue Op = getValue(I.getOperand(0));
SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
Op, Flags);
setValue(&I, UnNodeValue);
}
void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
}
if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
Flags.setExact(ExactOp->isExact());
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
Op1, Op2, Flags);
setValue(&I, BinNodeValue);
}
void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
Op1.getValueType(), DAG.getDataLayout());
// Coerce the shift amount to the right type if we can.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
unsigned ShiftSize = ShiftTy.getSizeInBits();
unsigned Op2Size = Op2.getValueSizeInBits();
SDLoc DL = getCurSDLoc();
// If the operand is smaller than the shift count type, promote it.
if (ShiftSize > Op2Size)
Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
// If the operand is larger than the shift count type but the shift
// count type has enough bits to represent any shift value, truncate
// it now. This is a common case and it exposes the truncate to
// optimization early.
else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
// Otherwise we'll need to temporarily settle for some other convenient
// type. Type legalization will make adjustments once the shiftee is split.
else
Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
}
bool nuw = false;
bool nsw = false;
bool exact = false;
if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
if (const OverflowingBinaryOperator *OFBinOp =
dyn_cast<const OverflowingBinaryOperator>(&I)) {
nuw = OFBinOp->hasNoUnsignedWrap();
nsw = OFBinOp->hasNoSignedWrap();
}
if (const PossiblyExactOperator *ExactOp =
dyn_cast<const PossiblyExactOperator>(&I))
exact = ExactOp->isExact();
}
SDNodeFlags Flags;
Flags.setExact(exact);
Flags.setNoSignedWrap(nsw);
Flags.setNoUnsignedWrap(nuw);
SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
Flags);
setValue(&I, Res);
}
void SelectionDAGBuilder::visitSDiv(const User &I) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
SDNodeFlags Flags;
Flags.setExact(isa<PossiblyExactOperator>(&I) &&
cast<PossiblyExactOperator>(&I)->isExact());
setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
Op2, Flags));
}
void SelectionDAGBuilder::visitICmp(const User &I) {
ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
predicate = IC->getPredicate();
else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
predicate = ICmpInst::Predicate(IC->getPredicate());
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Opcode = getICmpCondCode(predicate);
auto &TLI = DAG.getTargetLoweringInfo();
EVT MemVT =
TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
// If a pointer's DAG type is larger than its memory type then the DAG values
// are zero-extended. This breaks signed comparisons so truncate back to the
// underlying type before doing the compare.
if (Op1.getValueType() != MemVT) {
Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT);
Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT);
}
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
}
void SelectionDAGBuilder::visitFCmp(const User &I) {
FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
predicate = FC->getPredicate();
else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
predicate = FCmpInst::Predicate(FC->getPredicate());
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
auto *FPMO = cast<FPMathOperator>(&I);
if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
SDNodeFlags Flags;
Flags.copyFMF(*FPMO);
SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
}
// Check if the condition of the select has one use or two users that are both
// selects with the same condition.
static bool hasOnlySelectUsers(const Value *Cond) {
return llvm::all_of(Cond->users(), [](const Value *V) {
return isa<SelectInst>(V);
});
}
void SelectionDAGBuilder::visitSelect(const User &I) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
SmallVector<SDValue, 4> Values(NumValues);
SDValue Cond = getValue(I.getOperand(0));
SDValue LHSVal = getValue(I.getOperand(1));
SDValue RHSVal = getValue(I.getOperand(2));
SmallVector<SDValue, 1> BaseOps(1, Cond);
ISD::NodeType OpCode =
Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
bool IsUnaryAbs = false;
bool Negate = false;
SDNodeFlags Flags;
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
// Min/max matching is only viable if all output VTs are the same.
if (is_splat(ValueVTs)) {
EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo();
// We care about the legality of the operation after it has been type
// legalized.
while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
VT = TLI.getTypeToTransformTo(Ctx, VT);
// If the vselect is legal, assume we want to leave this as a vector setcc +
// vselect. Otherwise, if this is going to be scalarized, we want to see if
// min/max is legal on the scalar type.
bool UseScalarMinMax = VT.isVector() &&
!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
Value *LHS, *RHS;
auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
ISD::NodeType Opc = ISD::DELETED_NODE;
switch (SPR.Flavor) {
case SPF_UMAX: Opc = ISD::UMAX; break;
case SPF_UMIN: Opc = ISD::UMIN; break;
case SPF_SMAX: Opc = ISD::SMAX; break;
case SPF_SMIN: Opc = ISD::SMIN; break;
case SPF_FMINNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
case SPNB_RETURNS_ANY: {
if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
Opc = ISD::FMINNUM;
else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
Opc = ISD::FMINIMUM;
else if (UseScalarMinMax)
Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
ISD::FMINNUM : ISD::FMINIMUM;
break;
}
}
break;
case SPF_FMAXNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
Opc = ISD::FMAXNUM;
else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
Opc = ISD::FMAXIMUM;
else if (UseScalarMinMax)
Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
ISD::FMAXNUM : ISD::FMAXIMUM;
break;
}
break;
case SPF_NABS:
Negate = true;
LLVM_FALLTHROUGH;
case SPF_ABS:
IsUnaryAbs = true;
Opc = ISD::ABS;
break;
default: break;
}
if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
(TLI.isOperationLegalOrCustom(Opc, VT) ||
(UseScalarMinMax &&
TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
// If the underlying comparison instruction is used by any other
// instruction, the consumed instructions won't be destroyed, so it is
// not profitable to convert to a min/max.
hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
OpCode = Opc;
LHSVal = getValue(LHS);
RHSVal = getValue(RHS);
BaseOps.clear();
}
if (IsUnaryAbs) {
OpCode = Opc;
LHSVal = getValue(LHS);
BaseOps.clear();
}
}
if (IsUnaryAbs) {
for (unsigned i = 0; i != NumValues; ++i) {
SDLoc dl = getCurSDLoc();
EVT VT = LHSVal.getNode()->getValueType(LHSVal.getResNo() + i);
Values[i] =
DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i));
if (Negate)
Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT),
Values[i]);
}
} else {
for (unsigned i = 0; i != NumValues; ++i) {
SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
Values[i] = DAG.getNode(
OpCode, getCurSDLoc(),
LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags);
}
}
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ValueVTs), Values));
}
void SelectionDAGBuilder::visitTrunc(const User &I) {
// TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitZExt(const User &I) {
// ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// ZExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitSExt(const User &I) {
// SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// SExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPTrunc(const User &I) {
// FPTrunc is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
SDLoc dl = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
DAG.getTargetConstant(
0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
}
void SelectionDAGBuilder::visitFPExt(const User &I) {
// FPExt is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPToUI(const User &I) {
// FPToUI is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPToSI(const User &I) {
// FPToSI is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitUIToFP(const User &I) {
// UIToFP is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitSIToFP(const User &I) {
// SIToFP is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitPtrToInt(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
auto &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
EVT PtrMemVT =
TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT);
setValue(&I, N);
}
void SelectionDAGBuilder::visitIntToPtr(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
auto &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT);
setValue(&I, N);
}
void SelectionDAGBuilder::visitBitCast(const User &I) {
SDValue N = getValue(I.getOperand(0));
SDLoc dl = getCurSDLoc();
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
// BitCast assures us that source and destination are the same size so this is
// either a BITCAST or a no-op.
if (DestVT != N.getValueType())
setValue(&I, DAG.getNode(ISD::BITCAST, dl,
DestVT, N)); // convert types.
// Check if the original LLVM IR Operand was a ConstantInt, because getValue()
// might fold any kind of constant expression to an integer constant and that
// is not what we are looking for. Only recognize a bitcast of a genuine
// constant integer as an opaque constant.
else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
/*isOpaque*/true));
else
setValue(&I, N); // noop cast.
}
void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Value *SV = I.getOperand(0);
SDValue N = getValue(SV);
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
unsigned SrcAS = SV->getType()->getPointerAddressSpace();
unsigned DestAS = I.getType()->getPointerAddressSpace();
if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS))
N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
setValue(&I, N);
}
void SelectionDAGBuilder::visitInsertElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
SDValue InVal = getValue(I.getOperand(1));
SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
InVec, InVal, InIdx));
}
void SelectionDAGBuilder::visitExtractElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
InVec, InIdx));
}
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
ArrayRef<int> Mask;
if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
Mask = SVI->getShuffleMask();
else
Mask = cast<ConstantExpr>(I).getShuffleMask();
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT SrcVT = Src1.getValueType();
if (all_of(Mask, [](int Elem) { return Elem == 0; }) &&
VT.isScalableVector()) {
// Canonical splat form of first element of first input vector.
SDValue FirstElt =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT.getScalarType(), Src1,
DAG.getVectorIdxConstant(0, DL));
setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
return;
}
// For now, we only handle splats for scalable vectors.
// The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
// for targets that support a SPLAT_VECTOR for non-scalable vector types.
assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
unsigned SrcNumElts = SrcVT.getVectorNumElements();
unsigned MaskNumElts = Mask.size();
if (SrcNumElts == MaskNumElts) {
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
return;
}
// Normalize the shuffle vector since mask and vector length don't match.
if (SrcNumElts < MaskNumElts) {
// Mask is longer than the source vectors. We can use concatenate vector to
// make the mask and vectors lengths match.
if (MaskNumElts % SrcNumElts == 0) {
// Mask length is a multiple of the source vector length.
// Check if the shuffle is some kind of concatenation of the input
// vectors.
unsigned NumConcat = MaskNumElts / SrcNumElts;
bool IsConcat = true;
SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
if (Idx < 0)
continue;
// Ensure the indices in each SrcVT sized piece are sequential and that
// the same source is used for the whole piece.
if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
(ConcatSrcs[i / SrcNumElts] >= 0 &&
ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
IsConcat = false;
break;
}
// Remember which source this index came from.
ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
}
// The shuffle is concatenating multiple vectors together. Just emit
// a CONCAT_VECTORS operation.
if (IsConcat) {
SmallVector<SDValue, 8> ConcatOps;
for (auto Src : ConcatSrcs) {
if (Src < 0)
ConcatOps.push_back(DAG.getUNDEF(SrcVT));
else if (Src == 0)
ConcatOps.push_back(Src1);
else
ConcatOps.push_back(Src2);
}
setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
return;
}
}
unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
PaddedMaskNumElts);
// Pad both vectors with undefs to make them the same length as the mask.
SDValue UndefVal = DAG.getUNDEF(SrcVT);
SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
MOps1[0] = Src1;
MOps2[0] = Src2;
Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
// Readjust mask for new input vector length.
SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
if (Idx >= (int)SrcNumElts)
Idx -= SrcNumElts - PaddedMaskNumElts;
MappedOps[i] = Idx;
}
SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
// If the concatenated vector was padded, extract a subvector with the
// correct number of elements.
if (MaskNumElts != PaddedMaskNumElts)
Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
DAG.getVectorIdxConstant(0, DL));
setValue(&I, Result);
return;
}
if (SrcNumElts > MaskNumElts) {
// Analyze the access pattern of the vector to see if we can extract
// two subvectors and do the shuffle.
int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
bool CanExtract = true;
for (int Idx : Mask) {
unsigned Input = 0;
if (Idx < 0)
continue;
if (Idx >= (int)SrcNumElts) {
Input = 1;
Idx -= SrcNumElts;
}
// If all the indices come from the same MaskNumElts sized portion of
// the sources we can use extract. Also make sure the extract wouldn't
// extract past the end of the source.
int NewStartIdx = alignDown(Idx, MaskNumElts);
if (NewStartIdx + MaskNumElts > SrcNumElts ||
(StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
CanExtract = false;
// Make sure we always update StartIdx as we use it to track if all
// elements are undef.
StartIdx[Input] = NewStartIdx;
}
if (StartIdx[0] < 0 && StartIdx[1] < 0) {
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
return;
}
if (CanExtract) {
// Extract appropriate subvector and generate a vector shuffle
for (unsigned Input = 0; Input < 2; ++Input) {
SDValue &Src = Input == 0 ? Src1 : Src2;
if (StartIdx[Input] < 0)
Src = DAG.getUNDEF(VT);
else {
Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
DAG.getVectorIdxConstant(StartIdx[Input], DL));
}
}
// Calculate new mask.
SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
for (int &Idx : MappedOps) {
if (Idx >= (int)SrcNumElts)
Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
else if (Idx >= 0)
Idx -= StartIdx[0];
}
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
return;
}
}
// We can't use either concat vectors or extract subvectors so fall back to
// replacing the shuffle with extract and build vector.
// to insert and build vector.
EVT EltVT = VT.getVectorElementType();
SmallVector<SDValue,8> Ops;
for (int Idx : Mask) {
SDValue Res;
if (Idx < 0) {
Res = DAG.getUNDEF(EltVT);
} else {
SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src,
DAG.getVectorIdxConstant(Idx, DL));
}
Ops.push_back(Res);
}
setValue(&I, DAG.getBuildVector(VT, DL, Ops));
}
void SelectionDAGBuilder::visitInsertValue(const User &I) {
ArrayRef<unsigned> Indices;
if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
Indices = IV->getIndices();
else
Indices = cast<ConstantExpr>(&I)->getIndices();
const Value *Op0 = I.getOperand(0);
const Value *Op1 = I.getOperand(1);
Type *AggTy = I.getType();
Type *ValTy = Op1->getType();
bool IntoUndef = isa<UndefValue>(Op0);
bool FromUndef = isa<UndefValue>(Op1);
unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> AggValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
SmallVector<EVT, 4> ValValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
unsigned NumAggValues = AggValueVTs.size();
unsigned NumValValues = ValValueVTs.size();
SmallVector<SDValue, 4> Values(NumAggValues);
// Ignore an insertvalue that produces an empty object
if (!NumAggValues) {
setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
return;
}
SDValue Agg = getValue(Op0);
unsigned i = 0;
// Copy the beginning value(s) from the original aggregate.
for (; i != LinearIndex; ++i)
Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
SDValue(Agg.getNode(), Agg.getResNo() + i);
// Copy values from the inserted value(s).
if (NumValValues) {
SDValue Val = getValue(Op1);
for (; i != LinearIndex + NumValValues; ++i)
Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
}
// Copy remaining value(s) from the original aggregate.
for (; i != NumAggValues; ++i)
Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
SDValue(Agg.getNode(), Agg.getResNo() + i);
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(AggValueVTs), Values));
}
void SelectionDAGBuilder::visitExtractValue(const User &I) {
ArrayRef<unsigned> Indices;
if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
Indices = EV->getIndices();
else
Indices = cast<ConstantExpr>(&I)->getIndices();
const Value *Op0 = I.getOperand(0);
Type *AggTy = Op0->getType();
Type *ValTy = I.getType();
bool OutOfUndef = isa<UndefValue>(Op0);
unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
unsigned NumValValues = ValValueVTs.size();
// Ignore a extractvalue that produces an empty object
if (!NumValValues) {
setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
return;
}
SmallVector<SDValue, 4> Values(NumValValues);
SDValue Agg = getValue(Op0);
// Copy out the selected value(s).
for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
Values[i - LinearIndex] =
OutOfUndef ?
DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
SDValue(Agg.getNode(), Agg.getResNo() + i);
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ValValueVTs), Values));
}
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
Value *Op0 = I.getOperand(0);
// Note that the pointer operand may be a vector of pointers. Take the scalar
// element which holds a pointer.
unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
SDValue N = getValue(Op0);
SDLoc dl = getCurSDLoc();
auto &TLI = DAG.getTargetLoweringInfo();
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
bool IsVectorGEP = I.getType()->isVectorTy();
ElementCount VectorElementCount =
IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount()
: ElementCount::getFixed(0);
if (IsVectorGEP && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
if (VectorElementCount.isScalable())
N = DAG.getSplatVector(VT, dl, N);
else
N = DAG.getSplatBuildVector(VT, dl, N);
}
for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
GTI != E; ++GTI) {
const Value *Idx = GTI.getOperand();
if (StructType *StTy = GTI.getStructTypeOrNull()) {
unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
if (Field) {
// N = N + Offset
uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
// In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
Flags.setNoUnsignedWrap(true);
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
DAG.getConstant(Offset, dl, N.getValueType()), Flags);
}
} else {
// IdxSize is the width of the arithmetic according to IR semantics.
// In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
// (and fix up the result later).
unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
MVT IdxTy = MVT::getIntegerVT(IdxSize);
TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
// We intentionally mask away the high bits here; ElementSize may not
// fit in IdxTy.
APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
bool ElementScalable = ElementSize.isScalable();
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
const auto *C = dyn_cast<Constant>(Idx);
if (C && isa<VectorType>(C->getType()))
C = C->getSplatValue();
const auto *CI = dyn_cast_or_null<ConstantInt>(C);
if (CI && CI->isZero())
continue;
if (CI && !ElementScalable) {
APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
LLVMContext &Context = *DAG.getContext();
SDValue OffsVal;
if (IsVectorGEP)
OffsVal = DAG.getConstant(
Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
else
OffsVal = DAG.getConstant(Offs, dl, IdxTy);
// In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
Flags.setNoUnsignedWrap(true);
OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
continue;
}
// N = N + Idx * ElementMul;
SDValue IdxN = getValue(Idx);
if (!IdxN.getValueType().isVector() && IsVectorGEP) {
EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
VectorElementCount);
if (VectorElementCount.isScalable())
IdxN = DAG.getSplatVector(VT, dl, IdxN);
else
IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
}
// If the index is smaller or larger than intptr_t, truncate or extend
// it.
IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
if (ElementScalable) {
EVT VScaleTy = N.getValueType().getScalarType();
SDValue VScale = DAG.getNode(
ISD::VSCALE, dl, VScaleTy,
DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
if (IsVectorGEP)
VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
} else {
// If this is a multiply by a power of two, turn it into a shl
// immediately. This is a very common case.
if (ElementMul != 1) {
if (ElementMul.isPowerOf2()) {
unsigned Amt = ElementMul.logBase2();
IdxN = DAG.getNode(ISD::SHL, dl,
N.getValueType(), IdxN,
DAG.getConstant(Amt, dl, IdxN.getValueType()));
} else {
SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
IdxN.getValueType());
IdxN = DAG.getNode(ISD::MUL, dl,
N.getValueType(), IdxN, Scale);
}
}
}
N = DAG.getNode(ISD::ADD, dl,
N.getValueType(), N, IdxN);
}
}
MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
if (IsVectorGEP) {
PtrTy = MVT::getVectorVT(PtrTy, VectorElementCount);
PtrMemTy = MVT::getVectorVT(PtrMemTy, VectorElementCount);
}
if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);
setValue(&I, N);
}
void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// If this is a fixed sized alloca in the entry block of the function,
// allocate it statically on the stack.
if (FuncInfo.StaticAllocaMap.count(&I))
return; // getValue will auto-populate this.
SDLoc dl = getCurSDLoc();
Type *Ty = I.getAllocatedType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
uint64_t TySize = DL.getTypeAllocSize(Ty);
MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign());
SDValue AllocSize = getValue(I.getArraySize());
EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
AllocSize,
DAG.getConstant(TySize, dl, IntPtr));
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
if (*Alignment <= StackAlign)
Alignment = None;
const uint64_t StackAlignMask = StackAlign.value() - 1U;
// Round the size of the allocation up to the stack alignment size
// by add SA-1 to the size. This doesn't overflow because we're computing
// an address inside an alloca.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
DAG.getConstant(StackAlignMask, dl, IntPtr), Flags);
// Mask out the low bits for alignment purposes.
AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
DAG.getConstant(~StackAlignMask, dl, IntPtr));
SDValue Ops[] = {
getRoot(), AllocSize,
DAG.getConstant(Alignment ? Alignment->value() : 0, dl, IntPtr)};
SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
setValue(&I, DSA);
DAG.setRoot(DSA.getValue(1));
assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
}
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (I.isAtomic())
return visitAtomicLoad(I);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Value *SV = I.getOperand(0);
if (TLI.supportSwiftError()) {
// Swifterror values can come from either a function parameter with
// swifterror attribute or an alloca with swifterror attribute.
if (const Argument *Arg = dyn_cast<Argument>(SV)) {
if (Arg->hasSwiftErrorAttr())
return visitLoadFromSwiftError(I);
}
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
if (Alloca->isSwiftError())
return visitLoadFromSwiftError(I);
}
}
SDValue Ptr = getValue(SV);
Type *Ty = I.getType();
Align Alignment = I.getAlign();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
bool isVolatile = I.isVolatile();
SDValue Root;
bool ConstantMemory = false;
if (isVolatile)
// Serialize volatile loads with other side effects.
Root = getRoot();
else if (NumValues > MaxParallelChains)
Root = getMemoryRoot();
else if (AA &&
AA->pointsToConstantMemory(MemoryLocation(
SV,
LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
} else {
// Do not serialize non-volatile loads against each other.
Root = DAG.getRoot();
}
SDLoc dl = getCurSDLoc();
if (isVolatile)
Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
// An aggregate load cannot wrap around the address space, so offsets to its
// parts don't wrap either.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
EVT PtrVT = Ptr.getValueType();
MachineMemOperand::Flags MMOFlags
= TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
// Serializing loads here may result in excessive register pressure, and
// TokenFactor places arbitrary choke points on the scheduler. SD scheduling
// could recover a bit by hoisting nodes upward in the chain by recognizing
// they are side-effect free or do not alias. The optimizer should really
// avoid this case by converting large object/array copies to llvm.memcpy
// (MaxParallelChains should always remain as failsafe).
if (ChainI == MaxParallelChains) {
assert(PendingLoads.empty() && "PendingLoads must be serialized first");
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
makeArrayRef(Chains.data(), ChainI));
Root = Chain;
ChainI = 0;
}
SDValue A = DAG.getNode(ISD::ADD, dl,
PtrVT, Ptr,
DAG.getConstant(Offsets[i], dl, PtrVT),
Flags);
SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
MachinePointerInfo(SV, Offsets[i]), Alignment,
MMOFlags, AAInfo, Ranges);
Chains[ChainI] = L.getValue(1);
if (MemVTs[i] != ValueVTs[i])
L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);
Values[i] = L;
}
if (!ConstantMemory) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
makeArrayRef(Chains.data(), ChainI));
if (isVolatile)
DAG.setRoot(Chain);
else
PendingLoads.push_back(Chain);
}
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
DAG.getVTList(ValueVTs), Values));
}
void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
"call visitStoreToSwiftError when backend supports swifterror");
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
const Value *SrcV = I.getOperand(0);
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
SrcV->getType(), ValueVTs, &Offsets);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
SDValue Src = getValue(SrcV);
// Create a virtual register, then update the virtual register.
Register VReg =
SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
// Chain can be getRoot or getControlRoot.
SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
SDValue(Src.getNode(), Src.getResNo()));
DAG.setRoot(CopyNode);
}
void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
"call visitLoadFromSwiftError when backend supports swifterror");
assert(!I.isVolatile() &&
!I.hasMetadata(LLVMContext::MD_nontemporal) &&
!I.hasMetadata(LLVMContext::MD_invariant_load) &&
"Support volatile, non temporal, invariant for load_from_swift_error");
const Value *SV = I.getOperand(0);
Type *Ty = I.getType();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
assert(
(!AA ||
!AA->pointsToConstantMemory(MemoryLocation(
SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
AAInfo))) &&
"load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
ValueVTs, &Offsets);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
// Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
SDValue L = DAG.getCopyFromReg(
getRoot(), getCurSDLoc(),
SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);
setValue(&I, L);
}
void SelectionDAGBuilder::visitStore(const StoreInst &I) {
if (I.isAtomic())
return visitAtomicStore(I);
const Value *SrcV = I.getOperand(0);
const Value *PtrV = I.getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.supportSwiftError()) {
// Swifterror values can come from either a function parameter with
// swifterror attribute or an alloca with swifterror attribute.
if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
if (Arg->hasSwiftErrorAttr())
return visitStoreToSwiftError(I);
}
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
if (Alloca->isSwiftError())
return visitStoreToSwiftError(I);
}
}
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
// Get the lowered operands. Note that we do this after
// checking if NumResults is zero, because with zero results
// the operands won't have values in the map.
SDValue Src = getValue(SrcV);
SDValue Ptr = getValue(PtrV);
SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
SDLoc dl = getCurSDLoc();
Align Alignment = I.getAlign();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
// An aggregate load cannot wrap around the address space, so offsets to its
// parts don't wrap either.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
// See visitLoad comments.
if (ChainI == MaxParallelChains) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
makeArrayRef(Chains.data(), ChainI));
Root = Chain;
ChainI = 0;
}
SDValue Add =
DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags);
SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
SDValue St =
DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]),
Alignment, MMOFlags, AAInfo);
Chains[ChainI] = St;
}
SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
makeArrayRef(Chains.data(), ChainI));
DAG.setRoot(StoreNode);
}
void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
bool IsCompressing) {
SDLoc sdl = getCurSDLoc();
auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
// llvm.masked.store.*(Src0, Ptr, alignment, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
Mask = I.getArgOperand(3);
};
auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
// llvm.masked.compressstore.*(Src0, Ptr, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
Mask = I.getArgOperand(2);
Alignment = None;
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
MaybeAlign Alignment;
if (IsCompressing)
getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(Src0Operand);
SDValue Mask = getValue(MaskOperand);
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT VT = Src0.getValueType();
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
DAG.setRoot(StoreNode);
setValue(&I, StoreNode);
}
// Get a uniform base for the Gather/Scatter intrinsic.
// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
// We try to represent it as a base pointer + vector of indices.
// Usually, the vector of pointers comes from a 'getelementptr' instruction.
// The first operand of the GEP may be a single pointer or a vector of pointers
// Example:
// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
// or
// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
//
// When the first GEP operand is a single pointer - it is the uniform base we
// are looking for. If first operand of the GEP is a splat vector - we
// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.
static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
ISD::MemIndexType &IndexType, SDValue &Scale,
SelectionDAGBuilder *SDB, const BasicBlock *CurBB) {
SelectionDAG& DAG = SDB->DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
// Handle splat constant pointer.
if (auto *C = dyn_cast<Constant>(Ptr)) {
C = C->getSplatValue();
if (!C)
return false;
Base = SDB->getValue(C);
ElementCount NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
return true;
}
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP || GEP->getParent() != CurBB)
return false;
if (GEP->getNumOperands() != 2)
return false;
const Value *BasePtr = GEP->getPointerOperand();
const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1);
// Make sure the base is scalar and the index is a vector.
if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
return false;
Base = SDB->getValue(BasePtr);
Index = SDB->getValue(IndexVal);
IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(
DL.getTypeAllocSize(GEP->getResultElementType()),
SDB->getCurSDLoc(), TLI.getPointerTy(DL));
return true;
}
void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
// llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
const Value *Ptr = I.getArgOperand(1);
SDValue Src0 = getValue(I.getArgOperand(0));
SDValue Mask = getValue(I.getArgOperand(3));
EVT VT = Src0.getValueType();
Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
->getMaybeAlignValue()
.getValueOr(DAG.getEVTAlign(VT.getScalarType()));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
SDValue Base;
SDValue Index;
ISD::MemIndexType IndexType;
SDValue Scale;
bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
I.getParent());
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
MemoryLocation::UnknownSize, Alignment, AAInfo);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
IndexType = ISD::SIGNED_UNSCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
EVT IdxVT = Index.getValueType();
EVT EltTy = IdxVT.getVectorElementType();
if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
}
SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
Ops, MMO, IndexType, false);
DAG.setRoot(Scatter);
setValue(&I, Scatter);
}
void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDLoc sdl = getCurSDLoc();
auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
// @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
Ptr = I.getArgOperand(0);
Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
Mask = I.getArgOperand(2);
Src0 = I.getArgOperand(3);
};
auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
MaybeAlign &Alignment) {
// @llvm.masked.expandload.*(Ptr, Mask, Src0)
Ptr = I.getArgOperand(0);
Alignment = None;
Mask = I.getArgOperand(1);
Src0 = I.getArgOperand(2);
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
MaybeAlign Alignment;
if (IsExpanding)
getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(Src0Operand);
SDValue Mask = getValue(MaskOperand);
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT VT = Src0.getValueType();
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
MemoryLocation ML;
if (VT.isScalableVector())
ML = MemoryLocation::getAfter(PtrOperand);
else
ML = MemoryLocation(PtrOperand, LocationSize::precise(
DAG.getDataLayout().getTypeStoreSize(I.getType())),
AAInfo);
bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
SDValue Load =
DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
if (AddToChain)
PendingLoads.push_back(Load.getValue(1));
setValue(&I, Load);
}
void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
// @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
const Value *Ptr = I.getArgOperand(0);
SDValue Src0 = getValue(I.getArgOperand(3));
SDValue Mask = getValue(I.getArgOperand(2));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
->getMaybeAlignValue()
.getValueOr(DAG.getEVTAlign(VT.getScalarType()));
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SDValue Root = DAG.getRoot();
SDValue Base;
SDValue Index;
ISD::MemIndexType IndexType;
SDValue Scale;
bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
I.getParent());
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
IndexType = ISD::SIGNED_UNSCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
EVT IdxVT = Index.getValueType();
EVT EltTy = IdxVT.getVectorElementType();
if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
}
SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
Ops, MMO, IndexType, ISD::NON_EXTLOAD);
PendingLoads.push_back(Gather.getValue(1));
setValue(&I, Gather);
}
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
AtomicOrdering FailureOrdering = I.getFailureOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering,
FailureOrdering);
SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
dl, MemVT, VTs, InChain,
getValue(I.getPointerOperand()),
getValue(I.getCompareOperand()),
getValue(I.getNewValOperand()), MMO);
SDValue OutChain = L.getValue(2);
setValue(&I, L);
DAG.setRoot(OutChain);
}
void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
SDLoc dl = getCurSDLoc();
ISD::NodeType NT;
switch (I.getOperation()) {
default: llvm_unreachable("Unknown atomicrmw operation");
case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
}
AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering);
SDValue L =
DAG.getAtomic(NT, dl, MemVT, InChain,
getValue(I.getPointerOperand()), getValue(I.getValOperand()),
MMO);
SDValue OutChain = L.getValue(1);
setValue(&I, L);
DAG.setRoot(OutChain);
}
void SelectionDAGBuilder::visitFence(const FenceInst &I) {
SDLoc dl = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Ops[3];
Ops[0] = getRoot();
Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl,
TLI.getFenceOperandTy(DAG.getDataLayout()));
Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl,
TLI.getFenceOperandTy(DAG.getDataLayout()));
DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
}
void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering Order = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
if (!TLI.supportsUnalignedAtomics() &&
I.getAlignment() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
I.getAlign(), AAMDNodes(), nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue Ptr = getValue(I.getPointerOperand());
if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
// TODO: Once this is better exercised by tests, it should be merged with
// the normal path for loads to prevent future divergence.
SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
if (MemVT != VT)
L = DAG.getPtrExtOrTrunc(L, dl, VT);
setValue(&I, L);
SDValue OutChain = L.getValue(1);
if (!I.isUnordered())
DAG.setRoot(OutChain);
else
PendingLoads.push_back(OutChain);
return;
}
SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
Ptr, MMO);
SDValue OutChain = L.getValue(1);
if (MemVT != VT)
L = DAG.getPtrExtOrTrunc(L, dl, VT);
setValue(&I, L);
DAG.setRoot(OutChain);
}
void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT MemVT =
TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
if (I.getAlignment() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering);
SDValue Val = getValue(I.getValueOperand());
if (Val.getValueType() != MemVT)
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
SDValue Ptr = getValue(I.getPointerOperand());
if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
// TODO: Once this is better exercised by tests, it should be merged with
// the normal path for stores to prevent future divergence.
SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
DAG.setRoot(S);
return;
}
SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
Ptr, Val, MMO);
DAG.setRoot(OutChain);
}
/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
/// node.
void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
unsigned Intrinsic) {
// Ignore the callsite's attributes. A specific call site may be marked with
// readnone, but the lowering code will expect the chain based on the
// definition.
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
bool OnlyLoad = HasChain && F->onlyReadsMemory();
// Build the operand list.
SmallVector<SDValue, 8> Ops;
if (HasChain) { // If this intrinsic has side-effects, chainify it.
if (OnlyLoad) {
// We don't need to serialize loads against other loads.
Ops.push_back(DAG.getRoot());
} else {
Ops.push_back(getRoot());
}
}
// Info is set by getTgtMemInstrinsic
TargetLowering::IntrinsicInfo Info;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
DAG.getMachineFunction(),
Intrinsic);
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
Info.opc == ISD::INTRINSIC_W_CHAIN)
Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
const Value *Arg = I.getArgOperand(i);
if (!I.paramHasAttr(i, Attribute::ImmArg)) {
Ops.push_back(getValue(Arg));
continue;
}
// Use TargetConstant instead of a regular constant for immarg.
EVT VT = TLI.getValueType(*DL, Arg->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
assert(CI->getBitWidth() <= 64 &&
"large intrinsic immediates not handled");
Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
} else {
Ops.push_back(
DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
}
}
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
if (HasChain)
ValueVTs.push_back(MVT::Other);
SDVTList VTs = DAG.getVTList(ValueVTs);
// Propagate fast-math-flags from IR to node(s).
SDNodeFlags Flags;
if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPMO);
SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
// Create the node.
SDValue Result;
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
Result =
DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
MachinePointerInfo(Info.ptrVal, Info.offset),
Info.align, Info.flags, Info.size, AAInfo);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
} else {
Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
}
if (HasChain) {
SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
if (OnlyLoad)
PendingLoads.push_back(Chain);
else
DAG.setRoot(Chain);
}
if (!I.getType()->isVoidTy()) {
if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
} else
Result = lowerRangeToAssertZExt(DAG, I, Result);
MaybeAlign Alignment = I.getRetAlign();
if (!Alignment)
Alignment = F->getAttributes().getRetAlignment();
// Insert `assertalign` node if there's an alignment.
if (InsertAssertAlign && Alignment) {
Result =
DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
}
setValue(&I, Result);
}
}
/// GetSignificand - Get the significand and build it into a floating-point
/// number with exponent of 1:
///
/// Op = (Op & 0x007fffff) | 0x3f800000;
///
/// where Op is the hexadecimal representation of floating point value.
static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x007fffff, dl, MVT::i32));
SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
DAG.getConstant(0x3f800000, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
}
/// GetExponent - Get the exponent:
///
/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
///
/// where Op is the hexadecimal representation of floating point value.
static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
const TargetLowering &TLI, const SDLoc &dl) {
SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x7f800000, dl, MVT::i32));
SDValue t1 = DAG.getNode(
ISD::SRL, dl, MVT::i32, t0,
DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
DAG.getConstant(127, dl, MVT::i32));
return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
}
/// getF32Constant - Get 32-bit floating point constant.
static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
const SDLoc &dl) {
return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
MVT::f32);
}
static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
SelectionDAG &DAG) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
// IntegerPartOfX = ((int32_t)(t0);
SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
// FractionalPartOfX = t0 - (float)IntegerPartOfX;
SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
// IntegerPartOfX <<= 23;
IntegerPartOfX = DAG.getNode(
ISD::SHL, dl, MVT::i32, IntegerPartOfX,
DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
DAG.getDataLayout())));
SDValue TwoToFractionalPartOfX;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
// TwoToFractionalPartOfX =
// 0.997535578f +
// (0.735607626f + 0.252464424f * x) * x;
//
// error 0.0144103317, which is 6 bits
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3e814304, dl));
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f3c50c8, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e, dl));
} else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// TwoToFractionalPartOfX =
// 0.999892986f +
// (0.696457318f +
// (0.224338339f + 0.792043434e-1f * x) * x) * x;
//
// error 0.000107046256, which is 13 to 14 bits
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3da235e3, dl));
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3e65b8f3, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f324b07, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd, dl));
} else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// TwoToFractionalPartOfX =
// 0.999999982f +
// (0.693148872f +
// (0.240227044f +
// (0.554906021e-1f +
// (0.961591928e-2f +
// (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
// error 2.47208000*10^(-7), which is better than 18 bits
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3924b03e, dl));
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3ab24b87, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3c1d8c17, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3d634a1d, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
getF32Constant(DAG, 0x3e75fe14, dl));
SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
getF32Constant(DAG, 0x3f317234, dl));
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000, dl));
}
// Add the exponent into the result in integer domain.
SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
}
/// expandExp - Lower an exp intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI, SDNodeFlags Flags) {
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
// Put the exponent in the right bit position for later addition to the
// final result:
//
// t0 = Op * log2(e)
// TODO: What fast-math-flags should be set here?
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
return getLimitedPrecisionExp2(t0, dl, DAG);
}
// No special expansion.
return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags);
}
/// expandLog - Lower a log intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log(2).
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
SDValue LogOfExponent =
DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));
// Get the significand and build it into a floating-point number with
// exponent of 1.
SDValue X = GetSignificand(DAG, Op1, dl);
SDValue LogOfMantissa;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
// LogofMantissa =
// -1.1609546f +
// (1.4034025f - 0.23903021f * x) * x;
//
// error 0.0034276066, which is better than 8 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbe74c456, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3fb3a2b1, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f949a29, dl));
} else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// LogOfMantissa =
// -1.7417939f +
// (2.8212026f +
// (-1.4699568f +
// (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
//
// error 0.000061011436, which is 14 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbd67b6d6, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3ee4f4b8, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3fbc278b, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x40348e95, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3fdef31a, dl));
} else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// LogOfMantissa =
// -2.1072184f +
// (4.2372794f +
// (-3.7029485f +
// (2.2781945f +
// (-0.87823314f +
// (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
//
// error 0.0000023660568, which is better than 18 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbc91e5ac, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3e4350aa, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f60d3e3, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x4011cdf0, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
getF32Constant(DAG, 0x406cfd1c, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
getF32Constant(DAG, 0x408797cb, dl));
SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
getF32Constant(DAG, 0x4006dcab, dl));
}
return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
}
// No special expansion.
return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags);
}
/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Get the exponent.
SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
// Get the significand and build it into a floating-point number with
// exponent of 1.
SDValue X = GetSignificand(DAG, Op1, dl);
// Different possible minimax approximations of significand in
// floating-point for various degrees of accuracy over [1,2].
SDValue Log2ofMantissa;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
// Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
//
// error 0.0049451742, which is more than 7 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbeb08fe0, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x40019463, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3fd6633d, dl));
} else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// Log2ofMantissa =
// -2.51285454f +
// (4.07009056f +
// (-2.12067489f +
// (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
//
// error 0.0000876136000, which is better than 13 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbda7262e, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3f25280b, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x4007b923, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x40823e2f, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
getF32Constant(DAG, 0x4020d29c, dl));
} else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// Log2ofMantissa =
// -3.0400495f +
// (6.1129976f +
// (-5.3420409f +
// (3.2865683f +
// (-1.2669343f +
// (0.27515199f -
// 0.25691327e-1f * x) * x) * x) * x) * x) * x;
//
// error 0.0000018516, which is better than 18 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbcd2769e, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3e8ce0b9, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3fa22ae7, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x40525723, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
getF32Constant(DAG, 0x40aaf200, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
getF32Constant(DAG, 0x40c39dad, dl));
SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
getF32Constant(DAG, 0x4042902c, dl));
}
return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
}
// No special expansion.
return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags);
}
/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log10(2) [0.30102999f].
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
getF32Constant(DAG, 0x3e9a209a, dl));
// Get the significand and build it into a floating-point number with
// exponent of 1.
SDValue X = GetSignificand(DAG, Op1, dl);
SDValue Log10ofMantissa;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
// Log10ofMantissa =
// -0.50419619f +
// (0.60948995f - 0.10380950f * x) * x;
//
// error 0.0014886165, which is 6 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbdd49a13, dl));
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3f1c0789, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f011300, dl));
} else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// Log10ofMantissa =
// -0.64831180f +
// (0.91751397f +
// (-0.31664806f + 0.47637168e-1f * x) * x) * x;
//
// error 0.00019228036, which is better than 12 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3d431f31, dl));
SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3ea21fb2, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f6ae232, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f25f7c3, dl));
} else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// Log10ofMantissa =
// -0.84299375f +
// (1.5327582f +
// (-1.0688956f +
// (0.49102474f +
// (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
//
// error 0.0000037995730, which is better than 18 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3c5d51ce, dl));
SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3e00685a, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3efb6798, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f88d192, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3fc4316c, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
getF32Constant(DAG, 0x3f57ce70, dl));
}
return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
}
// No special expansion.
return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags);
}
/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI, SDNodeFlags Flags) {
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
return getLimitedPrecisionExp2(Op, dl, DAG);
// No special expansion.
return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags);
}
/// visitPow - Lower a pow intrinsic. Handles the special sequences for
/// limited-precision mode with x == 10.0f.
static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const TargetLowering &TLI,
SDNodeFlags Flags) {
bool IsExp10 = false;
if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
APFloat Ten(10.0f);
IsExp10 = LHSC->isExactlyValue(Ten);
}
}
// TODO: What fast-math-flags should be set on the FMUL node?
if (IsExp10) {
// Put the exponent in the right bit position for later addition to the
// final result:
//
// #define LOG2OF10 3.3219281f
// t0 = Op * LOG2OF10;
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
getF32Constant(DAG, 0x40549a78, dl));
return getLimitedPrecisionExp2(t0, dl, DAG);
}
// No special expansion.
return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags);
}
/// ExpandPowI - Expand a llvm.powi intrinsic.
static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
SelectionDAG &DAG) {
// If RHS is a constant, we can expand this out to a multiplication tree,
// otherwise we end up lowering to a call to __powidf2 (for example). When
// optimizing for size, we only want to do this if the expansion would produce
// a small number of multiplies, otherwise we do the full expansion.
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
// Get the exponent as a positive value.
unsigned Val = RHSC->getSExtValue();
if ((int)Val < 0) Val = -Val;
// powi(x, 0) -> 1.0
if (Val == 0)
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
bool OptForSize = DAG.shouldOptForSize();
if (!OptForSize ||
// If optimizing for size, don't insert too many multiplies.
// This inserts up to 5 multiplies.
countPopulation(Val) + Log2_32(Val) < 7) {
// We use the simple binary decomposition method to generate the multiply
// sequence. There are more optimal ways to do this (for example,
// powi(x,15) generates one more multiply than it should), but this has
// the benefit of being both really simple and much better than a libcall.
SDValue Res; // Logically starts equal to 1.0
SDValue CurSquare = LHS;
// TODO: Intrinsics should have fast-math-flags that propagate to these
// nodes.
while (Val) {
if (Val & 1) {
if (Res.getNode())
Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
else
Res = CurSquare; // 1.0*CurSquare.
}
CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
CurSquare, CurSquare);
Val >>= 1;
}
// If the original was negative, invert the result, producing 1/(x*x*x).
if (RHSC->getSExtValue() < 0)
Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
return Res;
}
}
// Otherwise, expand to a libcall.
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
SDValue LHS, SDValue RHS, SDValue Scale,
SelectionDAG &DAG, const TargetLowering &TLI) {
EVT VT = LHS.getValueType();
bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
LLVMContext &Ctx = *DAG.getContext();
// If the type is legal but the operation isn't, this node might survive all
// the way to operation legalization. If we end up there and we do not have
// the ability to widen the type (if VT*2 is not legal), we cannot expand the
// node.
// Coax the legalizer into expanding the node during type legalization instead
// by bumping the size by one bit. This will force it to Promote, enabling the
// early expansion and avoiding the need to expand later.
// We don't have to do this if Scale is 0; that can always be expanded, unless
// it's a saturating signed operation. Those can experience true integer
// division overflow, a case which we must avoid.
// FIXME: We wouldn't have to do this (or any of the early
// expansion/promotion) if it was possible to expand a libcall of an
// illegal type during operation legalization. But it's not, so things
// get a bit hacky.
unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
if ((ScaleInt > 0 || (Saturating && Signed)) &&
(TLI.isTypeLegal(VT) ||
(VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
Opcode, VT, ScaleInt);
if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) {
EVT PromVT;
if (VT.isScalarInteger())
PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1);
else if (VT.isVector()) {
PromVT = VT.getVectorElementType();
PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1);
PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount());
} else
llvm_unreachable("Wrong VT for DIVFIX?");
if (Signed) {
LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT);
RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT);
} else {
LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
}
EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
// For saturating operations, we need to shift up the LHS to get the
// proper saturation width, and then shift down again afterwards.
if (Saturating)
LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS,
DAG.getConstant(1, DL, ShiftTy));
SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
if (Saturating)
Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res,
DAG.getConstant(1, DL, ShiftTy));
return DAG.getZExtOrTrunc(Res, DL, VT);
}
}
return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale);
}
// getUnderlyingArgRegs - Find underlying registers used for a truncated,
// bitcasted, or split argument. Returns a list of <Register, size in bits>
static void
getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
const SDValue &N) {
switch (N.getOpcode()) {
case ISD::CopyFromReg: {
SDValue Op = N.getOperand(1);
Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),
Op.getValueType().getSizeInBits());
return;
}
case ISD::BITCAST:
case ISD::AssertZext:
case ISD::AssertSext:
case ISD::TRUNCATE:
getUnderlyingArgRegs(Regs, N.getOperand(0));
return;
case ISD::BUILD_PAIR:
case ISD::BUILD_VECTOR:
case ISD::CONCAT_VECTORS:
for (SDValue Op : N->op_values())
getUnderlyingArgRegs(Regs, Op);
return;
default:
return;
}
}
/// If the DbgValueInst is a dbg_value of a function argument, create the
/// corresponding DBG_VALUE machine instruction for it now. At the end of
/// instruction selection, they will be inserted to the entry BB.
/// We don't currently support this for variadic dbg_values, as they shouldn't
/// appear for function arguments or in the prologue.
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
const Value *V, DILocalVariable *Variable, DIExpression *Expr,
DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
// Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind
// we've been asked to pursue.
auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
bool Indirect) {
if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) {
// For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
// pointing at the VReg, which will be patched up later.
auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
auto MIB = BuildMI(MF, DL, Inst);
MIB.addReg(Reg, RegState::Debug);
MIB.addImm(0);
MIB.addMetadata(Variable);
auto *NewDIExpr = FragExpr;
// We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
// the DIExpression.
if (Indirect)
NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore);
MIB.addMetadata(NewDIExpr);
return MIB;
} else {
// Create a completely standard DBG_VALUE.
auto &Inst = TII->get(TargetOpcode::DBG_VALUE);
return BuildMI(MF, DL, Inst, Indirect, Reg, Variable, FragExpr);
}
};
if (!IsDbgDeclare) {
// ArgDbgValues are hoisted to the beginning of the entry block. So we
// should only emit as ArgDbgValue if the dbg.value intrinsic is found in
// the entry block.
bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
if (!IsInEntryBlock)
return false;
// ArgDbgValues are hoisted to the beginning of the entry block. So we
// should only emit as ArgDbgValue if the dbg.value intrinsic describes a
// variable that also is a param.
//
// Although, if we are at the top of the entry block already, we can still
// emit using ArgDbgValue. This might catch some situations when the
// dbg.value refers to an argument that isn't used in the entry block, so
// any CopyToReg node would be optimized out and the only way to express
// this DBG_VALUE is by using the physical reg (or FI) as done in this
// method. ArgDbgValues are hoisted to the beginning of the entry block. So
// we should only emit as ArgDbgValue if the Variable is an argument to the
// current function, and the dbg.value intrinsic is found in the entry
// block.
bool VariableIsFunctionInputArg = Variable->isParameter() &&
!DL->getInlinedAt();
bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
if (!IsInPrologue && !VariableIsFunctionInputArg)
return false;
// Here we assume that a function argument on IR level only can be used to
// describe one input parameter on source level. If we for example have
// source code like this
//
// struct A { long x, y; };
// void foo(struct A a, long b) {
// ...
// b = a.x;
// ...
// }
//
// and IR like this
//
// define void @foo(i32 %a1, i32 %a2, i32 %b) {
// entry:
// call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
// call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
// call void @llvm.dbg.value(metadata i32 %b, "b",
// ...
// call void @llvm.dbg.value(metadata i32 %a1, "b"
// ...
//
// then the last dbg.value is describing a parameter "b" using a value that
// is an argument. But since we already has used %a1 to describe a parameter
// we should not handle that last dbg.value here (that would result in an
// incorrect hoisting of the DBG_VALUE to the function entry).
// Notice that we allow one dbg.value per IR level argument, to accommodate
// for the situation with fragments above.
if (VariableIsFunctionInputArg) {
unsigned ArgNo = Arg->getArgNo();
if (ArgNo >= FuncInfo.DescribedArgs.size())
FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo))
return false;
FuncInfo.DescribedArgs.set(ArgNo);
}
}
bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
int FI = FuncInfo.getArgumentFrameIndex(Arg);
if (FI != std::numeric_limits<int>::max())
Op = MachineOperand::CreateFI(FI);
SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes;
if (!Op && N.getNode()) {
getUnderlyingArgRegs(ArgRegsAndSizes, N);
Register Reg;
if (ArgRegsAndSizes.size() == 1)
Reg = ArgRegsAndSizes.front().first;
if (Reg && Reg.isVirtual()) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
Register PR = RegInfo.getLiveInPhysReg(Reg);
if (PR)
Reg = PR;
}
if (Reg) {
Op = MachineOperand::CreateReg(Reg, false);
IsIndirect = IsDbgDeclare;
}
}
if (!Op && N.getNode()) {
// Check if frame index is available.
SDValue LCandidate = peekThroughBitcasts(N);
if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode()))
if (FrameIndexSDNode *FINode =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
Op = MachineOperand::CreateFI(FINode->getIndex());
}
if (!Op) {
// Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
SplitRegs) {
unsigned Offset = 0;
for (auto RegAndSize : SplitRegs) {
// If the expression is already a fragment, the current register
// offset+size might extend beyond the fragment. In this case, only
// the register bits that are inside the fragment are relevant.
int RegFragmentSizeInBits = RegAndSize.second;
if (auto ExprFragmentInfo = Expr->getFragmentInfo()) {
uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits;
// The register is entirely outside the expression fragment,
// so is irrelevant for debug info.
if (Offset >= ExprFragmentSizeInBits)
break;
// The register is partially outside the expression fragment, only
// the low bits within the fragment are relevant for debug info.
if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) {
RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset;
}
}
auto FragmentExpr = DIExpression::createFragmentExpression(
Expr, Offset, RegFragmentSizeInBits);
Offset += RegAndSize.second;
// If a valid fragment expression cannot be created, the variable's
// correct value cannot be determined and so it is set as Undef.
if (!FragmentExpr) {
SDDbgValue *SDV = DAG.getConstantDbgValue(
Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
continue;
}
MachineInstr *NewMI =
MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, IsDbgDeclare);
FuncInfo.ArgDbgValues.push_back(NewMI);
}
};
// Check if ValueMap has reg number.
DenseMap<const Value *, Register>::const_iterator
VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
const auto &TLI = DAG.getTargetLoweringInfo();
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
V->getType(), None);
if (RFV.occupiesMultipleRegs()) {
splitMultiRegDbgValue(RFV.getRegsAndSizes());
return true;
}
Op = MachineOperand::CreateReg(VMI->second, false);
IsIndirect = IsDbgDeclare;
} else if (ArgRegsAndSizes.size() > 1) {
// This was split due to the calling convention, and no virtual register
// mapping exists for the value.
splitMultiRegDbgValue(ArgRegsAndSizes);
return true;
}
}
if (!Op)
return false;
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
MachineInstr *NewMI = nullptr;
if (Op->isReg())
NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect);
else
NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op,
Variable, Expr);
FuncInfo.ArgDbgValues.push_back(NewMI);
return true;
}
/// Return the appropriate SDDbgValue based on N.
SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
DILocalVariable *Variable,
DIExpression *Expr,
const DebugLoc &dl,
unsigned DbgSDNodeOrder) {
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
// stack slot locations.
//
// Consider "int x = 0; int *px = &x;". There are two kinds of interesting
// debug values here after optimization:
//
// dbg.value(i32* %px, !"int *px", !DIExpression()), and
// dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
//
// Both describe the direct values of their associated variables.
return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(),
/*IsIndirect*/ false, dl, DbgSDNodeOrder);
}
return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(),
/*IsIndirect*/ false, dl, DbgSDNodeOrder);
}
static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
switch (Intrinsic) {
case Intrinsic::smul_fix:
return ISD::SMULFIX;
case Intrinsic::umul_fix:
return ISD::UMULFIX;
case Intrinsic::smul_fix_sat:
return ISD::SMULFIXSAT;
case Intrinsic::umul_fix_sat:
return ISD::UMULFIXSAT;
case Intrinsic::sdiv_fix:
return ISD::SDIVFIX;
case Intrinsic::udiv_fix:
return ISD::UDIVFIX;
case Intrinsic::sdiv_fix_sat:
return ISD::SDIVFIXSAT;
case Intrinsic::udiv_fix_sat:
return ISD::UDIVFIXSAT;
default:
llvm_unreachable("Unhandled fixed point intrinsic");
}
}
void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
const char *FunctionName) {
assert(FunctionName && "FunctionName must not be nullptr");
SDValue Callee = DAG.getExternalSymbol(
FunctionName,
DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
}
/// Given a @llvm.call.preallocated.setup, return the corresponding
/// preallocated call.
static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
assert(cast<CallBase>(PreallocatedSetup)
->getCalledFunction()
->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
"expected call_preallocated_setup Value");
for (auto *U : PreallocatedSetup->users()) {
auto *UseCall = cast<CallBase>(U);
const Function *Fn = UseCall->getCalledFunction();
if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
return UseCall;
}
}
llvm_unreachable("expected corresponding call to preallocated setup/arg");
}
/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc sdl = getCurSDLoc();
DebugLoc dl = getCurDebugLoc();
SDValue Res;
SDNodeFlags Flags;
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
switch (Intrinsic) {
default:
// By default, turn this into a target intrinsic node.
visitTargetIntrinsic(I, Intrinsic);
return;
case Intrinsic::vscale: {
match(&I, m_VScale(DAG.getDataLayout()));
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I,
DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1)));
return;
}
case Intrinsic::vastart: visitVAStart(I); return;
case Intrinsic::vaend: visitVAEnd(I); return;
case Intrinsic::vacopy: visitVACopy(I); return;
case Intrinsic::returnaddress:
setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::addressofreturnaddress:
setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout())));
return;
case Intrinsic::sponentry:
setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
TLI.getFrameIndexTy(DAG.getDataLayout())));
return;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
TLI.getFrameIndexTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::read_volatile_register:
case Intrinsic::read_register: {
Value *Reg = I.getArgOperand(0);
SDValue Chain = getRoot();
SDValue RegName =
DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Res = DAG.getNode(ISD::READ_REGISTER, sdl,
DAG.getVTList(VT, MVT::Other), Chain, RegName);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
}
case Intrinsic::write_register: {
Value *Reg = I.getArgOperand(0);
Value *RegValue = I.getArgOperand(1);
SDValue Chain = getRoot();
SDValue RegName =
DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
RegName, getValue(RegValue)));
return;
}
case Intrinsic::memcpy: {
const auto &MCI = cast<MemCpyInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memcpy defines 0 and 1 to both mean no alignment.
Align DstAlign = MCI.getDestAlign().valueOrOne();
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
Align Alignment = commonAlignment(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
/* AlwaysInline */ false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)), AAInfo);
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memcpy_inline: {
const auto &MCI = cast<MemCpyInlineInst>(I);
SDValue Dst = getValue(I.getArgOperand(0));
SDValue Src = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
// @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
Align DstAlign = MCI.getDestAlign().valueOrOne();
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
Align Alignment = commonAlignment(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
/* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)), AAInfo);
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memset: {
const auto &MSI = cast<MemSetInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memset defines 0 and 1 to both mean no alignment.
Align Alignment = MSI.getDestAlign().valueOrOne();
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
MachinePointerInfo(I.getArgOperand(0)), AAInfo);
updateDAGForMaybeTailCall(MS);
return;
}
case Intrinsic::memmove: {
const auto &MMI = cast<MemMoveInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memmove defines 0 and 1 to both mean no alignment.
Align DstAlign = MMI.getDestAlign().valueOrOne();
Align SrcAlign = MMI.getSourceAlign().valueOrOne();
Align Alignment = commonAlignment(DstAlign, SrcAlign);
bool isVol = MMI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)), AAInfo);
updateDAGForMaybeTailCall(MM);
return;
}
case Intrinsic::memcpy_element_unordered_atomic: {
const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
unsigned DstAlign = MI.getDestAlignment();
unsigned SrcAlign = MI.getSourceAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
SrcAlign, Length, LengthTy, ElemSz, isTC,
MachinePointerInfo(MI.getRawDest()),
MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memmove_element_unordered_atomic: {
auto &MI = cast<AtomicMemMoveInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
unsigned DstAlign = MI.getDestAlignment();
unsigned SrcAlign = MI.getSourceAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
SrcAlign, Length, LengthTy, ElemSz, isTC,
MachinePointerInfo(MI.getRawDest()),
MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memset_element_unordered_atomic: {
auto &MI = cast<AtomicMemSetInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Val = getValue(MI.getValue());
SDValue Length = getValue(MI.getLength());
unsigned DstAlign = MI.getDestAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
LengthTy, ElemSz, isTC,
MachinePointerInfo(MI.getRawDest()));
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::call_preallocated_setup: {
const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
getRoot(), SrcValue);
setValue(&I, Res);
DAG.setRoot(Res);
return;
}
case Intrinsic::call_preallocated_arg: {
const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
SDValue Ops[3];
Ops[0] = getRoot();
Ops[1] = SrcValue;
Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
MVT::i32); // arg index
SDValue Res = DAG.getNode(
ISD::PREALLOCATED_ARG, sdl,
DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
}
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
// Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e.
// they are non-variadic.
const auto &DI = cast<DbgVariableIntrinsic>(I);
assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
assert(Variable && "Missing variable");
LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI
<< "\n");
// Check if address has undef value.
const Value *Address = DI.getVariableLocationOp(0);
if (!Address || isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
<< " (bad/undef/unused-arg address)\n");
return;
}
bool isParameter = Variable->isParameter() || isa<Argument>(Address);
// Check if this variable can be described by a frame index, typically
// either as a static alloca or a byval parameter.
int FI = std::numeric_limits<int>::max();
if (const auto *AI =
dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
if (AI->isStaticAlloca()) {
auto I = FuncInfo.StaticAllocaMap.find(AI);
if (I != FuncInfo.StaticAllocaMap.end())
FI = I->second;
}
} else if (const auto *Arg = dyn_cast<Argument>(
Address->stripInBoundsConstantOffsets())) {
FI = FuncInfo.getArgumentFrameIndex(Arg);
}
// llvm.dbg.addr is control dependent and always generates indirect
// DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
// the MachineFunction variable table.
if (FI != std::numeric_limits<int>::max()) {
if (Intrinsic == Intrinsic::dbg_addr) {
SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
Variable, Expression, FI, getRoot().getNode(), /*IsIndirect*/ true,
dl, SDNodeOrder);
DAG.AddDbgValue(SDV, isParameter);
} else {
LLVM_DEBUG(dbgs() << "Skipping " << DI
<< " (variable info stashed in MF side table)\n");
}
return;
}
SDValue &N = NodeMap[Address];
if (!N.getNode() && isa<Argument>(Address))
// Check unused arguments map.
N = UnusedArgNodeMap[Address];
SDDbgValue *SDV;
if (N.getNode()) {
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
// Parameters are handled specially.
auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
if (isParameter && FINode) {
// Byval parameter. We have a frame index at this point.
SDV =
DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
/*IsIndirect*/ true, dl, SDNodeOrder);
} else if (isa<Argument>(Address)) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
return;
} else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
true, dl, SDNodeOrder);
}
DAG.AddDbgValue(SDV, isParameter);
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
N)) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
<< " (could not emit func-arg dbg_value)\n");
}
}
return;
}
case Intrinsic::dbg_label: {
const DbgLabelInst &DI = cast<DbgLabelInst>(I);
DILabel *Label = DI.getLabel();
assert(Label && "Missing label");
SDDbgLabel *SDV;
SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
DAG.AddDbgLabel(SDV);
return;
}
case Intrinsic::dbg_value: {
const DbgValueInst &DI = cast<DbgValueInst>(I);
assert(DI.getVariable() && "Missing variable");
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
SmallVector<Value *, 4> Values(DI.getValues());
if (Values.empty())
return;
if (std::count(Values.begin(), Values.end(), nullptr))
return;
bool IsVariadic = DI.hasArgList();
if (!handleDebugValue(Values, Variable, Expression, dl, DI.getDebugLoc(),
SDNodeOrder, IsVariadic))
addDanglingDebugInfo(&DI, dl, SDNodeOrder);
return;
}
case Intrinsic::eh_typeid_for: {
// Find the type id for the given typeinfo.
GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
Res = DAG.getConstant(TypeID, sdl, MVT::i32);
setValue(&I, Res);
return;
}
case Intrinsic::eh_return_i32:
case Intrinsic::eh_return_i64:
DAG.getMachineFunction().setCallsEHReturn(true);
DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
MVT::Other,
getControlRoot(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
return;
case Intrinsic::eh_unwind_init:
DAG.getMachineFunction().setCallsUnwindInit(true);
return;
case Intrinsic::eh_dwarf_cfa:
setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::eh_sjlj_callsite: {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
MMI.setCurrentCallSite(CI->getZExtValue());
return;
}
case Intrinsic::eh_sjlj_functioncontext: {
// Get and store the index of the function context.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
AllocaInst *FnCtx =
cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
int FI = FuncInfo.StaticAllocaMap[FnCtx];
MFI.setFunctionContextIndex(FI);
return;
}
case Intrinsic::eh_sjlj_setjmp: {
SDValue Ops[2];
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
DAG.getVTList(MVT::i32, MVT::Other), Ops);
setValue(&I, Op.getValue(0));
DAG.setRoot(Op.getValue(1));
return;
}
case Intrinsic::eh_sjlj_longjmp:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
getRoot(), getValue(I.getArgOperand(0))));
return;
case Intrinsic::eh_sjlj_setup_dispatch:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
getRoot()));
return;
case Intrinsic::masked_gather:
visitMaskedGather(I);
return;
case Intrinsic::masked_load:
visitMaskedLoad(I);
return;
case Intrinsic::masked_scatter:
visitMaskedScatter(I);
return;
case Intrinsic::masked_store:
visitMaskedStore(I);
return;
case Intrinsic::masked_expandload:
visitMaskedLoad(I, true /* IsExpanding */);
return;
case Intrinsic::masked_compressstore:
visitMaskedStore(I, true /* IsCompressing */);
return;
case Intrinsic::powi:
setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG));
return;
case Intrinsic::log:
setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::log2:
setValue(&I,
expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::log10:
setValue(&I,
expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::exp:
setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::exp2:
setValue(&I,
expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::pow:
setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG, TLI, Flags));
return;
case Intrinsic::sqrt:
case Intrinsic::fabs:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
case Intrinsic::roundeven:
case Intrinsic::canonicalize: {
unsigned Opcode;
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::fabs: Opcode = ISD::FABS; break;
case Intrinsic::sin: Opcode = ISD::FSIN; break;
case Intrinsic::cos: Opcode = ISD::FCOS; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
case Intrinsic::round: Opcode = ISD::FROUND; break;
case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
}
setValue(&I, DAG.getNode(Opcode, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)), Flags));
return;
}
case Intrinsic::lround:
case Intrinsic::llround:
case Intrinsic::lrint:
case Intrinsic::llrint: {
unsigned Opcode;
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::lround: Opcode = ISD::LROUND; break;
case Intrinsic::llround: Opcode = ISD::LLROUND; break;
case Intrinsic::lrint: Opcode = ISD::LRINT; break;
case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
}
EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
getValue(I.getArgOperand(0))));
return;
}
case Intrinsic::minnum:
setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::maxnum:
setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::minimum:
setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::maximum:
setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::arithmetic_fence: {
setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)), Flags));
return;
}
case Intrinsic::fma:
setValue(&I, DAG.getNode(
ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2)), Flags));
return;
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
return;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
#include "llvm/IR/VPIntrinsics.def"
visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I));
return;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2)), Flags));
} else {
// TODO: Intrinsic calls should have fast-math-flags.
SDValue Mul = DAG.getNode(
ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags);
SDValue Add = DAG.getNode(ISD::FADD, sdl,
getValue(I.getArgOperand(0)).getValueType(),
Mul, getValue(I.getArgOperand(2)), Flags);
setValue(&I, Add);
}
return;
}
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
getValue(I.getArgOperand(0)),
DAG.getTargetConstant(0, sdl,
MVT::i32))));
return;
case Intrinsic::convert_from_fp16:
setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
TLI.getValueType(DAG.getDataLayout(), I.getType()),
DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
getValue(I.getArgOperand(0)))));
return;
case Intrinsic::fptosi_sat: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, VT,
getValue(I.getArgOperand(0)),
DAG.getValueType(VT.getScalarType())));
return;
}
case Intrinsic::fptoui_sat: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, VT,
getValue(I.getArgOperand(0)),
DAG.getValueType(VT.getScalarType())));
return;
}
case Intrinsic::set_rounding:
Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other,
{getRoot(), getValue(I.getArgOperand(0))});
setValue(&I, Res);
DAG.setRoot(Res.getValue(0));
return;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
return;
}
case Intrinsic::readcyclecounter: {
SDValue Op = getRoot();
Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
DAG.getVTList(MVT::i64, MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
}
case Intrinsic::bitreverse:
setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::bswap:
setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
sdl, Ty, Arg));
return;
}
case Intrinsic::ctlz: {
SDValue Arg = getValue(I.getArgOperand(0));
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
sdl, Ty, Arg));
return;
}
case Intrinsic::ctpop: {
SDValue Arg = getValue(I.getArgOperand(0));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
return;
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
bool IsFSHL = Intrinsic == Intrinsic::fshl;
SDValue X = getValue(I.getArgOperand(0));
SDValue Y = getValue(I.getArgOperand(1));
SDValue Z = getValue(I.getArgOperand(2));
EVT VT = X.getValueType();
if (X == Y) {
auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
} else {
auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
}
return;
}
case Intrinsic::sadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::uadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::ssub_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::usub_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::sshl_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::ushl_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::smul_fix:
case Intrinsic::umul_fix:
case Intrinsic::smul_fix_sat:
case Intrinsic::umul_fix_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
Op1.getValueType(), Op1, Op2, Op3));
return;
}
case Intrinsic::sdiv_fix:
case Intrinsic::udiv_fix:
case Intrinsic::sdiv_fix_sat:
case Intrinsic::udiv_fix_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
Op1, Op2, Op3, DAG, TLI));
return;
}
case Intrinsic::smax: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SMAX, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::smin: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SMIN, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::umax: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::UMAX, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::umin: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::UMIN, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::abs: {
// TODO: Preserve "int min is poison" arg in SDAG?
SDValue Op1 = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1));
return;
}
case Intrinsic::stacksave: {
SDValue Op = getRoot();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
}
case Intrinsic::stackrestore:
Res = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
return;
case Intrinsic::get_dynamic_area_offset: {
SDValue Op = getRoot();
EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
// Result type for @llvm.get.dynamic.area.offset should match PtrTy for
// target.
if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits())
report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
" intrinsic!");
Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
Op);
DAG.setRoot(Op);
setValue(&I, Res);
return;
}
case Intrinsic::stackguard: {
MachineFunction &MF = DAG.getMachineFunction();
const Module &M = *MF.getFunction().getParent();
SDValue Chain = getRoot();
if (TLI.useLoadStackGuardNode()) {
Res = getLoadStackGuard(DAG, sdl, Chain);
} else {
EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
const Value *Global = TLI.getSDagStackGuard(M);
Align Align = DL->getPrefTypeAlign(Global->getType());
Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
MachinePointerInfo(Global, 0), Align,
MachineMemOperand::MOVolatile);
}
if (TLI.useStackGuardXorFP())
Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
DAG.setRoot(Chain);
setValue(&I, Res);
return;
}
case Intrinsic::stackprotector: {
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
SDValue Src, Chain = getRoot();
if (TLI.useLoadStackGuardNode())
Src = getLoadStackGuard(DAG, sdl, Chain);
else
Src = getValue(I.getArgOperand(0)); // The guard's value.
AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
int FI = FuncInfo.StaticAllocaMap[Slot];
MFI.setStackProtectorIndex(FI);
EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
// Store the stack protector onto the stack.
Res = DAG.getStore(
Chain, sdl, Src, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
MaybeAlign(), MachineMemOperand::MOVolatile);
setValue(&I, Res);
DAG.setRoot(Res);
return;
}
case Intrinsic::objectsize:
llvm_unreachable("llvm.objectsize.* should have been lowered already");
case Intrinsic::is_constant:
llvm_unreachable("llvm.is.constant.* should have been lowered already");
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
return;
case Intrinsic::assume:
case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::var_annotation:
case Intrinsic::sideeffect:
// Discard annotate attributes, noalias scope declarations, assumptions, and
// artificial side-effects.
return;
case Intrinsic::codeview_annotation: {
// Emit a label associated with this metadata.
MachineFunction &MF = DAG.getMachineFunction();
MCSymbol *Label =
MF.getMMI().getContext().createTempSymbol("annotation", true);
Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
DAG.setRoot(Res);
return;
}
case Intrinsic::init_trampoline: {
const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
SDValue Ops[6];
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
Ops[5] = DAG.getSrcValue(F);
Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
DAG.setRoot(Res);
return;
}
case Intrinsic::adjust_trampoline:
setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::gcroot: {
assert(DAG.getMachineFunction().getFunction().hasGC() &&
"only valid in functions with gc specified, enforced by Verifier");
assert(GFI && "implied by previous");
const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
GFI->addStackRoot(FI->getIndex(), TypeMap);
return;
}
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
case Intrinsic::flt_rounds:
Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
case Intrinsic::expect:
// Just replace __builtin_expect(exp, c) with EXP.
setValue(&I, getValue(I.getArgOperand(0)));
return;
case Intrinsic::ubsantrap:
case Intrinsic::debugtrap:
case Intrinsic::trap: {
StringRef TrapFuncName =
I.getAttributes()
.getAttribute(AttributeList::FunctionIndex, "trap-func-name")
.getValueAsString();
if (TrapFuncName.empty()) {
switch (Intrinsic) {
case Intrinsic::trap:
DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot()));
break;
case Intrinsic::debugtrap:
DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot()));
break;
case Intrinsic::ubsantrap:
DAG.setRoot(DAG.getNode(
ISD::UBSANTRAP, sdl, MVT::Other, getRoot(),
DAG.getTargetConstant(
cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl,
MVT::i32)));
break;
default: llvm_unreachable("unknown trap intrinsic");
}
return;
}
TargetLowering::ArgListTy Args;
if (Intrinsic == Intrinsic::ubsantrap) {
Args.push_back(TargetLoweringBase::ArgListEntry());
Args[0].Val = I.getArgOperand(0);
Args[0].Node = getValue(Args[0].Val);
Args[0].Ty = Args[0].Val->getType();
}
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
CallingConv::C, I.getType(),
DAG.getExternalSymbol(TrapFuncName.data(),
TLI.getPointerTy(DAG.getDataLayout())),
std::move(Args));
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
return;
}
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow: {
ISD::NodeType Op;
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
}
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
EVT ResultVT = Op1.getValueType();
EVT OverflowVT = MVT::i1;
if (ResultVT.isVector())
OverflowVT = EVT::getVectorVT(
*Context, OverflowVT, ResultVT.getVectorElementCount());
SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
return;
}
case Intrinsic::prefetch: {
SDValue Ops[5];
unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
Ops[0] = DAG.getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
Ops[4] = getValue(I.getArgOperand(3));
SDValue Result = DAG.getMemIntrinsicNode(
ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
/* align */ None, Flags);
// Chain the prefetch in parallell with any pending loads, to stay out of
// the way of later optimizations.
PendingLoads.push_back(Result);
Result = getRoot();
DAG.setRoot(Result);
return;
}
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
// Stack coloring is not enabled in O0, discard region information.
if (TM.getOptLevel() == CodeGenOpt::None)
return;
const int64_t ObjectSize =
cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
Value *const ObjectPtr = I.getArgOperand(1);
SmallVector<const Value *, 4> Allocas;
getUnderlyingObjects(ObjectPtr, Allocas);
for (const Value *Alloca : Allocas) {
const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(Alloca);
// Could not find an Alloca.
if (!LifetimeObject)
continue;
// First check that the Alloca is static, otherwise it won't have a
// valid frame index.
auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
if (SI == FuncInfo.StaticAllocaMap.end())
return;
const int FrameIndex = SI->second;
int64_t Offset;
if (GetPointerBaseWithConstantOffset(
ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject)
Offset = -1; // Cannot determine offset from alloca to lifetime object.
Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize,
Offset);
DAG.setRoot(Res);
}
return;
}
case Intrinsic::pseudoprobe: {
auto Guid = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
auto Index = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
auto Attr = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr);
DAG.setRoot(Res);
return;
}
case Intrinsic::invariant_start:
// Discard region information.
setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
return;
case Intrinsic::invariant_end:
// Discard region information.
return;
case Intrinsic::clear_cache:
/// FunctionName may be null.
if (const char *FunctionName = TLI.getClearCacheBuiltinName())
lowerCallToExternalSymbol(I, FunctionName);
return;
case Intrinsic::donothing:
case Intrinsic::seh_try_begin:
case Intrinsic::seh_scope_begin:
case Intrinsic::seh_try_end:
case Intrinsic::seh_scope_end:
// ignore
return;
case Intrinsic::experimental_stackmap:
visitStackmap(I);
return;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
visitPatchpoint(I);
return;
case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(cast<GCStatepointInst>(I));
return;
case Intrinsic::experimental_gc_result:
visitGCResult(cast<GCResultInst>(I));
return;
case Intrinsic::experimental_gc_relocate:
visitGCRelocate(cast<GCRelocateInst>(I));
return;
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
case Intrinsic::instrprof_value_profile:
llvm_unreachable("instrprof failed to lower a value profiling call");
case Intrinsic::localescape: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
// is the same on all targets.
for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
if (isa<ConstantPointerNull>(Arg))
continue; // Skip null pointers. They represent a hole in index space.
AllocaInst *Slot = cast<AllocaInst>(Arg);
assert(FuncInfo.StaticAllocaMap.count(Slot) &&
"can only escape static allocas");
int FI = FuncInfo.StaticAllocaMap[Slot];
MCSymbol *FrameAllocSym =
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
TII->get(TargetOpcode::LOCAL_ESCAPE))
.addSym(FrameAllocSym)
.addFrameIndex(FI);
}
return;
}
case Intrinsic::localrecover: {
// i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
MachineFunction &MF = DAG.getMachineFunction();
// Get the symbol that defines the frame offset.
auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
unsigned IdxVal =
unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
MCSymbol *FrameAllocSym =
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
Value *FP = I.getArgOperand(1);
SDValue FPVal = getValue(FP);
EVT PtrVT = FPVal.getValueType();
// Create a MCSymbol for the label to avoid any target lowering
// that would make this PC relative.
SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
SDValue OffsetVal =
DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
// Add the offset to the FP.
SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl);
setValue(&I, Add);
return;
}
case Intrinsic::eh_exceptionpointer:
case Intrinsic::eh_exceptioncode: {
// Get the exception pointer vreg, copy from it, and resize it to fit.
const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
SDValue N =
DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
if (Intrinsic == Intrinsic::eh_exceptioncode)
N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
setValue(&I, N);
return;
}
case Intrinsic::xray_customevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
// specific calling convention, and only for x86_64.
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
if (Triple.getArch() != Triple::x86_64)
return;
SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
// We want to say that we always want the arguments in registers.
SDValue LogEntryVal = getValue(I.getArgOperand(0));
SDValue StrSizeVal = getValue(I.getArgOperand(1));
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Chain = getRoot();
Ops.push_back(LogEntryVal);
Ops.push_back(StrSizeVal);
Ops.push_back(Chain);
// We need to enforce the calling convention for the callsite, so that
// argument ordering is enforced correctly, and that register allocation can
// see that some registers may be assumed clobbered and have to preserve
// them across calls to the intrinsic.
MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
DL, NodeTys, Ops);
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
return;
}
case Intrinsic::xray_typedevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
// specific calling convention, and only for x86_64.
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
if (Triple.getArch() != Triple::x86_64)
return;
SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
// We want to say that we always want the arguments in registers.
// It's unclear to me how manipulating the selection DAG here forces callers
// to provide arguments in registers instead of on the stack.
SDValue LogTypeId = getValue(I.getArgOperand(0));
SDValue LogEntryVal = getValue(I.getArgOperand(1));
SDValue StrSizeVal = getValue(I.getArgOperand(2));
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Chain = getRoot();
Ops.push_back(LogTypeId);
Ops.push_back(LogEntryVal);
Ops.push_back(StrSizeVal);
Ops.push_back(Chain);
// We need to enforce the calling convention for the callsite, so that
// argument ordering is enforced correctly, and that register allocation can
// see that some registers may be assumed clobbered and have to preserve
// them across calls to the intrinsic.
MachineSDNode *MN = DAG.getMachineNode(
TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops);
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
return;
}
case Intrinsic::experimental_deoptimize:
LowerDeoptimizeCall(&I);
return;
case Intrinsic::experimental_stepvector:
visitStepVector(I);
return;
case Intrinsic::vector_reduce_fadd:
case Intrinsic::vector_reduce_fmul:
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
case Intrinsic::vector_reduce_and:
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
case Intrinsic::vector_reduce_umax:
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
visitVectorReduce(I, Intrinsic);
return;
case Intrinsic::icall_branch_funnel: {
SmallVector<SDValue, 16> Ops;
Ops.push_back(getValue(I.getArgOperand(0)));
int64_t Offset;
auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
I.getArgOperand(1), Offset, DAG.getDataLayout()));
if (!Base)
report_fatal_error(
"llvm.icall.branch.funnel operand must be a GlobalValue");
Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0));
struct BranchFunnelTarget {
int64_t Offset;
SDValue Target;
};
SmallVector<BranchFunnelTarget, 8> Targets;
for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) {
auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
I.getArgOperand(Op), Offset, DAG.getDataLayout()));
if (ElemBase != Base)
report_fatal_error("all llvm.icall.branch.funnel operands must refer "
"to the same GlobalValue");
SDValue Val = getValue(I.getArgOperand(Op + 1));
auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
if (!GA)
report_fatal_error(
"llvm.icall.branch.funnel operand must be a GlobalValue");
Targets.push_back({Offset, DAG.getTargetGlobalAddress(
GA->getGlobal(), getCurSDLoc(),
Val.getValueType(), GA->getOffset())});
}
llvm::sort(Targets,
[](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
return T1.Offset < T2.Offset;
});
for (auto &T : Targets) {
Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32));
Ops.push_back(T.Target);
}
Ops.push_back(DAG.getRoot()); // Chain
SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
getCurSDLoc(), MVT::Other, Ops),
0);
DAG.setRoot(N);
setValue(&I, N);
HasTailCall = true;
return;
}
case Intrinsic::wasm_landingpad_index:
// Information this intrinsic contained has been transferred to
// MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
// delete it now.
return;
case Intrinsic::aarch64_settag:
case Intrinsic::aarch64_settag_zero: {
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
SDValue Val = TSI.EmitTargetCodeForSetTag(
DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
ZeroMemory);
DAG.setRoot(Val);
setValue(&I, Val);
return;
}
case Intrinsic::ptrmask: {
SDValue Ptr = getValue(I.getOperand(0));
SDValue Const = getValue(I.getOperand(1));
EVT PtrVT = Ptr.getValueType();
setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr,
DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT)));
return;
}
case Intrinsic::get_active_lane_mask: {
auto DL = getCurSDLoc();
SDValue Index = getValue(I.getOperand(0));
SDValue TripCount = getValue(I.getOperand(1));
Type *ElementTy = I.getOperand(0)->getType();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
unsigned VecWidth = VT.getVectorNumElements();
SmallVector<SDValue, 16> OpsTripCount;
SmallVector<SDValue, 16> OpsIndex;
SmallVector<SDValue, 16> OpsStepConstants;
for (unsigned i = 0; i < VecWidth; i++) {
OpsTripCount.push_back(TripCount);
OpsIndex.push_back(Index);
OpsStepConstants.push_back(
DAG.getConstant(i, DL, EVT::getEVT(ElementTy)));
}
EVT CCVT = EVT::getVectorVT(I.getContext(), MVT::i1, VecWidth);
auto VecTy = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth));
SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex);
SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
SDValue VectorInduction = DAG.getNode(
ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount);
SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
VectorTripCount, ISD::CondCode::SETULT);
setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
SetCC));
return;
}
case Intrinsic::experimental_vector_insert: {
auto DL = getCurSDLoc();
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
SDValue Index = getValue(I.getOperand(2));
// The intrinsic's index type is i64, but the SDNode requires an index type
// suitable for the target. Convert the index as required.
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
if (Index.getValueType() != VectorIdxTy)
Index = DAG.getVectorIdxConstant(
cast<ConstantSDNode>(Index)->getZExtValue(), DL);
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
Index));
return;
}
case Intrinsic::experimental_vector_extract: {
auto DL = getCurSDLoc();
SDValue Vec = getValue(I.getOperand(0));
SDValue Index = getValue(I.getOperand(1));
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
// The intrinsic's index type is i64, but the SDNode requires an index type
// suitable for the target. Convert the index as required.
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
if (Index.getValueType() != VectorIdxTy)
Index = DAG.getVectorIdxConstant(
cast<ConstantSDNode>(Index)->getZExtValue(), DL);
setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
return;
}
case Intrinsic::experimental_vector_reverse:
visitVectorReverse(I);
return;
case Intrinsic::experimental_vector_splice:
visitVectorSplice(I);
return;
}
}
void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
const ConstrainedFPIntrinsic &FPI) {
SDLoc sdl = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
ValueVTs.push_back(MVT::Other); // Out chain
// We do not need to serialize constrained FP intrinsics against
// each other or against (nonvolatile) loads, so they can be
// chained like loads.
SDValue Chain = DAG.getRoot();
SmallVector<SDValue, 4> Opers;
Opers.push_back(Chain);
if (FPI.isUnaryOp()) {
Opers.push_back(getValue(FPI.getArgOperand(0)));
} else if (FPI.isTernaryOp()) {
Opers.push_back(getValue(FPI.getArgOperand(0)));
Opers.push_back(getValue(FPI.getArgOperand(1)));
Opers.push_back(getValue(FPI.getArgOperand(2)));
} else {
Opers.push_back(getValue(FPI.getArgOperand(0)));
Opers.push_back(getValue(FPI.getArgOperand(1)));
}
auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
assert(Result.getNode()->getNumValues() == 2);
// Push node to the appropriate list so that future instructions can be
// chained up correctly.
SDValue OutChain = Result.getValue(1);
switch (EB) {
case fp::ExceptionBehavior::ebIgnore:
// The only reason why ebIgnore nodes still need to be chained is that
// they might depend on the current rounding mode, and therefore must
// not be moved across instruction that may change that mode.
LLVM_FALLTHROUGH;
case fp::ExceptionBehavior::ebMayTrap:
// These must not be moved across calls or instructions that may change
// floating-point exception masks.
PendingConstrainedFP.push_back(OutChain);
break;
case fp::ExceptionBehavior::ebStrict:
// These must not be moved across calls or instructions that may change
// floating-point exception masks or read floating-point exception flags.
// In addition, they cannot be optimized out even if unused.
PendingConstrainedFPStrict.push_back(OutChain);
break;
}
};
SDVTList VTs = DAG.getVTList(ValueVTs);
fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
SDNodeFlags Flags;
if (EB == fp::ExceptionBehavior::ebIgnore)
Flags.setNoFPExcept(true);
if (auto *FPOp = dyn_cast<FPMathOperator>(&FPI))
Flags.copyFMF(*FPOp);
unsigned Opcode;
switch (FPI.getIntrinsicID()) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case Intrinsic::INTRINSIC: \
Opcode = ISD::STRICT_##DAGN; \
break;
#include "llvm/IR/ConstrainedOps.def"
case Intrinsic::experimental_constrained_fmuladd: {
Opcode = ISD::STRICT_FMA;
// Break fmuladd into fmul and fadd.
if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
!TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(),
ValueVTs[0])) {
Opers.pop_back();
SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
pushOutChain(Mul, EB);
Opcode = ISD::STRICT_FADD;
Opers.clear();
Opers.push_back(Mul.getValue(1));
Opers.push_back(Mul.getValue(0));
Opers.push_back(getValue(FPI.getArgOperand(2)));
}
break;
}
}
// A few strict DAG nodes carry additional operands that are not
// set up by the default code above.
switch (Opcode) {
default: break;
case ISD::STRICT_FP_ROUND:
Opers.push_back(
DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
break;
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: {
auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate());
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
Opers.push_back(DAG.getCondCode(Condition));
break;
}
}
SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags);
pushOutChain(Result, EB);
SDValue FPResult = Result.getValue(0);
setValue(&FPI, FPResult);
}
static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
Optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN:
#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResOPC = ISD::VPSDID;
#define END_REGISTER_VP_INTRINSIC(...) break;
#include "llvm/IR/VPIntrinsics.def"
}
if (!ResOPC.hasValue())
llvm_unreachable(
"Inconsistency: no SDNode available for this VPIntrinsic!");
return ResOPC.getValue();
}
void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
const VPIntrinsic &VPIntrin) {
SDLoc DL = getCurSDLoc();
unsigned Opcode = getISDForVPIntrinsic(VPIntrin);
SmallVector<EVT, 4> ValueVTs;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs);
SDVTList VTs = DAG.getVTList(ValueVTs);
auto EVLParamPos =
VPIntrinsic::getVectorLengthParamPos(VPIntrin.getIntrinsicID());
MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
"Unexpected target EVL type");
// Request operands.
SmallVector<SDValue, 7> OpValues;
for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) {
auto Op = getValue(VPIntrin.getArgOperand(I));
if (I == EVLParamPos)
Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);
OpValues.push_back(Op);
}
SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
setValue(&VPIntrin, Result);
}
SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
const BasicBlock *EHPadBB,
MCSymbol *&BeginLabel) {
MachineFunction &MF = DAG.getMachineFunction();
MachineModuleInfo &MMI = MF.getMMI();
// Insert a label before the invoke call to mark the try range. This can be
// used to detect deletion of the invoke via the MachineModuleInfo.
BeginLabel = MMI.getContext().createTempSymbol();
// For SjLj, keep track of which landing pads go with which invokes
// so as to maintain the ordering of pads in the LSDA.
unsigned CallSiteIndex = MMI.getCurrentCallSite();
if (CallSiteIndex) {
MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
// Now that the call site is handled, stop tracking it.
MMI.setCurrentCallSite(0);
}
return DAG.getEHLabel(getCurSDLoc(), Chain, BeginLabel);
}
SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II,
const BasicBlock *EHPadBB,
MCSymbol *BeginLabel) {
assert(BeginLabel && "BeginLabel should've been set");
MachineFunction &MF = DAG.getMachineFunction();
MachineModuleInfo &MMI = MF.getMMI();
// Insert a label at the end of the invoke call to mark the try range. This
// can be used to detect deletion of the invoke via the MachineModuleInfo.
MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
Chain = DAG.getEHLabel(getCurSDLoc(), Chain, EndLabel);
// Inform MachineModuleInfo of range.
auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
// There is a platform (e.g. wasm) that uses funclet style IR but does not
// actually use outlined funclets and their LSDA info style.
if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
assert(II && "II should've been set");
WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
EHInfo->addIPToStateRange(II, BeginLabel, EndLabel);
} else if (!isScopedEHPersonality(Pers)) {
assert(EHPadBB);
MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
}
return Chain;
}
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB) {
MCSymbol *BeginLabel = nullptr;
if (EHPadBB) {
// Both PendingLoads and PendingExports must be flushed here;
// this call might not return.
(void)getRoot();
DAG.setRoot(lowerStartEH(getControlRoot(), EHPadBB, BeginLabel));
CLI.setChain(getRoot());
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
assert((CLI.IsTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
assert((Result.second.getNode() || !Result.first.getNode()) &&
"Null value expected with tail call!");
if (!Result.second.getNode()) {
// As a special case, a null chain means that a tail call has been emitted
// and the DAG root is already updated.
HasTailCall = true;
// Since there's no actual continuation from this block, nothing can be
// relying on us setting vregs for them.
PendingExports.clear();
} else {
DAG.setRoot(Result.second);
}
if (EHPadBB) {
DAG.setRoot(lowerEndEH(getRoot(), cast_or_null<InvokeInst>(CLI.CB), EHPadBB,
BeginLabel));
}
return Result;
}
void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
bool isTailCall,
bool isMustTailCall,
const BasicBlock *EHPadBB) {
auto &DL = DAG.getDataLayout();
FunctionType *FTy = CB.getFunctionType();
Type *RetTy = CB.getType();
TargetLowering::ArgListTy Args;
Args.reserve(CB.arg_size());
const Value *SwiftErrorVal = nullptr;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (isTailCall) {
// Avoid emitting tail calls in functions with the disable-tail-calls
// attribute.
auto *Caller = CB.getParent()->getParent();
if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
"true" && !isMustTailCall)
isTailCall = false;
// We can't tail call inside a function with a swifterror argument. Lowering
// does not support this yet. It would have to move into the swifterror
// register before the call.
if (TLI.supportSwiftError() &&
Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
isTailCall = false;
}
for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
TargetLowering::ArgListEntry Entry;
const Value *V = *I;
// Skip empty types
if (V->getType()->isEmptyTy())
continue;
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();
Entry.setAttributes(&CB, I - CB.arg_begin());
// Use swifterror virtual register as input to the call.
if (Entry.IsSwiftError && TLI.supportSwiftError()) {
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
Entry.Node =
DAG.getRegister(SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V),
EVT(TLI.getPointerTy(DL)));
}
Args.push_back(Entry);
// If we have an explicit sret argument that is an Instruction, (i.e., it
// might point to function-local memory), we can't meaningfully tail-call.
if (Entry.IsSRet && isa<Instruction>(V))
isTailCall = false;
}
// If call site has a cfguardtarget operand bundle, create and add an
// additional ArgListEntry.
if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
TargetLowering::ArgListEntry Entry;
Value *V = Bundle->Inputs[0];
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode;
Entry.Ty = V->getType();
Entry.IsCFGuardTarget = true;
Args.push_back(Entry);
}
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within TLI->LowerCallTo.
if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget()))
isTailCall = false;
// Disable tail calls if there is an swifterror argument. Targets have not
// been updated to support tail calls.
if (TLI.supportSwiftError() && SwiftErrorVal)
isTailCall = false;
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
.setCallee(RetTy, FTy, Callee, std::move(Args), CB)
.setTailCall(isTailCall)
.setConvergent(CB.isConvergent())
.setIsPreallocated(
CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode()) {
Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first);
setValue(&CB, Result.first);
}
// The last element of CLI.InVals has the SDValue for swifterror return.
// Here we copy it to a virtual register and update SwiftErrorMap for
// book-keeping.
if (SwiftErrorVal && TLI.supportSwiftError()) {
// Get the last element of InVals.
SDValue Src = CLI.InVals.back();
Register VReg =
SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal);
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
DAG.setRoot(CopyNode);
}
}
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
SelectionDAGBuilder &Builder) {
// Check to see if this load can be trivially constant folded, e.g. if the
// input is from a string literal.
if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
// Cast pointer to the type we really want to load.
Type *LoadTy =
Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
if (LoadVT.isVector())
LoadTy = FixedVectorType::get(LoadTy, LoadVT.getVectorNumElements());
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL))
return Builder.getValue(LoadCst);
}
// Otherwise, we have to emit the load. If the pointer is to unfoldable but
// still constant memory, the input chain can be the entry node.
SDValue Root;
bool ConstantMemory = false;
// Do not serialize (non-volatile) loads of constant memory with anything.
if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
Root = Builder.DAG.getEntryNode();
ConstantMemory = true;
} else {
// Do not serialize non-volatile loads against each other.
Root = Builder.DAG.getRoot();
}
SDValue Ptr = Builder.getValue(PtrVal);
SDValue LoadVal =
Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr,
MachinePointerInfo(PtrVal), Align(1));
if (!ConstantMemory)
Builder.PendingLoads.push_back(LoadVal.getValue(1));
return LoadVal;
}
/// Record the value for an instruction that produces an integer result,
/// converting the type where necessary.
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
SDValue Value,
bool IsSigned) {
EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType(), true);
if (IsSigned)
Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
else
Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
setValue(&I, Value);
}
/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
/// true and lower it. Otherwise return false, and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
const Value *Size = I.getArgOperand(2);
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
if (CSize && CSize->getZExtValue() == 0) {
EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType(), true);
setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
return true;
}
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, true);
PendingLoads.push_back(Res.second);
return true;
}
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I))
return false;
// If the target has a fast compare for the given size, it will return a
// preferred load type for that size. Require that the load VT is legal and
// that the target supports unaligned loads of that type. Otherwise, return
// INVALID.
auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT LVT = TLI.hasFastEqualityCompare(NumBits);
if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
// TODO: Check alignment of src and dest ptrs.
unsigned DstAS = LHS->getType()->getPointerAddressSpace();
unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
if (!TLI.isTypeLegal(LVT) ||
!TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
!TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
}
return LVT;
};
// This turns into unaligned loads. We only do this if the target natively
// supports the MVT we'll be loading or if it is small enough (<= 4) that
// we'll only produce a small number of byte loads.
MVT LoadVT;
unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
switch (NumBitsToCompare) {
default:
return false;
case 16:
LoadVT = MVT::i16;
break;
case 32:
LoadVT = MVT::i32;
break;
case 64:
case 128:
case 256:
LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
break;
}
if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
return false;
SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
// Bitcast to a wide integer type if the loads are vectors.
if (LoadVT.isVector()) {
EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
LoadL = DAG.getBitcast(CmpVT, LoadL);
LoadR = DAG.getBitcast(CmpVT, LoadR);
}
SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
processIntegerCallValue(I, Cmp, false);
return true;
}
/// See if we can lower a memchr call into an optimized form. If so, return
/// true and lower it. Otherwise return false, and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
const Value *Src = I.getArgOperand(0);
const Value *Char = I.getArgOperand(1);
const Value *Length = I.getArgOperand(2);
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Src), getValue(Char), getValue(Length),
MachinePointerInfo(Src));
if (Res.first.getNode()) {
setValue(&I, Res.first);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// See if we can lower a mempcpy call into an optimized form. If so, return
/// true and lower it. Otherwise return false, and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
SDValue Dst = getValue(I.getArgOperand(0));
SDValue Src = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne();
Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne();
// DAG::getMemcpy needs Alignment to be defined.
Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = false;
SDLoc sdl = getCurSDLoc();
// In the mempcpy context we need to pass in a false value for isTailCall
// because the return pointer needs to be adjusted by the size of
// the copied memory.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
/*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)), AAInfo);
assert(MC.getNode() != nullptr &&
"** memcpy should not be lowered as TailCall in mempcpy context **");
DAG.setRoot(MC);
// Check if Size needs to be truncated or extended.
Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());
// Adjust return pointer to point just past the last dst byte.
SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
Dst, Size);
setValue(&I, DstPlusSize);
return true;
}
/// See if we can lower a strcpy call into an optimized form. If so, return
/// true and lower it, otherwise return false and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
getValue(Arg0), getValue(Arg1),
MachinePointerInfo(Arg0),
MachinePointerInfo(Arg1), isStpcpy);
if (Res.first.getNode()) {
setValue(&I, Res.first);
DAG.setRoot(Res.second);
return true;
}
return false;
}
/// See if we can lower a strcmp call into an optimized form. If so, return
/// true and lower it, otherwise return false and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), getValue(Arg1),
MachinePointerInfo(Arg0),
MachinePointerInfo(Arg1));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, true);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// See if we can lower a strlen call into an optimized form. If so, return
/// true and lower it, otherwise return false and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
const Value *Arg0 = I.getArgOperand(0);
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), MachinePointerInfo(Arg0));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, false);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// See if we can lower a strnlen call into an optimized form. If so, return
/// true and lower it, otherwise return false and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), getValue(Arg1),
MachinePointerInfo(Arg0));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, false);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// See if we can lower a unary floating-point operation into an SDNode with
/// the specified Opcode. If so, return true and lower it, otherwise return
/// false and it will be lowered like a normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
unsigned Opcode) {
// We already checked this call's prototype; verify it doesn't modify errno.
if (!I.onlyReadsMemory())
return false;
SDNodeFlags Flags;
Flags.copyFMF(cast<FPMathOperator>(I));
SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I,
DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags));
return true;
}
/// See if we can lower a binary floating-point operation into an SDNode with
/// the specified Opcode. If so, return true and lower it. Otherwise return
/// false, and it will be lowered like a normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
unsigned Opcode) {
// We already checked this call's prototype; verify it doesn't modify errno.
if (!I.onlyReadsMemory())
return false;
SDNodeFlags Flags;
Flags.copyFMF(cast<FPMathOperator>(I));
SDValue Tmp0 = getValue(I.getArgOperand(0));
SDValue Tmp1 = getValue(I.getArgOperand(1));
EVT VT = Tmp0.getValueType();
setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags));
return true;
}
void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Handle inline assembly differently.
if (I.isInlineAsm()) {
visitInlineAsm(I);
return;
}
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
// Is this an LLVM intrinsic or a target-specific intrinsic?
unsigned IID = F->getIntrinsicID();
if (!IID)
if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
IID = II->getIntrinsicID(F);
if (IID) {
visitIntrinsicCall(I, IID);
return;
}
}
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call. Don't do the check if marked as nobuiltin for
// some reason or the call site requires strict floating point semantics.
LibFunc Func;
if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
F->hasName() && LibInfo->getLibFunc(*F, Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
case LibFunc_bcmp:
if (visitMemCmpBCmpCall(I))
return;
break;
case LibFunc_copysign:
case LibFunc_copysignf:
case LibFunc_copysignl:
// We already checked this call's prototype; verify it doesn't modify
// errno.
if (I.onlyReadsMemory()) {
SDValue LHS = getValue(I.getArgOperand(0));
SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
LHS.getValueType(), LHS, RHS));
return;
}
break;
case LibFunc_fabs:
case LibFunc_fabsf:
case LibFunc_fabsl:
if (visitUnaryFloatCall(I, ISD::FABS))
return;
break;
case LibFunc_fmin:
case LibFunc_fminf:
case LibFunc_fminl:
if (visitBinaryFloatCall(I, ISD::FMINNUM))
return;
break;
case LibFunc_fmax:
case LibFunc_fmaxf:
case LibFunc_fmaxl:
if (visitBinaryFloatCall(I, ISD::FMAXNUM))
return;
break;
case LibFunc_sin:
case LibFunc_sinf:
case LibFunc_sinl:
if (visitUnaryFloatCall(I, ISD::FSIN))
return;
break;
case LibFunc_cos:
case LibFunc_cosf:
case LibFunc_cosl:
if (visitUnaryFloatCall(I, ISD::FCOS))
return;
break;
case LibFunc_sqrt:
case LibFunc_sqrtf:
case LibFunc_sqrtl:
case LibFunc_sqrt_finite:
case LibFunc_sqrtf_finite:
case LibFunc_sqrtl_finite:
if (visitUnaryFloatCall(I, ISD::FSQRT))
return;
break;
case LibFunc_floor:
case LibFunc_floorf:
case LibFunc_floorl:
if (visitUnaryFloatCall(I, ISD::FFLOOR))
return;
break;
case LibFunc_nearbyint:
case LibFunc_nearbyintf:
case LibFunc_nearbyintl:
if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
return;
break;
case LibFunc_ceil:
case LibFunc_ceilf:
case LibFunc_ceill:
if (visitUnaryFloatCall(I, ISD::FCEIL))
return;
break;
case LibFunc_rint:
case LibFunc_rintf:
case LibFunc_rintl:
if (visitUnaryFloatCall(I, ISD::FRINT))
return;
break;
case LibFunc_round:
case LibFunc_roundf:
case LibFunc_roundl:
if (visitUnaryFloatCall(I, ISD::FROUND))
return;
break;
case LibFunc_trunc:
case LibFunc_truncf:
case LibFunc_truncl:
if (visitUnaryFloatCall(I, ISD::FTRUNC))
return;
break;
case LibFunc_log2:
case LibFunc_log2f:
case LibFunc_log2l:
if (visitUnaryFloatCall(I, ISD::FLOG2))
return;
break;
case LibFunc_exp2:
case LibFunc_exp2f:
case LibFunc_exp2l:
if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
break;
case LibFunc_memcmp:
if (visitMemCmpBCmpCall(I))
return;
break;
case LibFunc_mempcpy:
if (visitMemPCpyCall(I))
return;
break;
case LibFunc_memchr:
if (visitMemChrCall(I))
return;
break;
case LibFunc_strcpy:
if (visitStrCpyCall(I, false))
return;
break;
case LibFunc_stpcpy:
if (visitStrCpyCall(I, true))
return;
break;
case LibFunc_strcmp:
if (visitStrCmpCall(I))
return;
break;
case LibFunc_strlen:
if (visitStrLenCall(I))
return;
break;
case LibFunc_strnlen:
if (visitStrNLenCall(I))
return;
break;
}
}
}
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
// CFGuardTarget bundles are lowered in LowerCallTo.
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
LLVMContext::OB_clang_arc_attachedcall}) &&
"Cannot lower calls with arbitrary operand bundles!");
SDValue Callee = getValue(I.getCalledOperand());
if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
else
// Check if we can potentially perform a tail call. More detailed checking
// is be done within LowerCallTo, after more information about the call is
// known.
LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
}
namespace {
/// AsmOperandInfo - This contains information for each constraint that we are
/// lowering.
class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
public:
/// CallOperand - If this is the result output operand or a clobber
/// this is null, otherwise it is the incoming operand to the CallInst.
/// This gets modified as the asm is processed.
SDValue CallOperand;
/// AssignedRegs - If this is a register or register class operand, this
/// contains the set of register corresponding to the operand.
RegsForValue AssignedRegs;
explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
: TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
}
/// Whether or not this operand accesses memory
bool hasMemory(const TargetLowering &TLI) const {
// Indirect operand accesses access memory.
if (isIndirect)
return true;
for (const auto &Code : Codes)
if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
return true;
return false;
}
/// getCallOperandValEVT - Return the EVT of the Value* that this operand
/// corresponds to. If there is no Value* for this operand, it returns
/// MVT::Other.
EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL) const {
if (!CallOperandVal) return MVT::Other;
if (isa<BasicBlock>(CallOperandVal))
return TLI.getProgramPointerTy(DL);
llvm::Type *OpTy = CallOperandVal->getType();
// FIXME: code duplicated from TargetLowering::ParseConstraints().
// If this is an indirect operand, the operand is a pointer to the
// accessed type.
if (isIndirect) {
PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
if (!PtrTy)
report_fatal_error("Indirect operand for inline asm not a pointer!");
OpTy = PtrTy->getElementType();
}
// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
if (StructType *STy = dyn_cast<StructType>(OpTy))
if (STy->getNumElements() == 1)
OpTy = STy->getElementType(0);
// If OpTy is not a single value, it may be a struct/union that we
// can tile with integers.
if (!OpTy->isSingleValueType() && OpTy->isSized()) {
unsigned BitSize = DL.getTypeSizeInBits(OpTy);
switch (BitSize) {
default: break;
case 1:
case 8:
case 16:
case 32:
case 64:
case 128:
OpTy = IntegerType::get(Context, BitSize);
break;
}
}
return TLI.getAsmOperandValueType(DL, OpTy, true);
}
};
} // end anonymous namespace
/// Make sure that the output operand \p OpInfo and its corresponding input
/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
/// out).
static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
SDISelAsmOperandInfo &MatchingOpInfo,
SelectionDAG &DAG) {
if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
return;
const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
const auto &TLI = DAG.getTargetLoweringInfo();
std::pair<unsigned, const TargetRegisterClass *> MatchRC =
TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
std::pair<unsigned, const TargetRegisterClass *> InputRC =
TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
MatchingOpInfo.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
MatchingOpInfo.ConstraintVT.isInteger()) ||
(MatchRC.second != InputRC.second)) {
// FIXME: error out in a more elegant fashion
report_fatal_error("Unsupported asm: input constraint"
" with a matching output constraint of"
" incompatible type!");
}
MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
}
/// Get a direct memory input to behave well as an indirect operand.
/// This may introduce stores, hence the need for a \p Chain.
/// \return The (possibly updated) chain.
static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
SDISelAsmOperandInfo &OpInfo,
SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we don't have an indirect input, put it in the constpool if we can,
// otherwise spill it to a stack slot.
// TODO: This isn't quite right. We need to handle these according to
// the addressing mode that the constraint wants. Also, this may take
// an additional register for the computation and we don't want that
// either.
// If the operand is a float, integer, or vector constant, spill to a
// constant pool entry to get its address.
const Value *OpVal = OpInfo.CallOperandVal;
if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
OpInfo.CallOperand = DAG.getConstantPool(
cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
return Chain;
}
// Otherwise, create a stack slot and emit a store to it before the asm.
Type *Ty = OpVal->getType();
auto &DL = DAG.getDataLayout();
uint64_t TySize = DL.getTypeAllocSize(Ty);
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo().CreateStackObject(
TySize, DL.getPrefTypeAlign(Ty), false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
MachinePointerInfo::getFixedStack(MF, SSFI),
TLI.getMemValueType(DL, Ty));
OpInfo.CallOperand = StackSlot;
return Chain;
}
/// GetRegistersForValue - Assign registers (virtual or physical) for the
/// specified operand. We prefer to assign virtual registers, to allow the
/// register allocator to handle the assignment process. However, if the asm
/// uses features that we can't model on machineinstrs, we have SDISel do the
/// allocation. This produces generally horrible, but correct, code.
///
/// OpInfo describes the operand
/// RefOpInfo describes the matching operand if any, the operand otherwise
static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
SDISelAsmOperandInfo &OpInfo,
SDISelAsmOperandInfo &RefOpInfo) {
LLVMContext &Context = *DAG.getContext();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<unsigned, 4> Regs;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
// No work to do for memory operations.
if (OpInfo.ConstraintType == TargetLowering::C_Memory)
return;
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
unsigned AssignedReg;
const TargetRegisterClass *RC;
std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
&TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
// RC is unset only on failure. Return immediately.
if (!RC)
return;
// Get the actual register value type. This is important, because the user
// may have asked for (e.g.) the AX register in i32 type. We need to
// remember that AX is actually i16 to get the right extension.
const MVT RegVT = *TRI.legalclasstypes_begin(*RC);
if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) {
// If this is an FP operand in an integer register (or visa versa), or more
// generally if the operand value disagrees with the register class we plan
// to stick it in, fix the operand type.
//
// If this is an input value, the bitcast to the new type is done now.
// Bitcast for output value is done at the end of visitInlineAsm().
if ((OpInfo.Type == InlineAsm::isOutput ||
OpInfo.Type == InlineAsm::isInput) &&
!TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) {
// Try to convert to the first EVT that the reg class contains. If the
// types are identical size, use a bitcast to convert (e.g. two differing
// vector types). Note: output bitcast is done at the end of
// visitInlineAsm().
if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
// Exclude indirect inputs while they are unsupported because the code
// to perform the load is missing and thus OpInfo.CallOperand still
// refers to the input address rather than the pointed-to value.
if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
OpInfo.CallOperand =
DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
// If the operand is an FP value and we want it in integer registers,
// use the corresponding integer type. This turns an f64 value into
// i64, which can be passed with two i32 values on a 32-bit machine.
} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
if (OpInfo.Type == InlineAsm::isInput)
OpInfo.CallOperand =
DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand);
OpInfo.ConstraintVT = VT;
}
}
}
// No need to allocate a matching input constraint since the constraint it's
// matching to has already been allocated.
if (OpInfo.isMatchingInputConstraint())
return;
EVT ValueVT = OpInfo.ConstraintVT;
if (OpInfo.ConstraintVT == MVT::Other)
ValueVT = RegVT;
// Initialize NumRegs.
unsigned NumRegs = 1;
if (OpInfo.ConstraintVT != MVT::Other)
NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RegVT);
// If this is a constraint for a specific physical register, like {r17},
// assign it now.
// If this associated to a specific register, initialize iterator to correct
// place. If virtual, make sure we have enough registers
// Initialize iterator if necessary
TargetRegisterClass::iterator I = RC->begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
// Do not check for single registers.
if (AssignedReg) {
for (; *I != AssignedReg; ++I)
assert(I != RC->end() && "AssignedReg should be member of RC");
}
for (; NumRegs; --NumRegs, ++I) {
assert(I != RC->end() && "Ran out of registers to allocate!");
Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
Regs.push_back(R);
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
}
static unsigned
findMatchingInlineAsmOperand(unsigned OperandNo,
const std::vector<SDValue> &AsmNodeOperands) {
// Scan until we find the definition we already emitted of this operand.
unsigned CurOp = InlineAsm::Op_FirstOperand;
for (; OperandNo; --OperandNo) {
// Advance to the next operand.
unsigned OpFlag =
cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
assert((InlineAsm::isRegDefKind(OpFlag) ||
InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
InlineAsm::isMemKind(OpFlag)) &&
"Skipped past definitions?");
CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
}
return CurOp;
}
namespace {
class ExtraFlags {
unsigned Flags = 0;
public:
explicit ExtraFlags(const CallBase &Call) {
const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
if (IA->hasSideEffects())
Flags |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
Flags |= InlineAsm::Extra_IsAlignStack;
if (Call.isConvergent())
Flags |= InlineAsm::Extra_IsConvergent;
Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
}
void update(const TargetLowering::AsmOperandInfo &OpInfo) {
// Ideally, we would only check against memory constraints. However, the
// meaning of an Other constraint can be target-specific and we can't easily
// reason about it. Therefore, be conservative and set MayLoad/MayStore
// for Other constraints as well.
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
OpInfo.ConstraintType == TargetLowering::C_Other) {
if (OpInfo.Type == InlineAsm::isInput)
Flags |= InlineAsm::Extra_MayLoad;
else if (OpInfo.Type == InlineAsm::isOutput)
Flags |= InlineAsm::Extra_MayStore;
else if (OpInfo.Type == InlineAsm::isClobber)
Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
}
}
unsigned get() const { return Flags; }
};
} // end anonymous namespace
/// visitInlineAsm - Handle a call to an InlineAsm object.
void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
const BasicBlock *EHPadBB) {
const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
/// ConstraintOperands - Information about all of the constraints.
SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), Call);
// First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
// AsmDialect, MayLoad, MayStore).
bool HasSideEffect = IA->hasSideEffects();
ExtraFlags ExtraInfo(Call);
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
unsigned NumMatchingOps = 0;
for (auto &T : TargetConstraints) {
ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
// Compute the value type for each operand.
if (OpInfo.Type == InlineAsm::isInput ||
(OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
// Process the call argument. BasicBlocks are labels, currently appearing
// only in asm's.
if (isa<CallBrInst>(Call) &&
ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() -
cast<CallBrInst>(&Call)->getNumIndirectDests() -
NumMatchingOps) &&
(NumMatchingOps == 0 ||
ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() -
NumMatchingOps))) {
const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
} else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
} else {
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
}
EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
DAG.getDataLayout());
OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
} else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
// The return value of the call is this value. As such, there is no
// corresponding argument.
assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
OpInfo.ConstraintVT = TLI.getSimpleValueType(
DAG.getDataLayout(), STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
OpInfo.ConstraintVT = TLI.getAsmOperandValueType(
DAG.getDataLayout(), Call.getType()).getSimpleVT();
}
++ResNo;
} else {
OpInfo.ConstraintVT = MVT::Other;
}
if (OpInfo.hasMatchingInput())
++NumMatchingOps;
if (!HasSideEffect)
HasSideEffect = OpInfo.hasMemory(TLI);
// Determine if this InlineAsm MayLoad or MayStore based on the constraints.
// FIXME: Could we compute this on OpInfo rather than T?
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(T, SDValue());
if (T.ConstraintType == TargetLowering::C_Immediate &&
OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
// We've delayed emitting a diagnostic like the "n" constraint because
// inlining could cause an integer showing up.
return emitInlineAsmError(Call, "constraint '" + Twine(T.ConstraintCode) +
"' expects an integer constant "
"expression");
ExtraInfo.update(T);
}
// We won't need to flush pending loads if this asm doesn't touch
// memory and is nonvolatile.
SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
bool EmitEHLabels = isa<InvokeInst>(Call) && IA->canThrow();
if (EmitEHLabels) {
assert(EHPadBB && "InvokeInst must have an EHPadBB");
}
bool IsCallBr = isa<CallBrInst>(Call);
if (IsCallBr || EmitEHLabels) {
// If this is a callbr or invoke we need to flush pending exports since
// inlineasm_br and invoke are terminators.
// We need to do this before nodes are glued to the inlineasm_br node.
Chain = getControlRoot();
}
MCSymbol *BeginLabel = nullptr;
if (EmitEHLabels) {
Chain = lowerStartEH(Chain, EHPadBB, BeginLabel);
}
// Second pass over the constraints: compute which constraint option to use.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
// If this is an output operand with a matching input operand, look up the
// matching input. If their types mismatch, e.g. one is an integer, the
// other is floating point, or their sizes are different, flag it as an
// error.
if (OpInfo.hasMatchingInput()) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
patchMatchingInput(OpInfo, Input, DAG);
}
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.Type == InlineAsm::isClobber)
continue;
// If this is a memory input, and if the operand is not indirect, do what we
// need to provide an address for the memory input.
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
!OpInfo.isIndirect) {
assert((OpInfo.isMultipleAlternative ||
(OpInfo.Type == InlineAsm::isInput)) &&
"Can only indirectify direct input operands!");
// Memory operands really want the address of the value.
Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);
// There is no longer a Value* corresponding to this operand.
OpInfo.CallOperandVal = nullptr;
// It is now an indirect operand.
OpInfo.isIndirect = true;
}
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
std::vector<SDValue> AsmNodeOperands;
AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
IA->getAsmString().c_str(), TLI.getProgramPointerTy(DAG.getDataLayout())));
// If we have a !srcloc metadata node associated with it, we want to attach
// this to the ultimately generated inline asm machineinstr. To do this, we
// pass in the third operand as this (potentially null) inline asm MDNode.
const MDNode *SrcLoc = Call.getMetadata("srcloc");
AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
// Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
// bits as operand 3.
AsmNodeOperands.push_back(DAG.getTargetConstant(
ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
// Third pass: Loop over operands to prepare DAG-level operands.. As part of
// this, assign virtual and physical registers for inputs and otput.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
// Assign Registers.
SDISelAsmOperandInfo &RefOpInfo =
OpInfo.isMatchingInputConstraint()
? ConstraintOperands[OpInfo.getMatchedOperand()]
: OpInfo;
GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
auto DetectWriteToReservedRegister = [&]() {
const MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
for (unsigned Reg : OpInfo.AssignedRegs.Regs) {
if (Register::isPhysicalRegister(Reg) &&
TRI.isInlineAsmReadOnlyReg(MF, Reg)) {
const char *RegName = TRI.getName(Reg);
emitInlineAsmError(Call, "write to reserved register '" +
Twine(RegName) + "'");
return true;
}
}
return false;
};
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
assert(ConstraintID != InlineAsm::Constraint_Unknown &&
"Failed to convert memory constraint code to constraint id.");
// Add information to the INLINEASM node to know about this output.
unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
} else {
// Otherwise, this outputs to a register (directly for C_Register /
// C_RegisterClass, and a target-defined fashion for
// C_Immediate/C_Other). Find a register that we can use.
if (OpInfo.AssignedRegs.Regs.empty()) {
emitInlineAsmError(
Call, "couldn't allocate output register for constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
}
if (DetectWriteToReservedRegister())
return;
// Add information to the INLINEASM node to know that this register is
// set.
OpInfo.AssignedRegs.AddInlineAsmOperands(
OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
: InlineAsm::Kind_RegDef,
false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
}
break;
case InlineAsm::isInput: {
SDValue InOperandVal = OpInfo.CallOperand;
if (OpInfo.isMatchingInputConstraint()) {
// If this is required to match an output register we have already set,
// just use its register.
auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
AsmNodeOperands);
unsigned OpFlag =
cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
if (InlineAsm::isRegDefKind(OpFlag) ||
InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
// Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
if (OpInfo.isIndirect) {
// This happens on gcc/testsuite/gcc.dg/pr8788-1.c
emitInlineAsmError(Call, "inline asm not supported yet: "
"don't know how to handle tied "
"indirect register inputs");
return;
}
SmallVector<unsigned, 4> Regs;
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
Register TiedReg = R->getReg();
MVT RegVT = R->getSimpleValueType(0);
- const TargetRegisterClass *RC = TiedReg.isVirtual() ?
- MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg);
+ const TargetRegisterClass *RC =
+ TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
+ : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
+ : TRI.getMinimalPhysRegClass(TiedReg);
unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(MRI.createVirtualRegister(RC));
RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
SDLoc dl = getCurSDLoc();
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, &Call);
MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
true, OpInfo.getMatchedOperand(), dl,
DAG, AsmNodeOperands);
break;
}
assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
"Unexpected number of operands");
// Add information to the INLINEASM node to know about this input.
// See InlineAsm.h isUseOperandTiedToDef.
OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
OpInfo.getMatchedOperand());
AsmNodeOperands.push_back(DAG.getTargetConstant(
OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
break;
}
// Treat indirect 'X' constraint as memory.
if (OpInfo.ConstraintType == TargetLowering::C_Other &&
OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
OpInfo.ConstraintType == TargetLowering::C_Other) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
if (Ops.empty()) {
if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
if (isa<ConstantSDNode>(InOperandVal)) {
emitInlineAsmError(Call, "value out of range for constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
}
emitInlineAsmError(Call,
"invalid operand for inline asm constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
}
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType =
InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
AsmNodeOperands.push_back(DAG.getTargetConstant(
ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
llvm::append_range(AsmNodeOperands, Ops);
break;
}
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
assert(InOperandVal.getValueType() ==
TLI.getPointerTy(DAG.getDataLayout()) &&
"Memory operands expect pointer values");
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
assert(ConstraintID != InlineAsm::Constraint_Unknown &&
"Failed to convert memory constraint code to constraint id.");
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
getCurSDLoc(),
MVT::i32));
AsmNodeOperands.push_back(InOperandVal);
break;
}
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
OpInfo.ConstraintType == TargetLowering::C_Register) &&
"Unknown constraint type!");
// TODO: Support this.
if (OpInfo.isIndirect) {
emitInlineAsmError(
Call, "Don't know how to handle indirect register inputs yet "
"for constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
}
// Copy the input into the appropriate registers.
if (OpInfo.AssignedRegs.Regs.empty()) {
emitInlineAsmError(Call,
"couldn't allocate input reg for constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
}
if (DetectWriteToReservedRegister())
return;
SDLoc dl = getCurSDLoc();
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
&Call);
OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
dl, DAG, AsmNodeOperands);
break;
}
case InlineAsm::isClobber:
// Add the clobbered value to the operand list, so that the register
// allocator is aware that the physreg got clobbered.
if (!OpInfo.AssignedRegs.Regs.empty())
OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
false, 0, getCurSDLoc(), DAG,
AsmNodeOperands);
break;
}
}
// Finish up input operands. Set the input chain and add the flag last.
AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
Flag = Chain.getValue(1);
// Do additional work to generate outputs.
SmallVector<EVT, 1> ResultVTs;
SmallVector<SDValue, 1> ResultValues;
SmallVector<SDValue, 8> OutChains;
llvm::Type *CallResultType = Call.getType();
ArrayRef<Type *> ResultTypes;
if (StructType *StructResult = dyn_cast<StructType>(CallResultType))
ResultTypes = StructResult->elements();
else if (!CallResultType->isVoidTy())
ResultTypes = makeArrayRef(CallResultType);
auto CurResultType = ResultTypes.begin();
auto handleRegAssign = [&](SDValue V) {
assert(CurResultType != ResultTypes.end() && "Unexpected value");
assert((*CurResultType)->isSized() && "Unexpected unsized type");
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType);
++CurResultType;
// If the type of the inline asm call site return value is different but has
// same size as the type of the asm output bitcast it. One example of this
// is for vectors with different width / number of elements. This can
// happen for register classes that can contain multiple different value
// types. The preg or vreg allocated may not have the same VT as was
// expected.
//
// This can also happen for a return value that disagrees with the register
// class it is put in, eg. a double in a general-purpose register on a
// 32-bit machine.
if (ResultVT != V.getValueType() &&
ResultVT.getSizeInBits() == V.getValueSizeInBits())
V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V);
else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
V.getValueType().isInteger()) {
// If a result value was tied to an input value, the computed result
// may have a wider width than the expected result. Extract the
// relevant portion.
V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V);
}
assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
ResultVTs.push_back(ResultVT);
ResultValues.push_back(V);
};
// Deal with output operands.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
if (OpInfo.Type == InlineAsm::isOutput) {
SDValue Val;
// Skip trivial output operands.
if (OpInfo.AssignedRegs.Regs.empty())
continue;
switch (OpInfo.ConstraintType) {
case TargetLowering::C_Register:
case TargetLowering::C_RegisterClass:
Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
Chain, &Flag, &Call);
break;
case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
OpInfo, DAG);
break;
case TargetLowering::C_Memory:
break; // Already handled.
case TargetLowering::C_Unknown:
assert(false && "Unexpected unknown constraint");
}
// Indirect output manifest as stores. Record output chains.
if (OpInfo.isIndirect) {
const Value *Ptr = OpInfo.CallOperandVal;
assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr),
MachinePointerInfo(Ptr));
OutChains.push_back(Store);
} else {
// generate CopyFromRegs to associated registers.
assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
if (Val.getOpcode() == ISD::MERGE_VALUES) {
for (const SDValue &V : Val->op_values())
handleRegAssign(V);
} else
handleRegAssign(Val);
}
}
}
// Set results.
if (!ResultValues.empty()) {
assert(CurResultType == ResultTypes.end() &&
"Mismatch in number of ResultTypes");
assert(ResultValues.size() == ResultTypes.size() &&
"Mismatch in number of output operands in asm result");
SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ResultVTs), ResultValues);
setValue(&Call, V);
}
// Collect store chains.
if (!OutChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
if (EmitEHLabels) {
Chain = lowerEndEH(Chain, cast<InvokeInst>(&Call), EHPadBB, BeginLabel);
}
// Only Update Root if inline assembly has a memory effect.
if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr ||
EmitEHLabels)
DAG.setRoot(Chain);
}
void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
const Twine &Message) {
LLVMContext &Ctx = *DAG.getContext();
Ctx.emitError(&Call, Message);
// Make sure we leave the DAG in a valid state
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), Call.getType(), ValueVTs);
if (ValueVTs.empty())
return;
SmallVector<SDValue, 1> Ops;
for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc()));
}
void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
MVT::Other, getRoot(),
getValue(I.getArgOperand(0)),
DAG.getSrcValue(I.getArgOperand(0))));
}
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
SDValue V = DAG.getVAArg(
TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
DL.getABITypeAlign(I.getType()).value());
DAG.setRoot(V.getValue(1));
if (I.getType()->isPointerTy())
V = DAG.getPtrExtOrTrunc(
V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()));
setValue(&I, V);
}
void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
MVT::Other, getRoot(),
getValue(I.getArgOperand(0)),
DAG.getSrcValue(I.getArgOperand(0))));
}
void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
MVT::Other, getRoot(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
DAG.getSrcValue(I.getArgOperand(0)),
DAG.getSrcValue(I.getArgOperand(1))));
}
SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
const Instruction &I,
SDValue Op) {
const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
if (!Range)
return Op;
ConstantRange CR = getConstantRangeFromMetadata(*Range);
if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped())
return Op;
APInt Lo = CR.getUnsignedMin();
if (!Lo.isMinValue())
return Op;
APInt Hi = CR.getUnsignedMax();
unsigned Bits = std::max(Hi.getActiveBits(),
static_cast<unsigned>(IntegerType::MIN_INT_BITS));
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
SDLoc SL = getCurSDLoc();
SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
DAG.getValueType(SmallVT));
unsigned NumVals = Op.getNode()->getNumValues();
if (NumVals == 1)
return ZExt;
SmallVector<SDValue, 4> Ops;
Ops.push_back(ZExt);
for (unsigned I = 1; I != NumVals; ++I)
Ops.push_back(Op.getValue(I));
return DAG.getMergeValues(Ops, SL);
}
/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
/// the call being lowered.
///
/// This is a helper for lowering intrinsics that follow a target calling
/// convention or require stack pointer adjustment. Only a subset of the
/// intrinsic's operands need to participate in the calling convention.
void SelectionDAGBuilder::populateCallLoweringInfo(
TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
// Populate the argument list.
// Attributes for args start at offset 1, after the return attribute.
for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
ArgI != ArgE; ++ArgI) {
const Value *V = Call->getOperand(ArgI);
assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
TargetLowering::ArgListEntry Entry;
Entry.Node = getValue(V);
Entry.Ty = V->getType();
Entry.setAttributes(Call, ArgI);
Args.push_back(Entry);
}
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
.setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
.setDiscardResult(Call->use_empty())
.setIsPatchPoint(IsPatchPoint)
.setIsPreallocated(
Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
}
/// Add a stack map intrinsic call's live variable operands to a stackmap
/// or patchpoint target node's operand list.
///
/// Constants are converted to TargetConstants purely as an optimization to
/// avoid constant materialization and register allocation.
///
/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
/// generate addess computation nodes, and so FinalizeISel can convert the
/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
/// address materialization and register allocation, but may also be required
/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
/// alloca in the entry block, then the runtime may assume that the alloca's
/// StackMap location can be read immediately after compilation and that the
/// location is valid at any point during execution (this is similar to the
/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
/// only available in a register, then the runtime would need to trap when
/// execution reaches the StackMap in order to read the alloca's location.
static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
for (unsigned i = StartIdx, e = Call.arg_size(); i != e; ++i) {
SDValue OpVal = Builder.getValue(Call.getArgOperand(i));
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
Ops.push_back(
Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
Ops.push_back(
Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
} else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
Ops.push_back(Builder.DAG.getTargetFrameIndex(
FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
} else
Ops.push_back(OpVal);
}
}
/// Lower llvm.experimental.stackmap directly to its target opcode.
void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
// void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
// [live variables...])
assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
SDValue Chain, InFlag, Callee, NullPtr;
SmallVector<SDValue, 32> Ops;
SDLoc DL = getCurSDLoc();
Callee = getValue(CI.getCalledOperand());
NullPtr = DAG.getIntPtrConstant(0, DL, true);
// The stackmap intrinsic only records the live variables (the arguments
// passed to it) and emits NOPS (if requested). Unlike the patchpoint
// intrinsic, this won't be lowered to a function call. This means we don't
// have to worry about calling conventions and target specific lowering code.
// Instead we perform the call lowering right here.
//
// chain, flag = CALLSEQ_START(chain, 0, 0)
// chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
// chain, flag = CALLSEQ_END(chain, 0, 0, flag)
//
Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
InFlag = Chain.getValue(1);
// Add the <id> and <numBytes> constants.
SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
MVT::i32));
// Push live variables for the stack map.
addStackMapLiveVars(CI, 2, DL, Ops, *this);
// We are not pushing any register mask info here on the operands list,
// because the stackmap doesn't clobber anything.
// Push the chain and the glue flag.
Ops.push_back(Chain);
Ops.push_back(InFlag);
// Create the STACKMAP node.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
Chain = SDValue(SM, 0);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
// Stackmaps don't generate values, so nothing goes into the NodeMap.
// Set the root to the target-lowered call chain.
DAG.setRoot(Chain);
// Inform the Frame Information that we have a stackmap in this function.
FuncInfo.MF->getFrameInfo().setHasStackMap();
}
/// Lower llvm.experimental.patchpoint directly to its target opcode.
void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
const BasicBlock *EHPadBB) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
// i32 <numBytes>,
// i8* <target>,
// i32 <numArgs>,
// [Args...],
// [live variables...])
CallingConv::ID CC = CB.getCallingConv();
bool IsAnyRegCC = CC == CallingConv::AnyReg;
bool HasDef = !CB.getType()->isVoidTy();
SDLoc dl = getCurSDLoc();
SDValue Callee = getValue(CB.getArgOperand(PatchPointOpers::TargetPos));
// Handle immediate and symbolic callees.
if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
/*isTarget=*/true);
else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
SDLoc(SymbolicCallee),
SymbolicCallee->getValueType(0));
// Get the real number of arguments participating in the call <numArgs>
SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos));
unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
// Intrinsics include all meta-operands up to but not including CC.
unsigned NumMetaOpers = PatchPointOpers::CCPos;
assert(CB.arg_size() >= NumMetaOpers + NumArgs &&
"Not enough arguments provided to the patchpoint intrinsic");
// For AnyRegCC the arguments are lowered later on manually.
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
Type *ReturnTy =
IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CB.getType();
TargetLowering::CallLoweringInfo CLI(DAG);
populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee,
ReturnTy, true);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
SDNode *CallEnd = Result.second.getNode();
if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
CallEnd = CallEnd->getOperand(0).getNode();
/// Get a call instruction from the call sequence chain.
/// Tail calls are not allowed.
assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
"Expected a callseq node.");
SDNode *Call = CallEnd->getOperand(0).getNode();
bool HasGlue = Call->getGluedNode();
// Replace the target specific call node with the patchable intrinsic.
SmallVector<SDValue, 8> Ops;
// Add the <id> and <numBytes> constants.
SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
MVT::i32));
// Add the callee.
Ops.push_back(Callee);
// Adjust <numArgs> to account for any arguments that have been passed on the
// stack instead.
// Call Node: Chain, Target, {Args}, RegMask, [Glue]
unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
// Add the calling convention
Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
// Add the arguments we omitted previously. The register allocator should
// place these in any free register.
if (IsAnyRegCC)
for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
Ops.push_back(getValue(CB.getArgOperand(i)));
// Push the arguments from the call instruction up to the register mask.
SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
Ops.append(Call->op_begin() + 2, e);
// Push live variables for the stack map.
addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this);
// Push the register mask info.
if (HasGlue)
Ops.push_back(*(Call->op_end()-2));
else
Ops.push_back(*(Call->op_end()-1));
// Push the chain (this is originally the first operand of the call, but
// becomes now the last or second to last operand).
Ops.push_back(*(Call->op_begin()));
// Push the glue flag (last operand).
if (HasGlue)
Ops.push_back(*(Call->op_end()-1));
SDVTList NodeTys;
if (IsAnyRegCC && HasDef) {
// Create the return types based on the intrinsic definition
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 3> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), CB.getType(), ValueVTs);
assert(ValueVTs.size() == 1 && "Expected only one return value type.");
// There is always a chain and a glue type at the end
ValueVTs.push_back(MVT::Other);
ValueVTs.push_back(MVT::Glue);
NodeTys = DAG.getVTList(ValueVTs);
} else
NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
// Replace the target specific call node with a PATCHPOINT node.
MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
dl, NodeTys, Ops);
// Update the NodeMap.
if (HasDef) {
if (IsAnyRegCC)
setValue(&CB, SDValue(MN, 0));
else
setValue(&CB, Result.first);
}
// Fixup the consumers of the intrinsic. The chain and glue may be used in the
// call sequence. Furthermore the location of the chain and glue can change
// when the AnyReg calling convention is used and the intrinsic returns a
// value.
if (IsAnyRegCC && HasDef) {
SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
} else
DAG.ReplaceAllUsesWith(Call, MN);
DAG.DeleteNode(Call);
// Inform the Frame Information that we have a patchpoint in this function.
FuncInfo.MF->getFrameInfo().setHasPatchPoint();
}
void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2;
if (I.getNumArgOperands() > 1)
Op2 = getValue(I.getArgOperand(1));
SDLoc dl = getCurSDLoc();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDValue Res;
SDNodeFlags SDFlags;
if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
SDFlags.copyFMF(*FPMO);
switch (Intrinsic) {
case Intrinsic::vector_reduce_fadd:
if (SDFlags.hasAllowReassociation())
Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags),
SDFlags);
else
Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags);
break;
case Intrinsic::vector_reduce_fmul:
if (SDFlags.hasAllowReassociation())
Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags),
SDFlags);
else
Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags);
break;
case Intrinsic::vector_reduce_add:
Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_mul:
Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_and:
Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_or:
Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_xor:
Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_smax:
Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_smin:
Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_umax:
Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_umin:
Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
break;
case Intrinsic::vector_reduce_fmax:
Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
break;
case Intrinsic::vector_reduce_fmin:
Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
break;
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
}
setValue(&I, Res);
}
/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
SmallVector<Attribute::AttrKind, 2> Attrs;
if (CLI.RetSExt)
Attrs.push_back(Attribute::SExt);
if (CLI.RetZExt)
Attrs.push_back(Attribute::ZExt);
if (CLI.IsInReg)
Attrs.push_back(Attribute::InReg);
return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
Attrs);
}
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
/// implementation, which just calls LowerCall.
/// FIXME: When all targets are
/// migrated to using LowerCall, this hook should be integrated into SDISel.
std::pair<SDValue, SDValue>
TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Handle the incoming return values from the call.
CLI.Ins.clear();
Type *OrigRetTy = CLI.RetTy;
SmallVector<EVT, 4> RetTys;
SmallVector<uint64_t, 4> Offsets;
auto &DL = CLI.DAG.getDataLayout();
ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
if (CLI.IsPostTypeLegalization) {
// If we are lowering a libcall after legalization, split the return type.
SmallVector<EVT, 4> OldRetTys;
SmallVector<uint64_t, 4> OldOffsets;
RetTys.swap(OldRetTys);
Offsets.swap(OldOffsets);
for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
EVT RetVT = OldRetTys[i];
uint64_t Offset = OldOffsets[i];
MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
RetTys.append(NumRegs, RegisterVT);
for (unsigned j = 0; j != NumRegs; ++j)
Offsets.push_back(Offset + j * RegisterVTByteSZ);
}
}
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
bool CanLowerReturn =
this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
CLI.IsVarArg, Outs, CLI.RetTy->getContext());
SDValue DemoteStackSlot;
int DemoteStackIdx = -100;
if (!CanLowerReturn) {
// FIXME: equivalent assert?
// assert(!CS.hasInAllocaArgument() &&
// "sret demotion is incompatible with inalloca");
uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
Align Alignment = DL.getPrefTypeAlign(CLI.RetTy);
MachineFunction &MF = CLI.DAG.getMachineFunction();
DemoteStackIdx =
MF.getFrameInfo().CreateStackObject(TySize, Alignment, false);
Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
DL.getAllocaAddrSpace());
DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
ArgListEntry Entry;
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
Entry.IsSExt = false;
Entry.IsZExt = false;
Entry.IsInReg = false;
Entry.IsSRet = true;
Entry.IsNest = false;
Entry.IsByVal = false;
Entry.IsByRef = false;
Entry.IsReturned = false;
Entry.IsSwiftSelf = false;
Entry.IsSwiftAsync = false;
Entry.IsSwiftError = false;
Entry.IsCFGuardTarget = false;
Entry.Alignment = Alignment;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.NumFixedArgs += 1;
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
// sret demotion isn't compatible with tail-calls, since the sret argument
// points into the callers stack frame.
CLI.IsTailCall = false;
} else {
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL);
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
ISD::ArgFlagsTy Flags;
if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
if (I == RetTys.size() - 1)
Flags.setInConsecutiveRegsLast();
}
EVT VT = RetTys[I];
MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
MyFlags.Flags = Flags;
MyFlags.VT = RegisterVT;
MyFlags.ArgVT = VT;
MyFlags.Used = CLI.IsReturnValueUsed;
if (CLI.RetTy->isPointerTy()) {
MyFlags.Flags.setPointer();
MyFlags.Flags.setPointerAddrSpace(
cast<PointerType>(CLI.RetTy)->getAddressSpace());
}
if (CLI.RetSExt)
MyFlags.Flags.setSExt();
if (CLI.RetZExt)
MyFlags.Flags.setZExt();
if (CLI.IsInReg)
MyFlags.Flags.setInReg();
CLI.Ins.push_back(MyFlags);
}
}
}
// We push in swifterror return as the last element of CLI.Ins.
ArgListTy &Args = CLI.getArgs();
if (supportSwiftError()) {
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
if (Args[i].IsSwiftError) {
ISD::InputArg MyFlags;
MyFlags.VT = getPointerTy(DL);
MyFlags.ArgVT = EVT(getPointerTy(DL));
MyFlags.Flags.setSwiftError();
CLI.Ins.push_back(MyFlags);
}
}
}
// Handle all of the outgoing arguments.
CLI.Outs.clear();
CLI.OutVals.clear();
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
// FIXME: Split arguments if CLI.IsPostTypeLegalization
Type *FinalType = Args[i].Ty;
if (Args[i].IsByVal)
FinalType = Args[i].IndirectType;
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
FinalType, CLI.CallConv, CLI.IsVarArg, DL);
for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
++Value) {
EVT VT = ValueVTs[Value];
Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
SDValue Op = SDValue(Args[i].Node.getNode(),
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
// Certain targets (such as MIPS), may have a different ABI alignment
// for a type depending on the context. Give the target a chance to
// specify the alignment it wants.
const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
Flags.setOrigAlign(OriginalAlignment);
if (Args[i].Ty->isPointerTy()) {
Flags.setPointer();
Flags.setPointerAddrSpace(
cast<PointerType>(Args[i].Ty)->getAddressSpace());
}
if (Args[i].IsZExt)
Flags.setZExt();
if (Args[i].IsSExt)
Flags.setSExt();
if (Args[i].IsInReg) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (CLI.CallConv == CallingConv::X86_VectorCall &&
isa<StructType>(FinalType)) {
// The first value of a structure is marked
if (0 == Value)
Flags.setHvaStart();
Flags.setHva();
}
// Set InReg Flag
Flags.setInReg();
}
if (Args[i].IsSRet)
Flags.setSRet();
if (Args[i].IsSwiftSelf)
Flags.setSwiftSelf();
if (Args[i].IsSwiftAsync)
Flags.setSwiftAsync();
if (Args[i].IsSwiftError)
Flags.setSwiftError();
if (Args[i].IsCFGuardTarget)
Flags.setCFGuardTarget();
if (Args[i].IsByVal)
Flags.setByVal();
if (Args[i].IsByRef)
Flags.setByRef();
if (Args[i].IsPreallocated) {
Flags.setPreallocated();
// Set the byval flag for CCAssignFn callbacks that don't know about
// preallocated. This way we can know how many bytes we should've
// allocated and how many bytes a callee cleanup function will pop. If
// we port preallocated to more targets, we'll have to add custom
// preallocated handling in the various CC lowering callbacks.
Flags.setByVal();
}
if (Args[i].IsInAlloca) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// inalloca. This way we can know how many bytes we should've allocated
// and how many bytes a callee cleanup function will pop. If we port
// inalloca to more targets, we'll have to add custom inalloca handling
// in the various CC lowering callbacks.
Flags.setByVal();
}
Align MemAlign;
if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
unsigned FrameSize = DL.getTypeAllocSize(Args[i].IndirectType);
Flags.setByValSize(FrameSize);
// info is not there but there are cases it cannot get right.
if (auto MA = Args[i].Alignment)
MemAlign = *MA;
else
MemAlign = Align(getByValTypeAlignment(Args[i].IndirectType, DL));
} else if (auto MA = Args[i].Alignment) {
MemAlign = *MA;
} else {
MemAlign = OriginalAlignment;
}
Flags.setMemAlign(MemAlign);
if (Args[i].IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
SmallVector<SDValue, 4> Parts(NumParts);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
if (Args[i].IsSExt)
ExtendKind = ISD::SIGN_EXTEND;
else if (Args[i].IsZExt)
ExtendKind = ISD::ZERO_EXTEND;
// Conservatively only handle 'returned' on non-vectors that can be lowered,
// for now.
if (Args[i].IsReturned && !Op.getValueType().isVector() &&
CanLowerReturn) {
assert((CLI.RetTy == Args[i].Ty ||
(CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
CLI.RetTy->getPointerAddressSpace() ==
Args[i].Ty->getPointerAddressSpace())) &&
RetTys.size() == NumValues && "unexpected use of 'returned'");
// Before passing 'returned' to the target lowering code, ensure that
// either the register MVT and the actual EVT are the same size or that
// the return value and argument are extended in the same way; in these
// cases it's safe to pass the argument register value unchanged as the
// return register value (although it's at the target's option whether
// to do so)
// TODO: allow code generation to take advantage of partially preserved
// registers rather than clobbering the entire register when the
// parameter extension method is not compatible with the return
// extension method
if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
(ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
CLI.RetZExt == Args[i].IsZExt))
Flags.setReturned();
}
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CB,
CLI.CallConv, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
// For scalable vectors the scalable part is currently handled
// by individual targets, so we just use the known minimum size here.
ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
i < CLI.NumFixedArgs, i,
j*Parts[j].getValueType().getStoreSize().getKnownMinSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
MyFlags.Flags.setOrigAlign(Align(1));
if (j == NumParts - 1)
MyFlags.Flags.setSplitEnd();
}
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
}
if (NeedsRegBlock && Value == NumValues - 1)
CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
}
}
SmallVector<SDValue, 4> InVals;
CLI.Chain = LowerCall(CLI, InVals);
// Update CLI.InVals to use outside of this function.
CLI.InVals = InVals;
// Verify that the target's LowerCall behaved as expected.
assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
"LowerCall didn't return a valid chain!");
assert((!CLI.IsTailCall || InVals.empty()) &&
"LowerCall emitted a return value for a tail call!");
assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
"LowerCall didn't emit the correct number of values!");
// For a tail call, the return value is merely live-out and there aren't
// any nodes in the DAG representing it. Return a special value to
// indicate that a tail call has been emitted and no more Instructions
// should be processed in the current block.
if (CLI.IsTailCall) {
CLI.DAG.setRoot(CLI.Chain);
return std::make_pair(SDValue(), SDValue());
}
#ifndef NDEBUG
for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() && "LowerCall emitted a null value!");
assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
"LowerCall emitted a value with the wrong type!");
}
#endif
SmallVector<SDValue, 4> ReturnValues;
if (!CanLowerReturn) {
// The instruction result is the result of loading from the
// hidden sret parameter.
SmallVector<EVT, 1> PVTs;
Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());
ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
EVT PtrVT = PVTs[0];
unsigned NumValues = RetTys.size();
ReturnValues.resize(NumValues);
SmallVector<SDValue, 4> Chains(NumValues);
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
MachineFunction &MF = CLI.DAG.getMachineFunction();
Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx);
for (unsigned i = 0; i < NumValues; ++i) {
SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
CLI.DAG.getConstant(Offsets[i], CLI.DL,
PtrVT), Flags);
SDValue L = CLI.DAG.getLoad(
RetTys[i], CLI.DL, CLI.Chain, Add,
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
DemoteStackIdx, Offsets[i]),
HiddenSRetAlign);
ReturnValues[i] = L;
Chains[i] = L.getValue(1);
}
CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
} else {
// Collect the legal value parts into potentially illegal values
// that correspond to the original function's return values.
Optional<ISD::NodeType> AssertOp;
if (CLI.RetSExt)
AssertOp = ISD::AssertSext;
else if (CLI.RetZExt)
AssertOp = ISD::AssertZext;
unsigned CurReg = 0;
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
NumRegs, RegisterVT, VT, nullptr,
CLI.CallConv, AssertOp));
CurReg += NumRegs;
}
// For a function returning void, there is no return value. We can't create
// such a node, so we just return a null return value in that case. In
// that case, nothing will actually look at the value.
if (ReturnValues.empty())
return std::make_pair(SDValue(), CLI.Chain);
}
SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
CLI.DAG.getVTList(RetTys), ReturnValues);
return std::make_pair(Res, CLI.Chain);
}
/// Places new result values for the node in Results (their number
/// and types must exactly match those of the original return values of
/// the node), or leaves Results empty, which indicates that the node is not
/// to be custom lowered after all.
void TargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDValue Res = LowerOperation(SDValue(N, 0), DAG);
if (!Res.getNode())
return;
// If the original node has one result, take the return value from
// LowerOperation as is. It might not be result number 0.
if (N->getNumValues() == 1) {
Results.push_back(Res);
return;
}
// If the original node has multiple results, then the return node should
// have the same number of results.
assert((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!");
// Places new result values base on N result number.
for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
Results.push_back(Res.getValue(I));
}
SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("LowerOperation not implemented for this target!");
}
void
SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
SDValue Op = getNonRegisterValue(V);
assert((Op.getOpcode() != ISD::CopyFromReg ||
cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
"Copy from a reg to the same reg!");
assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If this is an InlineAsm we have to match the registers required, not the
// notional registers required by the type.
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
FuncInfo.PreferredExtendType.end())
? ISD::ANY_EXTEND
: FuncInfo.PreferredExtendType[V];
RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
PendingExports.push_back(Chain);
}
#include "llvm/CodeGen/SelectionDAGISel.h"
/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
/// entry block, return true. This includes arguments used by switches, since
/// the switch may expand into multiple basic blocks.
static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
// With FastISel active, we may be splitting blocks, so force creation
// of virtual registers for all non-dead arguments.
if (FastISel)
return A->use_empty();
const BasicBlock &Entry = A->getParent()->front();
for (const User *U : A->users())
if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U))
return false; // Use not in entry block.
return true;
}
using ArgCopyElisionMapTy =
DenseMap<const Argument *,
std::pair<const AllocaInst *, const StoreInst *>>;
/// Scan the entry block of the function in FuncInfo for arguments that look
/// like copies into a local alloca. Record any copied arguments in
/// ArgCopyElisionCandidates.
static void
findArgumentCopyElisionCandidates(const DataLayout &DL,
FunctionLoweringInfo *FuncInfo,
ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
// Record the state of every static alloca used in the entry block. Argument
// allocas are all used in the entry block, so we need approximately as many
// entries as we have arguments.
enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
unsigned NumArgs = FuncInfo->Fn->arg_size();
StaticAllocas.reserve(NumArgs * 2);
auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
if (!V)
return nullptr;
V = V->stripPointerCasts();
const auto *AI = dyn_cast<AllocaInst>(V);
if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
return nullptr;
auto Iter = StaticAllocas.insert({AI, Unknown});
return &Iter.first->second;
};
// Look for stores of arguments to static allocas. Look through bitcasts and
// GEPs to handle type coercions, as long as the alloca is fully initialized
// by the store. Any non-store use of an alloca escapes it and any subsequent
// unanalyzed store might write it.
// FIXME: Handle structs initialized with multiple stores.
for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
// Look for stores, and handle non-store uses conservatively.
const auto *SI = dyn_cast<StoreInst>(&I);
if (!SI) {
// We will look through cast uses, so ignore them completely.
if (I.isCast())
continue;
// Ignore debug info and pseudo op intrinsics, they don't escape or store
// to allocas.
if (I.isDebugOrPseudoInst())
continue;
// This is an unknown instruction. Assume it escapes or writes to all
// static alloca operands.
for (const Use &U : I.operands()) {
if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
*Info = StaticAllocaInfo::Clobbered;
}
continue;
}
// If the stored value is a static alloca, mark it as escaped.
if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
*Info = StaticAllocaInfo::Clobbered;
// Check if the destination is a static alloca.
const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
if (!Info)
continue;
const AllocaInst *AI = cast<AllocaInst>(Dst);
// Skip allocas that have been initialized or clobbered.
if (*Info != StaticAllocaInfo::Unknown)
continue;
// Check if the stored value is an argument, and that this store fully
// initializes the alloca.
// If the argument type has padding bits we can't directly forward a pointer
// as the upper bits may contain garbage.
// Don't elide copies from the same argument twice.
const Value *Val = SI->getValueOperand()->stripPointerCasts();
const auto *Arg = dyn_cast<Argument>(Val);
if (!Arg || Arg->hasPassPointeeByValueCopyAttr() ||
Arg->getType()->isEmptyTy() ||
DL.getTypeStoreSize(Arg->getType()) !=
DL.getTypeAllocSize(AI->getAllocatedType()) ||
!DL.typeSizeEqualsStoreSize(Arg->getType()) ||
ArgCopyElisionCandidates.count(Arg)) {
*Info = StaticAllocaInfo::Clobbered;
continue;
}
LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
<< '\n');
// Mark this alloca and store for argument copy elision.
*Info = StaticAllocaInfo::Elidable;
ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
// Stop scanning if we've seen all arguments. This will happen early in -O0
// builds, which is useful, because -O0 builds have large entry blocks and
// many allocas.
if (ArgCopyElisionCandidates.size() == NumArgs)
break;
}
}
/// Try to elide argument copies from memory into a local alloca. Succeeds if
/// ArgVal is a load from a suitable fixed stack object.
static void tryToElideArgumentCopy(
FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains,
DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
SDValue ArgVal, bool &ArgHasUses) {
// Check if this is a load from a fixed stack object.
auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
if (!LNode)
return;
auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
if (!FINode)
return;
// Check that the fixed stack object is the right size and alignment.
// Look at the alignment that the user wrote on the alloca instead of looking
// at the stack object.
auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
assert(ArgCopyIter != ArgCopyElisionCandidates.end());
const AllocaInst *AI = ArgCopyIter->second.first;
int FixedIndex = FINode->getIndex();
int &AllocaIndex = FuncInfo.StaticAllocaMap[AI];
int OldIndex = AllocaIndex;
MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
LLVM_DEBUG(
dbgs() << " argument copy elision failed due to bad fixed stack "
"object size\n");
return;
}
Align RequiredAlignment = AI->getAlign();
if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) {
LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
"greater than stack argument alignment ("
<< DebugStr(RequiredAlignment) << " vs "
<< DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n");
return;
}
// Perform the elision. Delete the old stack object and replace its only use
// in the variable info map. Mark the stack object as mutable.
LLVM_DEBUG({
dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
<< " Replacing frame index " << OldIndex << " with " << FixedIndex
<< '\n';
});
MFI.RemoveStackObject(OldIndex);
MFI.setIsImmutableObjectIndex(FixedIndex, false);
AllocaIndex = FixedIndex;
ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
Chains.push_back(ArgVal.getValue(1));
// Avoid emitting code for the store implementing the copy.
const StoreInst *SI = ArgCopyIter->second.second;
ElidedArgCopyInstrs.insert(SI);
// Check for uses of the argument again so that we can avoid exporting ArgVal
// if it is't used by anything other than the store.
for (const Value *U : Arg.users()) {
if (U != SI) {
ArgHasUses = true;
break;
}
}
}
void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
SDLoc dl = SDB->getCurSDLoc();
const DataLayout &DL = DAG.getDataLayout();
SmallVector<ISD::InputArg, 16> Ins;
// In Naked functions we aren't going to save any registers.
if (F.hasFnAttribute(Attribute::Naked))
return;
if (!FuncInfo->CanLowerReturn) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
F.getReturnType()->getPointerTo(
DAG.getDataLayout().getAllocaAddrSpace()),
ValueVTs);
// NOTE: Assuming that a pointer will never break down to more than one VT
// or one register.
ISD::ArgFlagsTy Flags;
Flags.setSRet();
MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
ISD::InputArg::NoArgIndex, 0);
Ins.push_back(RetArg);
}
// Look for stores of arguments to static allocas. Mark such arguments with a
// flag to ask the target to give us the memory location of that argument if
// available.
ArgCopyElisionMapTy ArgCopyElisionCandidates;
findArgumentCopyElisionCandidates(DL, FuncInfo.get(),
ArgCopyElisionCandidates);
// Set up the incoming argument description vector.
for (const Argument &Arg : F.args()) {
unsigned ArgNo = Arg.getArgNo();
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
bool isArgValueUsed = !Arg.use_empty();
unsigned PartBase = 0;
Type *FinalType = Arg.getType();
if (Arg.hasAttribute(Attribute::ByVal))
FinalType = Arg.getParamByValType();
bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
FinalType, F.getCallingConv(), F.isVarArg(), DL);
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
ISD::ArgFlagsTy Flags;
if (Arg.getType()->isPointerTy()) {
Flags.setPointer();
Flags.setPointerAddrSpace(
cast<PointerType>(Arg.getType())->getAddressSpace());
}
if (Arg.hasAttribute(Attribute::ZExt))
Flags.setZExt();
if (Arg.hasAttribute(Attribute::SExt))
Flags.setSExt();
if (Arg.hasAttribute(Attribute::InReg)) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (F.getCallingConv() == CallingConv::X86_VectorCall &&
isa<StructType>(Arg.getType())) {
// The first value of a structure is marked
if (0 == Value)
Flags.setHvaStart();
Flags.setHva();
}
// Set InReg Flag
Flags.setInReg();
}
if (Arg.hasAttribute(Attribute::StructRet))
Flags.setSRet();
if (Arg.hasAttribute(Attribute::SwiftSelf))
Flags.setSwiftSelf();
if (Arg.hasAttribute(Attribute::SwiftAsync))
Flags.setSwiftAsync();
if (Arg.hasAttribute(Attribute::SwiftError))
Flags.setSwiftError();
if (Arg.hasAttribute(Attribute::ByVal))
Flags.setByVal();
if (Arg.hasAttribute(Attribute::ByRef))
Flags.setByRef();
if (Arg.hasAttribute(Attribute::InAlloca)) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// inalloca. This way we can know how many bytes we should've allocated
// and how many bytes a callee cleanup function will pop. If we port
// inalloca to more targets, we'll have to add custom inalloca handling
// in the various CC lowering callbacks.
Flags.setByVal();
}
if (Arg.hasAttribute(Attribute::Preallocated)) {
Flags.setPreallocated();
// Set the byval flag for CCAssignFn callbacks that don't know about
// preallocated. This way we can know how many bytes we should've
// allocated and how many bytes a callee cleanup function will pop. If
// we port preallocated to more targets, we'll have to add custom
// preallocated handling in the various CC lowering callbacks.
Flags.setByVal();
}
// Certain targets (such as MIPS), may have a different ABI alignment
// for a type depending on the context. Give the target a chance to
// specify the alignment it wants.
const Align OriginalAlignment(
TLI->getABIAlignmentForCallingConv(ArgTy, DL));
Flags.setOrigAlign(OriginalAlignment);
Align MemAlign;
Type *ArgMemTy = nullptr;
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
Flags.isByRef()) {
if (!ArgMemTy)
ArgMemTy = Arg.getPointeeInMemoryValueType();
uint64_t MemSize = DL.getTypeAllocSize(ArgMemTy);
// For in-memory arguments, size and alignment should be passed from FE.
// BE will guess if this info is not there but there are cases it cannot
// get right.
if (auto ParamAlign = Arg.getParamStackAlign())
MemAlign = *ParamAlign;
else if ((ParamAlign = Arg.getParamAlign()))
MemAlign = *ParamAlign;
else
MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL));
if (Flags.isByRef())
Flags.setByRefSize(MemSize);
else
Flags.setByValSize(MemSize);
} else if (auto ParamAlign = Arg.getParamStackAlign()) {
MemAlign = *ParamAlign;
} else {
MemAlign = OriginalAlignment;
}
Flags.setMemAlign(MemAlign);
if (Arg.hasAttribute(Attribute::Nest))
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
if (ArgCopyElisionCandidates.count(&Arg))
Flags.setCopyElisionCandidate();
if (Arg.hasAttribute(Attribute::Returned))
Flags.setReturned();
MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
unsigned NumRegs = TLI->getNumRegistersForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
// For scalable vectors, use the minimum size; individual targets
// are responsible for handling scalable vector arguments and
// return values.
ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
else if (i > 0) {
MyFlags.Flags.setOrigAlign(Align(1));
if (i == NumRegs - 1)
MyFlags.Flags.setSplitEnd();
}
Ins.push_back(MyFlags);
}
if (NeedsRegBlock && Value == NumValues - 1)
Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
PartBase += VT.getStoreSize().getKnownMinSize();
}
}
// Call the target to set up the argument values.
SmallVector<SDValue, 8> InVals;
SDValue NewRoot = TLI->LowerFormalArguments(
DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
// Verify that the target's LowerFormalArguments behaved as expected.
assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
"LowerFormalArguments didn't return a valid chain!");
assert(InVals.size() == Ins.size() &&
"LowerFormalArguments didn't emit the correct number of values!");
LLVM_DEBUG({
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerFormalArguments emitted a null value!");
assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
"LowerFormalArguments emitted a value with the wrong type!");
}
});
// Update the DAG with the new chain value resulting from argument lowering.
DAG.setRoot(NewRoot);
// Set up the argument values.
unsigned i = 0;
if (!FuncInfo->CanLowerReturn) {
// Create a virtual register for the sret pointer, and put in a copy
// from the sret argument into it.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
F.getReturnType()->getPointerTo(
DAG.getDataLayout().getAllocaAddrSpace()),
ValueVTs);
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
Optional<ISD::NodeType> AssertOp = None;
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
nullptr, F.getCallingConv(), AssertOp);
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
Register SRetReg =
RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
FuncInfo->DemoteRegister = SRetReg;
NewRoot =
SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
DAG.setRoot(NewRoot);
// i indexes lowered arguments. Bump it past the hidden sret argument.
++i;
}
SmallVector<SDValue, 4> Chains;
DenseMap<int, int> ArgCopyElisionFrameIndexMap;
for (const Argument &Arg : F.args()) {
SmallVector<SDValue, 4> ArgValues;
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
continue;
bool ArgHasUses = !Arg.use_empty();
// Elide the copying store if the target loaded this argument from a
// suitable fixed stack object.
if (Ins[i].Flags.isCopyElisionCandidate()) {
tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
InVals[i], ArgHasUses);
}
// If this argument is unused then remember its value. It is used to generate
// debugging information.
bool isSwiftErrorArg =
TLI->supportSwiftError() &&
Arg.hasAttribute(Attribute::SwiftError);
if (!ArgHasUses && !isSwiftErrorArg) {
SDB->setUnusedArgValue(&Arg, InVals[i]);
// Also remember any frame index for use in FastISel.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
for (unsigned Val = 0; Val != NumValues; ++Val) {
EVT VT = ValueVTs[Val];
MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(),
F.getCallingConv(), VT);
unsigned NumParts = TLI->getNumRegistersForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
// Even an apparent 'unused' swifterror argument needs to be returned. So
// we do generate a copy for it that can be used on return from the
// function.
if (ArgHasUses || isSwiftErrorArg) {
Optional<ISD::NodeType> AssertOp;
if (Arg.hasAttribute(Attribute::SExt))
AssertOp = ISD::AssertSext;
else if (Arg.hasAttribute(Attribute::ZExt))
AssertOp = ISD::AssertZext;
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
PartVT, VT, nullptr,
F.getCallingConv(), AssertOp));
}
i += NumParts;
}
// We don't need to do anything else for unused arguments.
if (ArgValues.empty())
continue;
// Note down frame index.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
SDB->setValue(&Arg, Res);
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
// We want to associate the argument with the frame index, among
// involved operands, that correspond to the lowest address. The
// getCopyFromParts function, called earlier, is swapping the order of
// the operands to BUILD_PAIR depending on endianness. The result of
// that swapping is that the least significant bits of the argument will
// be in the first operand of the BUILD_PAIR node, and the most
// significant bits will be in the second operand.
unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
// Analyses past this point are naive and don't expect an assertion.
if (Res.getOpcode() == ISD::AssertZext)
Res = Res.getOperand(0);
// Update the SwiftErrorVRegDefMap.
if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (Register::isVirtualRegister(Reg))
SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
Reg);
}
// If this argument is live outside of the entry block, insert a copy from
// wherever we got it to the vreg that other BB's will reference it as.
if (Res.getOpcode() == ISD::CopyFromReg) {
// If we can, though, try to skip creating an unnecessary vreg.
// FIXME: This isn't very clean... it would be nice to make this more
// general.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (Register::isVirtualRegister(Reg)) {
FuncInfo->ValueMap[&Arg] = Reg;
continue;
}
}
if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
FuncInfo->InitializeRegForValue(&Arg);
SDB->CopyToExportRegsIfNeeded(&Arg);
}
}
if (!Chains.empty()) {
Chains.push_back(NewRoot);
NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
DAG.setRoot(NewRoot);
assert(i == InVals.size() && "Argument register count mismatch!");
// If any argument copy elisions occurred and we have debug info, update the
// stale frame indices used in the dbg.declare variable info table.
MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
if (I != ArgCopyElisionFrameIndexMap.end())
VI.Slot = I->second;
}
}
// Finally, if the target has anything special to do, allow it to do so.
emitFunctionEntryCode();
}
/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
/// ensure constants are generated when needed. Remember the virtual registers
/// that need to be added to the Machine PHI nodes as input. We cannot just
/// directly add them, because expansion might result in multiple MBB's for one
/// BB. As such, the start of the BB might correspond to a different MBB than
/// the end.
void
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const Instruction *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
// Check PHI nodes in successors that expect a value to be available from this
// block.
for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
const BasicBlock *SuccBB = TI->getSuccessor(succ);
if (!isa<PHINode>(SuccBB->begin())) continue;
MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
// If this terminator has multiple identical successors (common for
// switches), only handle each succ once.
if (!SuccsHandled.insert(SuccMBB).second)
continue;
MachineBasicBlock::iterator MBBI = SuccMBB->begin();
// At this point we know that there is a 1-1 correspondence between LLVM PHI
// nodes and Machine PHI nodes, but the incoming operands have not been
// emitted yet.
for (const PHINode &PN : SuccBB->phis()) {
// Ignore dead phi's.
if (PN.use_empty())
continue;
// Skip empty types
if (PN.getType()->isEmptyTy())
continue;
unsigned Reg;
const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
unsigned &RegOut = ConstantsOut[C];
if (RegOut == 0) {
RegOut = FuncInfo.CreateRegs(C);
CopyValueToVirtualRegister(C, RegOut);
}
Reg = RegOut;
} else {
DenseMap<const Value *, Register>::iterator I =
FuncInfo.ValueMap.find(PHIOp);
if (I != FuncInfo.ValueMap.end())
Reg = I->second;
else {
assert(isa<AllocaInst>(PHIOp) &&
FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
"Didn't codegen value into a register!??");
Reg = FuncInfo.CreateRegs(PHIOp);
CopyValueToVirtualRegister(PHIOp, Reg);
}
}
// Remember that this register needs to added to the machine PHI node as
// the input for this MBB.
SmallVector<EVT, 4> ValueVTs;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
for (unsigned i = 0, e = NumRegisters; i != e; ++i)
FuncInfo.PHINodesToUpdate.push_back(
std::make_pair(&*MBBI++, Reg + i));
Reg += NumRegisters;
}
}
}
ConstantsOut.clear();
}
/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
/// is 0.
MachineBasicBlock *
SelectionDAGBuilder::StackProtectorDescriptor::
AddSuccessorMBB(const BasicBlock *BB,
MachineBasicBlock *ParentMBB,
bool IsLikely,
MachineBasicBlock *SuccMBB) {
// If SuccBB has not been created yet, create it.
if (!SuccMBB) {
MachineFunction *MF = ParentMBB->getParent();
MachineFunction::iterator BBI(ParentMBB);
SuccMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(++BBI, SuccMBB);
}
// Add it as a successor of ParentMBB.
ParentMBB->addSuccessor(
SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
return SuccMBB;
}
MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
MachineFunction::iterator I(MBB);
if (++I == FuncInfo.MF->end())
return nullptr;
return &*I;
}
/// During lowering new call nodes can be created (such as memset, etc.).
/// Those will become new roots of the current DAG, but complications arise
/// when they are tail calls. In such cases, the call lowering will update
/// the root, but the builder still needs to know that a tail call has been
/// lowered in order to avoid generating an additional return.
void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
// If the node is null, we do have a tail call.
if (MaybeTC.getNode() != nullptr)
DAG.setRoot(MaybeTC);
else
HasTailCall = true;
}
void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB) {
MachineFunction *CurMF = FuncInfo.MF;
MachineBasicBlock *NextMBB = nullptr;
MachineFunction::iterator BBI(W.MBB);
if (++BBI != FuncInfo.MF->end())
NextMBB = &*BBI;
unsigned Size = W.LastCluster - W.FirstCluster + 1;
BranchProbabilityInfo *BPI = FuncInfo.BPI;
if (Size == 2 && W.MBB == SwitchMBB) {
// If any two of the cases has the same destination, and if one value
// is the same as the other, but has one bit unset that the other has set,
// use bit manipulation to do two compares at once. For example:
// "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
// TODO: This could be extended to merge any 2 cases in switches with 3
// cases.
// TODO: Handle cases where W.CaseBB != SwitchBB.
CaseCluster &Small = *W.FirstCluster;
CaseCluster &Big = *W.LastCluster;
if (Small.Low == Small.High && Big.Low == Big.High &&
Small.MBB == Big.MBB) {
const APInt &SmallValue = Small.Low->getValue();
const APInt &BigValue = Big.Low->getValue();
// Check that there is only one bit different.
APInt CommonBit = BigValue ^ SmallValue;
if (CommonBit.isPowerOf2()) {
SDValue CondLHS = getValue(Cond);
EVT VT = CondLHS.getValueType();
SDLoc DL = getCurSDLoc();
SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
DAG.getConstant(CommonBit, DL, VT));
SDValue Cond = DAG.getSetCC(
DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
ISD::SETEQ);
// Update successor info.
// Both Small and Big will jump to Small.BB, so we sum up the
// probabilities.
addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
if (BPI)
addSuccessorWithProb(
SwitchMBB, DefaultMBB,
// The default destination is the first successor in IR.
BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
else
addSuccessorWithProb(SwitchMBB, DefaultMBB);
// Insert the true branch.
SDValue BrCond =
DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(Small.MBB));
// Insert the false branch.
BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
DAG.getBasicBlock(DefaultMBB));
DAG.setRoot(BrCond);
return;
}
}
}
if (TM.getOptLevel() != CodeGenOpt::None) {
// Here, we order cases by probability so the most likely case will be
// checked first. However, two clusters can have the same probability in
// which case their relative ordering is non-deterministic. So we use Low
// as a tie-breaker as clusters are guaranteed to never overlap.
llvm::sort(W.FirstCluster, W.LastCluster + 1,
[](const CaseCluster &a, const CaseCluster &b) {
return a.Prob != b.Prob ?
a.Prob > b.Prob :
a.Low->getValue().slt(b.Low->getValue());
});
// Rearrange the case blocks so that the last one falls through if possible
// without changing the order of probabilities.
for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
--I;
if (I->Prob > W.LastCluster->Prob)
break;
if (I->Kind == CC_Range && I->MBB == NextMBB) {
std::swap(*I, *W.LastCluster);
break;
}
}
}
// Compute total probability.
BranchProbability DefaultProb = W.DefaultProb;
BranchProbability UnhandledProbs = DefaultProb;
for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
UnhandledProbs += I->Prob;
MachineBasicBlock *CurMBB = W.MBB;
for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
bool FallthroughUnreachable = false;
MachineBasicBlock *Fallthrough;
if (I == W.LastCluster) {
// For the last cluster, fall through to the default destination.
Fallthrough = DefaultMBB;
FallthroughUnreachable = isa<UnreachableInst>(
DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
} else {
Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
CurMF->insert(BBI, Fallthrough);
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
UnhandledProbs -= I->Prob;
switch (I->Kind) {
case CC_JumpTable: {
// FIXME: Optimize away range check based on pivot comparisons.
JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
// The jump block hasn't been inserted yet; insert it here.
MachineBasicBlock *JumpMBB = JT->MBB;
CurMF->insert(BBI, JumpMBB);
auto JumpProb = I->Prob;
auto FallthroughProb = UnhandledProbs;
// If the default statement is a target of the jump table, we evenly
// distribute the default probability to successors of CurMBB. Also
// update the probability on the edge from JumpMBB to Fallthrough.
for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
SE = JumpMBB->succ_end();
SI != SE; ++SI) {
if (*SI == DefaultMBB) {
JumpProb += DefaultProb / 2;
FallthroughProb -= DefaultProb / 2;
JumpMBB->setSuccProbability(SI, DefaultProb / 2);
JumpMBB->normalizeSuccProbs();
break;
}
}
if (FallthroughUnreachable) {
// Skip the range check if the fallthrough block is unreachable.
JTH->OmitRangeCheck = true;
}
if (!JTH->OmitRangeCheck)
addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
CurMBB->normalizeSuccProbs();
// The jump table header will be inserted in our current block, do the
// range check, and fall through to our fallthrough block.
JTH->HeaderBB = CurMBB;
JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
// If we're in the right place, emit the jump table header right now.
if (CurMBB == SwitchMBB) {
visitJumpTableHeader(*JT, *JTH, SwitchMBB);
JTH->Emitted = true;
}
break;
}
case CC_BitTests: {
// FIXME: Optimize away range check based on pivot comparisons.
BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
// The bit test blocks haven't been inserted yet; insert them here.
for (BitTestCase &BTC : BTB->Cases)
CurMF->insert(BBI, BTC.ThisBB);
// Fill in fields of the BitTestBlock.
BTB->Parent = CurMBB;
BTB->Default = Fallthrough;
BTB->DefaultProb = UnhandledProbs;
// If the cases in bit test don't form a contiguous range, we evenly
// distribute the probability on the edge to Fallthrough to two
// successors of CurMBB.
if (!BTB->ContiguousRange) {
BTB->Prob += DefaultProb / 2;
BTB->DefaultProb -= DefaultProb / 2;
}
if (FallthroughUnreachable) {
// Skip the range check if the fallthrough block is unreachable.
BTB->OmitRangeCheck = true;
}
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
visitBitTestHeader(*BTB, SwitchMBB);
BTB->Emitted = true;
}
break;
}
case CC_Range: {
const Value *RHS, *LHS, *MHS;
ISD::CondCode CC;
if (I->Low == I->High) {
// Check Cond == I->Low.
CC = ISD::SETEQ;
LHS = Cond;
RHS=I->Low;
MHS = nullptr;
} else {
// Check I->Low <= Cond <= I->High.
CC = ISD::SETLE;
LHS = I->Low;
MHS = Cond;
RHS = I->High;
}
// If Fallthrough is unreachable, fold away the comparison.
if (FallthroughUnreachable)
CC = ISD::SETTRUE;
// The false probability is the sum of all unhandled cases.
CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
getCurSDLoc(), I->Prob, UnhandledProbs);
if (CurMBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
else
SL->SwitchCases.push_back(CB);
break;
}
}
CurMBB = Fallthrough;
}
}
unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
CaseClusterIt First,
CaseClusterIt Last) {
return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
if (X.Prob != CC.Prob)
return X.Prob > CC.Prob;
// Ties are broken by comparing the case value.
return X.Low->getValue().slt(CC.Low->getValue());
});
}
void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
const SwitchWorkListItem &W,
Value *Cond,
MachineBasicBlock *SwitchMBB) {
assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
"Clusters not sorted?");
assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
// Balance the tree based on branch probabilities to create a near-optimal (in
// terms of search time given key frequency) binary search tree. See e.g. Kurt
// Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
CaseClusterIt LastLeft = W.FirstCluster;
CaseClusterIt FirstRight = W.LastCluster;
auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
// Move LastLeft and FirstRight towards each other from opposite directions to
// find a partitioning of the clusters which balances the probability on both
// sides. If LeftProb and RightProb are equal, alternate which side is
// taken to ensure 0-probability nodes are distributed evenly.
unsigned I = 0;
while (LastLeft + 1 < FirstRight) {
if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1)))
LeftProb += (++LastLeft)->Prob;
else
RightProb += (--FirstRight)->Prob;
I++;
}
while (true) {
// Our binary search tree differs from a typical BST in that ours can have up
// to three values in each leaf. The pivot selection above doesn't take that
// into account, which means the tree might require more nodes and be less
// efficient. We compensate for this here.
unsigned NumLeft = LastLeft - W.FirstCluster + 1;
unsigned NumRight = W.LastCluster - FirstRight + 1;
if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) {
// If one side has less than 3 clusters, and the other has more than 3,
// consider taking a cluster from the other side.
if (NumLeft < NumRight) {
// Consider moving the first cluster on the right to the left side.
CaseCluster &CC = *FirstRight;
unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
if (LeftSideRank <= RightSideRank) {
// Moving the cluster to the left does not demote it.
++LastLeft;
++FirstRight;
continue;
}
} else {
assert(NumRight < NumLeft);
// Consider moving the last element on the left to the right side.
CaseCluster &CC = *LastLeft;
unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
if (RightSideRank <= LeftSideRank) {
// Moving the cluster to the right does not demot it.
--LastLeft;
--FirstRight;
continue;
}
}
}
break;
}
assert(LastLeft + 1 == FirstRight);
assert(LastLeft >= W.FirstCluster);
assert(FirstRight <= W.LastCluster);
// Use the first element on the right as pivot since we will make less-than
// comparisons against it.
CaseClusterIt PivotCluster = FirstRight;
assert(PivotCluster > W.FirstCluster);
assert(PivotCluster <= W.LastCluster);
CaseClusterIt FirstLeft = W.FirstCluster;
CaseClusterIt LastRight = W.LastCluster;
const ConstantInt *Pivot = PivotCluster->Low;
// New blocks will be inserted immediately after the current one.
MachineFunction::iterator BBI(W.MBB);
++BBI;
// We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
// we can branch to its destination directly if it's squeezed exactly in
// between the known lower bound and Pivot - 1.
MachineBasicBlock *LeftMBB;
if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
FirstLeft->Low == W.GE &&
(FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
LeftMBB = FirstLeft->MBB;
} else {
LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, LeftMBB);
WorkList.push_back(
{LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
// Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
// single cluster, RHS.Low == Pivot, and we can branch to its destination
// directly if RHS.High equals the current upper bound.
MachineBasicBlock *RightMBB;
if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
RightMBB = FirstRight->MBB;
} else {
RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, RightMBB);
WorkList.push_back(
{RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
// Create the CaseBlock record that will be used to lower the branch.
CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
getCurSDLoc(), LeftProb, RightProb);
if (W.MBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
else
SL->SwitchCases.push_back(CB);
}
// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
// from the swith statement.
static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
BranchProbability PeeledCaseProb) {
if (PeeledCaseProb == BranchProbability::getOne())
return BranchProbability::getZero();
BranchProbability SwitchProb = PeeledCaseProb.getCompl();
uint32_t Numerator = CaseProb.getNumerator();
uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
return BranchProbability(Numerator, std::max(Numerator, Denominator));
}
// Try to peel the top probability case if it exceeds the threshold.
// Return current MachineBasicBlock for the switch statement if the peeling
// does not occur.
// If the peeling is performed, return the newly created MachineBasicBlock
// for the peeled switch statement. Also update Clusters to remove the peeled
// case. PeeledCaseProb is the BranchProbability for the peeled case.
MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
const SwitchInst &SI, CaseClusterVector &Clusters,
BranchProbability &PeeledCaseProb) {
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
// Don't perform if there is only one cluster or optimizing for size.
if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
TM.getOptLevel() == CodeGenOpt::None ||
SwitchMBB->getParent()->getFunction().hasMinSize())
return SwitchMBB;
BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
unsigned PeeledCaseIndex = 0;
bool SwitchPeeled = false;
for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
CaseCluster &CC = Clusters[Index];
if (CC.Prob < TopCaseProb)
continue;
TopCaseProb = CC.Prob;
PeeledCaseIndex = Index;
SwitchPeeled = true;
}
if (!SwitchPeeled)
return SwitchMBB;
LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
<< TopCaseProb << "\n");
// Record the MBB for the peeled switch statement.
MachineFunction::iterator BBI(SwitchMBB);
++BBI;
MachineBasicBlock *PeeledSwitchMBB =
FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
FuncInfo.MF->insert(BBI, PeeledSwitchMBB);
ExportFromCurrentBlock(SI.getCondition());
auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
nullptr, nullptr, TopCaseProb.getCompl()};
lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
Clusters.erase(PeeledCaseIt);
for (CaseCluster &CC : Clusters) {
LLVM_DEBUG(
dbgs() << "Scale the probablity for one cluster, before scaling: "
<< CC.Prob << "\n");
CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
}
PeeledCaseProb = TopCaseProb;
return PeeledSwitchMBB;
}
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// Extract cases from the switch.
BranchProbabilityInfo *BPI = FuncInfo.BPI;
CaseClusterVector Clusters;
Clusters.reserve(SI.getNumCases());
for (auto I : SI.cases()) {
MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
const ConstantInt *CaseVal = I.getCaseValue();
BranchProbability Prob =
BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
: BranchProbability(1, SI.getNumCases() + 1);
Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
}
MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
// Cluster adjacent cases with the same destination. We do this at all
// optimization levels because it's cheap to do and will make codegen faster
// if there are many clusters.
sortAndRangeify(Clusters);
// The branch probablity of the peeled case.
BranchProbability PeeledCaseProb = BranchProbability::getZero();
MachineBasicBlock *PeeledSwitchMBB =
peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
// If there is only the default destination, jump there directly.
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
if (Clusters.empty()) {
assert(PeeledSwitchMBB == SwitchMBB);
SwitchMBB->addSuccessor(DefaultMBB);
if (DefaultMBB != NextBlock(SwitchMBB)) {
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
}
return;
}
SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI());
SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
dbgs() << "Case clusters: ";
for (const CaseCluster &C : Clusters) {
if (C.Kind == CC_JumpTable)
dbgs() << "JT:";
if (C.Kind == CC_BitTests)
dbgs() << "BT:";
C.Low->getValue().print(dbgs(), true);
if (C.Low != C.High) {
dbgs() << '-';
C.High->getValue().print(dbgs(), true);
}
dbgs() << ' ';
}
dbgs() << '\n';
});
assert(!Clusters.empty());
SwitchWorkList WorkList;
CaseClusterIt First = Clusters.begin();
CaseClusterIt Last = Clusters.end() - 1;
auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
// Scale the branchprobability for DefaultMBB if the peel occurs and
// DefaultMBB is not replaced.
if (PeeledCaseProb != BranchProbability::getZero() &&
DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
WorkList.push_back(
{PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
while (!WorkList.empty()) {
SwitchWorkListItem W = WorkList.pop_back_val();
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
!DefaultMBB->getParent()->getFunction().hasMinSize()) {
// For optimized builds, lower large range as a balanced binary tree.
splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
continue;
}
lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
}
}
void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto DL = getCurSDLoc();
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getStepVector(DL, ResultVT));
}
void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDLoc DL = getCurSDLoc();
SDValue V = getValue(I.getOperand(0));
assert(VT == V.getValueType() && "Malformed vector.reverse!");
if (VT.isScalableVector()) {
setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V));
return;
}
// Use VECTOR_SHUFFLE for the fixed-length vector
// to maintain existing behavior.
SmallVector<int, 8> Mask;
unsigned NumElts = VT.getVectorMinNumElements();
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(NumElts - 1 - i);
setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
}
void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
SmallVector<SDValue, 4> Values(NumValues);
SDValue Op = getValue(I.getOperand(0));
for (unsigned i = 0; i != NumValues; ++i)
Values[i] = DAG.getNode(ISD::FREEZE, getCurSDLoc(), ValueVTs[i],
SDValue(Op.getNode(), Op.getResNo() + i));
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ValueVTs), Values));
}
void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDLoc DL = getCurSDLoc();
SDValue V1 = getValue(I.getOperand(0));
SDValue V2 = getValue(I.getOperand(1));
int64_t Imm = cast<ConstantInt>(I.getOperand(2))->getSExtValue();
// VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
if (VT.isScalableVector()) {
MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2,
DAG.getConstant(Imm, DL, IdxVT)));
return;
}
unsigned NumElts = VT.getVectorNumElements();
if ((-Imm > NumElts) || (Imm >= NumElts)) {
// Result is undefined if immediate is out-of-bounds.
setValue(&I, DAG.getUNDEF(VT));
return;
}
uint64_t Idx = (NumElts + Imm) % NumElts;
// Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
SmallVector<int, 8> Mask;
for (unsigned i = 0; i < NumElts; ++i)
Mask.push_back(Idx + i);
setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5e1786958b6f..7f80ce37e28a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1,8961 +1,8964 @@
//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This implements the TargetLowering class.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <cctype>
using namespace llvm;
/// NOTE: The TargetMachine owns TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm)
: TargetLoweringBase(tm) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
bool TargetLowering::isPositionIndependent() const {
return getTargetMachine().isPositionIndependent();
}
/// Check whether a given call node is in tail position within its function. If
/// so, it sets Chain to the input chain of the tail call.
bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
SDValue &Chain) const {
const Function &F = DAG.getMachineFunction().getFunction();
// First, check if tail calls have been disabled in this function.
if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
return false;
// Conservatively require the attributes of the call to match those of
// the return. Ignore following attributes because they don't affect the
// call sequence.
AttrBuilder CallerAttrs(F.getAttributes(), AttributeList::ReturnIndex);
for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
Attribute::DereferenceableOrNull, Attribute::NoAlias,
Attribute::NonNull})
CallerAttrs.removeAttribute(Attr);
if (CallerAttrs.hasAttributes())
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
if (CallerAttrs.contains(Attribute::ZExt) ||
CallerAttrs.contains(Attribute::SExt))
return false;
// Check if the only use is a function return node.
return isUsedByReturnOnly(Node, Chain);
}
bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
const uint32_t *CallerPreservedMask,
const SmallVectorImpl<CCValAssign> &ArgLocs,
const SmallVectorImpl<SDValue> &OutVals) const {
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
const CCValAssign &ArgLoc = ArgLocs[I];
if (!ArgLoc.isRegLoc())
continue;
MCRegister Reg = ArgLoc.getLocReg();
// Only look at callee saved registers.
if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
continue;
// Check that we pass the value used for the caller.
// (We look for a CopyFromReg reading a virtual register that is used
// for the function live-in value of register Reg)
SDValue Value = OutVals[I];
if (Value->getOpcode() != ISD::CopyFromReg)
return false;
Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
if (MRI.getLiveInPhysReg(ArgReg) != Reg)
return false;
}
return true;
}
/// Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
unsigned ArgIdx) {
IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
Alignment = Call->getParamStackAlign(ArgIdx);
IndirectType = nullptr;
assert(IsByVal + IsPreallocated + IsInAlloca <= 1 &&
"multiple ABI attributes?");
if (IsByVal) {
IndirectType = Call->getParamByValType(ArgIdx);
if (!Alignment)
Alignment = Call->getParamAlign(ArgIdx);
}
if (IsPreallocated)
IndirectType = Call->getParamPreallocatedType(ArgIdx);
if (IsInAlloca)
IndirectType = Call->getParamInAllocaType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
/// result of type RetVT.
std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
ArrayRef<SDValue> Ops,
MakeLibCallOptions CallOptions,
const SDLoc &dl,
SDValue InChain) const {
if (!InChain)
InChain = DAG.getEntryNode();
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0; i < Ops.size(); ++i) {
SDValue NewOp = Ops[i];
Entry.Node = NewOp;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
CallOptions.IsSExt);
Entry.IsZExt = !Entry.IsSExt;
if (CallOptions.IsSoften &&
!shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
Entry.IsSExt = Entry.IsZExt = false;
}
Args.push_back(Entry);
}
if (LC == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported library call operation!");
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
bool zeroExtend = !signExtend;
if (CallOptions.IsSoften &&
!shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
signExtend = zeroExtend = false;
}
CLI.setDebugLoc(dl)
.setChain(InChain)
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setNoReturn(CallOptions.DoesNotReturn)
.setDiscardResult(!CallOptions.IsReturnValueUsed)
.setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
.setSExtResult(signExtend)
.setZExtResult(zeroExtend);
return LowerCallTo(CLI);
}
bool TargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
unsigned SrcAS, const AttributeList &FuncAttributes) const {
if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
return false;
EVT VT = getOptimalMemOpType(Op, FuncAttributes);
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
if (Op.isFixedDstAlign())
while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
!allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());
// Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());
// If the type we've chosen is larger than the largest legal integer type
// then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}
unsigned NumMemOps = 0;
uint64_t Size = Op.size();
while (Size) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
EVT NewVT = VT;
unsigned NewVTSize;
bool Found = false;
if (VT.isVector() || VT.isFloatingPoint()) {
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
isSafeMemOpType(NewVT.getSimpleVT()))
Found = true;
else if (NewVT == MVT::i64 &&
isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
isSafeMemOpType(MVT::f64)) {
// i64 is usually not legal on 32-bit targets, but f64 may be.
NewVT = MVT::f64;
Found = true;
}
}
if (!Found) {
do {
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
if (NewVT == MVT::i8)
break;
} while (!isSafeMemOpType(NewVT.getSimpleVT()));
}
NewVTSize = NewVT.getSizeInBits() / 8;
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast)
VTSize = Size;
else {
VT = NewVT;
VTSize = NewVTSize;
}
}
if (++NumMemOps > Limit)
return false;
MemOps.push_back(VT);
Size -= VTSize;
}
return true;
}
/// Soften the operands of a comparison. This code is shared among BR_CC,
/// SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
const SDLoc &dl, const SDValue OldLHS,
const SDValue OldRHS) const {
SDValue Chain;
return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
OldRHS, Chain);
}
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
const SDLoc &dl, const SDValue OldLHS,
const SDValue OldRHS,
SDValue &Chain,
bool IsSignaling) const {
// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
// not supporting it. We can update this code when libgcc provides such
// functions.
assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
&& "Unsupported setcc type!");
// Expand into one or more soft-fp libcall(s).
RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
bool ShouldInvertCC = false;
switch (CCCode) {
case ISD::SETEQ:
case ISD::SETOEQ:
LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
(VT == MVT::f64) ? RTLIB::OEQ_F64 :
(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
break;
case ISD::SETNE:
case ISD::SETUNE:
LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
(VT == MVT::f64) ? RTLIB::UNE_F64 :
(VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
break;
case ISD::SETGE:
case ISD::SETOGE:
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
(VT == MVT::f64) ? RTLIB::OGE_F64 :
(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
break;
case ISD::SETLT:
case ISD::SETOLT:
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OLT_F64 :
(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
break;
case ISD::SETLE:
case ISD::SETOLE:
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 :
(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
break;
case ISD::SETGT:
case ISD::SETOGT:
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
(VT == MVT::f64) ? RTLIB::OGT_F64 :
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
case ISD::SETO:
ShouldInvertCC = true;
LLVM_FALLTHROUGH;
case ISD::SETUO:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
break;
case ISD::SETONE:
// SETONE = O && UNE
ShouldInvertCC = true;
LLVM_FALLTHROUGH;
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
(VT == MVT::f64) ? RTLIB::OEQ_F64 :
(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
break;
default:
// Invert CC for unordered comparisons
ShouldInvertCC = true;
switch (CCCode) {
case ISD::SETULT:
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
(VT == MVT::f64) ? RTLIB::OGE_F64 :
(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
break;
case ISD::SETULE:
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
(VT == MVT::f64) ? RTLIB::OGT_F64 :
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
case ISD::SETUGT:
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 :
(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
break;
case ISD::SETUGE:
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OLT_F64 :
(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
break;
default: llvm_unreachable("Do not know how to soften this setcc!");
}
}
// Use the target specific return value for comparions lib calls.
EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = {NewLHS, NewRHS};
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpsVT[2] = { OldLHS.getValueType(),
OldRHS.getValueType() };
CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
NewLHS = Call.first;
NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1);
if (ShouldInvertCC) {
assert(RetVT.isInteger());
CCCode = getSetCCInverse(CCCode, RetVT);
}
if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
// Update Chain.
Chain = Call.second;
} else {
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
CCCode = getCmpLibcallCC(LC2);
if (ShouldInvertCC)
CCCode = getSetCCInverse(CCCode, RetVT);
NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
Call2.second);
NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
Tmp.getValueType(), Tmp, NewLHS);
NewRHS = SDValue();
}
}
/// Return the entry encoding for a jump table in the current function. The
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
unsigned TargetLowering::getJumpTableEncoding() const {
// In non-pic modes, just use the address of a block.
if (!isPositionIndependent())
return MachineJumpTableInfo::EK_BlockAddress;
// In PIC mode, if the target supports a GPRel32 directive, use it.
if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
return MachineJumpTableInfo::EK_GPRel32BlockAddress;
// Otherwise, use a label difference.
return MachineJumpTableInfo::EK_LabelDifference32;
}
SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
// If our PIC model is GP relative, use the global offset table as the base.
unsigned JTEncoding = getJumpTableEncoding();
if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
return Table;
}
/// This returns the relocation base for the given PIC jumptable, the same as
/// getPICJumpTableRelocBase, but as an MCExpr.
const MCExpr *
TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,MCContext &Ctx) const{
// The normal PIC reloc base is the label at the start of the jump table.
return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
}
bool
TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
const TargetMachine &TM = getTargetMachine();
const GlobalValue *GV = GA->getGlobal();
// If the address is not even local to this DSO we will have to load it from
// a got and then add the offset.
if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return false;
// If the code is position independent we will have to add a base register.
if (isPositionIndependent())
return false;
// Otherwise we can do it.
return true;
}
//===----------------------------------------------------------------------===//
// Optimization Methods
//===----------------------------------------------------------------------===//
/// If the specified instruction has a constant integer operand and there are
/// bits set in that constant that are not demanded, then clear those bits and
/// return true.
bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
const APInt &DemandedBits,
const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
// Do target-specific constant optimization.
if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return TLO.New.getNode();
// FIXME: ISD::SELECT, ISD::SELECT_CC
switch (Opcode) {
default:
break;
case ISD::XOR:
case ISD::AND:
case ISD::OR: {
auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!Op1C || Op1C->isOpaque())
return false;
// If this is a 'not' op, don't touch it because that's a canonical form.
const APInt &C = Op1C->getAPIntValue();
if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
return false;
if (!C.isSubsetOf(DemandedBits)) {
EVT VT = Op.getValueType();
SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
}
break;
}
}
return false;
}
bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
const APInt &DemandedBits,
TargetLoweringOpt &TLO) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
}
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
const APInt &Demanded,
TargetLoweringOpt &TLO) const {
assert(Op.getNumOperands() == 2 &&
"ShrinkDemandedOp only supports binary operators!");
assert(Op.getNode()->getNumValues() == 1 &&
"ShrinkDemandedOp only supports nodes with one result!");
SelectionDAG &DAG = TLO.DAG;
SDLoc dl(Op);
// Early return, as this function cannot handle vector types.
if (Op.getValueType().isVector())
return false;
// Don't do this if the node has another user, which may require the
// full value.
if (!Op.getNode()->hasOneUse())
return false;
// Search for the smallest integer type with free casts to and from
// Op's type. For expedience, just check power-of-2 integer types.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned DemandedSize = Demanded.getActiveBits();
unsigned SmallVTBits = DemandedSize;
if (!isPowerOf2_32(SmallVTBits))
SmallVTBits = NextPowerOf2(SmallVTBits);
for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
TLI.isZExtFree(SmallVT, Op.getValueType())) {
// We found a type with free casts.
SDValue X = DAG.getNode(
Op.getOpcode(), dl, SmallVT,
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
return TLO.CombineTo(Op, Z);
}
}
return false;
}
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
KnownBits Known;
bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
if (Simplified) {
DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
}
return Simplified;
}
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
KnownBits &Known,
TargetLoweringOpt &TLO,
unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
// TODO: We can probably do more work on calculating the known bits and
// simplifying the operations for scalable vectors, but for now we just
// bail out.
if (VT.isScalableVector()) {
// Pretend we don't know anything for now.
Known = KnownBits(DemandedBits.getBitWidth());
return false;
}
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
AssumeSingleUse);
}
// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
// TODO: Under what circumstances can we create nodes? Constant folding?
SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
SelectionDAG &DAG, unsigned Depth) const {
// Limit search depth.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return SDValue();
// Ignore UNDEFs.
if (Op.isUndef())
return SDValue();
// Not demanding any bits/elts from Op.
if (DemandedBits == 0 || DemandedElts == 0)
return DAG.getUNDEF(Op.getValueType());
unsigned NumElts = DemandedElts.getBitWidth();
unsigned BitWidth = DemandedBits.getBitWidth();
KnownBits LHSKnown, RHSKnown;
switch (Op.getOpcode()) {
case ISD::BITCAST: {
SDValue Src = peekThroughBitcasts(Op.getOperand(0));
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
if (SrcVT == DstVT)
return Src;
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
if (NumSrcEltBits == NumDstEltBits)
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
// TODO - bigendian once we have test coverage.
if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
if (!Sub.isNullValue()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
DemandedSrcElts.setBit((j * Scale) + i);
}
}
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
}
// TODO - bigendian once we have test coverage.
if ((NumSrcEltBits % NumDstEltBits) == 0 &&
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * NumDstEltBits;
DemandedSrcBits.insertBits(DemandedBits, Offset);
DemandedSrcElts.setBit(i / Scale);
}
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
}
break;
}
case ISD::AND: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
// context.
if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
return Op.getOperand(1);
break;
}
case ISD::OR: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
// context.
if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
return Op.getOperand(1);
break;
}
case ISD::XOR: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known zero on one side, return the
// other.
if (DemandedBits.isSubsetOf(RHSKnown.Zero))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(LHSKnown.Zero))
return Op.getOperand(1);
break;
}
case ISD::SHL: {
// If we are only demanding sign bits then we can use the shift source
// directly.
if (const APInt *MaxSA =
DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
SDValue Op0 = Op.getOperand(0);
unsigned ShAmt = MaxSA->getZExtValue();
unsigned NumSignBits =
DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return Op0;
}
break;
}
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
// if we don't care about FP signed-zero. The use of SETLT with FP means
// that we don't care about NaNs.
if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
(isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
return Op0;
}
break;
}
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned ExBits = ExVT.getScalarSizeInBits();
if (DemandedBits.getActiveBits() <= ExBits)
return Op0;
// If the input is already sign extended, just drop the extension.
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
if (NumSignBits >= (BitWidth - ExBits + 1))
return Op0;
break;
}
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG: {
// If we only want the lowest element and none of extended bits, then we can
// return the bitcasted source vector.
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
DAG.getDataLayout().isLittleEndian() &&
DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
return DAG.getBitcast(DstVT, Src);
}
break;
}
case ISD::INSERT_VECTOR_ELT: {
// If we don't demand the inserted element, return the base vector.
SDValue Vec = Op.getOperand(0);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
EVT VecVT = Vec.getValueType();
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
!DemandedElts[CIdx->getZExtValue()])
return Vec;
break;
}
case ISD::INSERT_SUBVECTOR: {
// If we don't demand the inserted subvector, return the base vector.
SDValue Vec = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
return Vec;
break;
}
case ISD::VECTOR_SHUFFLE: {
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// If all the demanded elts are from one operand and are inline,
// then we can use the operand directly.
bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
for (unsigned i = 0; i != NumElts; ++i) {
int M = ShuffleMask[i];
if (M < 0 || !DemandedElts[i])
continue;
AllUndef = false;
IdentityLHS &= (M == (int)i);
IdentityRHS &= ((M - NumElts) == i);
}
if (AllUndef)
return DAG.getUNDEF(Op.getValueType());
if (IdentityLHS)
return Op.getOperand(0);
if (IdentityRHS)
return Op.getOperand(1);
break;
}
default:
if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
Op, DemandedBits, DemandedElts, DAG, Depth))
return V;
break;
}
return SDValue();
}
SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
}
SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
unsigned Depth) const {
APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
}
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
/// original and new nodes in Old and New. Otherwise, analyze the expression and
/// return a mask of Known bits for the expression (used to simplify the
/// caller). The Known bits may only be accurate for those bits in the
/// OriginalDemandedBits and OriginalDemandedElts.
bool TargetLowering::SimplifyDemandedBits(
SDValue Op, const APInt &OriginalDemandedBits,
const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
unsigned Depth, bool AssumeSingleUse) const {
unsigned BitWidth = OriginalDemandedBits.getBitWidth();
assert(Op.getScalarValueSizeInBits() == BitWidth &&
"Mask size mismatches value type size!");
// Don't know anything.
Known = KnownBits(BitWidth);
// TODO: We can probably do more work on calculating the known bits and
// simplifying the operations for scalable vectors, but for now we just
// bail out.
if (Op.getValueType().isScalableVector())
return false;
unsigned NumElts = OriginalDemandedElts.getBitWidth();
assert((!Op.getValueType().isVector() ||
NumElts == Op.getValueType().getVectorNumElements()) &&
"Unexpected vector size");
APInt DemandedBits = OriginalDemandedBits;
APInt DemandedElts = OriginalDemandedElts;
SDLoc dl(Op);
auto &DL = TLO.DAG.getDataLayout();
// Undef operand.
if (Op.isUndef())
return false;
if (Op.getOpcode() == ISD::Constant) {
// We know all of the bits for a constant!
Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
return false;
}
if (Op.getOpcode() == ISD::ConstantFP) {
// We know all of the bits for a floating point constant!
Known = KnownBits::makeConstant(
cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
return false;
}
// Other users may use these bits.
EVT VT = Op.getValueType();
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
if (Depth != 0) {
// If not at the root, Just compute the Known bits to
// simplify things downstream.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false;
}
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedBits/Elts to all bits.
DemandedBits = APInt::getAllOnesValue(BitWidth);
DemandedElts = APInt::getAllOnesValue(NumElts);
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
} else if (Depth >= SelectionDAG::MaxRecursionDepth) {
// Limit search depth.
return false;
}
KnownBits Known2;
switch (Op.getOpcode()) {
case ISD::TargetConstant:
llvm_unreachable("Can't simplify this node");
case ISD::SCALAR_TO_VECTOR: {
if (!DemandedElts[0])
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
KnownBits SrcKnown;
SDValue Src = Op.getOperand(0);
unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
return true;
// Upper elements are undef, so only get the knownbits if we just demand
// the bottom element.
if (DemandedElts == 1)
Known = SrcKnown.anyextOrTrunc(BitWidth);
break;
}
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every demanded element.
// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
case ISD::LOAD: {
auto *LD = cast<LoadSDNode>(Op);
if (getTargetConstantFromLoad(LD)) {
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
}
if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
// If this is a ZEXTLoad and we are looking at the loaded value.
EVT MemVT = LD->getMemoryVT();
unsigned MemBits = MemVT.getScalarSizeInBits();
Known.Zero.setBitsFrom(MemBits);
return false; // Don't fall through, will infinitely loop.
}
break;
}
case ISD::INSERT_VECTOR_ELT: {
SDValue Vec = Op.getOperand(0);
SDValue Scl = Op.getOperand(1);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
EVT VecVT = Vec.getValueType();
// If index isn't constant, assume we need all vector elements AND the
// inserted element.
APInt DemandedVecElts(DemandedElts);
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
unsigned Idx = CIdx->getZExtValue();
DemandedVecElts.clearBit(Idx);
// Inserted element is not required.
if (!DemandedElts[Idx])
return TLO.CombineTo(Op, Vec);
}
KnownBits KnownScl;
unsigned NumSclBits = Scl.getScalarValueSizeInBits();
APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
return true;
Known = KnownScl.anyextOrTrunc(BitWidth);
KnownBits KnownVec;
if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
Depth + 1))
return true;
if (!!DemandedVecElts)
Known = KnownBits::commonBits(Known, KnownVec);
return false;
}
case ISD::INSERT_SUBVECTOR: {
// Demand any elements from the subvector and the remainder from the src its
// inserted into.
SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
KnownBits KnownSub, KnownSrc;
if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
Depth + 1))
return true;
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedSubElts)
Known = KnownBits::commonBits(Known, KnownSub);
if (!!DemandedSrcElts)
Known = KnownBits::commonBits(Known, KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
!DemandedSrcElts.isAllOnesValue()) {
SDValue NewSub = SimplifyMultipleUseDemandedBits(
Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
if (NewSub || NewSrc) {
NewSub = NewSub ? NewSub : Sub;
NewSrc = NewSrc ? NewSrc : Src;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
Op.getOperand(2));
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::EXTRACT_SUBVECTOR: {
// Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
if (Src.getValueType().isScalableVector())
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
Depth + 1))
return true;
// Attempt to avoid multi-use src if we don't need anything from it.
if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
if (DemandedSrc) {
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
Op.getOperand(1));
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::CONCAT_VECTORS: {
Known.Zero.setAllBits();
Known.One.setAllBits();
EVT SubVT = Op.getOperand(0).getValueType();
unsigned NumSubVecs = Op.getNumOperands();
unsigned NumSubElts = SubVT.getVectorNumElements();
for (unsigned i = 0; i != NumSubVecs; ++i) {
APInt DemandedSubElts =
DemandedElts.extractBits(NumSubElts, i * NumSubElts);
if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
Known2, TLO, Depth + 1))
return true;
// Known bits are shared by every demanded subvector element.
if (!!DemandedSubElts)
Known = KnownBits::commonBits(Known, Known2);
}
break;
}
case ISD::VECTOR_SHUFFLE: {
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// Collect demanded elements from shuffle operands..
APInt DemandedLHS(NumElts, 0);
APInt DemandedRHS(NumElts, 0);
for (unsigned i = 0; i != NumElts; ++i) {
if (!DemandedElts[i])
continue;
int M = ShuffleMask[i];
if (M < 0) {
// For UNDEF elements, we don't know anything about the common state of
// the shuffle result.
DemandedLHS.clearAllBits();
DemandedRHS.clearAllBits();
break;
}
assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
if (M < (int)NumElts)
DemandedLHS.setBit(M);
else
DemandedRHS.setBit(M - NumElts);
}
if (!!DemandedLHS || !!DemandedRHS) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedLHS) {
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
Depth + 1))
return true;
Known = KnownBits::commonBits(Known, Known2);
}
if (!!DemandedRHS) {
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
Depth + 1))
return true;
Known = KnownBits::commonBits(Known, Known2);
}
// Attempt to avoid multi-use ops if we don't need anything from them.
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::AND: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
// If the RHS is a constant, check to see if the LHS would be zero without
// using the bits from the RHS. Below, we use knowledge about the RHS to
// simplify the LHS, here we're using information from the LHS to simplify
// the RHS.
if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
// Do not increment Depth here; that can cause an infinite loop.
KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
// If the LHS already has zeros where RHSC does, this 'and' is dead.
if ((LHSKnown.Zero & DemandedBits) ==
(~RHSC->getAPIntValue() & DemandedBits))
return TLO.CombineTo(Op, Op0);
// If any of the set bits in the RHS are known zero on the LHS, shrink
// the constant.
if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
DemandedElts, TLO))
return true;
// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
// constant, but if this 'and' is only clearing bits that were just set by
// the xor, then this 'and' can be eliminated by shrinking the mask of
// the xor. For example, for a 32-bit X:
// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
LHSKnown.One == ~RHSC->getAPIntValue()) {
SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
return TLO.CombineTo(Op, Xor);
}
}
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
return TLO.CombineTo(Op, Op1);
// If all of the demanded bits in the inputs are known zeros, return zero.
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
Known &= Known2;
break;
}
case ISD::OR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
return TLO.CombineTo(Op, Op1);
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
Known |= Known2;
break;
}
case ISD::XOR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if (DemandedBits.isSubsetOf(Known.Zero))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known2.Zero))
return TLO.CombineTo(Op, Op1);
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// If all of the unknown bits are known to be zero on one side or the other
// turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
if (C) {
// If one side is a constant, and all of the set bits in the constant are
// also known set on the other side, turn this into an AND, as we know
// the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
// NB: it is okay if more bits are known than are requested
if (C->getAPIntValue() == Known2.One) {
SDValue ANDC =
TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
}
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
if (!C->isAllOnesValue() &&
DemandedBits.isSubsetOf(C->getAPIntValue())) {
// We're flipping all demanded bits. Flip the undemanded bits too.
SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
return TLO.CombineTo(Op, New);
}
}
// If we can't turn this into a 'not', try to shrink the constant.
if (!C || !C->isAllOnesValue())
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
Known ^= Known2;
break;
}
case ISD::SELECT:
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// Only known if known in both the LHS and RHS.
Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SELECT_CC:
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// Only known if known in both the LHS and RHS.
Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
// if we don't care about FP signed-zero. The use of SETLT with FP means
// that we don't care about NaNs.
if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
(isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
return TLO.CombineTo(Op, Op0);
// TODO: Should we check for other forms of sign-bit comparisons?
// Examples: X <= -1, X >= 0
}
if (getBooleanContents(Op0.getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
}
case ISD::SHL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
if (const APInt *SA2 =
TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SHL;
int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SRL;
}
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
// are not demanded. This will likely allow the anyext to be folded away.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = Op0.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
unsigned InnerBits = InnerVT.getScalarSizeInBits();
if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT, DL);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
ShTy = InnerVT;
SDValue NarrowShl =
TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
TLO.DAG.getConstant(ShAmt, dl, ShTy));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
// Repeat the SHL optimization above in cases where an extension
// intervenes: (shl (anyext (shr x, c1)), c2) to
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
// TODO - support non-uniform vector amounts.
if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
InnerOp.hasOneUse()) {
if (const APInt *SA2 =
TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
unsigned InnerShAmt = SA2->getZExtValue();
if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
DemandedBits.getActiveBits() <=
(InnerBits - InnerShAmt + ShAmt) &&
DemandedBits.countTrailingZeros() >= ShAmt) {
SDValue NewSA =
TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
InnerOp.getOperand(0));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
}
}
}
}
APInt InDemandedMask = DemandedBits.lshr(ShAmt);
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// low bits known zero.
Known.Zero.setLowBits(ShAmt);
// Try shrinking the operation as long as the shift amount will still be
// in range.
if ((ShAmt < DemandedBits.getActiveBits()) &&
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
}
// If we are only demanding sign bits then we can use the shift source
// directly.
if (const APInt *MaxSA =
TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = MaxSA->getZExtValue();
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return TLO.CombineTo(Op, Op0);
}
break;
}
case ISD::SRL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SHL) {
if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
if (const APInt *SA2 =
TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SHL;
}
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (Op->getFlags().hasExact())
InDemandedMask.setLowBits(ShAmt);
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
// High bits known zero.
Known.Zero.setHighBits(ShAmt);
}
break;
}
case ISD::SRA: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
// If we only want bits that already match the signbit then we don't need
// to shift.
unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
NumHiDemandedBits)
return TLO.CombineTo(Op, Op0);
// If this is an arithmetic shift right and only the low-bit is set, we can
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
if (DemandedBits.isOneValue())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (Op->getFlags().hasExact())
InDemandedMask.setLowBits(ShAmt);
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
if (DemandedBits.countLeadingZeros() < ShAmt)
InDemandedMask.setSignBit();
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (Known.Zero[BitWidth - ShAmt - 1] ||
DemandedBits.countLeadingZeros() >= ShAmt) {
SDNodeFlags Flags;
Flags.setExact(Op->getFlags().hasExact());
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
}
int Log2 = DemandedBits.exactLogBase2();
if (Log2 >= 0) {
// The bit must come from the sign.
SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
}
if (Known.One[BitWidth - ShAmt - 1])
// New bits are known one.
Known.One.setHighBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
}
break;
}
case ISD::FSHL:
case ISD::FSHR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
unsigned Amt = SA->getAPIntValue().urem(BitWidth);
// For fshl, 0-shift returns the 1st arg.
// For fshr, 0-shift returns the 2nd arg.
if (Amt == 0) {
if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
Known, TLO, Depth + 1))
return true;
break;
}
// fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
// fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
Depth + 1))
return true;
Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.One |= Known2.One;
Known.Zero |= Known2.Zero;
}
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
}
break;
}
case ISD::ROTL:
case ISD::ROTR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
return TLO.CombineTo(Op, Op0);
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
}
break;
}
case ISD::UMIN: {
// Check if one arg is always less than (or equal) to the other arg.
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umin(Known0, Known1);
if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
break;
}
case ISD::UMAX: {
// Check if one arg is always greater than (or equal) to the other arg.
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umax(Known0, Known1);
if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
break;
}
case ISD::BITREVERSE: {
SDValue Src = Op.getOperand(0);
APInt DemandedSrcBits = DemandedBits.reverseBits();
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
Known.One = Known2.One.reverseBits();
Known.Zero = Known2.Zero.reverseBits();
break;
}
case ISD::BSWAP: {
SDValue Src = Op.getOperand(0);
APInt DemandedSrcBits = DemandedBits.byteSwap();
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
Known.One = Known2.One.byteSwap();
Known.Zero = Known2.Zero.byteSwap();
break;
}
case ISD::CTPOP: {
// If only 1 bit is demanded, replace with PARITY as long as we're before
// op legalization.
// FIXME: Limit to scalars for now.
if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
Op.getOperand(0)));
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned ExVTBits = ExVT.getScalarSizeInBits();
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
// Compute the correct shift amount type, which must be getShiftAmountTy
// for scalar types after legalization.
EVT ShiftAmtTy = VT;
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
SDValue ShiftAmt =
TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
}
}
// If none of the extended bits are demanded, eliminate the sextinreg.
if (DemandedBits.getActiveBits() <= ExVTBits)
return TLO.CombineTo(Op, Op0);
APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
InputDemandedBits.setBit(ExVTBits - 1);
if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
// If the input sign bit is known zero, convert this into a zero extension.
if (Known.Zero[ExVTBits - 1])
return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
if (Known.One[ExVTBits - 1]) { // Input sign bit known set
Known.One.setBitsFrom(ExVTBits);
Known.Zero &= Mask;
} else { // Input sign bit unknown
Known.Zero &= Mask;
Known.One &= Mask;
}
break;
}
case ISD::BUILD_PAIR: {
EVT HalfVT = Op.getOperand(0).getValueType();
unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
KnownBits KnownLo, KnownHi;
if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
return true;
Known.Zero = KnownLo.Zero.zext(BitWidth) |
KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
Known.One = KnownLo.One.zext(BitWidth) |
KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
break;
}
case ISD::ZERO_EXTEND:
case ISD::ZERO_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
if (IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
TLO.DAG.getDataLayout().isLittleEndian())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.zext(BitWidth);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::SIGN_EXTEND:
case ISD::SIGN_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
if (IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
TLO.DAG.getDataLayout().isLittleEndian())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
InDemandedBits.setBit(InBits - 1);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
// If the sign bit is known one, the top bits match.
Known = Known.sext(BitWidth);
// If the sign bit is known zero, convert this to a zero extend.
if (Known.isNonNegative()) {
unsigned Opc =
IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::ANY_EXTEND:
case ISD::ANY_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
// If we only need the bottom element then we can just bitcast.
// TODO: Handle ANY_EXTEND?
if (IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
TLO.DAG.getDataLayout().isLittleEndian())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.anyext(BitWidth);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::TRUNCATE: {
SDValue Src = Op.getOperand(0);
// Simplify the input, using demanded bit information, and compute the known
// zero/one bits live out.
unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
APInt TruncMask = DemandedBits.zext(OperandBitWidth);
if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
Known = Known.trunc(BitWidth);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
if (Src.getNode()->hasOneUse()) {
switch (Src.getOpcode()) {
default:
break;
case ISD::SRL:
// Shrink SRL by a constant if none of the high bits shifted in are
// demanded.
if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
// undesirable.
break;
const APInt *ShAmtC =
TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
if (!ShAmtC || ShAmtC->uge(BitWidth))
break;
uint64_t ShVal = ShAmtC->getZExtValue();
APInt HighBits =
APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
HighBits.lshrInPlace(ShVal);
HighBits = HighBits.trunc(BitWidth);
if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
SDValue NewShAmt = TLO.DAG.getConstant(
ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
SDValue NewTrunc =
TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
}
break;
}
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
case ISD::AssertZext: {
// AssertZext demands all of the high bits, plus any of the low bits
// demanded by its users.
EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero |= ~InMask;
break;
}
case ISD::EXTRACT_VECTOR_ELT: {
SDValue Src = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
unsigned EltBitWidth = Src.getScalarValueSizeInBits();
if (SrcEltCnt.isScalable())
return false;
// Demand the bits from every vector element without a constant index.
unsigned NumSrcElts = SrcEltCnt.getFixedValue();
APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
if (CIdx->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
// anything about the extended bits.
APInt DemandedSrcBits = DemandedBits;
if (BitWidth > EltBitWidth)
DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
Depth + 1))
return true;
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedSrcBits.isAllOnesValue() ||
!DemandedSrcElts.isAllOnesValue()) {
if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
SDValue NewOp =
TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
return TLO.CombineTo(Op, NewOp);
}
}
Known = Known2;
if (BitWidth > EltBitWidth)
Known = Known.anyext(BitWidth);
break;
}
case ISD::BITCAST: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
// If this is an FP->Int bitcast and if the sign bit is the only
// thing demanded, turn this into a FGETSIGN.
if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
SrcVT.isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
SrcVT != MVT::f128) {
// Cannot eliminate/lower SHL for f128 yet.
EVT Ty = OpVTLegal ? VT : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
unsigned OpVTSizeInBits = Op.getValueSizeInBits();
if (!OpVTLegal && OpVTSizeInBits > 32)
Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
unsigned ShVal = Op.getValueSizeInBits() - 1;
SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
}
}
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
// Demand the elt/bit if any of the original elts/bits are demanded.
// TODO - bigendian once we have test coverage.
if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
if (!Sub.isNullValue()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
DemandedSrcElts.setBit((j * Scale) + i);
}
}
APInt KnownSrcUndef, KnownSrcZero;
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
KnownSrcZero, TLO, Depth + 1))
return true;
KnownBits KnownSrcBits;
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
} else if ((NumSrcEltBits % BitWidth) == 0 &&
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * BitWidth;
DemandedSrcBits.insertBits(DemandedBits, Offset);
DemandedSrcElts.setBit(i / Scale);
}
if (SrcVT.isVector()) {
APInt KnownSrcUndef, KnownSrcZero;
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
KnownSrcZero, TLO, Depth + 1))
return true;
}
KnownBits KnownSrcBits;
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
}
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
// recursive call where Known may be useful to the caller.
if (Depth > 0) {
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false;
}
break;
}
case ISD::ADD:
case ISD::MUL:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
SDNodeFlags Flags = Op.getNode()->getFlags();
unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
Depth + 1) ||
SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
SDValue NewOp =
TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
return TLO.CombineTo(Op, NewOp);
}
return true;
}
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp =
TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
return TLO.CombineTo(Op, NewOp);
}
}
// If we have a constant operand, we may be able to turn it into -1 if we
// do not demand the high bits. This can make the constant smaller to
// encode, allow more general folding, or match specialized instruction
// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
// is probably not useful (and could be detrimental).
ConstantSDNode *C = isConstOrConstSplat(Op1);
APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
if (C && !C->isAllOnesValue() && !C->isOne() &&
(C->getAPIntValue() | HighMask).isAllOnesValue()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
return TLO.CombineTo(Op, NewOp);
}
LLVM_FALLTHROUGH;
}
default:
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
Known, TLO, Depth))
return true;
break;
}
// Just use computeKnownBits to compute output bits.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
// If we know the value of all of the demanded bits, return this as a
// constant.
if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
// Avoid folding to a constant if any OpaqueConstant is involved.
const SDNode *N = Op.getNode();
for (SDNode *Op :
llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
if (C->isOpaque())
return false;
}
if (VT.isInteger())
return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
if (VT.isFloatingPoint())
return TLO.CombineTo(
Op,
TLO.DAG.getConstantFP(
APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
}
return false;
}
bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
const APInt &DemandedElts,
APInt &KnownUndef,
APInt &KnownZero,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
bool Simplified =
SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
if (Simplified) {
DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
}
return Simplified;
}
/// Given a vector binary operation and known undefined elements for each input
/// operand, compute whether each element of the output is undefined.
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
const APInt &UndefOp0,
const APInt &UndefOp1) {
EVT VT = BO.getValueType();
assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
"Vector binop only");
EVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
assert(UndefOp0.getBitWidth() == NumElts &&
UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
const APInt &UndefVals) {
if (UndefVals[Index])
return DAG.getUNDEF(EltVT);
if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
// Try hard to make sure that the getNode() call is not creating temporary
// nodes. Ignore opaque integers because they do not constant fold.
SDValue Elt = BV->getOperand(Index);
auto *C = dyn_cast<ConstantSDNode>(Elt);
if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
return Elt;
}
return SDValue();
};
APInt KnownUndef = APInt::getNullValue(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
// If both inputs for this element are either constant or undef and match
// the element type, compute the constant/undef result for this element of
// the vector.
// TODO: Ideally we would use FoldConstantArithmetic() here, but that does
// not handle FP constants. The code within getNode() should be refactored
// to avoid the danger of creating a bogus temporary node here.
SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
KnownUndef.setBit(i);
}
return KnownUndef;
}
bool TargetLowering::SimplifyDemandedVectorElts(
SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
unsigned Opcode = Op.getOpcode();
APInt DemandedElts = OriginalDemandedElts;
unsigned NumElts = DemandedElts.getBitWidth();
assert(VT.isVector() && "Expected vector op");
KnownUndef = KnownZero = APInt::getNullValue(NumElts);
// TODO: For now we assume we know nothing about scalable vectors.
if (VT.isScalableVector())
return false;
assert(VT.getVectorNumElements() == NumElts &&
"Mask size mismatches value type element count!");
// Undef operand.
if (Op.isUndef()) {
KnownUndef.setAllBits();
return false;
}
// If Op has other users, assume that all elements are needed.
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
DemandedElts.setAllBits();
// Not demanding any elements from Op.
if (DemandedElts == 0) {
KnownUndef.setAllBits();
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
}
// Limit search depth.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;
SDLoc DL(Op);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
// Helper for demanding the specified elements and all the bits of both binary
// operands.
auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
TLO.DAG, Depth + 1);
SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
TLO.DAG, Depth + 1);
if (NewOp0 || NewOp1) {
SDValue NewOp = TLO.DAG.getNode(
Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
return TLO.CombineTo(Op, NewOp);
}
return false;
};
switch (Opcode) {
case ISD::SCALAR_TO_VECTOR: {
if (!DemandedElts[0]) {
KnownUndef.setAllBits();
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
}
SDValue ScalarSrc = Op.getOperand(0);
if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
SDValue Src = ScalarSrc.getOperand(0);
SDValue Idx = ScalarSrc.getOperand(1);
EVT SrcVT = Src.getValueType();
ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
if (SrcEltCnt.isScalable())
return false;
unsigned NumSrcElts = SrcEltCnt.getFixedValue();
if (isNullConstant(Idx)) {
APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
}
}
KnownUndef.setHighBits(NumElts - 1);
break;
}
case ISD::BITCAST: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
// We only handle vectors here.
// TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
if (!SrcVT.isVector())
break;
// Fast handling of 'identity' bitcasts.
unsigned NumSrcElts = SrcVT.getVectorNumElements();
if (NumSrcElts == NumElts)
return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1);
APInt SrcZero, SrcUndef;
APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
// Bitcast from 'large element' src vector to 'small element' vector, we
// must demand a source element if any DemandedElt maps to it.
if ((NumElts % NumSrcElts) == 0) {
unsigned Scale = NumElts / NumSrcElts;
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i])
SrcDemandedElts.setBit(i / Scale);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
// Try calling SimplifyDemandedBits, converting demanded elts to the bits
// of the large element.
// TODO - bigendian once we have test coverage.
if (TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Ofs = (i % Scale) * EltSizeInBits;
SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
}
KnownBits Known;
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
TLO, Depth + 1))
return true;
}
// If the src element is zero/undef then all the output elements will be -
// only demanded elements are guaranteed to be correct.
for (unsigned i = 0; i != NumSrcElts; ++i) {
if (SrcDemandedElts[i]) {
if (SrcZero[i])
KnownZero.setBits(i * Scale, (i + 1) * Scale);
if (SrcUndef[i])
KnownUndef.setBits(i * Scale, (i + 1) * Scale);
}
}
}
// Bitcast from 'small element' src vector to 'large element' vector, we
// demand all smaller source elements covered by the larger demanded element
// of this vector.
if ((NumSrcElts % NumElts) == 0) {
unsigned Scale = NumSrcElts / NumElts;
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i])
SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
// If all the src elements covering an output element are zero/undef, then
// the output element will be as well, assuming it was demanded.
for (unsigned i = 0; i != NumElts; ++i) {
if (DemandedElts[i]) {
if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
KnownZero.setBit(i);
if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
KnownUndef.setBit(i);
}
}
}
break;
}
case ISD::BUILD_VECTOR: {
// Check all elements and simplify any unused elements with UNDEF.
if (!DemandedElts.isAllOnesValue()) {
// Don't simplify BROADCASTS.
if (llvm::any_of(Op->op_values(),
[&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
bool Updated = false;
for (unsigned i = 0; i != NumElts; ++i) {
if (!DemandedElts[i] && !Ops[i].isUndef()) {
Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
KnownUndef.setBit(i);
Updated = true;
}
}
if (Updated)
return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
}
}
for (unsigned i = 0; i != NumElts; ++i) {
SDValue SrcOp = Op.getOperand(i);
if (SrcOp.isUndef()) {
KnownUndef.setBit(i);
} else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
(isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
KnownZero.setBit(i);
}
}
break;
}
case ISD::CONCAT_VECTORS: {
EVT SubVT = Op.getOperand(0).getValueType();
unsigned NumSubVecs = Op.getNumOperands();
unsigned NumSubElts = SubVT.getVectorNumElements();
for (unsigned i = 0; i != NumSubVecs; ++i) {
SDValue SubOp = Op.getOperand(i);
APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
APInt SubUndef, SubZero;
if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
Depth + 1))
return true;
KnownUndef.insertBits(SubUndef, i * NumSubElts);
KnownZero.insertBits(SubZero, i * NumSubElts);
}
break;
}
case ISD::INSERT_SUBVECTOR: {
// Demand any elements from the subvector and the remainder from the src its
// inserted into.
SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
APInt SubUndef, SubZero;
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
Depth + 1))
return true;
// If none of the src operand elements are demanded, replace it with undef.
if (!DemandedSrcElts && !Src.isUndef())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
TLO.DAG.getUNDEF(VT), Sub,
Op.getOperand(2)));
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
TLO, Depth + 1))
return true;
KnownUndef.insertBits(SubUndef, Idx);
KnownZero.insertBits(SubZero, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedSrcElts.isAllOnesValue() ||
!DemandedSubElts.isAllOnesValue()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
Sub, DemandedSubElts, TLO.DAG, Depth + 1);
if (NewSrc || NewSub) {
NewSrc = NewSrc ? NewSrc : Src;
NewSub = NewSub ? NewSub : Sub;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
NewSub, Op.getOperand(2));
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::EXTRACT_SUBVECTOR: {
// Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
if (Src.getValueType().isScalableVector())
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
Depth + 1))
return true;
KnownUndef = SrcUndef.extractBits(NumElts, Idx);
KnownZero = SrcZero.extractBits(NumElts, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedElts.isAllOnesValue()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
if (NewSrc) {
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
Op.getOperand(1));
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::INSERT_VECTOR_ELT: {
SDValue Vec = Op.getOperand(0);
SDValue Scl = Op.getOperand(1);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
// For a legal, constant insertion index, if we don't need this insertion
// then strip it, else remove it from the demanded elts.
if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
unsigned Idx = CIdx->getZExtValue();
if (!DemandedElts[Idx])
return TLO.CombineTo(Op, Vec);
APInt DemandedVecElts(DemandedElts);
DemandedVecElts.clearBit(Idx);
if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
KnownZero, TLO, Depth + 1))
return true;
KnownUndef.setBitVal(Idx, Scl.isUndef());
KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
break;
}
APInt VecUndef, VecZero;
if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
Depth + 1))
return true;
// Without knowing the insertion index we can't set KnownUndef/KnownZero.
break;
}
case ISD::VSELECT: {
// Try to transform the select condition based on the current demanded
// elements.
// TODO: If a condition element is undef, we can choose from one arm of the
// select (and if one arm is undef, then we can propagate that to the
// result).
// TODO - add support for constant vselect masks (see IR version of this).
APInt UnusedUndef, UnusedZero;
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
UnusedZero, TLO, Depth + 1))
return true;
// See if we can simplify either vselect operand.
APInt DemandedLHS(DemandedElts);
APInt DemandedRHS(DemandedElts);
APInt UndefLHS, ZeroLHS;
APInt UndefRHS, ZeroRHS;
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
ZeroLHS, TLO, Depth + 1))
return true;
if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
ZeroRHS, TLO, Depth + 1))
return true;
KnownUndef = UndefLHS & UndefRHS;
KnownZero = ZeroLHS & ZeroRHS;
break;
}
case ISD::VECTOR_SHUFFLE: {
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// Collect demanded elements from shuffle operands..
APInt DemandedLHS(NumElts, 0);
APInt DemandedRHS(NumElts, 0);
for (unsigned i = 0; i != NumElts; ++i) {
int M = ShuffleMask[i];
if (M < 0 || !DemandedElts[i])
continue;
assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
if (M < (int)NumElts)
DemandedLHS.setBit(M);
else
DemandedRHS.setBit(M - NumElts);
}
// See if we can simplify either shuffle operand.
APInt UndefLHS, ZeroLHS;
APInt UndefRHS, ZeroRHS;
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
ZeroLHS, TLO, Depth + 1))
return true;
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
ZeroRHS, TLO, Depth + 1))
return true;
// Simplify mask using undef elements from LHS/RHS.
bool Updated = false;
bool IdentityLHS = true, IdentityRHS = true;
SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
for (unsigned i = 0; i != NumElts; ++i) {
int &M = NewMask[i];
if (M < 0)
continue;
if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
(M >= (int)NumElts && UndefRHS[M - NumElts])) {
Updated = true;
M = -1;
}
IdentityLHS &= (M < 0) || (M == (int)i);
IdentityRHS &= (M < 0) || ((M - NumElts) == i);
}
// Update legal shuffle masks based on demanded elements if it won't reduce
// to Identity which can cause premature removal of the shuffle mask.
if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
SDValue LegalShuffle =
buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
NewMask, TLO.DAG);
if (LegalShuffle)
return TLO.CombineTo(Op, LegalShuffle);
}
// Propagate undef/zero elements from LHS/RHS.
for (unsigned i = 0; i != NumElts; ++i) {
int M = ShuffleMask[i];
if (M < 0) {
KnownUndef.setBit(i);
} else if (M < (int)NumElts) {
if (UndefLHS[M])
KnownUndef.setBit(i);
if (ZeroLHS[M])
KnownZero.setBit(i);
} else {
if (UndefRHS[M - NumElts])
KnownUndef.setBit(i);
if (ZeroRHS[M - NumElts])
KnownZero.setBit(i);
}
}
break;
}
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG: {
APInt SrcUndef, SrcZero;
SDValue Src = Op.getOperand(0);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
Depth + 1))
return true;
KnownZero = SrcZero.zextOrTrunc(NumElts);
KnownUndef = SrcUndef.zextOrTrunc(NumElts);
if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
// aext - if we just need the bottom element then we can bitcast.
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
}
if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
// zext(undef) upper bits are guaranteed to be zero.
if (DemandedElts.isSubsetOf(KnownUndef))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
KnownUndef.clearAllBits();
}
break;
}
// TODO: There are more binop opcodes that could be handled here - MIN,
// MAX, saturated math, etc.
case ISD::OR:
case ISD::XOR:
case ISD::ADD:
case ISD::SUB:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
APInt UndefRHS, ZeroRHS;
if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
Depth + 1))
return true;
APInt UndefLHS, ZeroLHS;
if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
Depth + 1))
return true;
KnownZero = ZeroLHS & ZeroRHS;
KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
if (!DemandedElts.isAllOnesValue())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
}
case ISD::SHL:
case ISD::SRL:
case ISD::SRA:
case ISD::ROTL:
case ISD::ROTR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
APInt UndefRHS, ZeroRHS;
if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
Depth + 1))
return true;
APInt UndefLHS, ZeroLHS;
if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
Depth + 1))
return true;
KnownZero = ZeroLHS;
KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
if (!DemandedElts.isAllOnesValue())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
}
case ISD::MUL:
case ISD::AND: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
Depth + 1))
return true;
if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
TLO, Depth + 1))
return true;
// If either side has a zero element, then the result element is zero, even
// if the other is an UNDEF.
// TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
// and then handle 'and' nodes with the rest of the binop opcodes.
KnownZero |= SrcZero;
KnownUndef &= SrcUndef;
KnownUndef &= ~KnownZero;
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
if (!DemandedElts.isAllOnesValue())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
}
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1))
return true;
if (Op.getOpcode() == ISD::ZERO_EXTEND) {
// zext(undef) upper bits are guaranteed to be zero.
if (DemandedElts.isSubsetOf(KnownUndef))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
KnownUndef.clearAllBits();
}
break;
default: {
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
KnownZero, TLO, Depth))
return true;
} else {
KnownBits Known;
APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
TLO, Depth, AssumeSingleUse))
return true;
}
break;
}
}
assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
// Constant fold all undef cases.
// TODO: Handle zero cases as well.
if (DemandedElts.isSubsetOf(KnownUndef))
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
return false;
}
/// Determine which of the bits specified in Mask are known to be either zero or
/// one and return them in the Known.
void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
Known.resetAll();
}
void TargetLowering::computeKnownBitsForTargetInstr(
GISelKnownBits &Analysis, Register R, KnownBits &Known,
const APInt &DemandedElts, const MachineRegisterInfo &MRI,
unsigned Depth) const {
Known.resetAll();
}
void TargetLowering::computeKnownBitsForFrameIndex(
const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
// The low bits are known zero if the pointer is aligned.
Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
}
Align TargetLowering::computeKnownAlignForTargetInstr(
GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
unsigned Depth) const {
return Align(1);
}
/// This method can be implemented by targets that want to expose additional
/// information about sign bits to the DAG Combiner.
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
const APInt &,
const SelectionDAG &,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use ComputeNumSignBits if you don't know whether Op"
" is a target node!");
return 1;
}
unsigned TargetLowering::computeNumSignBitsForTargetInstr(
GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
const MachineRegisterInfo &MRI, unsigned Depth) const {
return 1;
}
bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
TargetLoweringOpt &TLO, unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use SimplifyDemandedVectorElts if you don't know whether Op"
" is a target node!");
return false;
}
bool TargetLowering::SimplifyDemandedBitsForTargetNode(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use SimplifyDemandedBits if you don't know whether Op"
" is a target node!");
computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
return false;
}
SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
SelectionDAG &DAG, unsigned Depth) const {
assert(
(Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
" is a target node!");
return SDValue();
}
SDValue
TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
SDValue N1, MutableArrayRef<int> Mask,
SelectionDAG &DAG) const {
bool LegalMask = isShuffleMaskLegal(Mask, VT);
if (!LegalMask) {
std::swap(N0, N1);
ShuffleVectorSDNode::commuteMask(Mask);
LegalMask = isShuffleMaskLegal(Mask, VT);
}
if (!LegalMask)
return SDValue();
return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
}
const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
return nullptr;
}
bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
bool PoisonOnly, unsigned Depth) const {
assert(
(Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
" is a target node!");
return false;
}
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use isKnownNeverNaN if you don't know whether Op"
" is a target node!");
return false;
}
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
// work with truncating build vectors and vectors with elements of less than
// 8 bits.
bool TargetLowering::isConstTrueVal(const SDNode *N) const {
if (!N)
return false;
APInt CVal;
if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
CVal = CN->getAPIntValue();
} else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
auto *CN = BV->getConstantSplatNode();
if (!CN)
return false;
// If this is a truncating build vector, truncate the splat value.
// Otherwise, we may fail to match the expected values below.
unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
CVal = CN->getAPIntValue();
if (BVEltWidth < CVal.getBitWidth())
CVal = CVal.trunc(BVEltWidth);
} else {
return false;
}
switch (getBooleanContents(N->getValueType(0))) {
case UndefinedBooleanContent:
return CVal[0];
case ZeroOrOneBooleanContent:
return CVal.isOneValue();
case ZeroOrNegativeOneBooleanContent:
return CVal.isAllOnesValue();
}
llvm_unreachable("Invalid boolean contents");
}
bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (!N)
return false;
const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
if (!CN) {
const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
// Only interested in constant splats, we don't care about undef
// elements in identifying boolean constants and getConstantSplatNode
// returns NULL if all ops are undef;
CN = BV->getConstantSplatNode();
if (!CN)
return false;
}
if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
return !CN->getAPIntValue()[0];
return CN->isNullValue();
}
bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
bool SExt) const {
if (VT == MVT::i1)
return N->isOne();
TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
switch (Cnt) {
case TargetLowering::ZeroOrOneBooleanContent:
// An extended value of 1 is always true, unless its original type is i1,
// in which case it will be sign extended to -1.
return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
case TargetLowering::UndefinedBooleanContent:
case TargetLowering::ZeroOrNegativeOneBooleanContent:
return N->isAllOnesValue() && SExt;
}
llvm_unreachable("Unexpected enumeration.");
}
/// This helper function of SimplifySetCC tries to optimize the comparison when
/// either operand of the SetCC node is a bitwise-and instruction.
SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, const SDLoc &DL,
DAGCombinerInfo &DCI) const {
// Match these patterns in any of their permutations:
// (X & Y) == Y
// (X & Y) != Y
if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
std::swap(N0, N1);
EVT OpVT = N0.getValueType();
if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
(Cond != ISD::SETEQ && Cond != ISD::SETNE))
return SDValue();
SDValue X, Y;
if (N0.getOperand(0) == N1) {
X = N0.getOperand(1);
Y = N0.getOperand(0);
} else if (N0.getOperand(1) == N1) {
X = N0.getOperand(0);
Y = N0.getOperand(1);
} else {
return SDValue();
}
SelectionDAG &DAG = DCI.DAG;
SDValue Zero = DAG.getConstant(0, DL, OpVT);
if (DAG.isKnownToBeAPowerOfTwo(Y)) {
// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
// Note that where Y is variable and is known to have at most one bit set
// (for example, if it is Z & 1) we cannot do this; the expressions are not
// equivalent when Y == 0.
assert(OpVT.isInteger());
Cond = ISD::getSetCCInverse(Cond, OpVT);
if (DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(Cond, N0.getSimpleValueType()))
return DAG.getSetCC(DL, VT, N0, Zero, Cond);
} else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
// If the target supports an 'and-not' or 'and-complement' logic operation,
// try to use that to make a comparison operation more efficient.
// But don't do this transform if the mask is a single bit because there are
// more efficient ways to deal with that case (for example, 'bt' on x86 or
// 'rlwinm' on PPC).
// Bail out if the compare operand that we want to turn into a zero is
// already a zero (otherwise, infinite loop).
auto *YConst = dyn_cast<ConstantSDNode>(Y);
if (YConst && YConst->isNullValue())
return SDValue();
// Transform this into: ~X & Y == 0.
SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
}
return SDValue();
}
/// There are multiple IR patterns that could be checking whether certain
/// truncation of a signed number would be lossy or not. The pattern which is
/// best at IR level, may not lower optimally. Thus, we want to unfold it.
/// We are looking for the following pattern: (KeptBits is a constant)
/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
/// We will unfold it into the natural trunc+sext pattern:
/// ((%x << C) a>> C) dstcond %x
/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
const SDLoc &DL) const {
// We must be comparing with a constant.
ConstantSDNode *C1;
if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
return SDValue();
// N0 should be: add %x, (1 << (KeptBits-1))
if (N0->getOpcode() != ISD::ADD)
return SDValue();
// And we must be 'add'ing a constant.
ConstantSDNode *C01;
if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
return SDValue();
SDValue X = N0->getOperand(0);
EVT XVT = X.getValueType();
// Validate constants ...
APInt I1 = C1->getAPIntValue();
ISD::CondCode NewCond;
if (Cond == ISD::CondCode::SETULT) {
NewCond = ISD::CondCode::SETEQ;
} else if (Cond == ISD::CondCode::SETULE) {
NewCond = ISD::CondCode::SETEQ;
// But need to 'canonicalize' the constant.
I1 += 1;
} else if (Cond == ISD::CondCode::SETUGT) {
NewCond = ISD::CondCode::SETNE;
// But need to 'canonicalize' the constant.
I1 += 1;
} else if (Cond == ISD::CondCode::SETUGE) {
NewCond = ISD::CondCode::SETNE;
} else
return SDValue();
APInt I01 = C01->getAPIntValue();
auto checkConstants = [&I1, &I01]() -> bool {
// Both of them must be power-of-two, and the constant from setcc is bigger.
return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
};
if (checkConstants()) {
// Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
} else {
// What if we invert constants? (and the target predicate)
I1.negate();
I01.negate();
assert(XVT.isInteger());
NewCond = getSetCCInverse(NewCond, XVT);
if (!checkConstants())
return SDValue();
// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
}
// They are power-of-two, so which bit is set?
const unsigned KeptBits = I1.logBase2();
const unsigned KeptBitsMinusOne = I01.logBase2();
// Magic!
if (KeptBits != (KeptBitsMinusOne + 1))
return SDValue();
assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
// We don't want to do this in every single case.
SelectionDAG &DAG = DCI.DAG;
if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
XVT, KeptBits))
return SDValue();
const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
// Unfold into: ((%x << C) a>> C) cond %x
// Where 'cond' will be either 'eq' or 'ne'.
SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
return T2;
}
// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL) const {
assert(isConstOrConstSplat(N1C) &&
isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
"Should be a comparison with 0.");
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Valid only for [in]equality comparisons.");
unsigned NewShiftOpcode;
SDValue X, C, Y;
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Look for '(C l>>/<< Y)'.
auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
// The shift should be one-use.
if (!V.hasOneUse())
return false;
unsigned OldShiftOpcode = V.getOpcode();
switch (OldShiftOpcode) {
case ISD::SHL:
NewShiftOpcode = ISD::SRL;
break;
case ISD::SRL:
NewShiftOpcode = ISD::SHL;
break;
default:
return false; // must be a logical shift.
}
// We should be shifting a constant.
// FIXME: best to use isConstantOrConstantVector().
C = V.getOperand(0);
ConstantSDNode *CC =
isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
if (!CC)
return false;
Y = V.getOperand(1);
ConstantSDNode *XC =
isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
};
// LHS of comparison should be an one-use 'and'.
if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
return SDValue();
X = N0.getOperand(0);
SDValue Mask = N0.getOperand(1);
// 'and' is commutative!
if (!Match(Mask)) {
std::swap(X, Mask);
if (!Match(Mask))
return SDValue();
}
EVT VT = X.getValueType();
// Produce:
// ((X 'OppositeShiftOpcode' Y) & C) Cond 0
SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
return T2;
}
/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
/// handle the commuted versions of these patterns.
SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, const SDLoc &DL,
DAGCombinerInfo &DCI) const {
unsigned BOpcode = N0.getOpcode();
assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
"Unexpected binop");
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
// (X + Y) == X --> Y == 0
// (X - Y) == X --> Y == 0
// (X ^ Y) == X --> Y == 0
SelectionDAG &DAG = DCI.DAG;
EVT OpVT = N0.getValueType();
SDValue X = N0.getOperand(0);
SDValue Y = N0.getOperand(1);
if (X == N1)
return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
if (Y != N1)
return SDValue();
// (X + Y) == Y --> X == 0
// (X ^ Y) == Y --> X == 0
if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
// The shift would not be valid if the operands are boolean (i1).
if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
return SDValue();
// (X - Y) == Y --> X == Y << 1
EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
!DCI.isBeforeLegalize());
SDValue One = DAG.getConstant(1, DL, ShiftVT);
SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(YShl1.getNode());
return DAG.getSetCC(DL, VT, X, YShl1, Cond);
}
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
SDValue N0, const APInt &C1,
ISD::CondCode Cond, const SDLoc &dl,
SelectionDAG &DAG) {
// Look through truncs that don't change the value of a ctpop.
// FIXME: Add vector support? Need to be careful with setcc result type below.
SDValue CTPOP = N0;
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
CTPOP = N0.getOperand(0);
if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
return SDValue();
EVT CTVT = CTPOP.getValueType();
SDValue CTOp = CTPOP.getOperand(0);
// If this is a vector CTPOP, keep the CTPOP if it is legal.
// TODO: Should we check if CTPOP is legal(or custom) for scalars?
if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
return SDValue();
// (ctpop x) u< 2 -> (x & x-1) == 0
// (ctpop x) u> 1 -> (x & x-1) != 0
if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
return SDValue();
if (C1 == 0 && (Cond == ISD::SETULT))
return SDValue(); // This is handled elsewhere.
unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
SDValue Result = CTOp;
for (unsigned i = 0; i < Passes; i++) {
SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
}
ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
}
// If ctpop is not supported, expand a power-of-2 comparison based on it.
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
// For scalars, keep CTPOP if it is legal or custom.
if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
return SDValue();
// This is based on X86's custom lowering for CTPOP which produces more
// instructions than the expansion here.
// (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
// (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
SDValue Zero = DAG.getConstant(0, dl, CTVT);
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
assert(CTVT.isInteger());
ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
}
return SDValue();
}
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, bool foldBooleans,
DAGCombinerInfo &DCI,
const SDLoc &dl) const {
SelectionDAG &DAG = DCI.DAG;
const DataLayout &Layout = DAG.getDataLayout();
EVT OpVT = N0.getValueType();
// Constant fold or commute setcc.
if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
return Fold;
// Ensure that the constant occurs on the RHS and fold constant comparisons.
// TODO: Handle non-splat vector constants. All undef causes trouble.
// FIXME: We can't yet fold constant scalable vector splats, so avoid an
// infinite loop here when we encounter one.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
if (isConstOrConstSplat(N0) &&
(!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
// If we have a subtract with the same 2 non-constant operands as this setcc
// -- but in reverse order -- then try to commute the operands of this setcc
// to match. A matching pair of setcc (cmp) and sub may be combined into 1
// instruction on some targets.
if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
!DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
if (auto *N1C = isConstOrConstSplat(N1)) {
const APInt &C1 = N1C->getAPIntValue();
// Optimize some CTPOP cases.
if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
return V;
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
isPowerOf2_32(N0.getScalarValueSizeInBits())) {
if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
if ((C1 == 0) == (Cond == ISD::SETEQ)) {
// (srl (ctlz x), 5) == 0 -> X != 0
// (srl (ctlz x), 5) != 1 -> X != 0
Cond = ISD::SETNE;
} else {
// (srl (ctlz x), 5) != 0 -> X == 0
// (srl (ctlz x), 5) == 1 -> X == 0
Cond = ISD::SETEQ;
}
SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
Cond);
}
}
}
}
// FIXME: Support vectors.
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
// (zext x) == C --> x == (trunc C)
// (sext x) == C --> x == (trunc C)
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
DCI.isBeforeLegalize() && N0->hasOneUse()) {
unsigned MinBits = N0.getValueSizeInBits();
SDValue PreExt;
bool Signed = false;
if (N0->getOpcode() == ISD::ZERO_EXTEND) {
// ZExt
MinBits = N0->getOperand(0).getValueSizeInBits();
PreExt = N0->getOperand(0);
} else if (N0->getOpcode() == ISD::AND) {
// DAGCombine turns costly ZExts into ANDs
if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
if ((C->getAPIntValue()+1).isPowerOf2()) {
MinBits = C->getAPIntValue().countTrailingOnes();
PreExt = N0->getOperand(0);
}
} else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
// SExt
MinBits = N0->getOperand(0).getValueSizeInBits();
PreExt = N0->getOperand(0);
Signed = true;
} else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
// ZEXTLOAD / SEXTLOAD
if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
MinBits = LN0->getMemoryVT().getSizeInBits();
PreExt = N0;
} else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
Signed = true;
MinBits = LN0->getMemoryVT().getSizeInBits();
PreExt = N0;
}
}
// Figure out how many bits we need to preserve this constant.
unsigned ReqdBits = Signed ?
C1.getBitWidth() - C1.getNumSignBits() + 1 :
C1.getActiveBits();
// Make sure we're not losing bits from the constant.
if (MinBits > 0 &&
MinBits < C1.getBitWidth() &&
MinBits >= ReqdBits) {
EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
// Will get folded away.
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
if (MinBits == 1 && C1 == 1)
// Invert the condition.
return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
return DAG.getSetCC(dl, VT, Trunc, C, Cond);
}
// If truncating the setcc operands is not desirable, we can still
// simplify the expression in some cases:
// setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
// setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
// setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
// setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
// setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
// setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
SDValue TopSetCC = N0->getOperand(0);
unsigned N0Opc = N0->getOpcode();
bool SExt = (N0Opc == ISD::SIGN_EXTEND);
if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
TopSetCC.getOpcode() == ISD::SETCC &&
(N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
(isConstFalseVal(N1C) ||
isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
(!N1C->isNullValue() && Cond == ISD::SETNE);
if (!Inverse)
return TopSetCC;
ISD::CondCode InvCond = ISD::getSetCCInverse(
cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
TopSetCC.getOperand(0).getValueType());
return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
TopSetCC.getOperand(1),
InvCond);
}
}
}
// If the LHS is '(and load, const)', the RHS is 0, the test is for
// equality or unsigned, and all 1 bits of the const are in the same
// partial word, see if we can shorten the load.
if (DCI.isBeforeLegalize() &&
!ISD::isSignedIntSetCC(Cond) &&
N0.getOpcode() == ISD::AND && C1 == 0 &&
N0.getNode()->hasOneUse() &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(0).getNode()->hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
APInt bestMask;
unsigned bestWidth = 0, bestOffset = 0;
if (Lod->isSimple() && Lod->isUnindexed()) {
unsigned origWidth = N0.getValueSizeInBits();
unsigned maskWidth = origWidth;
// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
// 8 bits, but have to be careful...
if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
origWidth = Lod->getMemoryVT().getSizeInBits();
const APInt &Mask = N0.getConstantOperandAPInt(1);
for (unsigned width = origWidth / 2; width>=8; width /= 2) {
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
if (Mask.isSubsetOf(newMask)) {
if (Layout.isLittleEndian())
bestOffset = (uint64_t)offset * (width/8);
else
bestOffset = (origWidth/width - offset - 1) * (width/8);
bestMask = Mask.lshr(offset * (width/8) * 8);
bestWidth = width;
break;
}
newMask <<= width;
}
}
}
if (bestWidth) {
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
if (newVT.isRound() &&
shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
SDValue Ptr = Lod->getBasePtr();
if (bestOffset != 0)
Ptr =
DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
SDValue NewLoad =
DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
Lod->getPointerInfo().getWithOffset(bestOffset),
Lod->getOriginalAlign());
return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
dl, newVT)),
DAG.getConstant(0LL, dl, newVT), Cond);
}
}
}
// If the LHS is a ZERO_EXTEND, perform the comparison on the input.
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
unsigned InSize = N0.getOperand(0).getValueSizeInBits();
// If the comparison constant has bits in the upper part, the
// zero-extended value could never match.
if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
C1.getBitWidth() - InSize))) {
switch (Cond) {
case ISD::SETUGT:
case ISD::SETUGE:
case ISD::SETEQ:
return DAG.getConstant(0, dl, VT);
case ISD::SETULT:
case ISD::SETULE:
case ISD::SETNE:
return DAG.getConstant(1, dl, VT);
case ISD::SETGT:
case ISD::SETGE:
// True if the sign bit of C1 is set.
return DAG.getConstant(C1.isNegative(), dl, VT);
case ISD::SETLT:
case ISD::SETLE:
// True if the sign bit of C1 isn't set.
return DAG.getConstant(C1.isNonNegative(), dl, VT);
default:
break;
}
}
// Otherwise, we can perform the comparison with the low bits.
switch (Cond) {
case ISD::SETEQ:
case ISD::SETNE:
case ISD::SETUGT:
case ISD::SETUGE:
case ISD::SETULT:
case ISD::SETULE: {
EVT newVT = N0.getOperand(0).getValueType();
if (DCI.isBeforeLegalizeOps() ||
(isOperationLegal(ISD::SETCC, newVT) &&
isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
NewConst, Cond);
return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
}
break;
}
default:
break; // todo, be more careful with signed comparisons
}
} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
!isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
OpVT)) {
EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
EVT ExtDstTy = N0.getValueType();
unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
// If the constant doesn't fit into the number of bits for the source of
// the sign extension, it is impossible for both sides to be equal.
if (C1.getMinSignedBits() > ExtSrcTyBits)
return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
assert(ExtDstTy == N0.getOperand(0).getValueType() &&
ExtDstTy != ExtSrcTy && "Unexpected types!");
APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
DAG.getConstant(Imm, dl, ExtDstTy));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(ZextOp.getNode());
// Otherwise, make this a use of a zext.
return DAG.getSetCC(dl, VT, ZextOp,
DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
} else if ((N1C->isNullValue() || N1C->isOne()) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
(N0.getValueType() == MVT::i1 ||
getBooleanContents(N0.getOperand(0).getValueType()) ==
ZeroOrOneBooleanContent)) {
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
if (TrueWhenTrue)
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
if (DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
}
if ((N0.getOpcode() == ISD::XOR ||
(N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
isOneConstant(N0.getOperand(1))) {
// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
// can only do this if the top bits are known zero.
unsigned BitWidth = N0.getValueSizeInBits();
if (DAG.MaskedValueIsZero(N0,
APInt::getHighBitsSet(BitWidth,
BitWidth-1))) {
// Okay, get the un-inverted input value.
SDValue Val;
if (N0.getOpcode() == ISD::XOR) {
Val = N0.getOperand(0);
} else {
assert(N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR);
// ((X^1)&1)^1 -> X & 1
Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
N0.getOperand(0).getOperand(0),
N0.getOperand(1));
}
return DAG.getSetCC(dl, VT, Val, N1,
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
} else if (N1C->isOne()) {
SDValue Op0 = N0;
if (Op0.getOpcode() == ISD::TRUNCATE)
Op0 = Op0.getOperand(0);
if ((Op0.getOpcode() == ISD::XOR) &&
Op0.getOperand(0).getOpcode() == ISD::SETCC &&
Op0.getOperand(1).getOpcode() == ISD::SETCC) {
SDValue XorLHS = Op0.getOperand(0);
SDValue XorRHS = Op0.getOperand(1);
// Ensure that the input setccs return an i1 type or 0/1 value.
if (Op0.getValueType() == MVT::i1 ||
(getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
ZeroOrOneBooleanContent &&
getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
ZeroOrOneBooleanContent)) {
// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
}
}
if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
if (Op0.getValueType().bitsGT(VT))
Op0 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
DAG.getConstant(1, dl, VT));
else if (Op0.getValueType().bitsLT(VT))
Op0 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
DAG.getConstant(1, dl, VT));
return DAG.getSetCC(dl, VT, Op0,
DAG.getConstant(0, dl, Op0.getValueType()),
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
if (Op0.getOpcode() == ISD::AssertZext &&
cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
return DAG.getSetCC(dl, VT, Op0,
DAG.getConstant(0, dl, Op0.getValueType()),
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
}
// Given:
// icmp eq/ne (urem %x, %y), 0
// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
// icmp eq/ne %x, 0
if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
}
if (SDValue V =
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
return V;
}
// These simplifications apply to splat vectors as well.
// TODO: Handle more splat vector cases.
if (auto *N1C = isConstOrConstSplat(N1)) {
const APInt &C1 = N1C->getAPIntValue();
APInt MinVal, MaxVal;
unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
if (ISD::isSignedIntSetCC(Cond)) {
MinVal = APInt::getSignedMinValue(OperandBitSize);
MaxVal = APInt::getSignedMaxValue(OperandBitSize);
} else {
MinVal = APInt::getMinValue(OperandBitSize);
MaxVal = APInt::getMaxValue(OperandBitSize);
}
// Canonicalize GE/LE comparisons to use GT/LT comparisons.
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
// X >= MIN --> true
if (C1 == MinVal)
return DAG.getBoolConstant(true, dl, VT, OpVT);
if (!VT.isVector()) { // TODO: Support this for vectors.
// X >= C0 --> X > (C0 - 1)
APInt C = C1 - 1;
ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
if ((DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
(!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
isLegalICmpImmediate(C.getSExtValue())))) {
return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(C, dl, N1.getValueType()),
NewCC);
}
}
}
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
// X <= MAX --> true
if (C1 == MaxVal)
return DAG.getBoolConstant(true, dl, VT, OpVT);
// X <= C0 --> X < (C0 + 1)
if (!VT.isVector()) { // TODO: Support this for vectors.
APInt C = C1 + 1;
ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
if ((DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
(!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
isLegalICmpImmediate(C.getSExtValue())))) {
return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(C, dl, N1.getValueType()),
NewCC);
}
}
}
if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
if (C1 == MinVal)
return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
// TODO: Support this for vectors after legalize ops.
if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
// Canonicalize setlt X, Max --> setne X, Max
if (C1 == MaxVal)
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
// If we have setult X, 1, turn it into seteq X, 0
if (C1 == MinVal+1)
return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(MinVal, dl, N0.getValueType()),
ISD::SETEQ);
}
}
if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
if (C1 == MaxVal)
return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
// TODO: Support this for vectors after legalize ops.
if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
// Canonicalize setgt X, Min --> setne X, Min
if (C1 == MinVal)
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
// If we have setugt X, Max-1, turn it into seteq X, Max
if (C1 == MaxVal-1)
return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(MaxVal, dl, N0.getValueType()),
ISD::SETEQ);
}
}
if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
if (C1.isNullValue())
if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
VT, N0, N1, Cond, DCI, dl))
return CC;
// For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
// For example, when high 32-bits of i64 X are known clear:
// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
bool CmpZero = N1C->getAPIntValue().isNullValue();
bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
// Match or(lo,shl(hi,bw/2)) pattern.
auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
unsigned EltBits = V.getScalarValueSizeInBits();
if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
return false;
SDValue LHS = V.getOperand(0);
SDValue RHS = V.getOperand(1);
APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
// Unshifted element must have zero upperbits.
if (RHS.getOpcode() == ISD::SHL &&
isa<ConstantSDNode>(RHS.getOperand(1)) &&
RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
DAG.MaskedValueIsZero(LHS, HiBits)) {
Lo = LHS;
Hi = RHS.getOperand(0);
return true;
}
if (LHS.getOpcode() == ISD::SHL &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
DAG.MaskedValueIsZero(RHS, HiBits)) {
Lo = RHS;
Hi = LHS.getOperand(0);
return true;
}
return false;
};
auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
unsigned EltBits = N0.getScalarValueSizeInBits();
unsigned HalfBits = EltBits / 2;
APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
SDValue NewN0 =
DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
};
SDValue Lo, Hi;
if (IsConcat(N0, Lo, Hi))
return MergeConcat(Lo, Hi);
if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
SDValue Lo0, Lo1, Hi0, Hi1;
if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
IsConcat(N0.getOperand(1), Lo1, Hi1)) {
return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
}
}
}
}
// If we have "setcc X, C0", check to see if we can shrink the immediate
// by changing cc.
// TODO: Support this for vectors after legalize ops.
if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
// SETUGT X, SINTMAX -> SETLT X, 0
// SETUGE X, SINTMIN -> SETLT X, 0
if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
(Cond == ISD::SETUGE && C1.isMinSignedValue()))
return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(0, dl, N1.getValueType()),
ISD::SETLT);
// SETULT X, SINTMIN -> SETGT X, -1
// SETULE X, SINTMAX -> SETGT X, -1
if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
(Cond == ISD::SETULE && C1.isMaxSignedValue()))
return DAG.getSetCC(dl, VT, N0,
DAG.getAllOnesConstant(dl, N1.getValueType()),
ISD::SETGT);
}
}
// Back to non-vector simplifications.
// TODO: Can we do these for vector splats?
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const APInt &C1 = N1C->getAPIntValue();
EVT ShValTy = N0.getValueType();
// Fold bit comparisons when we can. This will result in an
// incorrect value when boolean false is negative one, unless
// the bitsize is 1 in which case the false value is the same
// in practice regardless of the representation.
if ((VT.getSizeInBits() == 1 ||
getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
(VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
N0.getOpcode() == ISD::AND) {
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
EVT ShiftTy =
getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
unsigned ShCt = AndRHS->getAPIntValue().logBase2();
if (AndRHS->getAPIntValue().isPowerOf2() &&
!TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShCt, dl, ShiftTy)));
}
} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
// (X & 8) == 8 --> (X & 8) >> 3
// Perform the xform if C1 is a single bit.
unsigned ShCt = C1.logBase2();
if (C1.isPowerOf2() &&
!TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShCt, dl, ShiftTy)));
}
}
}
}
if (C1.getMinSignedBits() <= 64 &&
!isLegalICmpImmediate(C1.getSExtValue())) {
EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
// (X & -256) == 256 -> (X >> 8) == 1
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC = AndRHS->getAPIntValue();
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
SDValue Shift =
DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
DAG.getConstant(ShiftBits, dl, ShiftTy));
SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
}
}
}
} else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
Cond == ISD::SETULE || Cond == ISD::SETUGT) {
bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
// X < 0x100000000 -> (X >> 32) < 1
// X >= 0x100000000 -> (X >> 32) >= 1
// X <= 0x0ffffffff -> (X >> 32) < 1
// X > 0x0ffffffff -> (X >> 32) >= 1
unsigned ShiftBits;
APInt NewC = C1;
ISD::CondCode NewCond = Cond;
if (AdjOne) {
ShiftBits = C1.countTrailingOnes();
NewC = NewC + 1;
NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
} else {
ShiftBits = C1.countTrailingZeros();
}
NewC.lshrInPlace(ShiftBits);
if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
isLegalICmpImmediate(NewC.getSExtValue()) &&
!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShiftBits, dl, ShiftTy));
SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
}
}
}
}
if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
auto *CFP = cast<ConstantFPSDNode>(N1);
assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
// constant if knowing that the operand is non-nan is enough. We prefer to
// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
// materialize 0.0.
if (Cond == ISD::SETO || Cond == ISD::SETUO)
return DAG.getSetCC(dl, VT, N0, N0, Cond);
// setcc (fneg x), C -> setcc swap(pred) x, -C
if (N0.getOpcode() == ISD::FNEG) {
ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
if (DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
}
}
// If the condition is not legal, see if we can find an equivalent one
// which is legal.
if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
// If the comparison was an awkward floating-point == or != and one of
// the comparison operands is infinity or negative infinity, convert the
// condition to a less-awkward <= or >=.
if (CFP->getValueAPF().isInfinity()) {
bool IsNegInf = CFP->getValueAPF().isNegative();
ISD::CondCode NewCond = ISD::SETCC_INVALID;
switch (Cond) {
case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
default: break;
}
if (NewCond != ISD::SETCC_INVALID &&
isCondCodeLegal(NewCond, N0.getSimpleValueType()))
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
}
}
if (N0 == N1) {
// The sext(setcc()) => setcc() optimization relies on the appropriate
// constant being emitted.
assert(!N0.getValueType().isInteger() &&
"Integer types should be handled by FoldSetCC");
bool EqTrue = ISD::isTrueWhenEqual(Cond);
unsigned UOF = ISD::getUnorderedFlavor(Cond);
if (UOF == 2) // FP operators that are undefined on NaNs.
return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
if (UOF == unsigned(EqTrue))
return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
// if it is not already.
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
if (NewCond != Cond &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(NewCond, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
N0.getValueType().isInteger()) {
if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
N0.getOpcode() == ISD::XOR) {
// Simplify (X+Y) == (X+Z) --> Y == Z
if (N0.getOpcode() == N1.getOpcode()) {
if (N0.getOperand(0) == N1.getOperand(0))
return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
if (N0.getOperand(1) == N1.getOperand(1))
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
if (isCommutativeBinOp(N0.getOpcode())) {
// If X op Y == Y op X, try other combinations.
if (N0.getOperand(0) == N1.getOperand(1))
return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
Cond);
if (N0.getOperand(1) == N1.getOperand(0))
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
Cond);
}
}
// If RHS is a legal immediate value for a compare instruction, we need
// to be careful about increasing register pressure needlessly.
bool LegalRHSImm = false;
if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
return DAG.getSetCC(dl, VT, N0.getOperand(0),
DAG.getConstant(RHSC->getAPIntValue()-
LHSR->getAPIntValue(),
dl, N0.getValueType()), Cond);
}
// Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
if (N0.getOpcode() == ISD::XOR)
// If we know that all of the inverted bits are zero, don't bother
// performing the inversion.
if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
return
DAG.getSetCC(dl, VT, N0.getOperand(0),
DAG.getConstant(LHSR->getAPIntValue() ^
RHSC->getAPIntValue(),
dl, N0.getValueType()),
Cond);
}
// Turn (C1-X) == C2 --> X == C1-C2
if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
return
DAG.getSetCC(dl, VT, N0.getOperand(1),
DAG.getConstant(SUBC->getAPIntValue() -
RHSC->getAPIntValue(),
dl, N0.getValueType()),
Cond);
}
}
// Could RHSC fold directly into a compare?
if (RHSC->getValueType(0).getSizeInBits() <= 64)
LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
}
// (X+Y) == X --> Y == 0 and similar folds.
// Don't do this if X is an immediate that can fold into a cmp
// instruction and X+Y has other uses. It could be an induction variable
// chain, and the transform would increase register pressure.
if (!LegalRHSImm || N0.hasOneUse())
if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
return V;
}
if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
N1.getOpcode() == ISD::XOR)
if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
return V;
if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
return V;
}
// Fold remainder of division by a constant.
if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
// When division is cheap or optimizing for minimum size,
// fall through to DIVREM creation by skipping this fold.
if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
if (N0.getOpcode() == ISD::UREM) {
if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
return Folded;
} else if (N0.getOpcode() == ISD::SREM) {
if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
return Folded;
}
}
}
// Fold away ALL boolean setcc's.
if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
SDValue Temp;
switch (Cond) {
default: llvm_unreachable("Unknown integer setcc!");
case ISD::SETEQ: // X == Y -> ~(X^Y)
Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
N0 = DAG.getNOT(dl, Temp, OpVT);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETNE: // X != Y --> (X^Y)
N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
break;
case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
Temp = DAG.getNOT(dl, N0, OpVT);
N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
Temp = DAG.getNOT(dl, N1, OpVT);
N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
Temp = DAG.getNOT(dl, N0, OpVT);
N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
Temp = DAG.getNOT(dl, N1, OpVT);
N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
break;
}
if (VT.getScalarType() != MVT::i1) {
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(N0.getNode());
// FIXME: If running after legalize, we probably can't do this.
ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
N0 = DAG.getNode(ExtendCode, dl, VT, N0);
}
return N0;
}
// Could not fold it.
return SDValue();
}
/// Returns true (and the GlobalValue and the offset) if the node is a
/// GlobalAddress + offset.
bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
int64_t &Offset) const {
SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
GA = GASD->getGlobal();
Offset += GASD->getOffset();
return true;
}
if (N->getOpcode() == ISD::ADD) {
SDValue N1 = N->getOperand(0);
SDValue N2 = N->getOperand(1);
if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
Offset += V->getSExtValue();
return true;
}
} else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
Offset += V->getSExtValue();
return true;
}
}
}
return false;
}
SDValue TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
// Default implementation: no optimization.
return SDValue();
}
//===----------------------------------------------------------------------===//
// Inline Assembler Implementation Methods
//===----------------------------------------------------------------------===//
TargetLowering::ConstraintType
TargetLowering::getConstraintType(StringRef Constraint) const {
unsigned S = Constraint.size();
if (S == 1) {
switch (Constraint[0]) {
default: break;
case 'r':
return C_RegisterClass;
case 'm': // memory
case 'o': // offsetable
case 'V': // not offsetable
return C_Memory;
case 'n': // Simple Integer
case 'E': // Floating Point Constant
case 'F': // Floating Point Constant
return C_Immediate;
case 'i': // Simple Integer or Relocatable Constant
case 's': // Relocatable Constant
case 'p': // Address.
case 'X': // Allow ANY value.
case 'I': // Target registers.
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case '<':
case '>':
return C_Other;
}
}
if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
return C_Memory;
return C_Register;
}
return C_Unknown;
}
/// Try to replace an X constraint, which matches anything, with another that
/// has more specific requirements based on the type of the corresponding
/// operand.
const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
if (ConstraintVT.isInteger())
return "r";
if (ConstraintVT.isFloatingPoint())
return "f"; // works for many targets
return nullptr;
}
SDValue TargetLowering::LowerAsmOutputForConstraint(
SDValue &Chain, SDValue &Flag, const SDLoc &DL,
const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
return SDValue();
}
/// Lower the specified operand into the Ops vector.
/// If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
if (Constraint.length() > 1) return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default: break;
case 'X': // Allows any operand; labels (basic block) use this.
if (Op.getOpcode() == ISD::BasicBlock ||
Op.getOpcode() == ISD::TargetBlockAddress) {
Ops.push_back(Op);
return;
}
LLVM_FALLTHROUGH;
case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
case 's': { // Relocatable Constant
GlobalAddressSDNode *GA;
ConstantSDNode *C;
BlockAddressSDNode *BA;
uint64_t Offset = 0;
// Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
// etc., since getelementpointer is variadic. We can't use
// SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
// while in this case the GA may be furthest from the root node which is
// likely an ISD::ADD.
while (1) {
if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
GA->getValueType(0),
Offset + GA->getOffset()));
return;
}
if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
// gcc prints these as sign extended. Sign extend value to 64 bits
// now; without this it would get ZExt'd later in
// ScheduleDAGSDNodes::EmitNode, which is very generic.
bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
BooleanContent BCont = getBooleanContents(MVT::i64);
ISD::NodeType ExtOpc =
IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
int64_t ExtVal =
ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
Ops.push_back(
DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
return;
}
if ((BA = dyn_cast<BlockAddressSDNode>(Op)) && ConstraintLetter != 'n') {
Ops.push_back(DAG.getTargetBlockAddress(
BA->getBlockAddress(), BA->getValueType(0),
Offset + BA->getOffset(), BA->getTargetFlags()));
return;
}
const unsigned OpCode = Op.getOpcode();
if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
Op = Op.getOperand(1);
// Subtraction is not commutative.
else if (OpCode == ISD::ADD &&
(C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
Op = Op.getOperand(0);
else
return;
Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
continue;
}
return;
}
break;
}
}
}
std::pair<unsigned, const TargetRegisterClass *>
TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
StringRef Constraint,
MVT VT) const {
if (Constraint.empty() || Constraint[0] != '{')
return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
// Remove the braces from around the name.
StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
std::pair<unsigned, const TargetRegisterClass *> R =
std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
// Figure out which register class contains this reg.
for (const TargetRegisterClass *RC : RI->regclasses()) {
// If none of the value types for this register class are valid, we
// can't use it. For example, 64-bit reg classes on 32-bit targets.
if (!isLegalRC(*RI, *RC))
continue;
for (const MCPhysReg &PR : *RC) {
if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
std::pair<unsigned, const TargetRegisterClass *> S =
std::make_pair(PR, RC);
// If this register class has the requested value type, return it,
// otherwise keep searching and return the first class found
// if no other is found which explicitly has the requested type.
if (RI->isTypeLegalForClass(*RC, VT))
return S;
if (!R.second)
R = S;
}
}
}
return R;
}
//===----------------------------------------------------------------------===//
// Constraint Selection.
/// Return true of this is an input operand that is a matching constraint like
/// "4".
bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
assert(!ConstraintCode.empty() && "No known constraint!");
return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
}
/// If this is an input matching constraint, this method returns the output
/// operand it matches.
unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
assert(!ConstraintCode.empty() && "No known constraint!");
return atoi(ConstraintCode.c_str());
}
/// Split up the constraint string from the inline assembly value into the
/// specific constraints and their prefixes, and also tie in the associated
/// operand values.
/// If this returns an empty vector, and if the constraint string itself
/// isn't empty, there was an error parsing.
TargetLowering::AsmOperandInfoVector
TargetLowering::ParseConstraints(const DataLayout &DL,
const TargetRegisterInfo *TRI,
const CallBase &Call) const {
/// Information about all of the constraints.
AsmOperandInfoVector ConstraintOperands;
const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
unsigned maCount = 0; // Largest number of multiple alternative constraints.
// Do a prepass over the constraints, canonicalizing them, and building up the
// ConstraintOperands list.
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
ConstraintOperands.emplace_back(std::move(CI));
AsmOperandInfo &OpInfo = ConstraintOperands.back();
// Update multiple alternative constraint count.
if (OpInfo.multipleAlternatives.size() > maCount)
maCount = OpInfo.multipleAlternatives.size();
OpInfo.ConstraintVT = MVT::Other;
// Compute the value type for each operand.
switch (OpInfo.Type) {
case InlineAsm::isOutput:
// Indirect outputs just consume an argument.
if (OpInfo.isIndirect) {
OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
break;
}
// The return value of the call is this value. As such, there is no
// corresponding argument.
assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
OpInfo.ConstraintVT =
getSimpleValueType(DL, STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
OpInfo.ConstraintVT =
getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
}
++ResNo;
break;
case InlineAsm::isInput:
OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
break;
case InlineAsm::isClobber:
// Nothing to do.
break;
}
if (OpInfo.CallOperandVal) {
llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
if (OpInfo.isIndirect) {
llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
if (!PtrTy)
report_fatal_error("Indirect operand for inline asm not a pointer!");
OpTy = PtrTy->getElementType();
}
// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
if (StructType *STy = dyn_cast<StructType>(OpTy))
if (STy->getNumElements() == 1)
OpTy = STy->getElementType(0);
// If OpTy is not a single value, it may be a struct/union that we
// can tile with integers.
if (!OpTy->isSingleValueType() && OpTy->isSized()) {
unsigned BitSize = DL.getTypeSizeInBits(OpTy);
switch (BitSize) {
default: break;
case 1:
case 8:
case 16:
case 32:
case 64:
case 128:
OpInfo.ConstraintVT =
MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
break;
}
} else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
} else {
OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
}
}
}
// If we have multiple alternative constraints, select the best alternative.
if (!ConstraintOperands.empty()) {
if (maCount) {
unsigned bestMAIndex = 0;
int bestWeight = -1;
// weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
int weight = -1;
unsigned maIndex;
// Compute the sums of the weights for each alternative, keeping track
// of the best (highest weight) one so far.
for (maIndex = 0; maIndex < maCount; ++maIndex) {
int weightSum = 0;
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
cIndex != eIndex; ++cIndex) {
AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
if (OpInfo.Type == InlineAsm::isClobber)
continue;
// If this is an output operand with a matching input operand,
// look up the matching input. If their types mismatch, e.g. one
// is an integer, the other is floating point, or their sizes are
// different, flag it as an maCantMatch.
if (OpInfo.hasMatchingInput()) {
AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
(OpInfo.ConstraintVT.getSizeInBits() !=
Input.ConstraintVT.getSizeInBits())) {
weightSum = -1; // Can't match.
break;
}
}
}
weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
if (weight == -1) {
weightSum = -1;
break;
}
weightSum += weight;
}
// Update best.
if (weightSum > bestWeight) {
bestWeight = weightSum;
bestMAIndex = maIndex;
}
}
// Now select chosen alternative in each constraint.
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
cIndex != eIndex; ++cIndex) {
AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
if (cInfo.Type == InlineAsm::isClobber)
continue;
cInfo.selectAlternative(bestMAIndex);
}
}
}
// Check and hook up tied operands, choose constraint code to use.
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
cIndex != eIndex; ++cIndex) {
AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
// If this is an output operand with a matching input operand, look up the
// matching input. If their types mismatch, e.g. one is an integer, the
// other is floating point, or their sizes are different, flag it as an
// error.
if (OpInfo.hasMatchingInput()) {
AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
std::pair<unsigned, const TargetRegisterClass *> MatchRC =
getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
std::pair<unsigned, const TargetRegisterClass *> InputRC =
getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
(MatchRC.second != InputRC.second)) {
report_fatal_error("Unsupported asm: input constraint"
" with a matching output constraint of"
" incompatible type!");
}
}
}
}
return ConstraintOperands;
}
/// Return an integer indicating how general CT is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
case TargetLowering::C_Unknown:
return 0;
case TargetLowering::C_Register:
return 1;
case TargetLowering::C_RegisterClass:
return 2;
case TargetLowering::C_Memory:
return 3;
}
llvm_unreachable("Invalid constraint type");
}
/// Examine constraint type and operand type and determine a weight value.
/// This object must already have been set up with the operand type
/// and the current alternative constraint selected.
TargetLowering::ConstraintWeight
TargetLowering::getMultipleConstraintMatchWeight(
AsmOperandInfo &info, int maIndex) const {
InlineAsm::ConstraintCodeVector *rCodes;
if (maIndex >= (int)info.multipleAlternatives.size())
rCodes = &info.Codes;
else
rCodes = &info.multipleAlternatives[maIndex].Codes;
ConstraintWeight BestWeight = CW_Invalid;
// Loop over the options, keeping track of the most general one.
for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
ConstraintWeight weight =
getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
if (weight > BestWeight)
BestWeight = weight;
}
return BestWeight;
}
/// Examine constraint type and operand type and determine a weight value.
/// This object must already have been set up with the operand type
/// and the current alternative constraint selected.
TargetLowering::ConstraintWeight
TargetLowering::getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const {
ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (!CallOperandVal)
return CW_Default;
// Look at the constraint type.
switch (*constraint) {
case 'i': // immediate integer.
case 'n': // immediate integer with a known value.
if (isa<ConstantInt>(CallOperandVal))
weight = CW_Constant;
break;
case 's': // non-explicit intregal immediate.
if (isa<GlobalValue>(CallOperandVal))
weight = CW_Constant;
break;
case 'E': // immediate float if host format.
case 'F': // immediate float.
if (isa<ConstantFP>(CallOperandVal))
weight = CW_Constant;
break;
case '<': // memory operand with autodecrement.
case '>': // memory operand with autoincrement.
case 'm': // memory operand.
case 'o': // offsettable memory operand
case 'V': // non-offsettable memory operand
weight = CW_Memory;
break;
case 'r': // general register.
case 'g': // general register, memory operand or immediate integer.
// note: Clang converts "g" to "imr".
if (CallOperandVal->getType()->isIntegerTy())
weight = CW_Register;
break;
case 'X': // any operand.
default:
weight = CW_Default;
break;
}
return weight;
}
/// If there are multiple different constraints that we could pick for this
/// operand (e.g. "imr") try to pick the 'best' one.
/// This is somewhat tricky: constraints fall into four classes:
/// Other -> immediates and magic values
/// Register -> one specific register
/// RegisterClass -> a group of regs
/// Memory -> memory
/// Ideally, we would pick the most specific constraint possible: if we have
/// something that fits into a register, we would pick it. The problem here
/// is that if we have something that could either be in a register or in
/// memory that use of the register could cause selection of *other*
/// operands to fail: they might only succeed if we pick memory. Because of
/// this the heuristic we use is:
///
/// 1) If there is an 'other' constraint, and if the operand is valid for
/// that constraint, use it. This makes us take advantage of 'i'
/// constraints when available.
/// 2) Otherwise, pick the most general constraint present. This prefers
/// 'm' over 'r', for example.
///
static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
const TargetLowering &TLI,
SDValue Op, SelectionDAG *DAG) {
assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
unsigned BestIdx = 0;
TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
int BestGenerality = -1;
// Loop over the options, keeping track of the most general one.
for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
TargetLowering::ConstraintType CType =
TLI.getConstraintType(OpInfo.Codes[i]);
// Indirect 'other' or 'immediate' constraints are not allowed.
if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
CType == TargetLowering::C_Register ||
CType == TargetLowering::C_RegisterClass))
continue;
// If this is an 'other' or 'immediate' constraint, see if the operand is
// valid for it. For example, on X86 we might have an 'rI' constraint. If
// the operand is an integer in the range [0..31] we want to use I (saving a
// load of a register), otherwise we must use 'r'.
if ((CType == TargetLowering::C_Other ||
CType == TargetLowering::C_Immediate) && Op.getNode()) {
assert(OpInfo.Codes[i].size() == 1 &&
"Unhandled multi-letter 'other' constraint");
std::vector<SDValue> ResultOps;
TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
ResultOps, *DAG);
if (!ResultOps.empty()) {
BestType = CType;
BestIdx = i;
break;
}
}
// Things with matching constraints can only be registers, per gcc
// documentation. This mainly affects "g" constraints.
if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
continue;
// This constraint letter is more general than the previous one, use it.
int Generality = getConstraintGenerality(CType);
if (Generality > BestGenerality) {
BestType = CType;
BestIdx = i;
BestGenerality = Generality;
}
}
OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
OpInfo.ConstraintType = BestType;
}
/// Determines the constraint code and constraint type to use for the specific
/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
SDValue Op,
SelectionDAG *DAG) const {
assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
// Single-letter constraints ('r') are very common.
if (OpInfo.Codes.size() == 1) {
OpInfo.ConstraintCode = OpInfo.Codes[0];
OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
} else {
ChooseConstraint(OpInfo, *this, Op, DAG);
}
// 'X' matches anything.
if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
// Labels and constants are handled elsewhere ('X' is the only thing
// that matches labels). For Functions, the type here is the type of
// the result, which is not what we want to look at; leave them alone.
Value *v = OpInfo.CallOperandVal;
if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
OpInfo.CallOperandVal = v;
return;
}
if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
return;
// Otherwise, try to resolve it to something we know about by looking at
// the actual operand type.
if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
OpInfo.ConstraintCode = Repl;
OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
}
}
}
/// Given an exact SDIV by a constant, create a multiplication
/// with the multiplicative inverse of the constant.
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT SVT = VT.getScalarType();
EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
EVT ShSVT = ShVT.getScalarType();
bool UseSRA = false;
SmallVector<SDValue, 16> Shifts, Factors;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
if (C->isNullValue())
return false;
APInt Divisor = C->getAPIntValue();
unsigned Shift = Divisor.countTrailingZeros();
if (Shift) {
Divisor.ashrInPlace(Shift);
UseSRA = true;
}
// Calculate the multiplicative inverse, using Newton's method.
APInt t;
APInt Factor = Divisor;
while ((t = Divisor * Factor) != 1)
Factor *= APInt(Divisor.getBitWidth(), 2) - t;
Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
Factors.push_back(DAG.getConstant(Factor, dl, SVT));
return true;
};
// Collect all magic values from the build vector.
if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
return SDValue();
SDValue Shift, Factor;
if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
Shift = DAG.getBuildVector(ShVT, dl, Shifts);
Factor = DAG.getBuildVector(VT, dl, Factors);
} else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
assert(Shifts.size() == 1 && Factors.size() == 1 &&
"Expected matchUnaryPredicate to return one element for scalable "
"vectors");
Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
Factor = DAG.getSplatVector(VT, dl, Factors[0]);
} else {
assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
Shift = Shifts[0];
Factor = Factors[0];
}
SDValue Res = Op0;
// Shift the value upfront if it is even, so the LSB is one.
if (UseSRA) {
// TODO: For UDIV use SRL instead of SRA.
SDNodeFlags Flags;
Flags.setExact(true);
Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
Created.push_back(Res.getNode());
}
return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
}
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N, 0); // Lower SDIV as SDIV
return SDValue();
}
/// Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
bool IsAfterLegalization,
SmallVectorImpl<SDNode *> &Created) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
EVT SVT = VT.getScalarType();
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
EVT ShSVT = ShVT.getScalarType();
unsigned EltBits = VT.getScalarSizeInBits();
EVT MulVT;
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
if (!isTypeLegal(VT)) {
// Limit this to simple scalars for now.
if (VT.isVector() || !VT.isSimple())
return SDValue();
// If this type will be promoted to a large enough type with a legal
// multiply operation, we can go ahead and do this transform.
if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
return SDValue();
MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
if (MulVT.getSizeInBits() < (2 * EltBits) ||
!isOperationLegal(ISD::MUL, MulVT))
return SDValue();
}
// If the sdiv has an 'exact' bit we can use a simpler lowering.
if (N->getFlags().hasExact())
return BuildExactSDIV(*this, N, dl, DAG, Created);
SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
if (C->isNullValue())
return false;
const APInt &Divisor = C->getAPIntValue();
APInt::ms magics = Divisor.magic();
int NumeratorFactor = 0;
int ShiftMask = -1;
if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
// If d is +1/-1, we just multiply the numerator by +1/-1.
NumeratorFactor = Divisor.getSExtValue();
magics.m = 0;
magics.s = 0;
ShiftMask = 0;
} else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
// If d > 0 and m < 0, add the numerator.
NumeratorFactor = 1;
} else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
// If d < 0 and m > 0, subtract the numerator.
NumeratorFactor = -1;
}
MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
return true;
};
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// Collect the shifts / magic values from each element.
if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
return SDValue();
SDValue MagicFactor, Factor, Shift, ShiftMask;
if (N1.getOpcode() == ISD::BUILD_VECTOR) {
MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
Factor = DAG.getBuildVector(VT, dl, Factors);
Shift = DAG.getBuildVector(ShVT, dl, Shifts);
ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
Shifts.size() == 1 && ShiftMasks.size() == 1 &&
"Expected matchUnaryPredicate to return one element for scalable "
"vectors");
MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
Factor = DAG.getSplatVector(VT, dl, Factors[0]);
Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
} else {
assert(isa<ConstantSDNode>(N1) && "Expected a constant");
MagicFactor = MagicFactors[0];
Factor = Factors[0];
Shift = Shifts[0];
ShiftMask = ShiftMasks[0];
}
// Multiply the numerator (operand 0) by the magic value.
// FIXME: We should support doing a MUL in a wider type.
auto GetMULHS = [&](SDValue X, SDValue Y) {
// If the type isn't legal, use a wider mul of the the type calculated
// earlier.
if (!isTypeLegal(VT)) {
X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
DAG.getShiftAmountConstant(EltBits, MulVT, dl));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
}
if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
SDValue LoHi =
DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
return SDValue(LoHi.getNode(), 1);
}
return SDValue();
};
SDValue Q = GetMULHS(N0, MagicFactor);
if (!Q)
return SDValue();
Created.push_back(Q.getNode());
// (Optionally) Add/subtract the numerator using Factor.
Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
Created.push_back(Factor.getNode());
Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
Created.push_back(Q.getNode());
// Shift right algebraic by shift value.
Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
Created.push_back(Q.getNode());
// Extract the sign bit, mask it and add it to the quotient.
SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
Created.push_back(T.getNode());
T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
Created.push_back(T.getNode());
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
}
/// Given an ISD::UDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
bool IsAfterLegalization,
SmallVectorImpl<SDNode *> &Created) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
EVT SVT = VT.getScalarType();
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
EVT ShSVT = ShVT.getScalarType();
unsigned EltBits = VT.getScalarSizeInBits();
EVT MulVT;
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
if (!isTypeLegal(VT)) {
// Limit this to simple scalars for now.
if (VT.isVector() || !VT.isSimple())
return SDValue();
// If this type will be promoted to a large enough type with a legal
// multiply operation, we can go ahead and do this transform.
if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
return SDValue();
MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
if (MulVT.getSizeInBits() < (2 * EltBits) ||
!isOperationLegal(ISD::MUL, MulVT))
return SDValue();
}
bool UseNPQ = false;
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
if (C->isNullValue())
return false;
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
const APInt& Divisor = C->getAPIntValue();
APInt::mu magics = Divisor.magicu();
unsigned PreShift = 0, PostShift = 0;
// If the divisor is even, we can avoid using the expensive fixup by
// shifting the divided value upfront.
if (magics.a != 0 && !Divisor[0]) {
PreShift = Divisor.countTrailingZeros();
// Get magic number for the shifted divisor.
magics = Divisor.lshr(PreShift).magicu(PreShift);
assert(magics.a == 0 && "Should use cheap fixup now");
}
APInt Magic = magics.m;
unsigned SelNPQ;
if (magics.a == 0 || Divisor.isOneValue()) {
assert(magics.s < Divisor.getBitWidth() &&
"We shouldn't generate an undefined shift!");
PostShift = magics.s;
SelNPQ = false;
} else {
PostShift = magics.s - 1;
SelNPQ = true;
}
PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
NPQFactors.push_back(
DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
: APInt::getNullValue(EltBits),
dl, SVT));
PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
UseNPQ |= SelNPQ;
return true;
};
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// Collect the shifts/magic values from each element.
if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
return SDValue();
SDValue PreShift, PostShift, MagicFactor, NPQFactor;
if (N1.getOpcode() == ISD::BUILD_VECTOR) {
PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
NPQFactors.size() == 1 && PostShifts.size() == 1 &&
"Expected matchUnaryPredicate to return one for scalable vectors");
PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
} else {
assert(isa<ConstantSDNode>(N1) && "Expected a constant");
PreShift = PreShifts[0];
MagicFactor = MagicFactors[0];
PostShift = PostShifts[0];
}
SDValue Q = N0;
Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
Created.push_back(Q.getNode());
// FIXME: We should support doing a MUL in a wider type.
auto GetMULHU = [&](SDValue X, SDValue Y) {
// If the type isn't legal, use a wider mul of the the type calculated
// earlier.
if (!isTypeLegal(VT)) {
X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
DAG.getShiftAmountConstant(EltBits, MulVT, dl));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
}
if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
SDValue LoHi =
DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
return SDValue(LoHi.getNode(), 1);
}
return SDValue(); // No mulhu or equivalent
};
// Multiply the numerator (operand 0) by the magic value.
Q = GetMULHU(Q, MagicFactor);
if (!Q)
return SDValue();
Created.push_back(Q.getNode());
if (UseNPQ) {
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
Created.push_back(NPQ.getNode());
// For vectors we might have a mix of non-NPQ/NPQ paths, so use
// MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
if (VT.isVector())
NPQ = GetMULHU(NPQ, NPQFactor);
else
NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
Created.push_back(NPQ.getNode());
Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
Created.push_back(Q.getNode());
}
Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
Created.push_back(Q.getNode());
EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue One = DAG.getConstant(1, dl, VT);
SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
/// If all values in Values that *don't* match the predicate are same 'splat'
/// value, then replace all values with that splat value.
/// Else, if AlternativeReplacement was provided, then replace all values that
/// do match predicate with AlternativeReplacement value.
static void
turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
std::function<bool(SDValue)> Predicate,
SDValue AlternativeReplacement = SDValue()) {
SDValue Replacement;
// Is there a value for which the Predicate does *NOT* match? What is it?
auto SplatValue = llvm::find_if_not(Values, Predicate);
if (SplatValue != Values.end()) {
// Does Values consist only of SplatValue's and values matching Predicate?
if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
return Value == *SplatValue || Predicate(Value);
})) // Then we shall replace values matching predicate with SplatValue.
Replacement = *SplatValue;
}
if (!Replacement) {
// Oops, we did not find the "baseline" splat value.
if (!AlternativeReplacement)
return; // Nothing to do.
// Let's replace with provided value then.
Replacement = AlternativeReplacement;
}
std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
}
/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
/// where the divisor is constant and the comparison target is zero,
/// return a DAG expression that will generate the same comparison result
/// using only multiplications, additions and shifts/rotations.
/// Ref: "Hacker's Delight" 10-17.
SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue CompTargetNode,
ISD::CondCode Cond,
DAGCombinerInfo &DCI,
const SDLoc &DL) const {
SmallVector<SDNode *, 5> Built;
if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
DCI, DL, Built)) {
for (SDNode *N : Built)
DCI.AddToWorklist(N);
return Folded;
}
return SDValue();
}
SDValue
TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue CompTargetNode, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL,
SmallVectorImpl<SDNode *> &Created) const {
// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
// - D must be constant, with D = D0 * 2^K where D0 is odd
// - P is the multiplicative inverse of D0 modulo 2^W
// - Q = floor(((2^W) - 1) / D)
// where W is the width of the common type of N and D.
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Only applicable for (in)equality comparisons.");
SelectionDAG &DAG = DCI.DAG;
EVT VT = REMNode.getValueType();
EVT SVT = VT.getScalarType();
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
EVT ShSVT = ShVT.getScalarType();
// If MUL is unavailable, we cannot proceed in any case.
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
bool ComparingWithAllZeros = true;
bool AllComparisonsWithNonZerosAreTautological = true;
bool HadTautologicalLanes = false;
bool AllLanesAreTautological = true;
bool HadEvenDivisor = false;
bool AllDivisorsArePowerOfTwo = true;
bool HadTautologicalInvertedLanes = false;
SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
if (CDiv->isNullValue())
return false;
const APInt &D = CDiv->getAPIntValue();
const APInt &Cmp = CCmp->getAPIntValue();
ComparingWithAllZeros &= Cmp.isNullValue();
// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
// if C2 is not less than C1, the comparison is always false.
// But we will only be able to produce the comparison that will give the
// opposive tautological answer. So this lane would need to be fixed up.
bool TautologicalInvertedLane = D.ule(Cmp);
HadTautologicalInvertedLanes |= TautologicalInvertedLane;
// If all lanes are tautological (either all divisors are ones, or divisor
// is not greater than the constant we are comparing with),
// we will prefer to avoid the fold.
bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
HadTautologicalLanes |= TautologicalLane;
AllLanesAreTautological &= TautologicalLane;
// If we are comparing with non-zero, we need'll need to subtract said
// comparison value from the LHS. But there is no point in doing that if
// every lane where we are comparing with non-zero is tautological..
if (!Cmp.isNullValue())
AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
// D is even if it has trailing zeros.
HadEvenDivisor |= (K != 0);
// D is a power-of-two if D0 is one.
// If all divisors are power-of-two, we will prefer to avoid the fold.
AllDivisorsArePowerOfTwo &= D0.isOneValue();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
unsigned W = D.getBitWidth();
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
// Q = floor((2^W - 1) u/ D)
// R = ((2^W - 1) u% D)
APInt Q, R;
APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
// If we are comparing with zero, then that comparison constant is okay,
// else it may need to be one less than that.
if (Cmp.ugt(R))
Q -= 1;
assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the lane is tautological the result can be constant-folded.
if (TautologicalLane) {
// Set P and K amount to a bogus values so we can try to splat them.
P = 0;
K = -1;
// And ensure that comparison constant is tautological,
// it will always compare true/false.
Q = -1;
}
PAmts.push_back(DAG.getConstant(P, DL, SVT));
KAmts.push_back(
DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
QAmts.push_back(DAG.getConstant(Q, DL, SVT));
return true;
};
SDValue N = REMNode.getOperand(0);
SDValue D = REMNode.getOperand(1);
// Collect the values from each element.
if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
return SDValue();
// If all lanes are tautological, the result can be constant-folded.
if (AllLanesAreTautological)
return SDValue();
// If this is a urem by a powers-of-two, avoid the fold since it can be
// best implemented as a bit test.
if (AllDivisorsArePowerOfTwo)
return SDValue();
SDValue PVal, KVal, QVal;
if (D.getOpcode() == ISD::BUILD_VECTOR) {
if (HadTautologicalLanes) {
// Try to turn PAmts into a splat, since we don't care about the values
// that are currently '0'. If we can't, just keep '0'`s.
turnVectorIntoSplatVector(PAmts, isNullConstant);
// Try to turn KAmts into a splat, since we don't care about the values
// that are currently '-1'. If we can't, change them to '0'`s.
turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
DAG.getConstant(0, DL, ShSVT));
}
PVal = DAG.getBuildVector(VT, DL, PAmts);
KVal = DAG.getBuildVector(ShVT, DL, KAmts);
QVal = DAG.getBuildVector(VT, DL, QAmts);
} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
"Expected matchBinaryPredicate to return one element for "
"SPLAT_VECTORs");
PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
} else {
PVal = PAmts[0];
KVal = KAmts[0];
QVal = QAmts[0];
}
if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
return SDValue(); // FIXME: Could/should use `ISD::ADD`?
assert(CompTargetNode.getValueType() == N.getValueType() &&
"Expecting that the types on LHS and RHS of comparisons match.");
N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
}
// (mul N, P)
SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
Created.push_back(Op0.getNode());
// Rotate right only if any divisor was even. We avoid rotates for all-odd
// divisors as a performance improvement, since rotating by 0 is a no-op.
if (HadEvenDivisor) {
// We need ROTR to do this.
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
return SDValue();
// UREM: (rotr (mul N, P), K)
Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
Created.push_back(Op0.getNode());
}
// UREM: (setule/setugt (rotr (mul N, P), K), Q)
SDValue NewCC =
DAG.getSetCC(DL, SETCCVT, Op0, QVal,
((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
if (!HadTautologicalInvertedLanes)
return NewCC;
// If any lanes previously compared always-false, the NewCC will give
// always-true result for them, so we need to fixup those lanes.
// Or the other way around for inequality predicate.
assert(VT.isVector() && "Can/should only get here for vectors.");
Created.push_back(NewCC.getNode());
// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
// if C2 is not less than C1, the comparison is always false.
// But we have produced the comparison that will give the
// opposive tautological answer. So these lanes would need to be fixed up.
SDValue TautologicalInvertedChannels =
DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
Created.push_back(TautologicalInvertedChannels.getNode());
// NOTE: we avoid letting illegal types through even if we're before legalize
// ops – legalization has a hard time producing good code for this.
if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
// If we have a vector select, let's replace the comparison results in the
// affected lanes with the correct tautological result.
SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
DL, SETCCVT, SETCCVT);
return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
Replacement, NewCC);
}
// Else, we can just invert the comparison result in the appropriate lanes.
//
// NOTE: see the note above VSELECT above.
if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
TautologicalInvertedChannels);
return SDValue(); // Don't know how to lower.
}
/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
/// where the divisor is constant and the comparison target is zero,
/// return a DAG expression that will generate the same comparison result
/// using only multiplications, additions and shifts/rotations.
/// Ref: "Hacker's Delight" 10-17.
SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue CompTargetNode,
ISD::CondCode Cond,
DAGCombinerInfo &DCI,
const SDLoc &DL) const {
SmallVector<SDNode *, 7> Built;
if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
DCI, DL, Built)) {
assert(Built.size() <= 7 && "Max size prediction failed.");
for (SDNode *N : Built)
DCI.AddToWorklist(N);
return Folded;
}
return SDValue();
}
SDValue
TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue CompTargetNode, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL,
SmallVectorImpl<SDNode *> &Created) const {
// Fold:
// (seteq/ne (srem N, D), 0)
// To:
// (setule/ugt (rotr (add (mul N, P), A), K), Q)
//
// - D must be constant, with D = D0 * 2^K where D0 is odd
// - P is the multiplicative inverse of D0 modulo 2^W
// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
// - Q = floor((2 * A) / (2^K))
// where W is the width of the common type of N and D.
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Only applicable for (in)equality comparisons.");
SelectionDAG &DAG = DCI.DAG;
EVT VT = REMNode.getValueType();
EVT SVT = VT.getScalarType();
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
EVT ShSVT = ShVT.getScalarType();
// If we are after ops legalization, and MUL is unavailable, we can not
// proceed.
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
// TODO: Could support comparing with non-zero too.
ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
if (!CompTarget || !CompTarget->isNullValue())
return SDValue();
bool HadIntMinDivisor = false;
bool HadOneDivisor = false;
bool AllDivisorsAreOnes = true;
bool HadEvenDivisor = false;
bool NeedToApplyOffset = false;
bool AllDivisorsArePowerOfTwo = true;
SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
auto BuildSREMPattern = [&](ConstantSDNode *C) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
if (C->isNullValue())
return false;
// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
// WARNING: this fold is only valid for positive divisors!
APInt D = C->getAPIntValue();
if (D.isNegative())
D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
HadIntMinDivisor |= D.isMinSignedValue();
// If all divisors are ones, we will prefer to avoid the fold.
HadOneDivisor |= D.isOneValue();
AllDivisorsAreOnes &= D.isOneValue();
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
if (!D.isMinSignedValue()) {
// D is even if it has trailing zeros; unless it's INT_MIN, in which case
// we don't care about this lane in this fold, we'll special-handle it.
HadEvenDivisor |= (K != 0);
}
// D is a power-of-two if D0 is one. This includes INT_MIN.
// If all divisors are power-of-two, we will prefer to avoid the fold.
AllDivisorsArePowerOfTwo &= D0.isOneValue();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
unsigned W = D.getBitWidth();
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
// A = floor((2^(W - 1) - 1) / D0) & -2^K
APInt A = APInt::getSignedMaxValue(W).udiv(D0);
A.clearLowBits(K);
if (!D.isMinSignedValue()) {
// If divisor INT_MIN, then we don't care about this lane in this fold,
// we'll special-handle it.
NeedToApplyOffset |= A != 0;
}
// Q = floor((2 * A) / (2^K))
APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
"We are expecting that A is always less than all-ones for SVT");
assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the divisor is 1 the result can be constant-folded. Likewise, we
// don't care about INT_MIN lanes, those can be set to undef if appropriate.
if (D.isOneValue()) {
// Set P, A and K to a bogus values so we can try to splat them.
P = 0;
A = -1;
K = -1;
// x ?% 1 == 0 <--> true <--> x u<= -1
Q = -1;
}
PAmts.push_back(DAG.getConstant(P, DL, SVT));
AAmts.push_back(DAG.getConstant(A, DL, SVT));
KAmts.push_back(
DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
QAmts.push_back(DAG.getConstant(Q, DL, SVT));
return true;
};
SDValue N = REMNode.getOperand(0);
SDValue D = REMNode.getOperand(1);
// Collect the values from each element.
if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
return SDValue();
// If this is a srem by a one, avoid the fold since it can be constant-folded.
if (AllDivisorsAreOnes)
return SDValue();
// If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
// since it can be best implemented as a bit test.
if (AllDivisorsArePowerOfTwo)
return SDValue();
SDValue PVal, AVal, KVal, QVal;
if (D.getOpcode() == ISD::BUILD_VECTOR) {
if (HadOneDivisor) {
// Try to turn PAmts into a splat, since we don't care about the values
// that are currently '0'. If we can't, just keep '0'`s.
turnVectorIntoSplatVector(PAmts, isNullConstant);
// Try to turn AAmts into a splat, since we don't care about the
// values that are currently '-1'. If we can't, change them to '0'`s.
turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
DAG.getConstant(0, DL, SVT));
// Try to turn KAmts into a splat, since we don't care about the values
// that are currently '-1'. If we can't, change them to '0'`s.
turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
DAG.getConstant(0, DL, ShSVT));
}
PVal = DAG.getBuildVector(VT, DL, PAmts);
AVal = DAG.getBuildVector(VT, DL, AAmts);
KVal = DAG.getBuildVector(ShVT, DL, KAmts);
QVal = DAG.getBuildVector(VT, DL, QAmts);
} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
QAmts.size() == 1 &&
"Expected matchUnaryPredicate to return one element for scalable "
"vectors");
PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
} else {
assert(isa<ConstantSDNode>(D) && "Expected a constant");
PVal = PAmts[0];
AVal = AAmts[0];
KVal = KAmts[0];
QVal = QAmts[0];
}
// (mul N, P)
SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
Created.push_back(Op0.getNode());
if (NeedToApplyOffset) {
// We need ADD to do this.
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
return SDValue();
// (add (mul N, P), A)
Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
Created.push_back(Op0.getNode());
}
// Rotate right only if any divisor was even. We avoid rotates for all-odd
// divisors as a performance improvement, since rotating by 0 is a no-op.
if (HadEvenDivisor) {
// We need ROTR to do this.
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
return SDValue();
// SREM: (rotr (add (mul N, P), A), K)
Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
Created.push_back(Op0.getNode());
}
// SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
SDValue Fold =
DAG.getSetCC(DL, SETCCVT, Op0, QVal,
((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
// If we didn't have lanes with INT_MIN divisor, then we're done.
if (!HadIntMinDivisor)
return Fold;
// That fold is only valid for positive divisors. Which effectively means,
// it is invalid for INT_MIN divisors. So if we have such a lane,
// we must fix-up results for said lanes.
assert(VT.isVector() && "Can/should only get here for vectors.");
// NOTE: we avoid letting illegal types through even if we're before legalize
// ops – legalization has a hard time producing good code for the code that
// follows.
if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
!isOperationLegalOrCustom(ISD::AND, VT) ||
!isOperationLegalOrCustom(Cond, VT) ||
!isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
return SDValue();
Created.push_back(Fold.getNode());
SDValue IntMin = DAG.getConstant(
APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
SDValue IntMax = DAG.getConstant(
APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
SDValue Zero =
DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
Created.push_back(DivisorIsIntMin.getNode());
// (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
Created.push_back(Masked.getNode());
SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
Created.push_back(MaskedIsZero.getNode());
// To produce final result we need to blend 2 vectors: 'SetCC' and
// 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
// from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
// constant-folded, select can get lowered to a shuffle with constant mask.
SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
MaskedIsZero, Fold);
return Blended;
}
bool TargetLowering::
verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
if (!isa<ConstantSDNode>(Op.getOperand(0))) {
DAG.getContext()->emitError("argument to '__builtin_return_address' must "
"be a constant integer");
return true;
}
return false;
}
SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
const DenormalMode &Mode) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
// Testing it with denormal inputs to avoid wrong estimate.
if (Mode.Input == DenormalMode::IEEE) {
// This is specifically a check for the handling of denormal inputs,
// not the result.
// Test = fabs(X) < SmallestNormal
const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
}
// Test = X == 0.0
return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
}
SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOps, bool OptForSize,
NegatibleCost &Cost,
unsigned Depth) const {
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) {
Cost = NegatibleCost::Cheaper;
return Op.getOperand(0);
}
// Don't recurse exponentially.
if (Depth > SelectionDAG::MaxRecursionDepth)
return SDValue();
// Pre-increment recursion depth for use in recursive calls.
++Depth;
const SDNodeFlags Flags = Op->getFlags();
const TargetOptions &Options = DAG.getTarget().Options;
EVT VT = Op.getValueType();
unsigned Opcode = Op.getOpcode();
// Don't allow anything with multiple uses unless we know it is free.
if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
isFPExtFree(VT, Op.getOperand(0).getValueType());
if (!IsFreeExtend)
return SDValue();
}
auto RemoveDeadNode = [&](SDValue N) {
if (N && N.getNode()->use_empty())
DAG.RemoveDeadNode(N.getNode());
};
SDLoc DL(Op);
// Because getNegatedExpression can delete nodes we need a handle to keep
// temporary nodes alive in case the recursion manages to create an identical
// node.
std::list<HandleSDNode> Handles;
switch (Opcode) {
case ISD::ConstantFP: {
// Don't invert constant FP values after legalization unless the target says
// the negated constant is legal.
bool IsOpLegal =
isOperationLegal(ISD::ConstantFP, VT) ||
isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
OptForSize);
if (LegalOps && !IsOpLegal)
break;
APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
V.changeSign();
SDValue CFP = DAG.getConstantFP(V, DL, VT);
// If we already have the use of the negated floating constant, it is free
// to negate it even it has multiple uses.
if (!Op.hasOneUse() && CFP.use_empty())
break;
Cost = NegatibleCost::Neutral;
return CFP;
}
case ISD::BUILD_VECTOR: {
// Only permit BUILD_VECTOR of constants.
if (llvm::any_of(Op->op_values(), [&](SDValue N) {
return !N.isUndef() && !isa<ConstantFPSDNode>(N);
}))
break;
bool IsOpLegal =
(isOperationLegal(ISD::ConstantFP, VT) &&
isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
llvm::all_of(Op->op_values(), [&](SDValue N) {
return N.isUndef() ||
isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
OptForSize);
});
if (LegalOps && !IsOpLegal)
break;
SmallVector<SDValue, 4> Ops;
for (SDValue C : Op->op_values()) {
if (C.isUndef()) {
Ops.push_back(C);
continue;
}
APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
V.changeSign();
Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
}
Cost = NegatibleCost::Neutral;
return DAG.getBuildVector(VT, DL, Ops);
}
case ISD::FADD: {
if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
break;
// After operation legalization, it might not be legal to create new FSUBs.
if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
break;
SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
// fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
// Prevent this node from being deleted by the next call.
if (NegX)
Handles.emplace_back(NegX);
// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
// We're done with the handles.
Handles.clear();
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = CostX;
SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
if (NegY != N)
RemoveDeadNode(NegY);
return N;
}
// Negate the Y if it is not expensive.
if (NegY) {
Cost = CostY;
SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
if (NegX != N)
RemoveDeadNode(NegX);
return N;
}
break;
}
case ISD::FSUB: {
// We can't turn -(A-B) into B-A when we honor signed zeros.
if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
break;
SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
// fold (fneg (fsub 0, Y)) -> Y
if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
if (C->isZero()) {
Cost = NegatibleCost::Cheaper;
return Y;
}
// fold (fneg (fsub X, Y)) -> (fsub Y, X)
Cost = NegatibleCost::Neutral;
return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
}
case ISD::FMUL:
case ISD::FDIV: {
SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
// Prevent this node from being deleted by the next call.
if (NegX)
Handles.emplace_back(NegX);
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
// We're done with the handles.
Handles.clear();
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = CostX;
SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
if (NegY != N)
RemoveDeadNode(NegY);
return N;
}
// Ignore X * 2.0 because that is expected to be canonicalized to X + X.
if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
break;
// Negate the Y if it is not expensive.
if (NegY) {
Cost = CostY;
SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
if (NegX != N)
RemoveDeadNode(NegX);
return N;
}
break;
}
case ISD::FMA:
case ISD::FMAD: {
if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
break;
SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
NegatibleCost CostZ = NegatibleCost::Expensive;
SDValue NegZ =
getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
// Give up if fail to negate the Z.
if (!NegZ)
break;
// Prevent this node from being deleted by the next two calls.
Handles.emplace_back(NegZ);
// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
// Prevent this node from being deleted by the next call.
if (NegX)
Handles.emplace_back(NegX);
// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
// We're done with the handles.
Handles.clear();
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = std::min(CostX, CostZ);
SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
if (NegY != N)
RemoveDeadNode(NegY);
return N;
}
// Negate the Y if it is not expensive.
if (NegY) {
Cost = std::min(CostY, CostZ);
SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
if (NegX != N)
RemoveDeadNode(NegX);
return N;
}
break;
}
case ISD::FP_EXTEND:
case ISD::FSIN:
if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
OptForSize, Cost, Depth))
return DAG.getNode(Opcode, DL, VT, NegV);
break;
case ISD::FP_ROUND:
if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
OptForSize, Cost, Depth))
return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
break;
}
return SDValue();
}
//===----------------------------------------------------------------------===//
// Legalization Utilities
//===----------------------------------------------------------------------===//
bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
SDValue LHS, SDValue RHS,
SmallVectorImpl<SDValue> &Result,
EVT HiLoVT, SelectionDAG &DAG,
MulExpansionKind Kind, SDValue LL,
SDValue LH, SDValue RL, SDValue RH) const {
assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
Opcode == ISD::SMUL_LOHI);
bool HasMULHS = (Kind == MulExpansionKind::Always) ||
isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
bool HasMULHU = (Kind == MulExpansionKind::Always) ||
isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
return false;
unsigned OuterBitSize = VT.getScalarSizeInBits();
unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
// LL, LH, RL, and RH must be either all NULL or all set to a value.
assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
(!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
bool Signed) -> bool {
if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
Hi = SDValue(Lo.getNode(), 1);
return true;
}
if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
return true;
}
return false;
};
SDValue Lo, Hi;
if (!LL.getNode() && !RL.getNode() &&
isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
}
if (!LL.getNode())
return false;
APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
if (DAG.MaskedValueIsZero(LHS, HighMask) &&
DAG.MaskedValueIsZero(RHS, HighMask)) {
// The inputs are both zero-extended.
if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
Result.push_back(Lo);
Result.push_back(Hi);
if (Opcode != ISD::MUL) {
SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
Result.push_back(Zero);
Result.push_back(Zero);
}
return true;
}
}
if (!VT.isVector() && Opcode == ISD::MUL &&
DAG.ComputeNumSignBits(LHS) > InnerBitSize &&
DAG.ComputeNumSignBits(RHS) > InnerBitSize) {
// The input values are both sign-extended.
// TODO non-MUL case?
if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
Result.push_back(Lo);
Result.push_back(Hi);
return true;
}
}
unsigned ShiftAmount = OuterBitSize - InnerBitSize;
EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
// FIXME getShiftAmountTy does not always return a sensible result when VT
// is an illegal type, and so the type may be too small to fit the shift
// amount. Override it with i32. The shift will have to be legalized.
ShiftAmountTy = MVT::i32;
}
SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
if (!LH.getNode() && !RH.getNode() &&
isOperationLegalOrCustom(ISD::SRL, VT) &&
isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
}
if (!LH.getNode())
return false;
if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
return false;
Result.push_back(Lo);
if (Opcode == ISD::MUL) {
RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
Result.push_back(Hi);
return true;
}
// Compute the full width result.
auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
};
SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
return false;
// This is effectively the add part of a multiply-add of half-sized operands,
// so it cannot overflow.
Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
return false;
SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
isOperationLegalOrCustom(ISD::ADDE, VT));
if (UseGlue)
Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
Merge(Lo, Hi));
else
Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
SDValue Carry = Next.getValue(1);
Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
return false;
if (UseGlue)
Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
Carry);
else
Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
Zero, Carry);
Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
if (Opcode == ISD::SMUL_LOHI) {
SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
}
Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
return true;
}
bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
SelectionDAG &DAG, MulExpansionKind Kind,
SDValue LL, SDValue LH, SDValue RL,
SDValue RH) const {
SmallVector<SDValue, 2> Result;
bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
N->getOperand(0), N->getOperand(1), Result, HiLoVT,
DAG, Kind, LL, LH, RL, RH);
if (Ok) {
assert(Result.size() == 2);
Lo = Result[0];
Hi = Result[1];
}
return Ok;
}
// Check that (every element of) Z is undef or not an exact multiple of BW.
static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
return ISD::matchUnaryPredicate(
Z,
[=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
true);
}
bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
return false;
SDValue X = Node->getOperand(0);
SDValue Y = Node->getOperand(1);
SDValue Z = Node->getOperand(2);
unsigned BW = VT.getScalarSizeInBits();
bool IsFSHL = Node->getOpcode() == ISD::FSHL;
SDLoc DL(SDValue(Node, 0));
EVT ShVT = Z.getValueType();
// If a funnel shift in the other direction is more supported, use it.
unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
if (isNonZeroModBitWidthOrUndef(Z, BW)) {
// fshl X, Y, Z -> fshr X, Y, -Z
// fshr X, Y, Z -> fshl X, Y, -Z
SDValue Zero = DAG.getConstant(0, DL, ShVT);
Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
} else {
// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
SDValue One = DAG.getConstant(1, DL, ShVT);
if (IsFSHL) {
Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
X = DAG.getNode(ISD::SRL, DL, VT, X, One);
} else {
X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
}
Z = DAG.getNOT(DL, Z, ShVT);
}
Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
return true;
}
SDValue ShX, ShY;
SDValue ShAmt, InvShAmt;
if (isNonZeroModBitWidthOrUndef(Z, BW)) {
// fshl: X << C | Y >> (BW - C)
// fshr: X << (BW - C) | Y >> C
// where C = Z % BW is not zero
SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
} else {
// fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
// fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
if (isPowerOf2_32(BW)) {
// Z % BW -> Z & (BW - 1)
ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
} else {
SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
}
SDValue One = DAG.getConstant(1, DL, ShVT);
if (IsFSHL) {
ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
} else {
SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
}
}
Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
return true;
}
// TODO: Merge with expandFunnelShift.
bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
SDValue &Result, SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
bool IsLeft = Node->getOpcode() == ISD::ROTL;
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
SDLoc DL(SDValue(Node, 0));
EVT ShVT = Op1.getValueType();
SDValue Zero = DAG.getConstant(0, DL, ShVT);
// If a rotate in the other direction is supported, use it.
unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
return true;
}
if (!AllowVectorOps && VT.isVector() &&
(!isOperationLegalOrCustom(ISD::SHL, VT) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
return false;
unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
SDValue ShVal;
SDValue HsVal;
if (isPowerOf2_32(EltSizeInBits)) {
// (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
// (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
} else {
// (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
// (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
SDValue One = DAG.getConstant(1, DL, ShVT);
HsVal =
DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
}
Result = DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
return true;
}
void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
SelectionDAG &DAG) const {
assert(Node->getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Node->getValueType(0);
unsigned VTBits = VT.getScalarSizeInBits();
assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
SDValue ShOpLo = Node->getOperand(0);
SDValue ShOpHi = Node->getOperand(1);
SDValue ShAmt = Node->getOperand(2);
EVT ShAmtVT = ShAmt.getValueType();
EVT ShAmtCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
SDLoc dl(Node);
// ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
// ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
// away during isel.
SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
DAG.getConstant(VTBits - 1, dl, ShAmtVT));
SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
DAG.getConstant(VTBits - 1, dl, ShAmtVT))
: DAG.getConstant(0, dl, VT);
SDValue Tmp2, Tmp3;
if (IsSHL) {
Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
} else {
Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
}
// If the shift amount is larger or equal than the width of a part we don't
// use the result from the FSHL/FSHR. Insert a test and select the appropriate
// values for large shift amounts.
SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
DAG.getConstant(VTBits, dl, ShAmtVT));
SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
if (IsSHL) {
Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
} else {
Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
}
}
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
SDLoc dl(SDValue(Node, 0));
// FIXME: Only f32 to i64 conversions are supported.
if (SrcVT != MVT::f32 || DstVT != MVT::i64)
return false;
if (Node->isStrictFPOpcode())
// When a NaN is converted to an integer a trap is allowed. We can't
// use this expansion here because it would eliminate that trap. Other
// traps are also allowed and cannot be eliminated. See
// IEEE 754-2008 sec 5.8.
return false;
// Expand f32 -> i64 conversion
// This algorithm comes from compiler-rt's implementation of fixsfdi:
// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
EVT IntVT = SrcVT.changeTypeToInteger();
EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
SDValue Bias = DAG.getConstant(127, dl, IntVT);
SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
SDValue ExponentBits = DAG.getNode(
ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
DAG.getConstant(0x00800000, dl, IntVT));
R = DAG.getZExtOrTrunc(R, dl, DstVT);
R = DAG.getSelectCC(
dl, Exponent, ExponentLoBit,
DAG.getNode(ISD::SHL, dl, DstVT, R,
DAG.getZExtOrTrunc(
DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
dl, IntShVT)),
DAG.getNode(ISD::SRL, dl, DstVT, R,
DAG.getZExtOrTrunc(
DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
dl, IntShVT)),
ISD::SETGT);
SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
return true;
}
bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
SDValue &Chain,
SelectionDAG &DAG) const {
SDLoc dl(SDValue(Node, 0));
unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
EVT DstSetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
// Only expand vector types if we have the appropriate vector bit operations.
unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
ISD::FP_TO_SINT;
if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
return false;
// If the maximum float value is smaller then the signed integer range,
// the destination signmask can't be represented by the float, so we can
// just use FP_TO_SINT directly.
const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
if (APFloat::opOverflow &
APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
if (Node->isStrictFPOpcode()) {
Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
{ Node->getOperand(0), Src });
Chain = Result.getValue(1);
} else
Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
return true;
}
// Don't expand it if there isn't cheap fsub instruction.
if (!isOperationLegalOrCustom(
Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
return false;
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
SDValue Sel;
if (Node->isStrictFPOpcode()) {
Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
Node->getOperand(0), /*IsSignaling*/ true);
Chain = Sel.getValue(1);
} else {
Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
}
bool Strict = Node->isStrictFPOpcode() ||
shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
if (Strict) {
// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
// signmask then offset (the result of which should be fully representable).
// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Src - FltOfs) ^ IntOfs
// TODO: Should any fast-math-flags be set for the FSUB?
SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
DAG.getConstantFP(0.0, dl, SrcVT), Cst);
Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
DAG.getConstant(0, dl, DstVT),
DAG.getConstant(SignMask, dl, DstVT));
SDValue SInt;
if (Node->isStrictFPOpcode()) {
SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
{ Chain, Src, FltOfs });
SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
{ Val.getValue(1), Val });
Chain = SInt.getValue(1);
} else {
SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
}
Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
} else {
// Expand based on maximum range of FP_TO_SINT:
// True = fp_to_sint(Src)
// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
// Result = select (Src < 0x8000000000000000), True, False
SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
// TODO: Should any fast-math-flags be set for the FSUB?
SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
False = DAG.getNode(ISD::XOR, dl, DstVT, False,
DAG.getConstant(SignMask, dl, DstVT));
Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
Result = DAG.getSelect(dl, DstVT, Sel, True, False);
}
return true;
}
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue &Chain,
SelectionDAG &DAG) const {
// This transform is not correct for converting 0 when rounding mode is set
// to round toward negative infinity which will produce -0.0. So disable under
// strictfp.
if (Node->isStrictFPOpcode())
return false;
SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
return false;
// Only expand vector types if we have the appropriate vector bit operations.
if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
!isOperationLegalOrCustom(ISD::FADD, DstVT) ||
!isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
return false;
SDLoc dl(SDValue(Node, 0));
EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
// Implementation of unsigned i64 to f64 following the algorithm in
// __floatundidf in compiler_rt. This implementation performs rounding
// correctly in all rounding modes with the exception of converting 0
// when rounding toward negative infinity. In that case the fsub will produce
// -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
SDValue HiSub =
DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
return true;
}
SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
SelectionDAG &DAG) const {
SDLoc dl(Node);
unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
EVT VT = Node->getValueType(0);
if (VT.isScalableVector())
report_fatal_error(
"Expanding fminnum/fmaxnum for scalable vectors is undefined.");
if (isOperationLegalOrCustom(NewOp, VT)) {
SDValue Quiet0 = Node->getOperand(0);
SDValue Quiet1 = Node->getOperand(1);
if (!Node->getFlags().hasNoNaNs()) {
// Insert canonicalizes if it's possible we need to quiet to get correct
// sNaN behavior.
if (!DAG.isKnownNeverSNaN(Quiet0)) {
Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
Node->getFlags());
}
if (!DAG.isKnownNeverSNaN(Quiet1)) {
Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
Node->getFlags());
}
}
return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
}
// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
// instead if there are no NaNs.
if (Node->getFlags().hasNoNaNs()) {
unsigned IEEE2018Op =
Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
Node->getOperand(1), Node->getFlags());
}
}
// If none of the above worked, but there are no NaNs, then expand to
// a compare/select sequence. This is required for correctness since
// InstCombine might have canonicalized a fcmp+select sequence to a
// FMINNUM/FMAXNUM node. If we were to fall through to the default
// expansion to libcall, we might introduce a link-time dependency
// on libm into a file that originally did not have one.
if (Node->getFlags().hasNoNaNs()) {
ISD::CondCode Pred =
Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
SDValue Op1 = Node->getOperand(0);
SDValue Op2 = Node->getOperand(1);
SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
// Copy FMF flags, but always set the no-signed-zeros flag
// as this is implied by the FMINNUM/FMAXNUM semantics.
SDNodeFlags Flags = Node->getFlags();
Flags.setNoSignedZeros(true);
SelCC->setFlags(Flags);
return SelCC;
}
return SDValue();
}
bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Op = Node->getOperand(0);
unsigned Len = VT.getScalarSizeInBits();
assert(VT.isInteger() && "CTPOP not implemented for this type.");
// TODO: Add support for irregular type lengths.
if (!(Len <= 128 && Len % 8 == 0))
return false;
// Only expand vector types if we have the appropriate vector bit operations.
if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
(Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
return false;
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
SDValue Mask55 =
DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
SDValue Mask33 =
DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
SDValue Mask0F =
DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
SDValue Mask01 =
DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
// v = v - ((v >> 1) & 0x55555555...)
Op = DAG.getNode(ISD::SUB, dl, VT, Op,
DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(ISD::SRL, dl, VT, Op,
DAG.getConstant(1, dl, ShVT)),
Mask55));
// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(ISD::SRL, dl, VT, Op,
DAG.getConstant(2, dl, ShVT)),
Mask33));
// v = (v + (v >> 4)) & 0x0F0F0F0F...
Op = DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(ISD::ADD, dl, VT, Op,
DAG.getNode(ISD::SRL, dl, VT, Op,
DAG.getConstant(4, dl, ShVT))),
Mask0F);
// v = (v * 0x01010101...) >> (Len - 8)
if (Len > 8)
Op =
DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
DAG.getConstant(Len - 8, dl, ShVT));
Result = Op;
return true;
}
bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Op = Node->getOperand(0);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
isOperationLegalOrCustom(ISD::CTLZ, VT)) {
Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
return true;
}
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
!isOperationLegalOrCustom(ISD::CTPOP, VT) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
return false;
// for now, we do this:
// x = x | (x >> 1);
// x = x | (x >> 2);
// ...
// x = x | (x >>16);
// x = x | (x >>32); // for 64-bit input
// return popcount(~x);
//
// Ref: "Hacker's Delight" by Henry Warren
for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
Op = DAG.getNode(ISD::OR, dl, VT, Op,
DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
}
Op = DAG.getNOT(dl, Op, VT);
Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
return true;
}
bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
SDValue Op = Node->getOperand(0);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
isOperationLegalOrCustom(ISD::CTTZ, VT)) {
Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
return true;
}
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
!isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
return false;
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
// { return 32 - nlz(~x & (x-1)); }
// Ref: "Hacker's Delight" by Henry Warren
SDValue Tmp = DAG.getNode(
ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
Result =
DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
return true;
}
Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
return true;
}
bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
SelectionDAG &DAG, bool IsNegative) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Op = N->getOperand(0);
// abs(x) -> smax(x,sub(0,x))
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMAX, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
return true;
}
// abs(x) -> umin(x,sub(0,x))
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::UMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
Result = DAG.getNode(ISD::UMIN, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
return true;
}
// 0 - abs(x) -> smin(x, sub(0,x))
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
Result = DAG.getNode(ISD::SMIN, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
return true;
}
// Only expand vector types if we have the appropriate vector operations.
if (VT.isVector() &&
(!isOperationLegalOrCustom(ISD::SRA, VT) ||
(!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
(IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
return false;
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, Op,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
if (!IsNegative) {
SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
} else {
// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
}
return true;
}
SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
SDValue Op = N->getOperand(0);
if (!VT.isSimple())
return SDValue();
EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
switch (VT.getSimpleVT().getScalarType().SimpleTy) {
default:
return SDValue();
case MVT::i16:
// Use a rotate by 8. This can be further expanded if necessary.
return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
case MVT::i32:
Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
DAG.getConstant(0xFF0000, dl, VT));
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
case MVT::i64:
Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
DAG.getConstant(255ULL<<48, dl, VT));
Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
DAG.getConstant(255ULL<<40, dl, VT));
Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
DAG.getConstant(255ULL<<32, dl, VT));
Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
DAG.getConstant(255ULL<<24, dl, VT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
DAG.getConstant(255ULL<<16, dl, VT));
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
DAG.getConstant(255ULL<<8 , dl, VT));
Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
}
}
SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
SDValue Op = N->getOperand(0);
EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
unsigned Sz = VT.getScalarSizeInBits();
SDValue Tmp, Tmp2, Tmp3;
// If we can, perform BSWAP first and then the mask+swap the i4, then i2
// and finally the i1 pairs.
// TODO: We can easily support i4/i2 legal types if any target ever does.
if (Sz >= 8 && isPowerOf2_32(Sz)) {
// Create the masks - repeating the pattern every byte.
APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
// BSWAP if the type is wider than a single byte.
Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
// swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
// swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
// swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
return Tmp;
}
Tmp = DAG.getConstant(0, dl, VT);
for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
if (I < J)
Tmp2 =
DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
else
Tmp2 =
DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
APInt Shift(Sz, 1);
Shift <<= J;
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
}
return Tmp;
}
std::pair<SDValue, SDValue>
TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SelectionDAG &DAG) const {
SDLoc SL(LD);
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
EVT SrcVT = LD->getMemoryVT();
EVT DstVT = LD->getValueType(0);
ISD::LoadExtType ExtType = LD->getExtensionType();
if (SrcVT.isScalableVector())
report_fatal_error("Cannot scalarize scalable vector loads");
unsigned NumElem = SrcVT.getVectorNumElements();
EVT SrcEltVT = SrcVT.getScalarType();
EVT DstEltVT = DstVT.getScalarType();
// A vector must always be stored in memory as-is, i.e. without any padding
// between the elements, since various code depend on it, e.g. in the
// handling of a bitcast of a vector type to int, which may be done with a
// vector store followed by an integer load. A vector that does not have
// elements that are byte-sized must therefore be stored as an integer
// built out of the extracted vector elements.
if (!SrcEltVT.isByteSized()) {
unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
unsigned NumSrcBits = SrcVT.getSizeInBits();
EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
unsigned SrcEltBits = SrcEltVT.getSizeInBits();
SDValue SrcEltBitMask = DAG.getConstant(
APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
// Load the whole vector and avoid masking off the top bits as it makes
// the codegen worse.
SDValue Load =
DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
LD->getMemOperand()->getFlags(), LD->getAAInfo());
SmallVector<SDValue, 8> Vals;
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
unsigned ShiftIntoIdx =
(DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
SDValue ShiftAmount =
DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
LoadVT, SL, /*LegalTypes=*/false);
SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
SDValue Elt =
DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
if (ExtType != ISD::NON_EXTLOAD) {
unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
}
Vals.push_back(Scalar);
}
SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
return std::make_pair(Value, Load.getValue(1));
}
unsigned Stride = SrcEltVT.getSizeInBits() / 8;
assert(SrcEltVT.isByteSized());
SmallVector<SDValue, 8> Vals;
SmallVector<SDValue, 8> LoadChains;
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
SDValue ScalarLoad =
DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Idx * Stride),
SrcEltVT, LD->getOriginalAlign(),
LD->getMemOperand()->getFlags(), LD->getAAInfo());
BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
return std::make_pair(Value, NewChain);
}
SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SelectionDAG &DAG) const {
SDLoc SL(ST);
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
SDValue Value = ST->getValue();
EVT StVT = ST->getMemoryVT();
if (StVT.isScalableVector())
report_fatal_error("Cannot scalarize scalable vector stores");
// The type of the data we want to save
EVT RegVT = Value.getValueType();
EVT RegSclVT = RegVT.getScalarType();
// The type of data as saved in memory.
EVT MemSclVT = StVT.getScalarType();
unsigned NumElem = StVT.getVectorNumElements();
// A vector must always be stored in memory as-is, i.e. without any padding
// between the elements, since various code depend on it, e.g. in the
// handling of a bitcast of a vector type to int, which may be done with a
// vector store followed by an integer load. A vector that does not have
// elements that are byte-sized must therefore be stored as an integer
// built out of the extracted vector elements.
if (!MemSclVT.isByteSized()) {
unsigned NumBits = StVT.getSizeInBits();
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
DAG.getVectorIdxConstant(Idx, SL));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
unsigned ShiftIntoIdx =
(DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
SDValue ShiftAmount =
DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
SDValue ShiftedElt =
DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
}
return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
ST->getAAInfo());
}
// Store Stride in bytes
unsigned Stride = MemSclVT.getSizeInBits() / 8;
assert(Stride && "Zero stride!");
// Extract each of the elements from the original vector and save them into
// memory individually.
SmallVector<SDValue, 8> Stores;
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
DAG.getVectorIdxConstant(Idx, SL));
SDValue Ptr =
DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(
Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
ST->getAAInfo());
Stores.push_back(Store);
}
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
}
std::pair<SDValue, SDValue>
TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
assert(LD->getAddressingMode() == ISD::UNINDEXED &&
"unaligned indexed loads not implemented!");
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
EVT VT = LD->getValueType(0);
EVT LoadedVT = LD->getMemoryVT();
SDLoc dl(LD);
auto &MF = DAG.getMachineFunction();
if (VT.isFloatingPoint() || VT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
LoadedVT.isVector()) {
// Scalarize the load and let the individual components be handled.
return scalarizeVectorLoad(LD, DAG);
}
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
LD->getMemOperand());
SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
if (LoadedVT != VT)
Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
ISD::ANY_EXTEND, dl, VT, Result);
return std::make_pair(Result, newLoad.getValue(1));
}
// Copy the value to a (aligned) stack slot using (unaligned) integer
// loads and stores, then do a (aligned) load from the stack slot.
MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
unsigned LoadedBytes = LoadedVT.getStoreSize();
unsigned RegBytes = RegVT.getSizeInBits() / 8;
unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
// Make sure the stack slot is also aligned for the register type.
SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
SmallVector<SDValue, 8> Stores;
SDValue StackPtr = StackBase;
unsigned Offset = 0;
EVT PtrVT = Ptr.getValueType();
EVT StackPtrVT = StackPtr.getValueType();
SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
// Do all but one copies using the full register width.
for (unsigned i = 1; i < NumRegs; i++) {
// Load one integer register's worth from the original location.
SDValue Load = DAG.getLoad(
RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
Stores.push_back(DAG.getStore(
Load.getValue(1), dl, Load, StackPtr,
MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
// Increment the pointers.
Offset += RegBytes;
Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
}
// The last copy may be partial. Do an extending load.
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
8 * (LoadedBytes - Offset));
SDValue Load =
DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(Offset), MemVT,
LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
// On big-endian machines this requires a truncating store to ensure
// that the bits end up in the right place.
Stores.push_back(DAG.getTruncStore(
Load.getValue(1), dl, Load, StackPtr,
MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
// Finally, perform the original load only redirected to the stack slot.
Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
LoadedVT);
// Callers expect a MERGE_VALUES node.
return std::make_pair(Load, TF);
}
assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
"Unaligned load of unsupported type.");
// Compute the new VT that is half the size of the old one. This is an
// integer MVT.
unsigned NumBits = LoadedVT.getSizeInBits();
EVT NewLoadedVT;
NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
NumBits >>= 1;
Align Alignment = LD->getOriginalAlign();
unsigned IncrementSize = NumBits / 8;
ISD::LoadExtType HiExtType = LD->getExtensionType();
// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
if (HiExtType == ISD::NON_EXTLOAD)
HiExtType = ISD::ZEXTLOAD;
// Load the value in two parts
SDValue Lo, Hi;
if (DAG.getDataLayout().isLittleEndian()) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
} else {
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
}
// aggregate the two parts
SDValue ShiftAmount =
DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
DAG.getDataLayout()));
SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
return std::make_pair(Result, TF);
}
SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
SelectionDAG &DAG) const {
assert(ST->getAddressingMode() == ISD::UNINDEXED &&
"unaligned indexed stores not implemented!");
SDValue Chain = ST->getChain();
SDValue Ptr = ST->getBasePtr();
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
Align Alignment = ST->getOriginalAlign();
auto &MF = DAG.getMachineFunction();
EVT StoreMemVT = ST->getMemoryVT();
SDLoc dl(ST);
if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
if (isTypeLegal(intVT)) {
if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
StoreMemVT.isVector()) {
// Scalarize the store and let the individual components be handled.
SDValue Result = scalarizeVectorStore(ST, DAG);
return Result;
}
// Expand to a bitconvert of the value to the integer type of the
// same size, then a (misaligned) int store.
// FIXME: Does not handle truncating floating point stores!
SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
Alignment, ST->getMemOperand()->getFlags());
return Result;
}
// Do a (aligned) store to a stack slot, then copy from the stack slot
// to the final destination using (unaligned) integer loads and stores.
MVT RegVT = getRegisterType(
*DAG.getContext(),
EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
EVT PtrVT = Ptr.getValueType();
unsigned StoredBytes = StoreMemVT.getStoreSize();
unsigned RegBytes = RegVT.getSizeInBits() / 8;
unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
// Make sure the stack slot is also aligned for the register type.
SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
// Perform the original store, only redirected to the stack slot.
SDValue Store = DAG.getTruncStore(
Chain, dl, Val, StackPtr,
MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
EVT StackPtrVT = StackPtr.getValueType();
SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
SmallVector<SDValue, 8> Stores;
unsigned Offset = 0;
// Do all but one copies using the full register width.
for (unsigned i = 1; i < NumRegs; i++) {
// Load one integer register's worth from the stack slot.
SDValue Load = DAG.getLoad(
RegVT, dl, Store, StackPtr,
MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset),
ST->getOriginalAlign(),
ST->getMemOperand()->getFlags()));
// Increment the pointers.
Offset += RegBytes;
StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
}
// The last store may be partial. Do a truncating store. On big-endian
// machines this requires an extending load from the stack slot to ensure
// that the bits are in the right place.
EVT LoadMemVT =
EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
// Load from the stack slot.
SDValue Load = DAG.getExtLoad(
ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
Stores.push_back(
DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
ST->getOriginalAlign(),
ST->getMemOperand()->getFlags(), ST->getAAInfo()));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
return Result;
}
assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
"Unaligned store of unknown type.");
// Get the half-size VT
EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
unsigned NumBits = NewStoredVT.getFixedSizeInBits();
unsigned IncrementSize = NumBits / 8;
// Divide the stored value in two parts.
SDValue ShiftAmount = DAG.getConstant(
NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
SDValue Lo = Val;
SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
// Store the two parts
SDValue Store1, Store2;
Store1 = DAG.getTruncStore(Chain, dl,
DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
ST->getMemOperand()->getFlags());
Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Store2 = DAG.getTruncStore(
Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
ST->getMemOperand()->getFlags(), ST->getAAInfo());
SDValue Result =
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
return Result;
}
SDValue
TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
const SDLoc &DL, EVT DataVT,
SelectionDAG &DAG,
bool IsCompressedMemory) const {
SDValue Increment;
EVT AddrVT = Addr.getValueType();
EVT MaskVT = Mask.getValueType();
assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
"Incompatible types of Data and Mask");
if (IsCompressedMemory) {
if (DataVT.isScalableVector())
report_fatal_error(
"Cannot currently handle compressed memory with scalable vectors");
// Incrementing the pointer according to number of '1's in the mask.
EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
if (MaskIntVT.getSizeInBits() < 32) {
MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
MaskIntVT = MVT::i32;
}
// Count '1's with POPCNT.
Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
// Scale is an element size in bytes.
SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
AddrVT);
Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
} else if (DataVT.isScalableVector()) {
Increment = DAG.getVScale(DL, AddrVT,
APInt(AddrVT.getFixedSizeInBits(),
DataVT.getStoreSize().getKnownMinSize()));
} else
Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
}
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
EVT VecVT, const SDLoc &dl,
unsigned NumSubElts) {
if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
return Idx;
EVT IdxVT = Idx.getValueType();
unsigned NElts = VecVT.getVectorMinNumElements();
if (VecVT.isScalableVector()) {
// If this is a constant index and we know the value plus the number of the
// elements in the subvector minus one is less than the minimum number of
// elements then it's safe to return Idx.
if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
return Idx;
SDValue VS =
DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
DAG.getConstant(NumSubElts, dl, IdxVT));
return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
}
if (isPowerOf2_32(NElts) && NumSubElts == 1) {
APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
DAG.getConstant(Imm, dl, IdxVT));
}
unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
DAG.getConstant(MaxIndex, dl, IdxVT));
}
SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
SDValue VecPtr, EVT VecVT,
SDValue Index) const {
return getVectorSubVecPointer(
DAG, VecPtr, VecVT,
EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
Index);
}
SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
SDValue VecPtr, EVT VecVT,
EVT SubVecVT,
SDValue Index) const {
SDLoc dl(Index);
// Make sure the index type is big enough to compute in.
Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
EVT EltVT = VecVT.getVectorElementType();
// Calculate the element offset and add it to the pointer.
unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
"Converting bits to bytes lost precision");
// Scalable vectors don't need clamping as these are checked at compile time
if (SubVecVT.isFixedLengthVector()) {
assert(SubVecVT.getVectorElementType() == EltVT &&
"Sub-vector must be a fixed vector with matching element type");
Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
SubVecVT.getVectorNumElements());
}
EVT IdxVT = Index.getValueType();
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
}
//===----------------------------------------------------------------------===//
// Implementation of Emulated TLS Model
//===----------------------------------------------------------------------===//
SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
SelectionDAG &DAG) const {
// Access to address of TLS varialbe xyz is lowered to a function call:
// __emutls_get_address( address of global variable named "__emutls_v.xyz" )
EVT PtrVT = getPointerTy(DAG.getDataLayout());
PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
SDLoc dl(GA);
ArgListTy Args;
ArgListEntry Entry;
std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
StringRef EmuTlsVarName(NameString);
GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
assert(EmuTlsVar && "Cannot find EmuTlsVar ");
Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
Entry.Ty = VoidPtrType;
Args.push_back(Entry);
SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
// At last for X86 targets, maybe good for other targets too?
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setAdjustsStack(true); // Is this only for X86 target?
MFI.setHasCalls(true);
assert((GA->getOffset() == 0) &&
"Emulated TLS must have zero offset in GlobalAddressSDNode");
return CallResult.first;
}
SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
SelectionDAG &DAG) const {
assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
if (!isCtlzFast())
return SDValue();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
if (C->isNullValue() && CC == ISD::SETEQ) {
EVT VT = Op.getOperand(0).getValueType();
SDValue Zext = Op.getOperand(0);
if (VT.bitsLT(MVT::i32)) {
VT = MVT::i32;
Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
}
unsigned Log2b = Log2_32(VT.getSizeInBits());
SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
DAG.getConstant(Log2b, dl, MVT::i32));
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
}
}
return SDValue();
}
// Convert redundant addressing modes (e.g. scaling is redundant
// when accessing bytes).
ISD::MemIndexType
TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
SDValue Offsets) const {
bool IsScaledIndex =
(IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED);
bool IsSignedIndex =
(IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
// Scaling is unimportant for bytes, canonicalize to unscaled.
if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
IsScaledIndex = false;
IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
}
return IndexType;
}
SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
EVT VT = Op0.getValueType();
unsigned Opcode = Node->getOpcode();
SDLoc DL(Node);
// umin(x,y) -> sub(x,usubsat(x,y))
if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::USUBSAT, VT)) {
return DAG.getNode(ISD::SUB, DL, VT, Op0,
DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
}
// umax(x,y) -> add(x,usubsat(y,x))
if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
isOperationLegal(ISD::USUBSAT, VT)) {
return DAG.getNode(ISD::ADD, DL, VT, Op0,
DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
}
// Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
ISD::CondCode CC;
switch (Opcode) {
default: llvm_unreachable("How did we get here?");
case ISD::SMAX: CC = ISD::SETGT; break;
case ISD::SMIN: CC = ISD::SETLT; break;
case ISD::UMAX: CC = ISD::SETUGT; break;
case ISD::UMIN: CC = ISD::SETULT; break;
}
// FIXME: Should really try to split the vector in case it's legal on a
// subvector.
if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
return DAG.UnrollVectorOp(Node);
SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
return DAG.getSelect(DL, VT, Cond, Op0, Op1);
}
SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
unsigned Opcode = Node->getOpcode();
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
SDLoc dl(Node);
assert(VT == RHS.getValueType() && "Expected operands to be the same type");
assert(VT.isInteger() && "Expected operands to be integers");
// usub.sat(a, b) -> umax(a, b) - b
if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
}
// uadd.sat(a, b) -> umin(a, ~b) + b
if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
}
unsigned OverflowOp;
switch (Opcode) {
case ISD::SADDSAT:
OverflowOp = ISD::SADDO;
break;
case ISD::UADDSAT:
OverflowOp = ISD::UADDO;
break;
case ISD::SSUBSAT:
OverflowOp = ISD::SSUBO;
break;
case ISD::USUBSAT:
OverflowOp = ISD::USUBO;
break;
default:
llvm_unreachable("Expected method to receive signed or unsigned saturation "
"addition or subtraction node.");
}
// FIXME: Should really try to split the vector in case it's legal on a
// subvector.
if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
return DAG.UnrollVectorOp(Node);
unsigned BitWidth = LHS.getScalarValueSizeInBits();
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
SDValue SumDiff = Result.getValue(0);
SDValue Overflow = Result.getValue(1);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
if (Opcode == ISD::UADDSAT) {
if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
// (LHS + RHS) | OverflowMask
SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
}
// Overflow ? 0xffff.... : (LHS + RHS)
return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
}
if (Opcode == ISD::USUBSAT) {
if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
// (LHS - RHS) & ~OverflowMask
SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
}
// Overflow ? 0 : (LHS - RHS)
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
}
// SatMax -> Overflow && SumDiff < 0
// SatMin -> Overflow && SumDiff >= 0
APInt MinVal = APInt::getSignedMinValue(BitWidth);
APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
}
SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
unsigned Opcode = Node->getOpcode();
bool IsSigned = Opcode == ISD::SSHLSAT;
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
SDLoc dl(Node);
assert((Node->getOpcode() == ISD::SSHLSAT ||
Node->getOpcode() == ISD::USHLSAT) &&
"Expected a SHLSAT opcode");
assert(VT == RHS.getValueType() && "Expected operands to be the same type");
assert(VT.isInteger() && "Expected operands to be integers");
// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
unsigned BW = VT.getScalarSizeInBits();
SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
SDValue Orig =
DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
SDValue SatVal;
if (IsSigned) {
SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT),
SatMin, SatMax, ISD::SETLT);
} else {
SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
}
Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE);
return Result;
}
SDValue
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
assert((Node->getOpcode() == ISD::SMULFIX ||
Node->getOpcode() == ISD::UMULFIX ||
Node->getOpcode() == ISD::SMULFIXSAT ||
Node->getOpcode() == ISD::UMULFIXSAT) &&
"Expected a fixed point multiplication opcode");
SDLoc dl(Node);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
unsigned Scale = Node->getConstantOperandVal(2);
bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
Node->getOpcode() == ISD::UMULFIXSAT);
bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
Node->getOpcode() == ISD::SMULFIXSAT);
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
unsigned VTSize = VT.getScalarSizeInBits();
if (!Scale) {
// [us]mul.fix(a, b, 0) -> mul(a, b)
if (!Saturating) {
if (isOperationLegalOrCustom(ISD::MUL, VT))
return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
} else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
SDValue Result =
DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
SDValue Product = Result.getValue(0);
SDValue Overflow = Result.getValue(1);
SDValue Zero = DAG.getConstant(0, dl, VT);
APInt MinVal = APInt::getSignedMinValue(VTSize);
APInt MaxVal = APInt::getSignedMaxValue(VTSize);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ // Xor the inputs, if resulting sign bit is 0 the product will be
+ // positive, else negative.
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
return DAG.getSelect(dl, VT, Overflow, Result, Product);
} else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
SDValue Result =
DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
SDValue Product = Result.getValue(0);
SDValue Overflow = Result.getValue(1);
APInt MaxVal = APInt::getMaxValue(VTSize);
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
}
}
assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
"Expected scale to be less than the number of bits if signed or at "
"most the number of bits if unsigned.");
assert(LHS.getValueType() == RHS.getValueType() &&
"Expected both operands to be the same type");
// Get the upper and lower bits of the result.
SDValue Lo, Hi;
unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
if (isOperationLegalOrCustom(LoHiOp, VT)) {
SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
Lo = Result.getValue(0);
Hi = Result.getValue(1);
} else if (isOperationLegalOrCustom(HiOp, VT)) {
Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
} else if (VT.isVector()) {
return SDValue();
} else {
report_fatal_error("Unable to expand fixed point multiplication.");
}
if (Scale == VTSize)
// Result is just the top half since we'd be shifting by the width of the
// operand. Overflow impossible so this works for both UMULFIX and
// UMULFIXSAT.
return Hi;
// The result will need to be shifted right by the scale since both operands
// are scaled. The result is given to us in 2 halves, so we only want part of
// both in the result.
EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
DAG.getConstant(Scale, dl, ShiftTy));
if (!Saturating)
return Result;
if (!Signed) {
// Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
// widened multiplication) aren't all zeroes.
// Saturate to max if ((Hi >> Scale) != 0),
// which is the same as if (Hi > ((1 << Scale) - 1))
APInt MaxVal = APInt::getMaxValue(VTSize);
SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
dl, VT);
Result = DAG.getSelectCC(dl, Hi, LowMask,
DAG.getConstant(MaxVal, dl, VT), Result,
ISD::SETUGT);
return Result;
}
// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
// widened multiplication) aren't all ones or all zeroes.
SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
if (Scale == 0) {
SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
DAG.getConstant(VTSize - 1, dl, ShiftTy));
SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
// Saturated to SatMin if wide product is negative, and SatMax if wide
// product is positive ...
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
ISD::SETLT);
// ... but only if we overflowed.
return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
}
// We handled Scale==0 above so all the bits to examine is in Hi.
// Saturate to max if ((Hi >> (Scale - 1)) > 0),
// which is the same as if (Hi > (1 << (Scale - 1)) - 1)
SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
dl, VT);
Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
// Saturate to min if (Hi >> (Scale - 1)) < -1),
// which is the same as if (HI < (-1 << (Scale - 1))
SDValue HighMask =
DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
dl, VT);
Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
return Result;
}
SDValue
TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
SDValue LHS, SDValue RHS,
unsigned Scale, SelectionDAG &DAG) const {
assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
"Expected a fixed point division opcode");
EVT VT = LHS.getValueType();
bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
// If there is enough room in the type to upscale the LHS or downscale the
// RHS before the division, we can perform it in this type without having to
// resize. For signed operations, the LHS headroom is the number of
// redundant sign bits, and for unsigned ones it is the number of zeroes.
// The headroom for the RHS is the number of trailing zeroes.
unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
: DAG.computeKnownBits(LHS).countMinLeadingZeros();
unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
// For signed saturating operations, we need to be able to detect true integer
// division overflow; that is, when you have MIN / -EPS. However, this
// is undefined behavior and if we emit divisions that could take such
// values it may cause undesired behavior (arithmetic exceptions on x86, for
// example).
// Avoid this by requiring an extra bit so that we never get this case.
// FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
// signed saturating division, we need to emit a whopping 32-bit division.
if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
return SDValue();
unsigned LHSShift = std::min(LHSLead, Scale);
unsigned RHSShift = Scale - LHSShift;
// At this point, we know that if we shift the LHS up by LHSShift and the
// RHS down by RHSShift, we can emit a regular division with a final scaling
// factor of Scale.
EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
if (LHSShift)
LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
DAG.getConstant(LHSShift, dl, ShiftTy));
if (RHSShift)
RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
DAG.getConstant(RHSShift, dl, ShiftTy));
SDValue Quot;
if (Signed) {
// For signed operations, if the resulting quotient is negative and the
// remainder is nonzero, subtract 1 from the quotient to round towards
// negative infinity.
SDValue Rem;
// FIXME: Ideally we would always produce an SDIVREM here, but if the
// type isn't legal, SDIVREM cannot be expanded. There is no reason why
// we couldn't just form a libcall, but the type legalizer doesn't do it.
if (isTypeLegal(VT) &&
isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
Quot = DAG.getNode(ISD::SDIVREM, dl,
DAG.getVTList(VT, VT),
LHS, RHS);
Rem = Quot.getValue(1);
Quot = Quot.getValue(0);
} else {
Quot = DAG.getNode(ISD::SDIV, dl, VT,
LHS, RHS);
Rem = DAG.getNode(ISD::SREM, dl, VT,
LHS, RHS);
}
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
DAG.getConstant(1, dl, VT));
Quot = DAG.getSelect(dl, VT,
DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
Sub1, Quot);
} else
Quot = DAG.getNode(ISD::UDIV, dl, VT,
LHS, RHS);
return Quot;
}
void TargetLowering::expandUADDSUBO(
SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
SDLoc dl(Node);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
bool IsAdd = Node->getOpcode() == ISD::UADDO;
// If ADD/SUBCARRY is legal, use that instead.
unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
{ LHS, RHS, CarryIn });
Result = SDValue(NodeCarry.getNode(), 0);
Overflow = SDValue(NodeCarry.getNode(), 1);
return;
}
Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
LHS.getValueType(), LHS, RHS);
EVT ResultType = Node->getValueType(1);
EVT SetCCType = getSetCCResultType(
DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
}
void TargetLowering::expandSADDSUBO(
SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
SDLoc dl(Node);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
bool IsAdd = Node->getOpcode() == ISD::SADDO;
Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
LHS.getValueType(), LHS, RHS);
EVT ResultType = Node->getValueType(1);
EVT OType = getSetCCResultType(
DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
return;
}
SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
// For an addition, the result should be less than one of the operands (LHS)
// if and only if the other operand (RHS) is negative, otherwise there will
// be overflow.
// For a subtraction, the result should be less than one of the operands
// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
// otherwise there will be overflow.
SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
SDValue ConditionRHS =
DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
Overflow = DAG.getBoolExtOrTrunc(
DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
ResultType, ResultType);
}
bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
SDValue &Overflow, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
bool isSigned = Node->getOpcode() == ISD::SMULO;
// For power-of-two multiplications we can use a simpler shift expansion.
if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
const APInt &C = RHSC->getAPIntValue();
// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
if (C.isPowerOf2()) {
// smulo(x, signed_min) is same as umulo(x, signed_min).
bool UseArithShift = isSigned && !C.isMinSignedValue();
EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
Overflow = DAG.getSetCC(dl, SetCCVT,
DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
dl, VT, Result, ShiftAmt),
LHS, ISD::SETNE);
return true;
}
}
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
if (VT.isVector())
WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
VT.getVectorNumElements());
SDValue BottomHalf;
SDValue TopHalf;
static const unsigned Ops[2][3] =
{ { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
{ ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
} else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
RHS);
TopHalf = BottomHalf.getValue(1);
} else if (isTypeLegal(WideVT)) {
LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
getShiftAmountTy(WideVT, DAG.getDataLayout()));
TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
} else {
if (VT.isVector())
return false;
// We can fall back to a libcall with an illegal type for the MUL if we
// have a libcall big enough.
// Also, we can fall back to a division in some cases, but that's a big
// performance hit in the general case.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (WideVT == MVT::i16)
LC = RTLIB::MUL_I16;
else if (WideVT == MVT::i32)
LC = RTLIB::MUL_I32;
else if (WideVT == MVT::i64)
LC = RTLIB::MUL_I64;
else if (WideVT == MVT::i128)
LC = RTLIB::MUL_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
SDValue HiLHS;
SDValue HiRHS;
if (isSigned) {
// The high part is obtained by SRA'ing all but one of the bits of low
// part.
unsigned LoSize = VT.getFixedSizeInBits();
HiLHS =
DAG.getNode(ISD::SRA, dl, VT, LHS,
DAG.getConstant(LoSize - 1, dl,
getPointerTy(DAG.getDataLayout())));
HiRHS =
DAG.getNode(ISD::SRA, dl, VT, RHS,
DAG.getConstant(LoSize - 1, dl,
getPointerTy(DAG.getDataLayout())));
} else {
HiLHS = DAG.getConstant(0, dl, VT);
HiRHS = DAG.getConstant(0, dl, VT);
}
// Here we're passing the 2 arguments explicitly as 4 arguments that are
// pre-lowered to the correct types. This all depends upon WideVT not
// being a legal type for the architecture and thus has to be split to
// two arguments.
SDValue Ret;
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(isSigned);
CallOptions.setIsPostTypeLegalization(true);
if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
// Halves of WideVT are packed into registers in different order
// depending on platform endianness. This is usually handled by
// the C calling convention, but we can't defer to it in
// the legalizer.
SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
} else {
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
}
assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
"Ret value is a collection of constituent nodes holding result.");
if (DAG.getDataLayout().isLittleEndian()) {
// Same as above.
BottomHalf = Ret.getOperand(0);
TopHalf = Ret.getOperand(1);
} else {
BottomHalf = Ret.getOperand(1);
TopHalf = Ret.getOperand(0);
}
}
Result = BottomHalf;
if (isSigned) {
SDValue ShiftAmt = DAG.getConstant(
VT.getScalarSizeInBits() - 1, dl,
getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
} else {
Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
DAG.getConstant(0, dl, VT), ISD::SETNE);
}
// Truncate the result if SetCC returns a larger type than needed.
EVT RType = Node->getValueType(1);
if (RType.bitsLT(Overflow.getValueType()))
Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
"Unexpected result type for S/UMULO legalization");
return true;
}
SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
SDValue Op = Node->getOperand(0);
EVT VT = Op.getValueType();
if (VT.isScalableVector())
report_fatal_error(
"Expanding reductions for scalable vectors is undefined.");
// Try to use a shuffle reduction for power of two vectors.
if (VT.isPow2VectorType()) {
while (VT.getVectorNumElements() > 1) {
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
break;
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
VT = HalfVT;
}
}
EVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops;
DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
SDValue Res = Ops[0];
for (unsigned i = 1; i < NumElts; i++)
Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
// Result type may be wider than element type.
if (EltVT != Node->getValueType(0))
Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
return Res;
}
SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
SDValue AccOp = Node->getOperand(0);
SDValue VecOp = Node->getOperand(1);
SDNodeFlags Flags = Node->getFlags();
EVT VT = VecOp.getValueType();
EVT EltVT = VT.getVectorElementType();
if (VT.isScalableVector())
report_fatal_error(
"Expanding reductions for scalable vectors is undefined.");
unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops;
DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
SDValue Res = AccOp;
for (unsigned i = 0; i < NumElts; i++)
Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
return Res;
}
bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
SDLoc dl(Node);
bool isSigned = Node->getOpcode() == ISD::SREM;
unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
SDValue Dividend = Node->getOperand(0);
SDValue Divisor = Node->getOperand(1);
if (isOperationLegalOrCustom(DivRemOpc, VT)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
return true;
}
if (isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y
SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
return true;
}
return false;
}
SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
SelectionDAG &DAG) const {
bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
SDLoc dl(SDValue(Node, 0));
SDValue Src = Node->getOperand(0);
// DstVT is the result type, while SatVT is the size to which we saturate
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
unsigned SatWidth = SatVT.getScalarSizeInBits();
unsigned DstWidth = DstVT.getScalarSizeInBits();
assert(SatWidth <= DstWidth &&
"Expected saturation width smaller than result width");
// Determine minimum and maximum integer values and their corresponding
// floating-point values.
APInt MinInt, MaxInt;
if (IsSigned) {
MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
} else {
MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
}
// We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
// libcall emission cannot handle this. Large result types will fail.
if (SrcVT == MVT::f16) {
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
SrcVT = Src.getValueType();
}
APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
APFloat::opStatus MinStatus =
MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
APFloat::opStatus MaxStatus =
MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
!(MaxStatus & APFloat::opStatus::opInexact);
SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
// If the integer bounds are exactly representable as floats and min/max are
// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
// of comparisons and selects.
bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
isOperationLegal(ISD::FMAXNUM, SrcVT);
if (AreExactFloatBounds && MinMaxLegal) {
SDValue Clamped = Src;
// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
// Clamp by MaxFloat from above. NaN cannot occur.
Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
// Convert clamped value to integer.
SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
dl, DstVT, Clamped);
// In the unsigned case we're done, because we mapped NaN to MinFloat,
// which will cast to zero.
if (!IsSigned)
return FpToInt;
// Otherwise, select 0 if Src is NaN.
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
ISD::CondCode::SETUO);
}
SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
// Result of direct conversion. The assumption here is that the operation is
// non-trapping and it's fine to apply it to an out-of-range value if we
// select it away later.
SDValue FpToInt =
DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
SDValue Select = FpToInt;
// If Src ULT MinFloat, select MinInt. In particular, this also selects
// MinInt if Src is NaN.
Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
ISD::CondCode::SETULT);
// If Src OGT MaxFloat, select MaxInt.
Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
ISD::CondCode::SETOGT);
// In the unsigned case we are done, because we mapped NaN to MinInt, which
// is already zero.
if (!IsSigned)
return Select;
// Otherwise, select 0 if Src is NaN.
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
}
SDValue TargetLowering::expandVectorSplice(SDNode *Node,
SelectionDAG &DAG) const {
assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
assert(Node->getValueType(0).isScalableVector() &&
"Fixed length vector types expected to use SHUFFLE_VECTOR!");
EVT VT = Node->getValueType(0);
SDValue V1 = Node->getOperand(0);
SDValue V2 = Node->getOperand(1);
int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
SDLoc DL(Node);
// Expand through memory thusly:
// Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
// Store V1, Ptr
// Store V2, Ptr + sizeof(V1)
// If (Imm < 0)
// TrailingElts = -Imm
// Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
// else
// Ptr = Ptr + (Imm * sizeof(VT.Elt))
// Res = Load Ptr
Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
VT.getVectorElementCount() * 2);
SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
EVT PtrVT = StackPtr.getValueType();
auto &MF = DAG.getMachineFunction();
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
// Store the lo part of CONCAT_VECTORS(V1, V2)
SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
// Store the hi part of CONCAT_VECTORS(V1, V2)
SDValue OffsetToV2 = DAG.getVScale(
DL, PtrVT,
APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
if (Imm >= 0) {
// Load back the required element. getVectorElementPointer takes care of
// clamping the index if it's out-of-bounds.
StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
// Load the spliced result
return DAG.getLoad(VT, DL, StoreV2, StackPtr,
MachinePointerInfo::getUnknownStack(MF));
}
uint64_t TrailingElts = -Imm;
// NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
SDValue TrailingBytes =
DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
if (TrailingElts > VT.getVectorMinNumElements()) {
SDValue VLBytes = DAG.getVScale(
DL, PtrVT,
APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
}
// Calculate the start address of the spliced result.
StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
// Load the spliced result
return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
MachinePointerInfo::getUnknownStack(MF));
}
bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
SDValue &LHS, SDValue &RHS,
SDValue &CC, bool &NeedInvert,
const SDLoc &dl, SDValue &Chain,
bool IsSignaling) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default:
llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
// Nothing to do.
break;
case TargetLowering::Expand: {
ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
std::swap(LHS, RHS);
CC = DAG.getCondCode(InvCC);
return true;
}
// Swapping operands didn't work. Try inverting the condition.
bool NeedSwap = false;
InvCC = getSetCCInverse(CCCode, OpVT);
if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
// If inverting the condition is not enough, try swapping operands
// on top of it.
InvCC = ISD::getSetCCSwappedOperands(InvCC);
NeedSwap = true;
}
if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
CC = DAG.getCondCode(InvCC);
NeedInvert = true;
if (NeedSwap)
std::swap(LHS, RHS);
return true;
}
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
default:
llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETUO:
if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
CC1 = ISD::SETUNE;
CC2 = ISD::SETUNE;
Opc = ISD::OR;
break;
}
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
NeedInvert = true;
LLVM_FALLTHROUGH;
case ISD::SETO:
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
"If SETO is expanded, SETOEQ must be legal!");
CC1 = ISD::SETOEQ;
CC2 = ISD::SETOEQ;
Opc = ISD::AND;
break;
case ISD::SETONE:
case ISD::SETUEQ:
// If the SETUO or SETO CC isn't legal, we might be able to use
// SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
// the operands.
CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
if (!TLI.isCondCodeLegal(CC2, OpVT) &&
(TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
CC1 = ISD::SETOGT;
CC2 = ISD::SETOLT;
Opc = ISD::OR;
NeedInvert = ((unsigned)CCCode & 0x8U);
break;
}
LLVM_FALLTHROUGH;
case ISD::SETOEQ:
case ISD::SETOGT:
case ISD::SETOGE:
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETUNE:
case ISD::SETUGT:
case ISD::SETUGE:
case ISD::SETULT:
case ISD::SETULE:
// If we are floating point, assign and break, otherwise fall through.
if (!OpVT.isInteger()) {
// We can use the 4th bit to tell if we are the unordered
// or ordered version of the opcode.
CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
break;
}
// Fallthrough if we are unsigned integer.
LLVM_FALLTHROUGH;
case ISD::SETLE:
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETLT:
case ISD::SETNE:
case ISD::SETEQ:
// If all combinations of inverting the condition and swapping operands
// didn't work then we have no means to expand the condition.
llvm_unreachable("Don't know how to expand this condition!");
}
SDValue SetCC1, SetCC2;
if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
// If we aren't the ordered or unorder operation,
// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
} else {
// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
}
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
SetCC2.getValue(1));
LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
RHS = SDValue();
CC = SDValue();
return true;
}
}
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
index 97d6f8cd8075..efdbc49cdf47 100644
--- a/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
@@ -1,605 +1,621 @@
//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the LLVM module linker.
//
//===----------------------------------------------------------------------===//
#include "LinkDiagnosticInfo.h"
#include "llvm-c/Linker.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Linker/Linker.h"
#include "llvm/Support/Error.h"
using namespace llvm;
namespace {
/// This is an implementation class for the LinkModules function, which is the
/// entrypoint for this file.
class ModuleLinker {
IRMover &Mover;
std::unique_ptr<Module> SrcM;
SetVector<GlobalValue *> ValuesToLink;
/// For symbol clashes, prefer those from Src.
unsigned Flags;
/// List of global value names that should be internalized.
StringSet<> Internalize;
/// Function that will perform the actual internalization. The reason for a
/// callback is that the linker cannot call internalizeModule without
/// creating a circular dependency between IPO and the linker.
std::function<void(Module &, const StringSet<> &)> InternalizeCallback;
/// Used as the callback for lazy linking.
/// The mover has just hit GV and we have to decide if it, and other members
/// of the same comdat, should be linked. Every member to be linked is passed
/// to Add.
void addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add);
bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; }
bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; }
bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
const GlobalValue &Src);
/// Should we have mover and linker error diag info?
bool emitError(const Twine &Message) {
SrcM->getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message));
return true;
}
bool getComdatLeader(Module &M, StringRef ComdatName,
const GlobalVariable *&GVar);
bool computeResultingSelectionKind(StringRef ComdatName,
Comdat::SelectionKind Src,
Comdat::SelectionKind Dst,
Comdat::SelectionKind &Result,
bool &LinkFromSrc);
std::map<const Comdat *, std::pair<Comdat::SelectionKind, bool>>
ComdatsChosen;
bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
bool &LinkFromSrc);
// Keep track of the lazy linked global members of each comdat in source.
DenseMap<const Comdat *, std::vector<GlobalValue *>> LazyComdatMembers;
/// Given a global in the source module, return the global in the
/// destination module that is being linked to, if any.
GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
Module &DstM = Mover.getModule();
// If the source has no name it can't link. If it has local linkage,
// there is no name match-up going on.
if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
return nullptr;
// Otherwise see if we have a match in the destination module's symtab.
GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
if (!DGV)
return nullptr;
// If we found a global with the same name in the dest module, but it has
// internal linkage, we are really not doing any linkage here.
if (DGV->hasLocalLinkage())
return nullptr;
// Otherwise, we do in fact link to the destination global.
return DGV;
}
/// Drop GV if it is a member of a comdat that we are dropping.
/// This can happen with COFF's largest selection kind.
void dropReplacedComdat(GlobalValue &GV,
const DenseSet<const Comdat *> &ReplacedDstComdats);
bool linkIfNeeded(GlobalValue &GV);
public:
ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
std::function<void(Module &, const StringSet<> &)>
InternalizeCallback = {})
: Mover(Mover), SrcM(std::move(SrcM)), Flags(Flags),
InternalizeCallback(std::move(InternalizeCallback)) {}
bool run();
};
}
static GlobalValue::VisibilityTypes
getMinVisibility(GlobalValue::VisibilityTypes A,
GlobalValue::VisibilityTypes B) {
if (A == GlobalValue::HiddenVisibility || B == GlobalValue::HiddenVisibility)
return GlobalValue::HiddenVisibility;
if (A == GlobalValue::ProtectedVisibility ||
B == GlobalValue::ProtectedVisibility)
return GlobalValue::ProtectedVisibility;
return GlobalValue::DefaultVisibility;
}
bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
const GlobalVariable *&GVar) {
const GlobalValue *GVal = M.getNamedValue(ComdatName);
if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
GVal = GA->getBaseObject();
if (!GVal)
// We cannot resolve the size of the aliasee yet.
return emitError("Linking COMDATs named '" + ComdatName +
"': COMDAT key involves incomputable alias size.");
}
GVar = dyn_cast_or_null<GlobalVariable>(GVal);
if (!GVar)
return emitError(
"Linking COMDATs named '" + ComdatName +
"': GlobalVariable required for data dependent selection!");
return false;
}
bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
Comdat::SelectionKind Src,
Comdat::SelectionKind Dst,
Comdat::SelectionKind &Result,
bool &LinkFromSrc) {
Module &DstM = Mover.getModule();
// The ability to mix Comdat::SelectionKind::Any with
// Comdat::SelectionKind::Largest is a behavior that comes from COFF.
bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any ||
Dst == Comdat::SelectionKind::Largest;
bool SrcAnyOrLargest = Src == Comdat::SelectionKind::Any ||
Src == Comdat::SelectionKind::Largest;
if (DstAnyOrLargest && SrcAnyOrLargest) {
if (Dst == Comdat::SelectionKind::Largest ||
Src == Comdat::SelectionKind::Largest)
Result = Comdat::SelectionKind::Largest;
else
Result = Comdat::SelectionKind::Any;
} else if (Src == Dst) {
Result = Dst;
} else {
return emitError("Linking COMDATs named '" + ComdatName +
"': invalid selection kinds!");
}
switch (Result) {
case Comdat::SelectionKind::Any:
// Go with Dst.
LinkFromSrc = false;
break;
- case Comdat::SelectionKind::NoDeduplicate:
- return emitError("Linking COMDATs named '" + ComdatName +
- "': nodeduplicate has been violated!");
+ case Comdat::SelectionKind::NoDeduplicate: {
+ const GlobalVariable *DstGV;
+ const GlobalVariable *SrcGV;
+ if (getComdatLeader(DstM, ComdatName, DstGV) ||
+ getComdatLeader(*SrcM, ComdatName, SrcGV))
+ return true;
+
+ if (SrcGV->isWeakForLinker()) {
+ // Go with Dst.
+ LinkFromSrc = false;
+ } else if (DstGV->isWeakForLinker()) {
+ // Go with Src.
+ LinkFromSrc = true;
+ } else {
+ return emitError("Linking COMDATs named '" + ComdatName +
+ "': nodeduplicate has been violated!");
+ }
+ break;
+ }
case Comdat::SelectionKind::ExactMatch:
case Comdat::SelectionKind::Largest:
case Comdat::SelectionKind::SameSize: {
const GlobalVariable *DstGV;
const GlobalVariable *SrcGV;
if (getComdatLeader(DstM, ComdatName, DstGV) ||
getComdatLeader(*SrcM, ComdatName, SrcGV))
return true;
const DataLayout &DstDL = DstM.getDataLayout();
const DataLayout &SrcDL = SrcM->getDataLayout();
uint64_t DstSize = DstDL.getTypeAllocSize(DstGV->getValueType());
uint64_t SrcSize = SrcDL.getTypeAllocSize(SrcGV->getValueType());
if (Result == Comdat::SelectionKind::ExactMatch) {
if (SrcGV->getInitializer() != DstGV->getInitializer())
return emitError("Linking COMDATs named '" + ComdatName +
"': ExactMatch violated!");
LinkFromSrc = false;
} else if (Result == Comdat::SelectionKind::Largest) {
LinkFromSrc = SrcSize > DstSize;
} else if (Result == Comdat::SelectionKind::SameSize) {
if (SrcSize != DstSize)
return emitError("Linking COMDATs named '" + ComdatName +
"': SameSize violated!");
LinkFromSrc = false;
} else {
llvm_unreachable("unknown selection kind");
}
break;
}
}
return false;
}
bool ModuleLinker::getComdatResult(const Comdat *SrcC,
Comdat::SelectionKind &Result,
bool &LinkFromSrc) {
Module &DstM = Mover.getModule();
Comdat::SelectionKind SSK = SrcC->getSelectionKind();
StringRef ComdatName = SrcC->getName();
Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName);
if (DstCI == ComdatSymTab.end()) {
// Use the comdat if it is only available in one of the modules.
LinkFromSrc = true;
Result = SSK;
return false;
}
const Comdat *DstC = &DstCI->second;
Comdat::SelectionKind DSK = DstC->getSelectionKind();
return computeResultingSelectionKind(ComdatName, SSK, DSK, Result,
LinkFromSrc);
}
bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
const GlobalValue &Dest,
const GlobalValue &Src) {
// Should we unconditionally use the Src?
if (shouldOverrideFromSrc()) {
LinkFromSrc = true;
return false;
}
// We always have to add Src if it has appending linkage.
if (Src.hasAppendingLinkage() || Dest.hasAppendingLinkage()) {
LinkFromSrc = true;
return false;
}
bool SrcIsDeclaration = Src.isDeclarationForLinker();
bool DestIsDeclaration = Dest.isDeclarationForLinker();
if (SrcIsDeclaration) {
// If Src is external or if both Src & Dest are external.. Just link the
// external globals, we aren't adding anything.
if (Src.hasDLLImportStorageClass()) {
// If one of GVs is marked as DLLImport, result should be dllimport'ed.
LinkFromSrc = DestIsDeclaration;
return false;
}
// If the Dest is weak, use the source linkage.
if (Dest.hasExternalWeakLinkage()) {
LinkFromSrc = true;
return false;
}
// Link an available_externally over a declaration.
LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration();
return false;
}
if (DestIsDeclaration) {
// If Dest is external but Src is not:
LinkFromSrc = true;
return false;
}
if (Src.hasCommonLinkage()) {
if (Dest.hasLinkOnceLinkage() || Dest.hasWeakLinkage()) {
LinkFromSrc = true;
return false;
}
if (!Dest.hasCommonLinkage()) {
LinkFromSrc = false;
return false;
}
const DataLayout &DL = Dest.getParent()->getDataLayout();
uint64_t DestSize = DL.getTypeAllocSize(Dest.getValueType());
uint64_t SrcSize = DL.getTypeAllocSize(Src.getValueType());
LinkFromSrc = SrcSize > DestSize;
return false;
}
if (Src.isWeakForLinker()) {
assert(!Dest.hasExternalWeakLinkage());
assert(!Dest.hasAvailableExternallyLinkage());
if (Dest.hasLinkOnceLinkage() && Src.hasWeakLinkage()) {
LinkFromSrc = true;
return false;
}
LinkFromSrc = false;
return false;
}
if (Dest.isWeakForLinker()) {
assert(Src.hasExternalLinkage());
LinkFromSrc = true;
return false;
}
assert(!Src.hasExternalWeakLinkage());
assert(!Dest.hasExternalWeakLinkage());
assert(Dest.hasExternalLinkage() && Src.hasExternalLinkage() &&
"Unexpected linkage type!");
return emitError("Linking globals named '" + Src.getName() +
"': symbol multiply defined!");
}
bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
GlobalValue *DGV = getLinkedToGlobal(&GV);
if (shouldLinkOnlyNeeded()) {
// Always import variables with appending linkage.
if (!GV.hasAppendingLinkage()) {
// Don't import globals unless they are referenced by the destination
// module.
if (!DGV)
return false;
// Don't import globals that are already defined in the destination module
if (!DGV->isDeclaration())
return false;
}
}
if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) {
auto *DGVar = dyn_cast<GlobalVariable>(DGV);
auto *SGVar = dyn_cast<GlobalVariable>(&GV);
if (DGVar && SGVar) {
if (DGVar->isDeclaration() && SGVar->isDeclaration() &&
(!DGVar->isConstant() || !SGVar->isConstant())) {
DGVar->setConstant(false);
SGVar->setConstant(false);
}
if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) {
MaybeAlign Align(
std::max(DGVar->getAlignment(), SGVar->getAlignment()));
SGVar->setAlignment(Align);
DGVar->setAlignment(Align);
}
}
GlobalValue::VisibilityTypes Visibility =
getMinVisibility(DGV->getVisibility(), GV.getVisibility());
DGV->setVisibility(Visibility);
GV.setVisibility(Visibility);
GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::getMinUnnamedAddr(
DGV->getUnnamedAddr(), GV.getUnnamedAddr());
DGV->setUnnamedAddr(UnnamedAddr);
GV.setUnnamedAddr(UnnamedAddr);
}
if (!DGV && !shouldOverrideFromSrc() &&
(GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() ||
GV.hasAvailableExternallyLinkage()))
return false;
if (GV.isDeclaration())
return false;
if (const Comdat *SC = GV.getComdat()) {
bool LinkFromSrc;
Comdat::SelectionKind SK;
std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
if (!LinkFromSrc)
return false;
}
bool LinkFromSrc = true;
if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV))
return true;
if (LinkFromSrc)
ValuesToLink.insert(&GV);
return false;
}
void ModuleLinker::addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add) {
// Add these to the internalize list
if (!GV.hasLinkOnceLinkage() && !GV.hasAvailableExternallyLinkage() &&
!shouldLinkOnlyNeeded())
return;
if (InternalizeCallback)
Internalize.insert(GV.getName());
Add(GV);
const Comdat *SC = GV.getComdat();
if (!SC)
return;
for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
GlobalValue *DGV = getLinkedToGlobal(GV2);
bool LinkFromSrc = true;
if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
return;
if (!LinkFromSrc)
continue;
if (InternalizeCallback)
Internalize.insert(GV2->getName());
Add(*GV2);
}
}
void ModuleLinker::dropReplacedComdat(
GlobalValue &GV, const DenseSet<const Comdat *> &ReplacedDstComdats) {
Comdat *C = GV.getComdat();
if (!C)
return;
if (!ReplacedDstComdats.count(C))
return;
if (GV.use_empty()) {
GV.eraseFromParent();
return;
}
if (auto *F = dyn_cast<Function>(&GV)) {
F->deleteBody();
} else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
Var->setInitializer(nullptr);
} else {
auto &Alias = cast<GlobalAlias>(GV);
Module &M = *Alias.getParent();
GlobalValue *Declaration;
if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) {
Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M);
} else {
Declaration =
new GlobalVariable(M, Alias.getValueType(), /*isConstant*/ false,
GlobalValue::ExternalLinkage,
/*Initializer*/ nullptr);
}
Declaration->takeName(&Alias);
Alias.replaceAllUsesWith(Declaration);
Alias.eraseFromParent();
}
}
bool ModuleLinker::run() {
Module &DstM = Mover.getModule();
DenseSet<const Comdat *> ReplacedDstComdats;
for (const auto &SMEC : SrcM->getComdatSymbolTable()) {
const Comdat &C = SMEC.getValue();
if (ComdatsChosen.count(&C))
continue;
Comdat::SelectionKind SK;
bool LinkFromSrc;
if (getComdatResult(&C, SK, LinkFromSrc))
return true;
ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);
if (!LinkFromSrc)
continue;
Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(C.getName());
if (DstCI == ComdatSymTab.end())
continue;
// The source comdat is replacing the dest one.
const Comdat *DstC = &DstCI->second;
ReplacedDstComdats.insert(DstC);
}
// Alias have to go first, since we are not able to find their comdats
// otherwise.
for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) {
GlobalAlias &GV = *I++;
dropReplacedComdat(GV, ReplacedDstComdats);
}
for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) {
GlobalVariable &GV = *I++;
dropReplacedComdat(GV, ReplacedDstComdats);
}
for (auto I = DstM.begin(), E = DstM.end(); I != E;) {
Function &GV = *I++;
dropReplacedComdat(GV, ReplacedDstComdats);
}
for (GlobalVariable &GV : SrcM->globals())
if (GV.hasLinkOnceLinkage())
if (const Comdat *SC = GV.getComdat())
LazyComdatMembers[SC].push_back(&GV);
for (Function &SF : *SrcM)
if (SF.hasLinkOnceLinkage())
if (const Comdat *SC = SF.getComdat())
LazyComdatMembers[SC].push_back(&SF);
for (GlobalAlias &GA : SrcM->aliases())
if (GA.hasLinkOnceLinkage())
if (const Comdat *SC = GA.getComdat())
LazyComdatMembers[SC].push_back(&GA);
// Insert all of the globals in src into the DstM module... without linking
// initializers (which could refer to functions not yet mapped over).
for (GlobalVariable &GV : SrcM->globals())
if (linkIfNeeded(GV))
return true;
for (Function &SF : *SrcM)
if (linkIfNeeded(SF))
return true;
for (GlobalAlias &GA : SrcM->aliases())
if (linkIfNeeded(GA))
return true;
for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
GlobalValue *GV = ValuesToLink[I];
const Comdat *SC = GV->getComdat();
if (!SC)
continue;
for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
GlobalValue *DGV = getLinkedToGlobal(GV2);
bool LinkFromSrc = true;
if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
return true;
if (LinkFromSrc)
ValuesToLink.insert(GV2);
}
}
if (InternalizeCallback) {
for (GlobalValue *GV : ValuesToLink)
Internalize.insert(GV->getName());
}
// FIXME: Propagate Errors through to the caller instead of emitting
// diagnostics.
bool HasErrors = false;
if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(),
[this](GlobalValue &GV, IRMover::ValueAdder Add) {
addLazyFor(GV, Add);
},
/* IsPerformingImport */ false)) {
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message()));
HasErrors = true;
});
}
if (HasErrors)
return true;
if (InternalizeCallback)
InternalizeCallback(DstM, Internalize);
return false;
}
Linker::Linker(Module &M) : Mover(M) {}
bool Linker::linkInModule(
std::unique_ptr<Module> Src, unsigned Flags,
std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
ModuleLinker ModLinker(Mover, std::move(Src), Flags,
std::move(InternalizeCallback));
return ModLinker.run();
}
//===----------------------------------------------------------------------===//
// LinkModules entrypoint.
//===----------------------------------------------------------------------===//
/// This function links two modules together, with the resulting Dest module
/// modified to be the composite of the two input modules. If an error occurs,
/// true is returned and ErrorMsg (if not null) is set to indicate the problem.
/// Upon failure, the Dest module could be in a modified state, and shouldn't be
/// relied on to be consistent.
bool Linker::linkModules(
Module &Dest, std::unique_ptr<Module> Src, unsigned Flags,
std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
Linker L(Dest);
return L.linkInModule(std::move(Src), Flags, std::move(InternalizeCallback));
}
//===----------------------------------------------------------------------===//
// C API.
//===----------------------------------------------------------------------===//
LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) {
Module *D = unwrap(Dest);
std::unique_ptr<Module> M(unwrap(Src));
return Linker::linkModules(*D, std::move(M));
}
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
index f52dbc604a9f..21c06e2dec26 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
@@ -1,3229 +1,3232 @@
//===- Parsing, selection, and construction of pass pipelines -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// This file provides the implementation of the PassBuilder based on our
/// static pass registry as well as related functionality. It also provides
/// helpers to aid in analyzing, debugging, and testing passes and pass
/// pipelines.
///
//===----------------------------------------------------------------------===//
#include "llvm/Passes/PassBuilder.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/AliasAnalysisEvaluator.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFGPrinter.h"
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/DDG.h"
#include "llvm/Analysis/DDGPrinter.h"
#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
#include "llvm/Analysis/InstCount.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Lint.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopCacheAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/MemDerefPrinter.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ModuleDebugInfoPrinter.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PhiValues.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PrintPasses.h"
#include "llvm/IR/SafepointIRVerifier.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Regex.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "llvm/Transforms/Coroutines/CoroCleanup.h"
#include "llvm/Transforms/Coroutines/CoroEarly.h"
#include "llvm/Transforms/Coroutines/CoroElide.h"
#include "llvm/Transforms/Coroutines/CoroSplit.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/Annotation2Metadata.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/IPO/BlockExtractor.h"
#include "llvm/Transforms/IPO/CalledValuePropagation.h"
#include "llvm/Transforms/IPO/ConstantMerge.h"
#include "llvm/Transforms/IPO/CrossDSOCFI.h"
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/GlobalOpt.h"
#include "llvm/Transforms/IPO/GlobalSplit.h"
#include "llvm/Transforms/IPO/HotColdSplitting.h"
#include "llvm/Transforms/IPO/IROutliner.h"
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/LoopExtractor.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/IPO/OpenMPOpt.h"
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/Transforms/IPO/SCCP.h"
#include "llvm/Transforms/IPO/SampleProfile.h"
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
#include "llvm/Transforms/IPO/StripSymbols.h"
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
#include "llvm/Transforms/Instrumentation/CGProfile.h"
#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
#include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar/ADCE.h"
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
#include "llvm/Transforms/Scalar/BDCE.h"
#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
#include "llvm/Transforms/Scalar/ConstantHoisting.h"
#include "llvm/Transforms/Scalar/ConstraintElimination.h"
#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/GuardWidening.h"
#include "llvm/Transforms/Scalar/IVUsersPrinter.h"
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
#include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h"
#include "llvm/Transforms/Scalar/LoopBoundSplit.h"
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/Transforms/Scalar/LoopDistribute.h"
#include "llvm/Transforms/Scalar/LoopFlatten.h"
#include "llvm/Transforms/Scalar/LoopFuse.h"
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
#include "llvm/Transforms/Scalar/LoopInterchange.h"
#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Scalar/LoopPredication.h"
#include "llvm/Transforms/Scalar/LoopReroll.h"
#include "llvm/Transforms/Scalar/LoopRotation.h"
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
#include "llvm/Transforms/Scalar/LoopSink.h"
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/Transforms/Scalar/MergeICmps.h"
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/NewGVN.h"
#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
#include "llvm/Transforms/Scalar/Reassociate.h"
#include "llvm/Transforms/Scalar/Reg2Mem.h"
#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Scalar/SROA.h"
#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
#include "llvm/Transforms/Scalar/Scalarizer.h"
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
#include "llvm/Transforms/Utils/AddDiscriminators.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/HelloWorld.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/InstructionNamer.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Utils/LowerInvoke.h"
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/Mem2Reg.h"
#include "llvm/Transforms/Utils/MetaRenamer.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
#include "llvm/Transforms/Utils/StripGCRelocates.h"
#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
#include "llvm/Transforms/Vectorize/VectorCombine.h"
using namespace llvm;
static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
"enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
"Heuristics-based inliner version."),
clEnumValN(InliningAdvisorMode::Development, "development",
"Use development mode (runtime-loadable model)."),
clEnumValN(InliningAdvisorMode::Release, "release",
"Use release mode (AOT-compiled model).")));
static cl::opt<bool> EnableSyntheticCounts(
"enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
cl::desc("Run synthetic function entry count generation "
"pass"));
static const Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
/// Flag to enable inline deferral during PGO.
static cl::opt<bool>
EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
cl::Hidden,
cl::desc("Enable inline deferral during PGO"));
static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
cl::Hidden, cl::ZeroOrMore,
cl::desc("Enable memory profiler"));
static cl::opt<bool> PerformMandatoryInliningsFirst(
"mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
cl::desc("Perform mandatory inlinings module-wide, before performing "
"inlining."));
static cl::opt<bool> EnableO3NonTrivialUnswitching(
"enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
PipelineTuningOptions::PipelineTuningOptions() {
LoopInterleaving = true;
LoopVectorization = true;
SLPVectorization = false;
LoopUnrolling = true;
ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
LicmMssaOptCap = SetLicmMssaOptCap;
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
CallGraphProfile = true;
MergeFunctions = false;
}
namespace llvm {
extern cl::opt<unsigned> MaxDevirtIterations;
extern cl::opt<bool> EnableConstraintElimination;
extern cl::opt<bool> EnableFunctionSpecialization;
extern cl::opt<bool> EnableGVNHoist;
extern cl::opt<bool> EnableGVNSink;
extern cl::opt<bool> EnableHotColdSplit;
extern cl::opt<bool> EnableIROutliner;
extern cl::opt<bool> EnableOrderFileInstrumentation;
extern cl::opt<bool> EnableCHR;
extern cl::opt<bool> EnableLoopInterchange;
extern cl::opt<bool> EnableUnrollAndJam;
extern cl::opt<bool> EnableLoopFlatten;
extern cl::opt<bool> EnableDFAJumpThreading;
extern cl::opt<bool> RunNewGVN;
extern cl::opt<bool> RunPartialInlining;
extern cl::opt<bool> ExtraVectorizerPasses;
extern cl::opt<bool> FlattenedProfileUsed;
extern cl::opt<AttributorRunOption> AttributorRun;
extern cl::opt<bool> EnableKnowledgeRetention;
extern cl::opt<bool> EnableMatrix;
extern cl::opt<bool> DisablePreInliner;
extern cl::opt<int> PreInlineThreshold;
} // namespace llvm
const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {
/*SpeedLevel*/ 0,
/*SizeLevel*/ 0};
const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {
/*SpeedLevel*/ 1,
/*SizeLevel*/ 0};
const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {
/*SpeedLevel*/ 2,
/*SizeLevel*/ 0};
const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {
/*SpeedLevel*/ 3,
/*SizeLevel*/ 0};
const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {
/*SpeedLevel*/ 2,
/*SizeLevel*/ 1};
const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {
/*SpeedLevel*/ 2,
/*SizeLevel*/ 2};
namespace {
// The following passes/analyses have custom names, otherwise their name will
// include `(anonymous namespace)`. These are special since they are only for
// testing purposes and don't live in a header file.
/// No-op module pass which does nothing.
struct NoOpModulePass : PassInfoMixin<NoOpModulePass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &) {
return PreservedAnalyses::all();
}
static StringRef name() { return "NoOpModulePass"; }
};
/// No-op module analysis.
class NoOpModuleAnalysis : public AnalysisInfoMixin<NoOpModuleAnalysis> {
friend AnalysisInfoMixin<NoOpModuleAnalysis>;
static AnalysisKey Key;
public:
struct Result {};
Result run(Module &, ModuleAnalysisManager &) { return Result(); }
static StringRef name() { return "NoOpModuleAnalysis"; }
};
/// No-op CGSCC pass which does nothing.
struct NoOpCGSCCPass : PassInfoMixin<NoOpCGSCCPass> {
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &,
LazyCallGraph &, CGSCCUpdateResult &UR) {
return PreservedAnalyses::all();
}
static StringRef name() { return "NoOpCGSCCPass"; }
};
/// No-op CGSCC analysis.
class NoOpCGSCCAnalysis : public AnalysisInfoMixin<NoOpCGSCCAnalysis> {
friend AnalysisInfoMixin<NoOpCGSCCAnalysis>;
static AnalysisKey Key;
public:
struct Result {};
Result run(LazyCallGraph::SCC &, CGSCCAnalysisManager &, LazyCallGraph &G) {
return Result();
}
static StringRef name() { return "NoOpCGSCCAnalysis"; }
};
/// No-op function pass which does nothing.
struct NoOpFunctionPass : PassInfoMixin<NoOpFunctionPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &) {
return PreservedAnalyses::all();
}
static StringRef name() { return "NoOpFunctionPass"; }
};
/// No-op function analysis.
class NoOpFunctionAnalysis : public AnalysisInfoMixin<NoOpFunctionAnalysis> {
friend AnalysisInfoMixin<NoOpFunctionAnalysis>;
static AnalysisKey Key;
public:
struct Result {};
Result run(Function &, FunctionAnalysisManager &) { return Result(); }
static StringRef name() { return "NoOpFunctionAnalysis"; }
};
/// No-op loop pass which does nothing.
struct NoOpLoopPass : PassInfoMixin<NoOpLoopPass> {
PreservedAnalyses run(Loop &L, LoopAnalysisManager &,
LoopStandardAnalysisResults &, LPMUpdater &) {
return PreservedAnalyses::all();
}
static StringRef name() { return "NoOpLoopPass"; }
};
/// No-op loop analysis.
class NoOpLoopAnalysis : public AnalysisInfoMixin<NoOpLoopAnalysis> {
friend AnalysisInfoMixin<NoOpLoopAnalysis>;
static AnalysisKey Key;
public:
struct Result {};
Result run(Loop &, LoopAnalysisManager &, LoopStandardAnalysisResults &) {
return Result();
}
static StringRef name() { return "NoOpLoopAnalysis"; }
};
AnalysisKey NoOpModuleAnalysis::Key;
AnalysisKey NoOpCGSCCAnalysis::Key;
AnalysisKey NoOpFunctionAnalysis::Key;
AnalysisKey NoOpLoopAnalysis::Key;
/// Whether or not we should populate a PassInstrumentationCallbacks's class to
/// pass name map.
///
/// This is for optimization purposes so we don't populate it if we never use
/// it. This should be updated if new pass instrumentation wants to use the map.
/// We currently only use this for --print-before/after.
bool shouldPopulateClassToPassNames() {
return !printBeforePasses().empty() || !printAfterPasses().empty();
}
} // namespace
PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
Optional<PGOOptions> PGOOpt,
PassInstrumentationCallbacks *PIC)
: TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) {
if (TM)
TM->registerPassBuilderCallbacks(*this);
if (PIC && shouldPopulateClassToPassNames()) {
#define MODULE_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define FUNCTION_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
PIC->addClassToPassName(CLASS, NAME);
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define LOOP_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
PIC->addClassToPassName(CLASS, NAME);
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define CGSCC_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#include "PassRegistry.def"
}
}
void PassBuilder::invokePeepholeEPCallbacks(
FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
for (auto &C : PeepholeEPCallbacks)
C(FPM, Level);
}
void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) {
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
MAM.registerPass([&] { return CREATE_PASS; });
#include "PassRegistry.def"
for (auto &C : ModuleAnalysisRegistrationCallbacks)
C(MAM);
}
void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) {
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
CGAM.registerPass([&] { return CREATE_PASS; });
#include "PassRegistry.def"
for (auto &C : CGSCCAnalysisRegistrationCallbacks)
C(CGAM);
}
void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) {
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
FAM.registerPass([&] { return CREATE_PASS; });
#include "PassRegistry.def"
for (auto &C : FunctionAnalysisRegistrationCallbacks)
C(FAM);
}
void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
LAM.registerPass([&] { return CREATE_PASS; });
#include "PassRegistry.def"
for (auto &C : LoopAnalysisRegistrationCallbacks)
C(LAM);
}
// Helper to add AnnotationRemarksPass.
static void addAnnotationRemarksPass(ModulePassManager &MPM) {
FunctionPassManager FPM;
FPM.addPass(AnnotationRemarksPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
// Helper to check if the current compilation phase is preparing for LTO
static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
Phase == ThinOrFullLTOPhase::FullLTOPreLink;
}
// TODO: Investigate the cost/benefit of tail call elimination on debugging.
FunctionPassManager
PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
ThinOrFullLTOPhase Phase) {
FunctionPassManager FPM;
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROA());
// Catch trivial redundancies
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
// Hoisting of scalars and load expressions.
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
FPM.addPass(LibCallsShrinkWrapPass());
invokePeepholeEPCallbacks(FPM, Level);
FPM.addPass(SimplifyCFGPass());
// Form canonically associated expression trees, and simplify the trees using
// basic mathematical properties. For example, this will form (nearly)
// minimal multiplication trees.
FPM.addPass(ReassociatePass());
// Add the primary loop simplification pipeline.
// FIXME: Currently this is split into two loop pass pipelines because we run
// some function passes in between them. These can and should be removed
// and/or replaced by scheduling the loop pass equivalents in the correct
// positions. But those equivalent passes aren't powerful enough yet.
// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
// fully replace `SimplifyCFGPass`, and the closest to the other we have is
// `LoopInstSimplify`.
LoopPassManager LPM1, LPM2;
// Simplify the loop body. We do this initially to clean up after other loop
// passes run, either when iterating on a loop or on inner loops with
// implications on the outer loop.
LPM1.addPass(LoopInstSimplifyPass());
LPM1.addPass(LoopSimplifyCFGPass());
// Try to remove as much code from the loop header as possible,
// to reduce amount of IR that will have to be duplicated.
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
isLTOPreLink(Phase)));
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(SimpleLoopUnswitchPass());
LPM2.addPass(LoopIdiomRecognizePass());
LPM2.addPass(IndVarSimplifyPass());
for (auto &C : LateLoopOptimizationsEPCallbacks)
C(LPM2, Level);
LPM2.addPass(LoopDeletionPass());
if (EnableLoopInterchange)
LPM2.addPass(LoopInterchangePass());
// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
// because it changes IR to makes profile annotation in back compile
// inaccurate. The normal unroller doesn't pay attention to forced full unroll
// attributes so we need to make sure and allow the full unroll pass to pay
// attention to it.
if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
PGOOpt->Action != PGOOptions::SampleUse)
LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
PTO.ForgetAllSCEVInLoopUnroll));
for (auto &C : LoopOptimizerEndEPCallbacks)
C(LPM2, Level);
// We provide the opt remark emitter pass for LICM to use. We only need to do
// this once as it is immutable.
FPM.addPass(
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
EnableMSSALoopDependency,
/*UseBlockFrequencyInfo=*/true));
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
if (EnableLoopFlatten)
FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
/*UseMemorySSA=*/false,
/*UseBlockFrequencyInfo=*/false));
// Delete small array after loop unroll.
FPM.addPass(SROA());
// Specially optimize memory movement as it doesn't look like dataflow in SSA.
FPM.addPass(MemCpyOptPass());
// Sparse conditional constant propagation.
// FIXME: It isn't clear why we do this *after* loop passes rather than
// before...
FPM.addPass(SCCPPass());
// Delete dead bit computations (instcombine runs after to fold away the dead
// computations, and then ADCE will run later to exploit any new DCE
// opportunities that creates).
FPM.addPass(BDCEPass());
// Run instcombine after redundancy and dead bit elimination to exploit
// opportunities opened up by them.
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
FPM.addPass(CoroElidePass());
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
// Finally, do an expensive DCE pass to catch all the dead code exposed by
// the simplifications and basic cleanup after all the simplifications.
// TODO: Investigate if this is too expensive.
FPM.addPass(ADCEPass());
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
return FPM;
}
FunctionPassManager
PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
ThinOrFullLTOPhase Phase) {
assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
// The O1 pipeline has a separate pipeline creation function to simplify
// construction readability.
if (Level.getSpeedupLevel() == 1)
return buildO1FunctionSimplificationPipeline(Level, Phase);
FunctionPassManager FPM;
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROA());
// Catch trivial redundancies
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
if (EnableKnowledgeRetention)
FPM.addPass(AssumeSimplifyPass());
// Hoisting of scalars and load expressions.
if (EnableGVNHoist)
FPM.addPass(GVNHoistPass());
// Global value numbering based sinking.
if (EnableGVNSink) {
FPM.addPass(GVNSinkPass());
FPM.addPass(SimplifyCFGPass());
}
if (EnableConstraintElimination)
FPM.addPass(ConstraintEliminationPass());
// Speculative execution if the target has divergent branches; otherwise nop.
FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
// Optimize based on known information about branches, and cleanup afterward.
FPM.addPass(JumpThreadingPass());
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(SimplifyCFGPass());
if (Level == OptimizationLevel::O3)
FPM.addPass(AggressiveInstCombinePass());
FPM.addPass(InstCombinePass());
if (!Level.isOptimizingForSize())
FPM.addPass(LibCallsShrinkWrapPass());
invokePeepholeEPCallbacks(FPM, Level);
// For PGO use pipeline, try to optimize memory intrinsics such as memcpy
// using the size value profile. Don't perform this when optimizing for size.
if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
!Level.isOptimizingForSize())
FPM.addPass(PGOMemOPSizeOpt());
FPM.addPass(TailCallElimPass());
FPM.addPass(SimplifyCFGPass());
// Form canonically associated expression trees, and simplify the trees using
// basic mathematical properties. For example, this will form (nearly)
// minimal multiplication trees.
FPM.addPass(ReassociatePass());
// Add the primary loop simplification pipeline.
// FIXME: Currently this is split into two loop pass pipelines because we run
// some function passes in between them. These can and should be removed
// and/or replaced by scheduling the loop pass equivalents in the correct
// positions. But those equivalent passes aren't powerful enough yet.
// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
// fully replace `SimplifyCFGPass`, and the closest to the other we have is
// `LoopInstSimplify`.
LoopPassManager LPM1, LPM2;
// Simplify the loop body. We do this initially to clean up after other loop
// passes run, either when iterating on a loop or on inner loops with
// implications on the outer loop.
LPM1.addPass(LoopInstSimplifyPass());
LPM1.addPass(LoopSimplifyCFGPass());
// Try to remove as much code from the loop header as possible,
// to reduce amount of IR that will have to be duplicated.
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
// Disable header duplication in loop rotation at -Oz.
LPM1.addPass(
LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(
SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
EnableO3NonTrivialUnswitching));
LPM2.addPass(LoopIdiomRecognizePass());
LPM2.addPass(IndVarSimplifyPass());
for (auto &C : LateLoopOptimizationsEPCallbacks)
C(LPM2, Level);
LPM2.addPass(LoopDeletionPass());
if (EnableLoopInterchange)
LPM2.addPass(LoopInterchangePass());
// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
// because it changes IR to makes profile annotation in back compile
// inaccurate. The normal unroller doesn't pay attention to forced full unroll
// attributes so we need to make sure and allow the full unroll pass to pay
// attention to it.
if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
PGOOpt->Action != PGOOptions::SampleUse)
LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
PTO.ForgetAllSCEVInLoopUnroll));
for (auto &C : LoopOptimizerEndEPCallbacks)
C(LPM2, Level);
// We provide the opt remark emitter pass for LICM to use. We only need to do
// this once as it is immutable.
FPM.addPass(
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
EnableMSSALoopDependency,
/*UseBlockFrequencyInfo=*/true));
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
if (EnableLoopFlatten)
FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
// The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
/*UseMemorySSA=*/false,
/*UseBlockFrequencyInfo=*/false));
// Delete small array after loop unroll.
FPM.addPass(SROA());
// Eliminate redundancies.
FPM.addPass(MergedLoadStoreMotionPass());
if (RunNewGVN)
FPM.addPass(NewGVNPass());
else
FPM.addPass(GVN());
// Sparse conditional constant propagation.
// FIXME: It isn't clear why we do this *after* loop passes rather than
// before...
FPM.addPass(SCCPPass());
// Delete dead bit computations (instcombine runs after to fold away the dead
// computations, and then ADCE will run later to exploit any new DCE
// opportunities that creates).
FPM.addPass(BDCEPass());
// Run instcombine after redundancy and dead bit elimination to exploit
// opportunities opened up by them.
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
// Re-consider control flow based optimizations after redundancy elimination,
// redo DCE, etc.
if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
FPM.addPass(DFAJumpThreadingPass());
FPM.addPass(JumpThreadingPass());
FPM.addPass(CorrelatedValuePropagationPass());
// Finally, do an expensive DCE pass to catch all the dead code exposed by
// the simplifications and basic cleanup after all the simplifications.
// TODO: Investigate if this is too expensive.
FPM.addPass(ADCEPass());
// Specially optimize memory movement as it doesn't look like dataflow in SSA.
FPM.addPass(MemCpyOptPass());
FPM.addPass(DSEPass());
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
FPM.addPass(CoroElidePass());
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
FPM.addPass(SimplifyCFGPass(
SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
(PGOOpt->Action == PGOOptions::IRUse ||
PGOOpt->Action == PGOOptions::SampleUse))
FPM.addPass(ControlHeightReductionPass());
return FPM;
}
void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
MPM.addPass(CanonicalizeAliasesPass());
MPM.addPass(NameAnonGlobalPass());
}
void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
PassBuilder::OptimizationLevel Level,
bool RunProfileGen, bool IsCS,
std::string ProfileFile,
std::string ProfileRemappingFile) {
assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
if (!IsCS && !DisablePreInliner) {
InlineParams IP;
IP.DefaultThreshold = PreInlineThreshold;
// FIXME: The hint threshold has the same value used by the regular inliner
// when not optimzing for size. This should probably be lowered after
// performance testing.
// FIXME: this comment is cargo culted from the old pass manager, revisit).
IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
ModuleInlinerWrapperPass MIWP(IP);
CGSCCPassManager &CGPipeline = MIWP.getPM();
FunctionPassManager FPM;
FPM.addPass(SROA());
FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
FPM.addPass(InstCombinePass()); // Combine silly sequences.
invokePeepholeEPCallbacks(FPM, Level);
CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
MPM.addPass(std::move(MIWP));
// Delete anything that is now dead to make sure that we don't instrument
// dead code. Instrumentation can end up keeping dead code around and
// dramatically increase code size.
MPM.addPass(GlobalDCEPass());
}
if (!RunProfileGen) {
assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
// RequireAnalysisPass for PSI before subsequent non-module passes.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
return;
}
// Perform PGO instrumentation.
MPM.addPass(PGOInstrumentationGen(IsCS));
FunctionPassManager FPM;
// Disable header duplication in loop rotation at -Oz.
FPM.addPass(createFunctionToLoopPassAdaptor(
LoopRotatePass(Level != OptimizationLevel::Oz), EnableMSSALoopDependency,
/*UseBlockFrequencyInfo=*/false));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
// Add the profile lowering pass.
InstrProfOptions Options;
if (!ProfileFile.empty())
Options.InstrProfileOutput = ProfileFile;
// Do counter promotion at Level greater than O0.
Options.DoCounterPromotion = true;
Options.UseBFIInPromotion = IsCS;
MPM.addPass(InstrProfiling(Options, IsCS));
}
void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
bool RunProfileGen, bool IsCS,
std::string ProfileFile,
std::string ProfileRemappingFile) {
if (!RunProfileGen) {
assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
// RequireAnalysisPass for PSI before subsequent non-module passes.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
return;
}
// Perform PGO instrumentation.
MPM.addPass(PGOInstrumentationGen(IsCS));
// Add the profile lowering pass.
InstrProfOptions Options;
if (!ProfileFile.empty())
Options.InstrProfileOutput = ProfileFile;
// Do not do counter promotion at O0.
Options.DoCounterPromotion = false;
Options.UseBFIInPromotion = IsCS;
MPM.addPass(InstrProfiling(Options, IsCS));
}
static InlineParams
getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
}
ModuleInlinerWrapperPass
PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
ThinOrFullLTOPhase Phase) {
InlineParams IP = getInlineParamsFromOptLevel(Level);
if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
PGOOpt->Action == PGOOptions::SampleUse)
IP.HotCallSiteThreshold = 0;
if (PGOOpt)
IP.EnableDeferral = EnablePGOInlineDeferral;
ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
UseInlineAdvisor, MaxDevirtIterations);
// Require the GlobalsAA analysis for the module so we can query it within
// the CGSCC pipeline.
MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
// Invalidate AAManager so it can be recreated and pick up the newly available
// GlobalsAA.
MIWP.addModulePass(
createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
// Require the ProfileSummaryAnalysis for the module so we can query it within
// the inliner pass.
MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
// Now begin the main postorder CGSCC pipeline.
// FIXME: The current CGSCC pipeline has its origins in the legacy pass
// manager and trying to emulate its precise behavior. Much of this doesn't
// make a lot of sense and we should revisit the core CGSCC structure.
CGSCCPassManager &MainCGPipeline = MIWP.getPM();
// Note: historically, the PruneEH pass was run first to deduce nounwind and
// generally clean up exception handling overhead. It isn't clear this is
// valuable as the inliner doesn't currently care whether it is inlining an
// invoke or a call.
if (AttributorRun & AttributorRunOption::CGSCC)
MainCGPipeline.addPass(AttributorCGSCCPass());
// Now deduce any function attributes based in the current code.
MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
// When at O3 add argument promotion to the pass pipeline.
// FIXME: It isn't at all clear why this should be limited to O3.
if (Level == OptimizationLevel::O3)
MainCGPipeline.addPass(ArgumentPromotionPass());
// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
// there are no OpenMP runtime calls present in the module.
if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
MainCGPipeline.addPass(OpenMPOptCGSCCPass());
for (auto &C : CGSCCOptimizerLateEPCallbacks)
C(MainCGPipeline, Level);
// Lastly, add the core function simplification pipeline nested inside the
// CGSCC walk.
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
buildFunctionSimplificationPipeline(Level, Phase)));
MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
return MIWP;
}
ModulePassManager
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
ThinOrFullLTOPhase Phase) {
ModulePassManager MPM;
// Place pseudo probe instrumentation as the first pass of the pipeline to
// minimize the impact of optimization changes.
if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
MPM.addPass(SampleProfileProbePass(TM));
bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
// In ThinLTO mode, when flattened profile is used, all the available
// profile information will be annotated in PreLink phase so there is
// no need to load the profile again in PostLink.
bool LoadSampleProfile =
HasSampleProfile &&
!(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
// During the ThinLTO backend phase we perform early indirect call promotion
// here, before globalopt. Otherwise imported available_externally functions
// look unreferenced and are removed. If we are going to load the sample
// profile then defer until later.
// TODO: See if we can move later and consolidate with the location where
// we perform ICP when we are loading a sample profile.
// TODO: We pass HasSampleProfile (whether there was a sample profile file
// passed to the compile) to the SamplePGO flag of ICP. This is used to
// determine whether the new direct calls are annotated with prof metadata.
// Ideally this should be determined from whether the IR is annotated with
// sample profile, and not whether the a sample profile was provided on the
// command line. E.g. for flattened profiles where we will not be reloading
// the sample profile in the ThinLTO backend, we ideally shouldn't have to
// provide the sample profile file.
if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
// Do basic inference of function attributes from known properties of system
// libraries and other oracles.
MPM.addPass(InferFunctionAttrsPass());
// Create an early function pass manager to cleanup the output of the
// frontend.
FunctionPassManager EarlyFPM;
// Lower llvm.expect to metadata before attempting transforms.
// Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
EarlyFPM.addPass(LowerExpectIntrinsicPass());
EarlyFPM.addPass(SimplifyCFGPass());
EarlyFPM.addPass(SROA());
EarlyFPM.addPass(EarlyCSEPass());
EarlyFPM.addPass(CoroEarlyPass());
if (Level == OptimizationLevel::O3)
EarlyFPM.addPass(CallSiteSplittingPass());
// In SamplePGO ThinLTO backend, we need instcombine before profile annotation
// to convert bitcast to direct calls so that they can be inlined during the
// profile annotation prepration step.
// More details about SamplePGO design can be found in:
// https://research.google.com/pubs/pub45290.html
// FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
if (LoadSampleProfile)
EarlyFPM.addPass(InstCombinePass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
if (LoadSampleProfile) {
// Annotate sample profile right after early FPM to ensure freshness of
// the debug info.
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile, Phase));
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
// RequireAnalysisPass for PSI before subsequent non-module passes.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
// Do not invoke ICP in the LTOPrelink phase as it makes it hard
// for the profile annotation to be accurate in the LTO backend.
if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
Phase != ThinOrFullLTOPhase::FullLTOPreLink)
// We perform early indirect call promotion here, before globalopt.
// This is important for the ThinLTO backend phase because otherwise
// imported available_externally functions look unreferenced and are
// removed.
MPM.addPass(
PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
}
// Try to perform OpenMP specific optimizations on the module. This is a
// (quick!) no-op if there are no OpenMP runtime calls present in the module.
if (Level != OptimizationLevel::O0)
MPM.addPass(OpenMPOptPass());
if (AttributorRun & AttributorRunOption::MODULE)
MPM.addPass(AttributorPass());
// Lower type metadata and the type.test intrinsic in the ThinLTO
// post link pipeline after ICP. This is to enable usage of the type
// tests in ICP sequences.
if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
for (auto &C : PipelineEarlySimplificationEPCallbacks)
C(MPM, Level);
// Specialize functions with IPSCCP.
if (EnableFunctionSpecialization)
MPM.addPass(FunctionSpecializationPass());
// Interprocedural constant propagation now that basic cleanup has occurred
// and prior to optimizing globals.
// FIXME: This position in the pipeline hasn't been carefully considered in
// years, it should be re-analyzed.
MPM.addPass(IPSCCPPass());
// Attach metadata to indirect call sites indicating the set of functions
// they may target at run-time. This should follow IPSCCP.
MPM.addPass(CalledValuePropagationPass());
// Optimize globals to try and fold them into constants.
MPM.addPass(GlobalOptPass());
// Promote any localized globals to SSA registers.
// FIXME: Should this instead by a run of SROA?
// FIXME: We should probably run instcombine and simplifycfg afterward to
// delete control flows that are dead once globals have been folded to
// constants.
MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
// Remove any dead arguments exposed by cleanups and constant folding
// globals.
MPM.addPass(DeadArgumentEliminationPass());
// Create a small function pass pipeline to cleanup after all the global
// optimizations.
FunctionPassManager GlobalCleanupPM;
GlobalCleanupPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
GlobalCleanupPM.addPass(SimplifyCFGPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
// Add all the requested passes for instrumentation PGO, if requested.
if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
(PGOOpt->Action == PGOOptions::IRInstr ||
PGOOpt->Action == PGOOptions::IRUse)) {
addPGOInstrPasses(MPM, Level,
/* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
/* IsCS */ false, PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile);
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
PGOOpt->CSAction == PGOOptions::CSIRInstr)
MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
// Synthesize function entry counts for non-PGO compilation.
if (EnableSyntheticCounts && !PGOOpt)
MPM.addPass(SyntheticCountsPropagation());
MPM.addPass(buildInlinerPipeline(Level, Phase));
if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
MPM.addPass(ModuleMemProfilerPass());
}
return MPM;
}
/// TODO: Should LTO cause any differences to this set of passes?
void PassBuilder::addVectorPasses(OptimizationLevel Level,
FunctionPassManager &FPM, bool IsFullLTO) {
FPM.addPass(LoopVectorizePass(
LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
if (IsFullLTO) {
// The vectorizer may have significantly shortened a loop body; unroll
// again. Unroll small loops to hide loop backedge latency and saturate any
// parallel execution resources of an out-of-order processor. We also then
// need to clean up redundancies and loop invariant code.
// FIXME: It would be really good to use a loop-integrated instruction
// combiner for cleanup here so that the unrolling and LICM can be pipelined
// across the loop nests.
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
if (EnableUnrollAndJam && PTO.LoopUnrolling)
FPM.addPass(createFunctionToLoopPassAdaptor(
LoopUnrollAndJamPass(Level.getSpeedupLevel())));
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
PTO.ForgetAllSCEVInLoopUnroll)));
FPM.addPass(WarnMissedTransformationsPass());
}
if (!IsFullLTO) {
// Eliminate loads by forwarding stores from the previous iteration to loads
// of the current iteration.
FPM.addPass(LoopLoadEliminationPass());
}
// Cleanup after the loop optimization passes.
FPM.addPass(InstCombinePass());
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
// At higher optimization levels, try to clean up any runtime overlap and
// alignment checks inserted by the vectorizer. We want to track correlated
// runtime checks for two inner loops in the same outer loop, fold any
// common computations, hoist loop-invariant aspects out of any outer loop,
// and unswitch the runtime checks if possible. Once hoisted, we may have
// dead (or speculatable) control flows or more combining opportunities.
FPM.addPass(EarlyCSEPass());
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(InstCombinePass());
LoopPassManager LPM;
LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
OptimizationLevel::O3));
FPM.addPass(
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(
std::move(LPM), EnableMSSALoopDependency,
/*UseBlockFrequencyInfo=*/true));
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
}
// Now that we've formed fast to execute loop structures, we do further
// optimizations. These are run afterward as they might block doing complex
// analyses and transforms such as what are needed for loop vectorization.
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
// GVN, loop transforms, and others have already run, so it's now better to
// convert to more optimized IR using more aggressive simplify CFG options.
// The extra sinking transform can create larger basic blocks, so do this
// before SLP vectorization.
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
.forwardSwitchCondToPhi(true)
.convertSwitchToLookupTable(true)
.needCanonicalLoops(false)
.hoistCommonInsts(true)
.sinkCommonInsts(true)));
if (IsFullLTO) {
FPM.addPass(SCCPPass());
FPM.addPass(InstCombinePass());
FPM.addPass(BDCEPass());
}
// Optimize parallel scalar instruction chains into SIMD instructions.
if (PTO.SLPVectorization) {
FPM.addPass(SLPVectorizerPass());
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
FPM.addPass(EarlyCSEPass());
}
}
// Enhance/cleanup vector code.
FPM.addPass(VectorCombinePass());
if (!IsFullLTO) {
FPM.addPass(InstCombinePass());
// Unroll small loops to hide loop backedge latency and saturate any
// parallel execution resources of an out-of-order processor. We also then
// need to clean up redundancies and loop invariant code.
// FIXME: It would be really good to use a loop-integrated instruction
// combiner for cleanup here so that the unrolling and LICM can be pipelined
// across the loop nests.
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
if (EnableUnrollAndJam && PTO.LoopUnrolling) {
FPM.addPass(createFunctionToLoopPassAdaptor(
LoopUnrollAndJamPass(Level.getSpeedupLevel())));
}
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
PTO.ForgetAllSCEVInLoopUnroll)));
FPM.addPass(WarnMissedTransformationsPass());
FPM.addPass(InstCombinePass());
FPM.addPass(
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
}
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
FPM.addPass(AlignmentFromAssumptionsPass());
if (IsFullLTO)
FPM.addPass(InstCombinePass());
}
ModulePassManager
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
bool LTOPreLink) {
ModulePassManager MPM;
// Optimize globals now that the module is fully simplified.
MPM.addPass(GlobalOptPass());
MPM.addPass(GlobalDCEPass());
// Run partial inlining pass to partially inline functions that have
// large bodies.
if (RunPartialInlining)
MPM.addPass(PartialInlinerPass());
// Remove avail extern fns and globals definitions since we aren't compiling
// an object file for later LTO. For LTO we want to preserve these so they
// are eligible for inlining at link-time. Note if they are unreferenced they
// will be removed by GlobalDCE later, so this only impacts referenced
// available externally globals. Eventually they will be suppressed during
// codegen, but eliminating here enables more opportunity for GlobalDCE as it
// may make globals referenced by available external functions dead and saves
// running remaining passes on the eliminated functions. These should be
// preserved during prelinking for link-time inlining decisions.
if (!LTOPreLink)
MPM.addPass(EliminateAvailableExternallyPass());
if (EnableOrderFileInstrumentation)
MPM.addPass(InstrOrderFilePass());
// Do RPO function attribute inference across the module to forward-propagate
// attributes where applicable.
// FIXME: Is this really an optimization rather than a canonicalization?
MPM.addPass(ReversePostOrderFunctionAttrsPass());
// Do a post inline PGO instrumentation and use pass. This is a context
// sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
// cross-module inline has not been done yet. The context sensitive
// instrumentation is after all the inlines are done.
if (!LTOPreLink && PGOOpt) {
if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
/* IsCS */ true, PGOOpt->CSProfileGenFile,
PGOOpt->ProfileRemappingFile);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
/* IsCS */ true, PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile);
}
// Re-require GloblasAA here prior to function passes. This is particularly
// useful as the above will have inlined, DCE'ed, and function-attr
// propagated everything. We should at this point have a reasonably minimal
// and richly annotated call graph. By computing aliasing and mod/ref
// information for all local globals here, the late loop passes and notably
// the vectorizer will be able to use them to help recognize vectorizable
// memory operations.
MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
FunctionPassManager OptimizePM;
OptimizePM.addPass(Float2IntPass());
OptimizePM.addPass(LowerConstantIntrinsicsPass());
if (EnableMatrix) {
OptimizePM.addPass(LowerMatrixIntrinsicsPass());
OptimizePM.addPass(EarlyCSEPass());
}
// FIXME: We need to run some loop optimizations to re-rotate loops after
// simplifycfg and others undo their rotation.
// Optimize the loop execution. These passes operate on entire loop nests
// rather than on each loop in an inside-out manner, and so they are actually
// function passes.
for (auto &C : VectorizerStartEPCallbacks)
C(OptimizePM, Level);
// First rotate loops that may have been un-rotated by prior passes.
// Disable header duplication at -Oz.
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink),
EnableMSSALoopDependency,
/*UseBlockFrequencyInfo=*/false));
// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is
// currently only performed for loops marked with the metadata
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
OptimizePM.addPass(LoopDistributePass());
// Populates the VFABI attribute with the scalar-to-vector mappings
// from the TargetLibraryInfo.
OptimizePM.addPass(InjectTLIMappings());
addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
// Split out cold code. Splitting is done late to avoid hiding context from
// other optimizations and inadvertently regressing performance. The tradeoff
// is that this has a higher code size cost than splitting early.
if (EnableHotColdSplit && !LTOPreLink)
MPM.addPass(HotColdSplittingPass());
// Search the code for similar regions of code. If enough similar regions can
// be found where extracting the regions into their own function will decrease
// the size of the program, we extract the regions, a deduplicate the
// structurally similar regions.
if (EnableIROutliner)
MPM.addPass(IROutlinerPass());
// Merge functions if requested.
if (PTO.MergeFunctions)
MPM.addPass(MergeFunctionsPass());
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
// canonicalization pass that enables other optimizations. As a result,
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
// result too early.
OptimizePM.addPass(LoopSinkPass());
// And finally clean up LCSSA form before generating code.
OptimizePM.addPass(InstSimplifyPass());
// This hoists/decomposes div/rem ops. It should run after other sink/hoist
// passes to avoid re-sinking, but before SimplifyCFG because it can allow
// flattening of blocks.
OptimizePM.addPass(DivRemPairsPass());
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
OptimizePM.addPass(SimplifyCFGPass());
OptimizePM.addPass(CoroCleanupPass());
// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
for (auto &C : OptimizerLastEPCallbacks)
C(MPM, Level);
if (PTO.CallGraphProfile)
MPM.addPass(CGProfilePass());
// Now we need to do some global optimization transforms.
// FIXME: It would seem like these should come first in the optimization
// pipeline and maybe be the bottom of the canonicalization pipeline? Weird
// ordering here.
MPM.addPass(GlobalDCEPass());
MPM.addPass(ConstantMergePass());
// TODO: Relative look table converter pass caused an issue when full lto is
// enabled. See https://reviews.llvm.org/D94355 for more details.
// Until the issue fixed, disable this pass during pre-linking phase.
if (!LTOPreLink)
MPM.addPass(RelLookupTableConverterPass());
return MPM;
}
ModulePassManager
PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
bool LTOPreLink) {
assert(Level != OptimizationLevel::O0 &&
"Must request optimizations for the default pipeline!");
ModulePassManager MPM;
// Convert @llvm.global.annotations to !annotation metadata.
MPM.addPass(Annotation2MetadataPass());
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
// Apply module pipeline start EP callback.
for (auto &C : PipelineStartEPCallbacks)
C(MPM, Level);
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
// Add the core simplification pipeline.
MPM.addPass(buildModuleSimplificationPipeline(
Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
: ThinOrFullLTOPhase::None));
// Now add the optimization pipeline.
MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
MPM.addPass(PseudoProbeUpdatePass());
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
if (LTOPreLink)
addRequiredLTOPreLinkPasses(MPM);
return MPM;
}
ModulePassManager
PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
assert(Level != OptimizationLevel::O0 &&
"Must request optimizations for the default pipeline!");
ModulePassManager MPM;
// Convert @llvm.global.annotations to !annotation metadata.
MPM.addPass(Annotation2MetadataPass());
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
// Apply module pipeline start EP callback.
for (auto &C : PipelineStartEPCallbacks)
C(MPM, Level);
// If we are planning to perform ThinLTO later, we don't bloat the code with
// unrolling/vectorization/... now. Just simplify the module as much as we
// can.
MPM.addPass(buildModuleSimplificationPipeline(
Level, ThinOrFullLTOPhase::ThinLTOPreLink));
// Run partial inlining pass to partially inline functions that have
// large bodies.
// FIXME: It isn't clear whether this is really the right place to run this
// in ThinLTO. Because there is another canonicalization and simplification
// phase that will run after the thin link, running this here ends up with
// less information than will be available later and it may grow functions in
// ways that aren't beneficial.
if (RunPartialInlining)
MPM.addPass(PartialInlinerPass());
// Reduce the size of the IR as much as possible.
MPM.addPass(GlobalOptPass());
// Module simplification splits coroutines, but does not fully clean up
// coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
// on these, we schedule the cleanup here.
MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
MPM.addPass(PseudoProbeUpdatePass());
// Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
// optimization is going to be done in PostLink stage, but clang can't
// add callbacks there in case of in-process ThinLTO called by linker.
for (auto &C : OptimizerLastEPCallbacks)
C(MPM, Level);
if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
MPM.addPass(PseudoProbeUpdatePass());
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
addRequiredLTOPreLinkPasses(MPM);
return MPM;
}
ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
ModulePassManager MPM;
// Convert @llvm.global.annotations to !annotation metadata.
MPM.addPass(Annotation2MetadataPass());
if (ImportSummary) {
// These passes import type identifier resolutions for whole-program
// devirtualization and CFI. They must run early because other passes may
// disturb the specific instruction patterns that these passes look for,
// creating dependencies on resolutions that may not appear in the summary.
//
// For example, GVN may transform the pattern assume(type.test) appearing in
// two basic blocks into assume(phi(type.test, type.test)), which would
// transform a dependency on a WPD resolution into a dependency on a type
// identifier resolution for CFI.
//
// Also, WPD has access to more precise information than ICP and can
// devirtualize more effectively, so it should operate on the IR first.
//
// The WPD and LowerTypeTest passes need to run at -O0 to lower type
// metadata and intrinsics.
MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
}
if (Level == OptimizationLevel::O0) {
// Run a second time to clean up any type tests left behind by WPD for use
// in ICP.
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
// Drop available_externally and unreferenced globals. This is necessary
// with ThinLTO in order to avoid leaving undefined references to dead
// globals in the object file.
MPM.addPass(EliminateAvailableExternallyPass());
MPM.addPass(GlobalDCEPass());
return MPM;
}
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
// Add the core simplification pipeline.
MPM.addPass(buildModuleSimplificationPipeline(
Level, ThinOrFullLTOPhase::ThinLTOPostLink));
// Now add the optimization pipeline.
MPM.addPass(buildModuleOptimizationPipeline(Level));
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
return MPM;
}
ModulePassManager
PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
assert(Level != OptimizationLevel::O0 &&
"Must request optimizations for the default pipeline!");
// FIXME: We should use a customized pre-link pipeline!
return buildPerModuleDefaultPipeline(Level,
/* LTOPreLink */ true);
}
ModulePassManager
PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
ModuleSummaryIndex *ExportSummary) {
ModulePassManager MPM;
// Convert @llvm.global.annotations to !annotation metadata.
MPM.addPass(Annotation2MetadataPass());
// Create a function that performs CFI checks for cross-DSO calls with targets
// in the current module.
MPM.addPass(CrossDSOCFIPass());
if (Level == OptimizationLevel::O0) {
// The WPD and LowerTypeTest passes need to run at -O0 to lower type
// metadata and intrinsics.
MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
// Run a second time to clean up any type tests left behind by WPD for use
// in ICP.
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
return MPM;
}
if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
// Load sample profile before running the LTO optimization pipeline.
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile,
ThinOrFullLTOPhase::FullLTOPostLink));
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
// RequireAnalysisPass for PSI before subsequent non-module passes.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
}
// Remove unused virtual tables to improve the quality of code generated by
// whole-program devirtualization and bitset lowering.
MPM.addPass(GlobalDCEPass());
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
// Do basic inference of function attributes from known properties of system
// libraries and other oracles.
MPM.addPass(InferFunctionAttrsPass());
if (Level.getSpeedupLevel() > 1) {
FunctionPassManager EarlyFPM;
EarlyFPM.addPass(CallSiteSplittingPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
// Indirect call promotion. This should promote all the targets that are
// left by the earlier promotion pass that promotes intra-module targets.
// This two-step promotion is to save the compile time. For LTO, it should
// produce the same result as if we only do promotion here.
MPM.addPass(PGOIndirectCallPromotion(
true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
if (EnableFunctionSpecialization)
MPM.addPass(FunctionSpecializationPass());
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
MPM.addPass(IPSCCPPass());
// Attach metadata to indirect call sites indicating the set of functions
// they may target at run-time. This should follow IPSCCP.
MPM.addPass(CalledValuePropagationPass());
}
// Now deduce any function attributes based in the current code.
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
PostOrderFunctionAttrsPass()));
// Do RPO function attribute inference across the module to forward-propagate
// attributes where applicable.
// FIXME: Is this really an optimization rather than a canonicalization?
MPM.addPass(ReversePostOrderFunctionAttrsPass());
// Use in-range annotations on GEP indices to split globals where beneficial.
MPM.addPass(GlobalSplitPass());
// Run whole program optimization of virtual call when the list of callees
// is fixed.
MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
// Stop here at -O1.
if (Level == OptimizationLevel::O1) {
// The LowerTypeTestsPass needs to run to lower type metadata and the
// type.test intrinsics. The pass does nothing if CFI is disabled.
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
// Run a second time to clean up any type tests left behind by WPD for use
// in ICP (which is performed earlier than this in the regular LTO
// pipeline).
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
return MPM;
}
// Optimize globals to try and fold them into constants.
MPM.addPass(GlobalOptPass());
// Promote any localized globals to SSA registers.
MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
// Linking modules together can lead to duplicate global constant, only
// keep one copy of each constant.
MPM.addPass(ConstantMergePass());
// Remove unused arguments from functions.
MPM.addPass(DeadArgumentEliminationPass());
// Reduce the code after globalopt and ipsccp. Both can open up significant
// simplification opportunities, and both can propagate functions through
// function pointers. When this happens, we often have to resolve varargs
// calls, etc, so let instcombine do this.
FunctionPassManager PeepholeFPM;
if (Level == OptimizationLevel::O3)
PeepholeFPM.addPass(AggressiveInstCombinePass());
PeepholeFPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(PeepholeFPM, Level);
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM)));
// Note: historically, the PruneEH pass was run first to deduce nounwind and
// generally clean up exception handling overhead. It isn't clear this is
// valuable as the inliner doesn't currently care whether it is inlining an
// invoke or a call.
// Run the inliner now.
MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level)));
// Optimize globals again after we ran the inliner.
MPM.addPass(GlobalOptPass());
// Garbage collect dead functions.
- // FIXME: Add ArgumentPromotion pass after once it's ported.
MPM.addPass(GlobalDCEPass());
+ // If we didn't decide to inline a function, check to see if we can
+ // transform it to pass arguments by value instead of by reference.
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
+
FunctionPassManager FPM;
// The IPO Passes may leave cruft around. Clean up after them.
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
// Do a post inline PGO instrumentation and use pass. This is a context
// sensitive PGO pass.
if (PGOOpt) {
if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
/* IsCS */ true, PGOOpt->CSProfileGenFile,
PGOOpt->ProfileRemappingFile);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
/* IsCS */ true, PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile);
}
// Break up allocas
FPM.addPass(SROA());
// LTO provides additional opportunities for tailcall elimination due to
// link-time inlining, and visibility of nocapture attribute.
FPM.addPass(TailCallElimPass());
// Run a few AA driver optimizations here and now to cleanup the code.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
MPM.addPass(
createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
// Require the GlobalsAA analysis for the module so we can query it within
// MainFPM.
MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
// Invalidate AAManager so it can be recreated and pick up the newly available
// GlobalsAA.
MPM.addPass(
createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
FunctionPassManager MainFPM;
MainFPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
if (RunNewGVN)
MainFPM.addPass(NewGVNPass());
else
MainFPM.addPass(GVN());
// Remove dead memcpy()'s.
MainFPM.addPass(MemCpyOptPass());
// Nuke dead stores.
MainFPM.addPass(DSEPass());
MainFPM.addPass(MergedLoadStoreMotionPass());
// More loops are countable; try to optimize them.
if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
MainFPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
if (EnableConstraintElimination)
MainFPM.addPass(ConstraintEliminationPass());
LoopPassManager LPM;
LPM.addPass(IndVarSimplifyPass());
LPM.addPass(LoopDeletionPass());
// FIXME: Add loop interchange.
// Unroll small loops and perform peeling.
LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
PTO.ForgetAllSCEVInLoopUnroll));
// The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
MainFPM.addPass(createFunctionToLoopPassAdaptor(
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
MainFPM.addPass(LoopDistributePass());
addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
invokePeepholeEPCallbacks(MainFPM, Level);
MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
// Lower type metadata and the type.test intrinsic. This pass supports
// clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
// to be run at link time if CFI is enabled. This pass does nothing if
// CFI is disabled.
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
// Run a second time to clean up any type tests left behind by WPD for use
// in ICP (which is performed earlier than this in the regular LTO pipeline).
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
// Enable splitting late in the FullLTO post-link pipeline. This is done in
// the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
if (EnableHotColdSplit)
MPM.addPass(HotColdSplittingPass());
// Add late LTO optimization passes.
// Delete basic blocks, which optimization passes may have killed.
MPM.addPass(createModuleToFunctionPassAdaptor(
SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
// Drop bodies of available eternally objects to improve GlobalDCE.
MPM.addPass(EliminateAvailableExternallyPass());
// Now that we have optimized the program, discard unreachable functions.
MPM.addPass(GlobalDCEPass());
if (PTO.MergeFunctions)
MPM.addPass(MergeFunctionsPass());
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
return MPM;
}
ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
bool LTOPreLink) {
assert(Level == OptimizationLevel::O0 &&
"buildO0DefaultPipeline should only be used with O0");
ModulePassManager MPM;
if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
PGOOpt->Action == PGOOptions::IRUse))
addPGOInstrPassesForO0(
MPM,
/* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
/* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
for (auto &C : PipelineStartEPCallbacks)
C(MPM, Level);
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
for (auto &C : PipelineEarlySimplificationEPCallbacks)
C(MPM, Level);
// Build a minimal pipeline based on the semantics required by LLVM,
// which is just that always inlining occurs. Further, disable generating
// lifetime intrinsics to avoid enabling further optimizations during
// code generation.
MPM.addPass(AlwaysInlinerPass(
/*InsertLifetimeIntrinsics=*/false));
if (PTO.MergeFunctions)
MPM.addPass(MergeFunctionsPass());
if (EnableMatrix)
MPM.addPass(
createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
if (!CGSCCOptimizerLateEPCallbacks.empty()) {
CGSCCPassManager CGPM;
for (auto &C : CGSCCOptimizerLateEPCallbacks)
C(CGPM, Level);
if (!CGPM.isEmpty())
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
}
if (!LateLoopOptimizationsEPCallbacks.empty()) {
LoopPassManager LPM;
for (auto &C : LateLoopOptimizationsEPCallbacks)
C(LPM, Level);
if (!LPM.isEmpty()) {
MPM.addPass(createModuleToFunctionPassAdaptor(
createFunctionToLoopPassAdaptor(std::move(LPM))));
}
}
if (!LoopOptimizerEndEPCallbacks.empty()) {
LoopPassManager LPM;
for (auto &C : LoopOptimizerEndEPCallbacks)
C(LPM, Level);
if (!LPM.isEmpty()) {
MPM.addPass(createModuleToFunctionPassAdaptor(
createFunctionToLoopPassAdaptor(std::move(LPM))));
}
}
if (!ScalarOptimizerLateEPCallbacks.empty()) {
FunctionPassManager FPM;
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
if (!FPM.isEmpty())
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
if (!VectorizerStartEPCallbacks.empty()) {
FunctionPassManager FPM;
for (auto &C : VectorizerStartEPCallbacks)
C(FPM, Level);
if (!FPM.isEmpty())
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
CGSCCPassManager CGPM;
CGPM.addPass(CoroSplitPass());
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
for (auto &C : OptimizerLastEPCallbacks)
C(MPM, Level);
if (LTOPreLink)
addRequiredLTOPreLinkPasses(MPM);
return MPM;
}
AAManager PassBuilder::buildDefaultAAPipeline() {
AAManager AA;
// The order in which these are registered determines their priority when
// being queried.
// First we register the basic alias analysis that provides the majority of
// per-function local AA logic. This is a stateless, on-demand local set of
// AA techniques.
AA.registerFunctionAnalysis<BasicAA>();
// Next we query fast, specialized alias analyses that wrap IR-embedded
// information about aliasing.
AA.registerFunctionAnalysis<ScopedNoAliasAA>();
AA.registerFunctionAnalysis<TypeBasedAA>();
// Add support for querying global aliasing information when available.
// Because the `AAManager` is a function analysis and `GlobalsAA` is a module
// analysis, all that the `AAManager` can do is query for any *cached*
// results from `GlobalsAA` through a readonly proxy.
AA.registerModuleAnalysis<GlobalsAA>();
// Add target-specific alias analyses.
if (TM)
TM->registerDefaultAliasAnalyses(AA);
return AA;
}
static Optional<int> parseRepeatPassName(StringRef Name) {
if (!Name.consume_front("repeat<") || !Name.consume_back(">"))
return None;
int Count;
if (Name.getAsInteger(0, Count) || Count <= 0)
return None;
return Count;
}
static Optional<int> parseDevirtPassName(StringRef Name) {
if (!Name.consume_front("devirt<") || !Name.consume_back(">"))
return None;
int Count;
if (Name.getAsInteger(0, Count) || Count < 0)
return None;
return Count;
}
static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
if (!Name.consume_front(PassName))
return false;
// normal pass name w/o parameters == default parameters
if (Name.empty())
return true;
return Name.startswith("<") && Name.endswith(">");
}
namespace {
/// This performs customized parsing of pass name with parameters.
///
/// We do not need parametrization of passes in textual pipeline very often,
/// yet on a rare occasion ability to specify parameters right there can be
/// useful.
///
/// \p Name - parameterized specification of a pass from a textual pipeline
/// is a string in a form of :
/// PassName '<' parameter-list '>'
///
/// Parameter list is being parsed by the parser callable argument, \p Parser,
/// It takes a string-ref of parameters and returns either StringError or a
/// parameter list in a form of a custom parameters type, all wrapped into
/// Expected<> template class.
///
template <typename ParametersParseCallableT>
auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
StringRef PassName) -> decltype(Parser(StringRef{})) {
using ParametersT = typename decltype(Parser(StringRef{}))::value_type;
StringRef Params = Name;
if (!Params.consume_front(PassName)) {
assert(false &&
"unable to strip pass name from parametrized pass specification");
}
if (!Params.empty() &&
(!Params.consume_front("<") || !Params.consume_back(">"))) {
assert(false && "invalid format for parametrized pass name");
}
Expected<ParametersT> Result = Parser(Params);
assert((Result || Result.template errorIsA<StringError>()) &&
"Pass parameter parser can only return StringErrors.");
return Result;
}
/// Parser of parameters for LoopUnroll pass.
Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
LoopUnrollOptions UnrollOpts;
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
int OptLevel = StringSwitch<int>(ParamName)
.Case("O0", 0)
.Case("O1", 1)
.Case("O2", 2)
.Case("O3", 3)
.Default(-1);
if (OptLevel >= 0) {
UnrollOpts.setOptLevel(OptLevel);
continue;
}
if (ParamName.consume_front("full-unroll-max=")) {
int Count;
if (ParamName.getAsInteger(0, Count))
return make_error<StringError>(
formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(),
inconvertibleErrorCode());
UnrollOpts.setFullUnrollMaxCount(Count);
continue;
}
bool Enable = !ParamName.consume_front("no-");
if (ParamName == "partial") {
UnrollOpts.setPartial(Enable);
} else if (ParamName == "peeling") {
UnrollOpts.setPeeling(Enable);
} else if (ParamName == "profile-peeling") {
UnrollOpts.setProfileBasedPeeling(Enable);
} else if (ParamName == "runtime") {
UnrollOpts.setRuntime(Enable);
} else if (ParamName == "upperbound") {
UnrollOpts.setUpperBound(Enable);
} else {
return make_error<StringError>(
formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(),
inconvertibleErrorCode());
}
}
return UnrollOpts;
}
Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
MemorySanitizerOptions Result;
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
if (ParamName == "recover") {
Result.Recover = true;
} else if (ParamName == "kernel") {
Result.Kernel = true;
} else if (ParamName.consume_front("track-origins=")) {
if (ParamName.getAsInteger(0, Result.TrackOrigins))
return make_error<StringError>(
formatv("invalid argument to MemorySanitizer pass track-origins "
"parameter: '{0}' ",
ParamName)
.str(),
inconvertibleErrorCode());
} else {
return make_error<StringError>(
formatv("invalid MemorySanitizer pass parameter '{0}' ", ParamName)
.str(),
inconvertibleErrorCode());
}
}
return Result;
}
/// Parser of parameters for SimplifyCFG pass.
Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
SimplifyCFGOptions Result;
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
bool Enable = !ParamName.consume_front("no-");
if (ParamName == "forward-switch-cond") {
Result.forwardSwitchCondToPhi(Enable);
} else if (ParamName == "switch-to-lookup") {
Result.convertSwitchToLookupTable(Enable);
} else if (ParamName == "keep-loops") {
Result.needCanonicalLoops(Enable);
} else if (ParamName == "hoist-common-insts") {
Result.hoistCommonInsts(Enable);
} else if (ParamName == "sink-common-insts") {
Result.sinkCommonInsts(Enable);
} else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
APInt BonusInstThreshold;
if (ParamName.getAsInteger(0, BonusInstThreshold))
return make_error<StringError>(
formatv("invalid argument to SimplifyCFG pass bonus-threshold "
"parameter: '{0}' ",
ParamName).str(),
inconvertibleErrorCode());
Result.bonusInstThreshold(BonusInstThreshold.getSExtValue());
} else {
return make_error<StringError>(
formatv("invalid SimplifyCFG pass parameter '{0}' ", ParamName).str(),
inconvertibleErrorCode());
}
}
return Result;
}
/// Parser of parameters for LoopVectorize pass.
Expected<LoopVectorizeOptions> parseLoopVectorizeOptions(StringRef Params) {
LoopVectorizeOptions Opts;
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
bool Enable = !ParamName.consume_front("no-");
if (ParamName == "interleave-forced-only") {
Opts.setInterleaveOnlyWhenForced(Enable);
} else if (ParamName == "vectorize-forced-only") {
Opts.setVectorizeOnlyWhenForced(Enable);
} else {
return make_error<StringError>(
formatv("invalid LoopVectorize parameter '{0}' ", ParamName).str(),
inconvertibleErrorCode());
}
}
return Opts;
}
Expected<std::pair<bool, bool>> parseLoopUnswitchOptions(StringRef Params) {
std::pair<bool, bool> Result = {false, true};
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
bool Enable = !ParamName.consume_front("no-");
if (ParamName == "nontrivial") {
Result.first = Enable;
} else if (ParamName == "trivial") {
Result.second = Enable;
} else {
return make_error<StringError>(
formatv("invalid LoopUnswitch pass parameter '{0}' ", ParamName)
.str(),
inconvertibleErrorCode());
}
}
return Result;
}
Expected<bool> parseMergedLoadStoreMotionOptions(StringRef Params) {
bool Result = false;
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
bool Enable = !ParamName.consume_front("no-");
if (ParamName == "split-footer-bb") {
Result = Enable;
} else {
return make_error<StringError>(
formatv("invalid MergedLoadStoreMotion pass parameter '{0}' ",
ParamName)
.str(),
inconvertibleErrorCode());
}
}
return Result;
}
Expected<GVNOptions> parseGVNOptions(StringRef Params) {
GVNOptions Result;
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
bool Enable = !ParamName.consume_front("no-");
if (ParamName == "pre") {
Result.setPRE(Enable);
} else if (ParamName == "load-pre") {
Result.setLoadPRE(Enable);
} else if (ParamName == "split-backedge-load-pre") {
Result.setLoadPRESplitBackedge(Enable);
} else if (ParamName == "memdep") {
Result.setMemDep(Enable);
} else {
return make_error<StringError>(
formatv("invalid GVN pass parameter '{0}' ", ParamName).str(),
inconvertibleErrorCode());
}
}
return Result;
}
Expected<StackLifetime::LivenessType>
parseStackLifetimeOptions(StringRef Params) {
StackLifetime::LivenessType Result = StackLifetime::LivenessType::May;
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
if (ParamName == "may") {
Result = StackLifetime::LivenessType::May;
} else if (ParamName == "must") {
Result = StackLifetime::LivenessType::Must;
} else {
return make_error<StringError>(
formatv("invalid StackLifetime parameter '{0}' ", ParamName).str(),
inconvertibleErrorCode());
}
}
return Result;
}
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
/// alias.
static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) {
return Name.startswith("default") || Name.startswith("thinlto") ||
Name.startswith("lto");
}
/// Tests whether registered callbacks will accept a given pass name.
///
/// When parsing a pipeline text, the type of the outermost pipeline may be
/// omitted, in which case the type is automatically determined from the first
/// pass name in the text. This may be a name that is handled through one of the
/// callbacks. We check this through the oridinary parsing callbacks by setting
/// up a dummy PassManager in order to not force the client to also handle this
/// type of query.
template <typename PassManagerT, typename CallbacksT>
static bool callbacksAcceptPassName(StringRef Name, CallbacksT &Callbacks) {
if (!Callbacks.empty()) {
PassManagerT DummyPM;
for (auto &CB : Callbacks)
if (CB(Name, DummyPM, {}))
return true;
}
return false;
}
template <typename CallbacksT>
static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
// Manually handle aliases for pre-configured pipeline fragments.
if (startsWithDefaultPipelineAliasPrefix(Name))
return DefaultAliasRegex.match(Name);
// Explicitly handle pass manager names.
if (Name == "module")
return true;
if (Name == "cgscc")
return true;
if (Name == "function")
return true;
// Explicitly handle custom-parsed pass names.
if (parseRepeatPassName(Name))
return true;
#define MODULE_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
return true;
#include "PassRegistry.def"
return callbacksAcceptPassName<ModulePassManager>(Name, Callbacks);
}
template <typename CallbacksT>
static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
if (Name == "cgscc")
return true;
if (Name == "function")
return true;
// Explicitly handle custom-parsed pass names.
if (parseRepeatPassName(Name))
return true;
if (parseDevirtPassName(Name))
return true;
#define CGSCC_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
return true;
#include "PassRegistry.def"
return callbacksAcceptPassName<CGSCCPassManager>(Name, Callbacks);
}
template <typename CallbacksT>
static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
if (Name == "function")
return true;
if (Name == "loop" || Name == "loop-mssa")
return true;
// Explicitly handle custom-parsed pass names.
if (parseRepeatPassName(Name))
return true;
#define FUNCTION_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
if (checkParametrizedPassName(Name, NAME)) \
return true;
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
return true;
#include "PassRegistry.def"
return callbacksAcceptPassName<FunctionPassManager>(Name, Callbacks);
}
template <typename CallbacksT>
static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
if (Name == "loop" || Name == "loop-mssa")
return true;
// Explicitly handle custom-parsed pass names.
if (parseRepeatPassName(Name))
return true;
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
if (checkParametrizedPassName(Name, NAME)) \
return true;
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
return true;
#include "PassRegistry.def"
return callbacksAcceptPassName<LoopPassManager>(Name, Callbacks);
}
Optional<std::vector<PassBuilder::PipelineElement>>
PassBuilder::parsePipelineText(StringRef Text) {
std::vector<PipelineElement> ResultPipeline;
SmallVector<std::vector<PipelineElement> *, 4> PipelineStack = {
&ResultPipeline};
for (;;) {
std::vector<PipelineElement> &Pipeline = *PipelineStack.back();
size_t Pos = Text.find_first_of(",()");
Pipeline.push_back({Text.substr(0, Pos), {}});
// If we have a single terminating name, we're done.
if (Pos == Text.npos)
break;
char Sep = Text[Pos];
Text = Text.substr(Pos + 1);
if (Sep == ',')
// Just a name ending in a comma, continue.
continue;
if (Sep == '(') {
// Push the inner pipeline onto the stack to continue processing.
PipelineStack.push_back(&Pipeline.back().InnerPipeline);
continue;
}
assert(Sep == ')' && "Bogus separator!");
// When handling the close parenthesis, we greedily consume them to avoid
// empty strings in the pipeline.
do {
// If we try to pop the outer pipeline we have unbalanced parentheses.
if (PipelineStack.size() == 1)
return None;
PipelineStack.pop_back();
} while (Text.consume_front(")"));
// Check if we've finished parsing.
if (Text.empty())
break;
// Otherwise, the end of an inner pipeline always has to be followed by
// a comma, and then we can continue.
if (!Text.consume_front(","))
return None;
}
if (PipelineStack.size() > 1)
// Unbalanced paretheses.
return None;
assert(PipelineStack.back() == &ResultPipeline &&
"Wrong pipeline at the bottom of the stack!");
return {std::move(ResultPipeline)};
}
Error PassBuilder::parseModulePass(ModulePassManager &MPM,
const PipelineElement &E) {
auto &Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
// First handle complex passes like the pass managers which carry pipelines.
if (!InnerPipeline.empty()) {
if (Name == "module") {
ModulePassManager NestedMPM;
if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline))
return Err;
MPM.addPass(std::move(NestedMPM));
return Error::success();
}
if (Name == "cgscc") {
CGSCCPassManager CGPM;
if (auto Err = parseCGSCCPassPipeline(CGPM, InnerPipeline))
return Err;
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
return Error::success();
}
if (Name == "function") {
FunctionPassManager FPM;
if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
return Err;
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
ModulePassManager NestedMPM;
if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline))
return Err;
MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM)));
return Error::success();
}
for (auto &C : ModulePipelineParsingCallbacks)
if (C(Name, MPM, InnerPipeline))
return Error::success();
// Normal passes can't have pipelines.
return make_error<StringError>(
formatv("invalid use of '{0}' pass as module pipeline", Name).str(),
inconvertibleErrorCode());
;
}
// Manually handle aliases for pre-configured pipeline fragments.
if (startsWithDefaultPipelineAliasPrefix(Name)) {
SmallVector<StringRef, 3> Matches;
if (!DefaultAliasRegex.match(Name, &Matches))
return make_error<StringError>(
formatv("unknown default pipeline alias '{0}'", Name).str(),
inconvertibleErrorCode());
assert(Matches.size() == 3 && "Must capture two matched strings!");
OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2])
.Case("O0", OptimizationLevel::O0)
.Case("O1", OptimizationLevel::O1)
.Case("O2", OptimizationLevel::O2)
.Case("O3", OptimizationLevel::O3)
.Case("Os", OptimizationLevel::Os)
.Case("Oz", OptimizationLevel::Oz);
if (L == OptimizationLevel::O0 && Matches[1] != "thinlto" &&
Matches[1] != "lto") {
MPM.addPass(buildO0DefaultPipeline(L, Matches[1] == "thinlto-pre-link" ||
Matches[1] == "lto-pre-link"));
return Error::success();
}
// This is consistent with old pass manager invoked via opt, but
// inconsistent with clang. Clang doesn't enable loop vectorization
// but does enable slp vectorization at Oz.
PTO.LoopVectorization =
L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz;
PTO.SLPVectorization =
L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz;
if (Matches[1] == "default") {
MPM.addPass(buildPerModuleDefaultPipeline(L));
} else if (Matches[1] == "thinlto-pre-link") {
MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L));
} else if (Matches[1] == "thinlto") {
MPM.addPass(buildThinLTODefaultPipeline(L, nullptr));
} else if (Matches[1] == "lto-pre-link") {
MPM.addPass(buildLTOPreLinkDefaultPipeline(L));
} else {
assert(Matches[1] == "lto" && "Not one of the matched options!");
MPM.addPass(buildLTODefaultPipeline(L, nullptr));
}
return Error::success();
}
// Finally expand the basic registered passes from the .inc file.
#define MODULE_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(CREATE_PASS); \
return Error::success(); \
}
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
MPM.addPass( \
RequireAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type, Module>()); \
return Error::success(); \
} \
if (Name == "invalidate<" NAME ">") { \
MPM.addPass(InvalidateAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type>()); \
return Error::success(); \
}
#define CGSCC_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS)); \
return Error::success(); \
}
#define FUNCTION_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS)); \
return Error::success(); \
}
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
if (checkParametrizedPassName(Name, NAME)) { \
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
return Error::success(); \
}
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(createModuleToFunctionPassAdaptor( \
createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
return Error::success(); \
}
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
if (checkParametrizedPassName(Name, NAME)) { \
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
MPM.addPass( \
createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
CREATE_PASS(Params.get()), false, false))); \
return Error::success(); \
}
#include "PassRegistry.def"
for (auto &C : ModulePipelineParsingCallbacks)
if (C(Name, MPM, InnerPipeline))
return Error::success();
return make_error<StringError>(
formatv("unknown module pass '{0}'", Name).str(),
inconvertibleErrorCode());
}
Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
const PipelineElement &E) {
auto &Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
// First handle complex passes like the pass managers which carry pipelines.
if (!InnerPipeline.empty()) {
if (Name == "cgscc") {
CGSCCPassManager NestedCGPM;
if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
CGPM.addPass(std::move(NestedCGPM));
return Error::success();
}
if (Name == "function") {
FunctionPassManager FPM;
if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
CGSCCPassManager NestedCGPM;
if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline))
return Err;
CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM)));
return Error::success();
}
if (auto MaxRepetitions = parseDevirtPassName(Name)) {
CGSCCPassManager NestedCGPM;
if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline))
return Err;
CGPM.addPass(
createDevirtSCCRepeatedPass(std::move(NestedCGPM), *MaxRepetitions));
return Error::success();
}
for (auto &C : CGSCCPipelineParsingCallbacks)
if (C(Name, CGPM, InnerPipeline))
return Error::success();
// Normal passes can't have pipelines.
return make_error<StringError>(
formatv("invalid use of '{0}' pass as cgscc pipeline", Name).str(),
inconvertibleErrorCode());
}
// Now expand the basic registered passes from the .inc file.
#define CGSCC_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
CGPM.addPass(CREATE_PASS); \
return Error::success(); \
}
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
CGPM.addPass(RequireAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type, \
LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, \
CGSCCUpdateResult &>()); \
return Error::success(); \
} \
if (Name == "invalidate<" NAME ">") { \
CGPM.addPass(InvalidateAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type>()); \
return Error::success(); \
}
#define FUNCTION_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS)); \
return Error::success(); \
}
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
if (checkParametrizedPassName(Name, NAME)) { \
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
return Error::success(); \
}
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
return Error::success(); \
}
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
if (checkParametrizedPassName(Name, NAME)) { \
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
CGPM.addPass( \
createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
CREATE_PASS(Params.get()), false, false))); \
return Error::success(); \
}
#include "PassRegistry.def"
for (auto &C : CGSCCPipelineParsingCallbacks)
if (C(Name, CGPM, InnerPipeline))
return Error::success();
return make_error<StringError>(
formatv("unknown cgscc pass '{0}'", Name).str(),
inconvertibleErrorCode());
}
Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
const PipelineElement &E) {
auto &Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
// First handle complex passes like the pass managers which carry pipelines.
if (!InnerPipeline.empty()) {
if (Name == "function") {
FunctionPassManager NestedFPM;
if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
FPM.addPass(std::move(NestedFPM));
return Error::success();
}
if (Name == "loop" || Name == "loop-mssa") {
LoopPassManager LPM;
if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
bool UseMemorySSA = (Name == "loop-mssa");
bool UseBFI = llvm::any_of(
InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; });
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA,
UseBFI));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
FunctionPassManager NestedFPM;
if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline))
return Err;
FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM)));
return Error::success();
}
for (auto &C : FunctionPipelineParsingCallbacks)
if (C(Name, FPM, InnerPipeline))
return Error::success();
// Normal passes can't have pipelines.
return make_error<StringError>(
formatv("invalid use of '{0}' pass as function pipeline", Name).str(),
inconvertibleErrorCode());
}
// Now expand the basic registered passes from the .inc file.
#define FUNCTION_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
FPM.addPass(CREATE_PASS); \
return Error::success(); \
}
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
if (checkParametrizedPassName(Name, NAME)) { \
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
FPM.addPass(CREATE_PASS(Params.get())); \
return Error::success(); \
}
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
FPM.addPass( \
RequireAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type, Function>()); \
return Error::success(); \
} \
if (Name == "invalidate<" NAME ">") { \
FPM.addPass(InvalidateAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type>()); \
return Error::success(); \
}
// FIXME: UseMemorySSA is set to false. Maybe we could do things like:
// bool UseMemorySSA = !("canon-freeze" || "loop-predication" ||
// "guard-widening");
// The risk is that it may become obsolete if we're not careful.
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
return Error::success(); \
}
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
if (checkParametrizedPassName(Name, NAME)) { \
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), \
false, false)); \
return Error::success(); \
}
#include "PassRegistry.def"
for (auto &C : FunctionPipelineParsingCallbacks)
if (C(Name, FPM, InnerPipeline))
return Error::success();
return make_error<StringError>(
formatv("unknown function pass '{0}'", Name).str(),
inconvertibleErrorCode());
}
Error PassBuilder::parseLoopPass(LoopPassManager &LPM,
const PipelineElement &E) {
StringRef Name = E.Name;
auto &InnerPipeline = E.InnerPipeline;
// First handle complex passes like the pass managers which carry pipelines.
if (!InnerPipeline.empty()) {
if (Name == "loop") {
LoopPassManager NestedLPM;
if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
LPM.addPass(std::move(NestedLPM));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
LoopPassManager NestedLPM;
if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline))
return Err;
LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM)));
return Error::success();
}
for (auto &C : LoopPipelineParsingCallbacks)
if (C(Name, LPM, InnerPipeline))
return Error::success();
// Normal passes can't have pipelines.
return make_error<StringError>(
formatv("invalid use of '{0}' pass as loop pipeline", Name).str(),
inconvertibleErrorCode());
}
// Now expand the basic registered passes from the .inc file.
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
LPM.addPass(CREATE_PASS); \
return Error::success(); \
}
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
if (checkParametrizedPassName(Name, NAME)) { \
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
LPM.addPass(CREATE_PASS(Params.get())); \
return Error::success(); \
}
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
LPM.addPass(RequireAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type, Loop, \
LoopAnalysisManager, LoopStandardAnalysisResults &, \
LPMUpdater &>()); \
return Error::success(); \
} \
if (Name == "invalidate<" NAME ">") { \
LPM.addPass(InvalidateAnalysisPass< \
std::remove_reference<decltype(CREATE_PASS)>::type>()); \
return Error::success(); \
}
#include "PassRegistry.def"
for (auto &C : LoopPipelineParsingCallbacks)
if (C(Name, LPM, InnerPipeline))
return Error::success();
return make_error<StringError>(formatv("unknown loop pass '{0}'", Name).str(),
inconvertibleErrorCode());
}
bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) {
#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
if (Name == NAME) { \
AA.registerModuleAnalysis< \
std::remove_reference<decltype(CREATE_PASS)>::type>(); \
return true; \
}
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
if (Name == NAME) { \
AA.registerFunctionAnalysis< \
std::remove_reference<decltype(CREATE_PASS)>::type>(); \
return true; \
}
#include "PassRegistry.def"
for (auto &C : AAParsingCallbacks)
if (C(Name, AA))
return true;
return false;
}
Error PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM,
ArrayRef<PipelineElement> Pipeline) {
for (const auto &Element : Pipeline) {
if (auto Err = parseLoopPass(LPM, Element))
return Err;
}
return Error::success();
}
Error PassBuilder::parseFunctionPassPipeline(
FunctionPassManager &FPM, ArrayRef<PipelineElement> Pipeline) {
for (const auto &Element : Pipeline) {
if (auto Err = parseFunctionPass(FPM, Element))
return Err;
}
return Error::success();
}
Error PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
ArrayRef<PipelineElement> Pipeline) {
for (const auto &Element : Pipeline) {
if (auto Err = parseCGSCCPass(CGPM, Element))
return Err;
}
return Error::success();
}
void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
FunctionAnalysisManager &FAM,
CGSCCAnalysisManager &CGAM,
ModuleAnalysisManager &MAM) {
MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); });
MAM.registerPass([&] { return CGSCCAnalysisManagerModuleProxy(CGAM); });
CGAM.registerPass([&] { return ModuleAnalysisManagerCGSCCProxy(MAM); });
FAM.registerPass([&] { return CGSCCAnalysisManagerFunctionProxy(CGAM); });
FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); });
FAM.registerPass([&] { return LoopAnalysisManagerFunctionProxy(LAM); });
LAM.registerPass([&] { return FunctionAnalysisManagerLoopProxy(FAM); });
}
Error PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
ArrayRef<PipelineElement> Pipeline) {
for (const auto &Element : Pipeline) {
if (auto Err = parseModulePass(MPM, Element))
return Err;
}
return Error::success();
}
// Primary pass pipeline description parsing routine for a \c ModulePassManager
// FIXME: Should this routine accept a TargetMachine or require the caller to
// pre-populate the analysis managers with target-specific stuff?
Error PassBuilder::parsePassPipeline(ModulePassManager &MPM,
StringRef PipelineText) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
return make_error<StringError>(
formatv("invalid pipeline '{0}'", PipelineText).str(),
inconvertibleErrorCode());
// If the first name isn't at the module layer, wrap the pipeline up
// automatically.
StringRef FirstName = Pipeline->front().Name;
if (!isModulePassName(FirstName, ModulePipelineParsingCallbacks)) {
if (isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) {
Pipeline = {{"cgscc", std::move(*Pipeline)}};
} else if (isFunctionPassName(FirstName,
FunctionPipelineParsingCallbacks)) {
Pipeline = {{"function", std::move(*Pipeline)}};
} else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks)) {
Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}};
} else {
for (auto &C : TopLevelPipelineParsingCallbacks)
if (C(MPM, *Pipeline))
return Error::success();
// Unknown pass or pipeline name!
auto &InnerPipeline = Pipeline->front().InnerPipeline;
return make_error<StringError>(
formatv("unknown {0} name '{1}'",
(InnerPipeline.empty() ? "pass" : "pipeline"), FirstName)
.str(),
inconvertibleErrorCode());
}
}
if (auto Err = parseModulePassPipeline(MPM, *Pipeline))
return Err;
return Error::success();
}
// Primary pass pipeline description parsing routine for a \c CGSCCPassManager
Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM,
StringRef PipelineText) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
return make_error<StringError>(
formatv("invalid pipeline '{0}'", PipelineText).str(),
inconvertibleErrorCode());
StringRef FirstName = Pipeline->front().Name;
if (!isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks))
return make_error<StringError>(
formatv("unknown cgscc pass '{0}' in pipeline '{1}'", FirstName,
PipelineText)
.str(),
inconvertibleErrorCode());
if (auto Err = parseCGSCCPassPipeline(CGPM, *Pipeline))
return Err;
return Error::success();
}
// Primary pass pipeline description parsing routine for a \c
// FunctionPassManager
Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM,
StringRef PipelineText) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
return make_error<StringError>(
formatv("invalid pipeline '{0}'", PipelineText).str(),
inconvertibleErrorCode());
StringRef FirstName = Pipeline->front().Name;
if (!isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks))
return make_error<StringError>(
formatv("unknown function pass '{0}' in pipeline '{1}'", FirstName,
PipelineText)
.str(),
inconvertibleErrorCode());
if (auto Err = parseFunctionPassPipeline(FPM, *Pipeline))
return Err;
return Error::success();
}
// Primary pass pipeline description parsing routine for a \c LoopPassManager
Error PassBuilder::parsePassPipeline(LoopPassManager &CGPM,
StringRef PipelineText) {
auto Pipeline = parsePipelineText(PipelineText);
if (!Pipeline || Pipeline->empty())
return make_error<StringError>(
formatv("invalid pipeline '{0}'", PipelineText).str(),
inconvertibleErrorCode());
if (auto Err = parseLoopPassPipeline(CGPM, *Pipeline))
return Err;
return Error::success();
}
Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
// If the pipeline just consists of the word 'default' just replace the AA
// manager with our default one.
if (PipelineText == "default") {
AA = buildDefaultAAPipeline();
return Error::success();
}
while (!PipelineText.empty()) {
StringRef Name;
std::tie(Name, PipelineText) = PipelineText.split(',');
if (!parseAAPassName(AA, Name))
return make_error<StringError>(
formatv("unknown alias analysis name '{0}'", Name).str(),
inconvertibleErrorCode());
}
return Error::success();
}
bool PassBuilder::isAAPassName(StringRef PassName) {
#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
if (PassName == NAME) \
return true;
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
if (PassName == NAME) \
return true;
#include "PassRegistry.def"
return false;
}
bool PassBuilder::isAnalysisPassName(StringRef PassName) {
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
if (PassName == NAME) \
return true;
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
if (PassName == NAME) \
return true;
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
if (PassName == NAME) \
return true;
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
if (PassName == NAME) \
return true;
#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
if (PassName == NAME) \
return true;
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
if (PassName == NAME) \
return true;
#include "PassRegistry.def"
return false;
}
static void printPassName(StringRef PassName, raw_ostream &OS) {
OS << " " << PassName << "\n";
}
static void printPassName(StringRef PassName, StringRef Params,
raw_ostream &OS) {
OS << " " << PassName << "<" << Params << ">\n";
}
void PassBuilder::printPassNames(raw_ostream &OS) {
// TODO: print pass descriptions when they are available
OS << "Module passes:\n";
#define MODULE_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
OS << "Module analyses:\n";
#define MODULE_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
OS << "Module alias analyses:\n";
#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
OS << "CGSCC passes:\n";
#define CGSCC_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
OS << "CGSCC analyses:\n";
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
OS << "Function passes:\n";
#define FUNCTION_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
OS << "Function passes with params:\n";
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
printPassName(NAME, PARAMS, OS);
#include "PassRegistry.def"
OS << "Function analyses:\n";
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
OS << "Function alias analyses:\n";
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
OS << "Loop passes:\n";
#define LOOP_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
OS << "Loop passes with params:\n";
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
printPassName(NAME, PARAMS, OS);
#include "PassRegistry.def"
OS << "Loop analyses:\n";
#define LOOP_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
}
void PassBuilder::registerParseTopLevelPipelineCallback(
const std::function<bool(ModulePassManager &, ArrayRef<PipelineElement>)>
&C) {
TopLevelPipelineParsingCallbacks.push_back(C);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b27a02b8c182..60c00f47859b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1,18751 +1,18753 @@
//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the AArch64TargetLowering class.
//
//===----------------------------------------------------------------------===//
#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
#include "AArch64ExpandImm.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <bitset>
#include <cassert>
#include <cctype>
#include <cstdint>
#include <cstdlib>
#include <iterator>
#include <limits>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64-lower"
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
// FIXME: The necessary dtprel relocations don't seem to be supported
// well in the GNU bfd and gold linkers at the moment. Therefore, by
// default, for now, fall back to GeneralDynamic code generation.
cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
"aarch64-elf-ldtls-generation", cl::Hidden,
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));
static cl::opt<bool>
EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
cl::desc("Enable AArch64 logical imm instruction "
"optimization"),
cl::init(true));
// Temporary option added for the purpose of testing functionality added
// to DAGCombiner.cpp in D92230. It is expected that this can be removed
// in future when both implementations will be based off MGATHER rather
// than the GLD1 nodes added for the SVE gather load intrinsics.
static cl::opt<bool>
EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
cl::desc("Combine extends of AArch64 masked "
"gather intrinsics"),
cl::init(true));
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
static inline EVT getPackedSVEVectorVT(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unexpected element type for vector");
case MVT::i8:
return MVT::nxv16i8;
case MVT::i16:
return MVT::nxv8i16;
case MVT::i32:
return MVT::nxv4i32;
case MVT::i64:
return MVT::nxv2i64;
case MVT::f16:
return MVT::nxv8f16;
case MVT::f32:
return MVT::nxv4f32;
case MVT::f64:
return MVT::nxv2f64;
case MVT::bf16:
return MVT::nxv8bf16;
}
}
// NOTE: Currently there's only a need to return integer vector types. If this
// changes then just add an extra "type" parameter.
static inline EVT getPackedSVEVectorVT(ElementCount EC) {
switch (EC.getKnownMinValue()) {
default:
llvm_unreachable("unexpected element count for vector");
case 16:
return MVT::nxv16i8;
case 8:
return MVT::nxv8i16;
case 4:
return MVT::nxv4i32;
case 2:
return MVT::nxv2i64;
}
}
static inline EVT getPromotedVTForPredicate(EVT VT) {
assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&
"Expected scalable predicate vector type!");
switch (VT.getVectorMinNumElements()) {
default:
llvm_unreachable("unexpected element count for vector");
case 2:
return MVT::nxv2i64;
case 4:
return MVT::nxv4i32;
case 8:
return MVT::nxv8i16;
case 16:
return MVT::nxv16i8;
}
}
/// Returns true if VT's elements occupy the lowest bit positions of its
/// associated register class without any intervening space.
///
/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
/// same register class, but only nxv8f16 can be treated as a packed vector.
static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal vector type!");
return VT.isFixedLengthVector() ||
VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
}
// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
// predicate and end with a passthru value matching the result type.
static bool isMergePassthruOpcode(unsigned Opc) {
switch (Opc) {
default:
return false;
case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
case AArch64ISD::BSWAP_MERGE_PASSTHRU:
case AArch64ISD::CTLZ_MERGE_PASSTHRU:
case AArch64ISD::CTPOP_MERGE_PASSTHRU:
case AArch64ISD::DUP_MERGE_PASSTHRU:
case AArch64ISD::ABS_MERGE_PASSTHRU:
case AArch64ISD::NEG_MERGE_PASSTHRU:
case AArch64ISD::FNEG_MERGE_PASSTHRU:
case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
case AArch64ISD::FCEIL_MERGE_PASSTHRU:
case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
case AArch64ISD::FRINT_MERGE_PASSTHRU:
case AArch64ISD::FROUND_MERGE_PASSTHRU:
case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
case AArch64ISD::FSQRT_MERGE_PASSTHRU:
case AArch64ISD::FRECPX_MERGE_PASSTHRU:
case AArch64ISD::FABS_MERGE_PASSTHRU:
return true;
}
}
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
// we have to make something up. Arbitrarily, choose ZeroOrOne.
setBooleanContents(ZeroOrOneBooleanContent);
// When comparing vectors the result sets the different elements in the
// vector to all-one or all-zero.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// Set up the register classes.
addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
if (Subtarget->hasLS64()) {
addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
setOperationAction(ISD::STORE, MVT::i64x8, Custom);
}
if (Subtarget->hasFPARMv8()) {
addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
}
if (Subtarget->hasNEON()) {
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
// Someone set us up the NEON.
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
addDRTypeForNEON(MVT::v4i16);
addDRTypeForNEON(MVT::v2i32);
addDRTypeForNEON(MVT::v1i64);
addDRTypeForNEON(MVT::v1f64);
addDRTypeForNEON(MVT::v4f16);
if (Subtarget->hasBF16())
addDRTypeForNEON(MVT::v4bf16);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
addQRTypeForNEON(MVT::v16i8);
addQRTypeForNEON(MVT::v8i16);
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
addQRTypeForNEON(MVT::v8f16);
if (Subtarget->hasBF16())
addQRTypeForNEON(MVT::v8bf16);
}
if (Subtarget->hasSVE()) {
// Add legal sve predicate types
addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
// Add legal sve data types
addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
if (Subtarget->hasBF16()) {
addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
}
if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addRegisterClass(VT, &AArch64::ZPRRegClass);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addRegisterClass(VT, &AArch64::ZPRRegClass);
}
for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
}
for (auto VT :
{ MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
for (auto VT :
{ MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
MVT::nxv2f64 }) {
setCondCodeAction(ISD::SETO, VT, Expand);
setCondCodeAction(ISD::SETOLT, VT, Expand);
setCondCodeAction(ISD::SETLT, VT, Expand);
setCondCodeAction(ISD::SETOLE, VT, Expand);
setCondCodeAction(ISD::SETLE, VT, Expand);
setCondCodeAction(ISD::SETULT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETUNE, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
}
}
// Compute derived properties from the register classes
computeRegisterProperties(Subtarget->getRegisterInfo());
// Provide all sorts of operation actions
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::f16, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::FREM, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f80, Expand);
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// Custom lowering hooks are needed for XOR
// to fold it into CSINC/CSINV.
setOperationAction(ISD::XOR, MVT::i32, Custom);
setOperationAction(ISD::XOR, MVT::i64, Custom);
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
setOperationAction(ISD::FABS, MVT::f128, Expand);
setOperationAction(ISD::FADD, MVT::f128, LibCall);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
setOperationAction(ISD::FCOS, MVT::f128, Expand);
setOperationAction(ISD::FDIV, MVT::f128, LibCall);
setOperationAction(ISD::FMA, MVT::f128, Expand);
setOperationAction(ISD::FMUL, MVT::f128, LibCall);
setOperationAction(ISD::FNEG, MVT::f128, Expand);
setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
setOperationAction(ISD::FRINT, MVT::f128, Expand);
setOperationAction(ISD::FSIN, MVT::f128, Expand);
setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
setOperationAction(ISD::FSUB, MVT::f128, LibCall);
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
setOperationAction(ISD::SETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
setOperationAction(ISD::BR_CC, MVT::f128, Custom);
setOperationAction(ISD::SELECT, MVT::f128, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
// Lowering for many of the conversions is actually specified by the non-f128
// type. The LowerXXX function will be trivial when f128 isn't involved.
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
// Variable arguments.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Custom);
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
// Variable-sized objects.
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
// Constant pool entries
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
// BlockAddress
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
// Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
setOperationAction(ISD::SUBC, MVT::i32, Custom);
setOperationAction(ISD::SUBE, MVT::i32, Custom);
setOperationAction(ISD::ADDC, MVT::i64, Custom);
setOperationAction(ISD::ADDE, MVT::i64, Custom);
setOperationAction(ISD::SUBC, MVT::i64, Custom);
setOperationAction(ISD::SUBE, MVT::i64, Custom);
// AArch64 lacks both left-rotate and popcount instructions.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
}
// AArch64 doesn't have i32 MULH{S|U}.
setOperationAction(ISD::MULHU, MVT::i32, Expand);
setOperationAction(ISD::MULHS, MVT::i32, Expand);
// AArch64 doesn't have {U|S}MUL_LOHI.
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
setOperationAction(ISD::CTPOP, MVT::i128, Custom);
setOperationAction(ISD::ABS, MVT::i32, Custom);
setOperationAction(ISD::ABS, MVT::i64, Custom);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
}
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
// Custom lower Add/Sub/Mul with overflow.
setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction(ISD::SADDO, MVT::i64, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i64, Custom);
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i64, Custom);
setOperationAction(ISD::SMULO, MVT::i32, Custom);
setOperationAction(ISD::SMULO, MVT::i64, Custom);
setOperationAction(ISD::UMULO, MVT::i32, Custom);
setOperationAction(ISD::UMULO, MVT::i64, Custom);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
if (Subtarget->hasFullFP16())
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
else
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::v4f16, Expand);
setOperationAction(ISD::FREM, MVT::v8f16, Expand);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
if (!Subtarget->hasFullFP16()) {
setOperationAction(ISD::SELECT, MVT::f16, Promote);
setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
setOperationAction(ISD::SETCC, MVT::f16, Promote);
setOperationAction(ISD::BR_CC, MVT::f16, Promote);
setOperationAction(ISD::FADD, MVT::f16, Promote);
setOperationAction(ISD::FSUB, MVT::f16, Promote);
setOperationAction(ISD::FMUL, MVT::f16, Promote);
setOperationAction(ISD::FDIV, MVT::f16, Promote);
setOperationAction(ISD::FMA, MVT::f16, Promote);
setOperationAction(ISD::FNEG, MVT::f16, Promote);
setOperationAction(ISD::FABS, MVT::f16, Promote);
setOperationAction(ISD::FCEIL, MVT::f16, Promote);
setOperationAction(ISD::FSQRT, MVT::f16, Promote);
setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
setOperationAction(ISD::FRINT, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Promote);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
// promote v4f16 to v4f32 when that is known to be safe.
setOperationAction(ISD::FADD, MVT::v4f16, Promote);
setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
setOperationAction(ISD::FABS, MVT::v4f16, Expand);
setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
setOperationAction(ISD::FMA, MVT::v4f16, Expand);
setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
setOperationAction(ISD::FABS, MVT::v8f16, Expand);
setOperationAction(ISD::FADD, MVT::v8f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
setOperationAction(ISD::FMA, MVT::v8f16, Expand);
setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
}
// AArch64 has implementations of a lot of rounding-like FP operations.
for (MVT Ty : {MVT::f32, MVT::f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
setOperationAction(ISD::FMINNUM, Ty, Legal);
setOperationAction(ISD::FMAXNUM, Ty, Legal);
setOperationAction(ISD::FMINIMUM, Ty, Legal);
setOperationAction(ISD::FMAXIMUM, Ty, Legal);
setOperationAction(ISD::LROUND, Ty, Legal);
setOperationAction(ISD::LLROUND, Ty, Legal);
setOperationAction(ISD::LRINT, Ty, Legal);
setOperationAction(ISD::LLRINT, Ty, Legal);
}
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
setOperationAction(ISD::FCEIL, MVT::f16, Legal);
setOperationAction(ISD::FRINT, MVT::f16, Legal);
setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
// Generate outline atomics library calls only if LSE was not specified for
// subtarget
if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
#define LCALLNAMES(A, B, N) \
setLibcallName(A##N##_RELAX, #B #N "_relax"); \
setLibcallName(A##N##_ACQ, #B #N "_acq"); \
setLibcallName(A##N##_REL, #B #N "_rel"); \
setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
#define LCALLNAME4(A, B) \
LCALLNAMES(A, B, 1) \
LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
#define LCALLNAME5(A, B) \
LCALLNAMES(A, B, 1) \
LCALLNAMES(A, B, 2) \
LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
#undef LCALLNAMES
#undef LCALLNAME4
#undef LCALLNAME5
}
// 128-bit loads and stores can be done without expanding
setOperationAction(ISD::LOAD, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::i128, Custom);
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
// custom lowering, as there are no un-paired non-temporal stores and
// legalization will break up 256 bit inputs.
setOperationAction(ISD::STORE, MVT::v32i8, Custom);
setOperationAction(ISD::STORE, MVT::v16i16, Custom);
setOperationAction(ISD::STORE, MVT::v16f16, Custom);
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::v8f32, Custom);
setOperationAction(ISD::STORE, MVT::v4f64, Custom);
setOperationAction(ISD::STORE, MVT::v4i64, Custom);
// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
// This requires the Performance Monitors extension.
if (Subtarget->hasPerfMon())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
// Issue __sincos_stret if available.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
} else {
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
if (Subtarget->getTargetTriple().isOSMSVCRT()) {
// MSVCRT doesn't have powi; fall back to pow
setLibcallName(RTLIB::POWI_F32, nullptr);
setLibcallName(RTLIB::POWI_F64, nullptr);
}
// Make floating-point constants legal for the large code model, so they don't
// become loads from the constant pool.
if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
}
// AArch64 does not have floating-point extending loads, i1 sign-extending
// load, floating-point truncating stores, or v2i32->v2i16 truncating store.
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
}
for (MVT VT : MVT::integer_valuetypes())
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f128, MVT::f80, Expand);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
// Indexed loads and stores are supported.
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, MVT::i8, Legal);
setIndexedLoadAction(im, MVT::i16, Legal);
setIndexedLoadAction(im, MVT::i32, Legal);
setIndexedLoadAction(im, MVT::i64, Legal);
setIndexedLoadAction(im, MVT::f64, Legal);
setIndexedLoadAction(im, MVT::f32, Legal);
setIndexedLoadAction(im, MVT::f16, Legal);
setIndexedLoadAction(im, MVT::bf16, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
setIndexedStoreAction(im, MVT::i64, Legal);
setIndexedStoreAction(im, MVT::f64, Legal);
setIndexedStoreAction(im, MVT::f32, Legal);
setIndexedStoreAction(im, MVT::f16, Legal);
setIndexedStoreAction(im, MVT::bf16, Legal);
}
// Trap.
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
// We combine OR nodes for bitfield operations.
setTargetDAGCombine(ISD::OR);
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);
// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::ABS);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
// TODO: Do the same for FP_TO_*INT_SAT.
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
// Try and combine setcc with csel
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::VECTOR_SPLICE);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::STORE);
if (Subtarget->supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::VECREDUCE_ADD);
setTargetDAGCombine(ISD::STEP_VECTOR);
setTargetDAGCombine(ISD::GlobalAddress);
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset = Subtarget->requiresStrictAlign()
? MaxStoresPerMemsetOptSize : 32;
MaxGluedStoresPerMemcpy = 4;
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
? MaxStoresPerMemcpyOptSize : 16;
MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
MaxLoadsPerMemcmpOptSize = 4;
MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
? MaxLoadsPerMemcmpOptSize : 8;
setStackPointerRegisterToSaveRestore(AArch64::SP);
setSchedulingPreference(Sched::Hybrid);
EnableExtLdPromotion = true;
// Set required alignment.
setMinFunctionAlignment(Align(4));
// Set preferred alignments.
setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
// Only change the limit for entries in a jump table if specified by
// the sub target, but not at the command line.
unsigned MaxJT = STI.getMaximumJumpTableSize();
if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
setMaximumJumpTableSize(MaxJT);
setHasExtractBitsInsn(true);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget->hasNEON()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
// silliness like this:
setOperationAction(ISD::FABS, MVT::v1f64, Expand);
setOperationAction(ISD::FADD, MVT::v1f64, Expand);
setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
setOperationAction(ISD::FMA, MVT::v1f64, Expand);
setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
setOperationAction(ISD::FREM, MVT::v1f64, Expand);
setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
// AArch64 doesn't have a direct vector ->f32 conversion instructions for
// elements smaller than i32, so promote the input to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
// Or, direct i32 -> f16 vector conversion. Set it so custom, so the
// conversion happens in two steps: v4i32 -> v4f32 -> v4f16
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
} else {
// when AArch64 doesn't have fullfp16 support, promote the input
// to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
}
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
// Custom handling for some quad-vector types to detect MULL.
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
// Saturates
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
}
for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
MVT::v4i32}) {
setOperationAction(ISD::ABDS, VT, Legal);
setOperationAction(ISD::ABDU, VT, Legal);
}
// Vector reductions
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
}
}
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
setOperationAction(ISD::MULHS, VT, Legal);
setOperationAction(ISD::MULHU, VT, Legal);
} else {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
}
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
// AArch64 has implementations of a lot of rounding-like FP operations.
for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
}
if (Subtarget->hasFullFP16()) {
for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
}
}
if (Subtarget->hasSVE())
setOperationAction(ISD::VSCALE, MVT::i32, Custom);
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
}
if (Subtarget->hasSVE()) {
for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::BSWAP, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
}
// Illegal unpacked integer vector types.
for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}
// Legalize unpacked bitcasts to REINTERPRET_CAST.
for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
setOperationAction(ISD::BITCAST, VT, Custom);
for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
// There are no legal MVT::nxv16f## based types.
if (VT != MVT::nxv16i1) {
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
}
}
// NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) {
// Avoid marking truncating FP stores as legal to prevent the
// DAGCombiner from creating unsupported truncating stores.
setTruncStoreAction(VT, InnerVT, Expand);
// SVE does not have floating-point extending loads.
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
// SVE supports truncating stores of 64 and 128-bit vectors
setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
MVT::nxv4f32, MVT::nxv2f64}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMAXNUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMINNUM, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FCEIL, VT, Custom);
setOperationAction(ISD::FFLOOR, VT, Custom);
setOperationAction(ISD::FNEARBYINT, VT, Custom);
setOperationAction(ISD::FRINT, VT, Custom);
setOperationAction(ISD::FROUND, VT, Custom);
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
}
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
}
setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addTypeForFixedLengthSVE(VT);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addTypeForFixedLengthSVE(VT);
// 64bit results can mean a bigger than NEON input.
for (auto VT : {MVT::v8i8, MVT::v4i16})
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
// 128bit results imply a bigger than NEON input.
for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(ISD::TRUNCATE, VT, Custom);
for (auto VT : {MVT::v8f16, MVT::v4f32})
setOperationAction(ISD::FP_ROUND, VT, Custom);
// These operations are not supported on NEON but SVE can do them.
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
// Int operations with no NEON support.
for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
}
// FP operations with no NEON support.
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
MVT::v1f64, MVT::v2f64})
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
// Use SVE for vectors with more than 2 elements.
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
}
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
}
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
}
void AArch64TargetLowering::addTypeForNEON(MVT VT) {
assert(VT.isVector() && "VT should be a vector type");
if (VT.isFloatingPoint()) {
MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
}
// Mark vector float intrinsics as expand.
if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
}
// But we do support custom-lowering for FCOPYSIGN.
if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
for (MVT InnerVT : MVT::all_valuetypes())
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
// CNT supports only B element sizes, then use UADDLP to widen.
if (VT != MVT::v8i8 && VT != MVT::v16i8)
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
if (!VT.isFloatingPoint())
setOperationAction(ISD::ABS, VT, Legal);
// [SU][MIN|MAX] are available for all NEON types apart from i64.
if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
// F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
if (VT.isFloatingPoint() &&
VT.getVectorElementType() != MVT::bf16 &&
(VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
for (unsigned Opcode :
{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
setOperationAction(Opcode, VT, Legal);
if (Subtarget->isLittleEndian()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
}
}
}
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
// We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
if (VT.isFloatingPoint()) {
setCondCodeAction(ISD::SETO, VT, Expand);
setCondCodeAction(ISD::SETOLT, VT, Expand);
setCondCodeAction(ISD::SETLT, VT, Expand);
setCondCodeAction(ISD::SETOLE, VT, Expand);
setCondCodeAction(ISD::SETLE, VT, Expand);
setCondCodeAction(ISD::SETULT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETUNE, VT, Expand);
}
// Mark integer truncating stores as having custom lowering
if (VT.isInteger()) {
MVT InnerVT = VT.changeVectorElementType(MVT::i8);
while (InnerVT != VT) {
setTruncStoreAction(VT, InnerVT, Custom);
InnerVT = InnerVT.changeVectorElementType(
MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
}
}
// Lower fixed length vector operations to scalable equivalents.
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::BSWAP, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::FCEIL, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FFLOOR, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMAXNUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMINNUM, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FNEARBYINT, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FRINT, VT, Custom);
setOperationAction(ISD::FROUND, VT, Custom);
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
}
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
addRegisterClass(VT, &AArch64::FPR64RegClass);
addTypeForNEON(VT);
}
void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
addRegisterClass(VT, &AArch64::FPR128RegClass);
addTypeForNEON(VT);
}
EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
LLVMContext &C, EVT VT) const {
if (!VT.isVector())
return MVT::i32;
if (VT.isScalableVector())
return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
const APInt &Demanded,
TargetLowering::TargetLoweringOpt &TLO,
unsigned NewOpc) {
uint64_t OldImm = Imm, NewImm, Enc;
uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
// Return if the immediate is already all zeros, all ones, a bimm32 or a
// bimm64.
if (Imm == 0 || Imm == Mask ||
AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
return false;
unsigned EltSize = Size;
uint64_t DemandedBits = Demanded.getZExtValue();
// Clear bits that are not demanded.
Imm &= DemandedBits;
while (true) {
// The goal here is to set the non-demanded bits in a way that minimizes
// the number of switching between 0 and 1. In order to achieve this goal,
// we set the non-demanded bits to the value of the preceding demanded bits.
// For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
// non-demanded bit), we copy bit0 (1) to the least significant 'x',
// bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
// The final result is 0b11000011.
uint64_t NonDemandedBits = ~DemandedBits;
uint64_t InvertedImm = ~Imm & DemandedBits;
uint64_t RotatedImm =
((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
NonDemandedBits;
uint64_t Sum = RotatedImm + NonDemandedBits;
bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
uint64_t Ones = (Sum + Carry) & NonDemandedBits;
NewImm = (Imm | Ones) & Mask;
// If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
// or all-ones or all-zeros, in which case we can stop searching. Otherwise,
// we halve the element size and continue the search.
if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
break;
// We cannot shrink the element size any further if it is 2-bits.
if (EltSize == 2)
return false;
EltSize /= 2;
Mask >>= EltSize;
uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
// Return if there is mismatch in any of the demanded bits of Imm and Hi.
if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
return false;
// Merge the upper and lower halves of Imm and DemandedBits.
Imm |= Hi;
DemandedBits |= DemandedBitsHi;
}
++NumOptimizedImms;
// Replicate the element across the register width.
while (EltSize < Size) {
NewImm |= NewImm << EltSize;
EltSize *= 2;
}
(void)OldImm;
assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered");
assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
// Create the new constant immediate node.
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue New;
// If the new constant immediate is all-zeros or all-ones, let the target
// independent DAG combine optimize this node.
if (NewImm == 0 || NewImm == OrigMask) {
New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
TLO.DAG.getConstant(NewImm, DL, VT));
// Otherwise, create a machine node so that target independent DAG combine
// doesn't undo this optimization.
} else {
Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
New = SDValue(
TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
}
return TLO.CombineTo(Op, New);
}
bool AArch64TargetLowering::targetShrinkDemandedConstant(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
// Delay this optimization to as late as possible.
if (!TLO.LegalOps)
return false;
if (!EnableOptimizeLogicalImm)
return false;
EVT VT = Op.getValueType();
if (VT.isVector())
return false;
unsigned Size = VT.getSizeInBits();
assert((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.");
// Exit early if we demand all bits.
if (DemandedBits.countPopulation() == Size)
return false;
unsigned NewOpc;
switch (Op.getOpcode()) {
default:
return false;
case ISD::AND:
NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
break;
case ISD::OR:
NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
break;
case ISD::XOR:
NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
break;
}
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!C)
return false;
uint64_t Imm = C->getZExtValue();
return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
}
/// computeKnownBitsForTargetNode - Determine which of the bits specified in
/// Mask are known to be either zero or one and return them Known.
void AArch64TargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, KnownBits &Known,
const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
case AArch64ISD::CSEL: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known = KnownBits::commonBits(Known, Known2);
break;
}
case AArch64ISD::LOADgot:
case AArch64ISD::ADDlow: {
if (!Subtarget->isTargetILP32())
break;
// In ILP32 mode all valid pointers are in the low 4GB of the address-space.
Known.Zero = APInt::getHighBitsSet(64, 32);
break;
}
case ISD::INTRINSIC_W_CHAIN: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
switch (IntID) {
default: return;
case Intrinsic::aarch64_ldaxr:
case Intrinsic::aarch64_ldxr: {
unsigned BitWidth = Known.getBitWidth();
EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
return;
}
}
break;
}
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID: {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IntNo) {
default:
break;
case Intrinsic::aarch64_neon_umaxv:
case Intrinsic::aarch64_neon_uminv: {
// Figure out the datatype of the vector operand. The UMINV instruction
// will zero extend the result, so we can mark as known zero all the
// bits larger than the element datatype. 32-bit or larget doesn't need
// this as those are legal types and will be handled by isel directly.
MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
unsigned BitWidth = Known.getBitWidth();
if (VT == MVT::v8i8 || VT == MVT::v16i8) {
assert(BitWidth >= 8 && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
Known.Zero |= Mask;
} else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
assert(BitWidth >= 16 && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
Known.Zero |= Mask;
}
break;
} break;
}
}
}
}
MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
EVT) const {
return MVT::i64;
}
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (Subtarget->requiresStrictAlign())
return false;
if (Fast) {
// Some CPUs are fine with unaligned stores except for 128-bit ones.
*Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
// See comments in performSTORECombine() for more details about
// these conditions.
// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.
Alignment <= 2 ||
// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on micro-benchmarks and olden/bh.
VT == MVT::v2i64;
}
return true;
}
// Same as above but handling LLTs instead.
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (Subtarget->requiresStrictAlign())
return false;
if (Fast) {
// Some CPUs are fine with unaligned stores except for 128-bit ones.
*Fast = !Subtarget->isMisaligned128StoreSlow() ||
Ty.getSizeInBytes() != 16 ||
// See comments in performSTORECombine() for more details about
// these conditions.
// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.
Alignment <= 2 ||
// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on micro-benchmarks and olden/bh.
Ty == LLT::fixed_vector(2, 64);
}
return true;
}
FastISel *
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
return AArch64::createFastISel(funcInfo, libInfo);
}
const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
#define MAKE_CASE(V) \
case V: \
return #V;
switch ((AArch64ISD::NodeType)Opcode) {
case AArch64ISD::FIRST_NUMBER:
break;
MAKE_CASE(AArch64ISD::CALL)
MAKE_CASE(AArch64ISD::ADRP)
MAKE_CASE(AArch64ISD::ADR)
MAKE_CASE(AArch64ISD::ADDlow)
MAKE_CASE(AArch64ISD::LOADgot)
MAKE_CASE(AArch64ISD::RET_FLAG)
MAKE_CASE(AArch64ISD::BRCOND)
MAKE_CASE(AArch64ISD::CSEL)
MAKE_CASE(AArch64ISD::CSINV)
MAKE_CASE(AArch64ISD::CSNEG)
MAKE_CASE(AArch64ISD::CSINC)
MAKE_CASE(AArch64ISD::THREAD_POINTER)
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
MAKE_CASE(AArch64ISD::ADD_PRED)
MAKE_CASE(AArch64ISD::MUL_PRED)
MAKE_CASE(AArch64ISD::MULHS_PRED)
MAKE_CASE(AArch64ISD::MULHU_PRED)
MAKE_CASE(AArch64ISD::SDIV_PRED)
MAKE_CASE(AArch64ISD::SHL_PRED)
MAKE_CASE(AArch64ISD::SMAX_PRED)
MAKE_CASE(AArch64ISD::SMIN_PRED)
MAKE_CASE(AArch64ISD::SRA_PRED)
MAKE_CASE(AArch64ISD::SRL_PRED)
MAKE_CASE(AArch64ISD::SUB_PRED)
MAKE_CASE(AArch64ISD::UDIV_PRED)
MAKE_CASE(AArch64ISD::UMAX_PRED)
MAKE_CASE(AArch64ISD::UMIN_PRED)
MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
MAKE_CASE(AArch64ISD::ADC)
MAKE_CASE(AArch64ISD::SBC)
MAKE_CASE(AArch64ISD::ADDS)
MAKE_CASE(AArch64ISD::SUBS)
MAKE_CASE(AArch64ISD::ADCS)
MAKE_CASE(AArch64ISD::SBCS)
MAKE_CASE(AArch64ISD::ANDS)
MAKE_CASE(AArch64ISD::CCMP)
MAKE_CASE(AArch64ISD::CCMN)
MAKE_CASE(AArch64ISD::FCCMP)
MAKE_CASE(AArch64ISD::FCMP)
MAKE_CASE(AArch64ISD::STRICT_FCMP)
MAKE_CASE(AArch64ISD::STRICT_FCMPE)
MAKE_CASE(AArch64ISD::DUP)
MAKE_CASE(AArch64ISD::DUPLANE8)
MAKE_CASE(AArch64ISD::DUPLANE16)
MAKE_CASE(AArch64ISD::DUPLANE32)
MAKE_CASE(AArch64ISD::DUPLANE64)
MAKE_CASE(AArch64ISD::MOVI)
MAKE_CASE(AArch64ISD::MOVIshift)
MAKE_CASE(AArch64ISD::MOVIedit)
MAKE_CASE(AArch64ISD::MOVImsl)
MAKE_CASE(AArch64ISD::FMOV)
MAKE_CASE(AArch64ISD::MVNIshift)
MAKE_CASE(AArch64ISD::MVNImsl)
MAKE_CASE(AArch64ISD::BICi)
MAKE_CASE(AArch64ISD::ORRi)
MAKE_CASE(AArch64ISD::BSP)
MAKE_CASE(AArch64ISD::EXTR)
MAKE_CASE(AArch64ISD::ZIP1)
MAKE_CASE(AArch64ISD::ZIP2)
MAKE_CASE(AArch64ISD::UZP1)
MAKE_CASE(AArch64ISD::UZP2)
MAKE_CASE(AArch64ISD::TRN1)
MAKE_CASE(AArch64ISD::TRN2)
MAKE_CASE(AArch64ISD::REV16)
MAKE_CASE(AArch64ISD::REV32)
MAKE_CASE(AArch64ISD::REV64)
MAKE_CASE(AArch64ISD::EXT)
MAKE_CASE(AArch64ISD::SPLICE)
MAKE_CASE(AArch64ISD::VSHL)
MAKE_CASE(AArch64ISD::VLSHR)
MAKE_CASE(AArch64ISD::VASHR)
MAKE_CASE(AArch64ISD::VSLI)
MAKE_CASE(AArch64ISD::VSRI)
MAKE_CASE(AArch64ISD::CMEQ)
MAKE_CASE(AArch64ISD::CMGE)
MAKE_CASE(AArch64ISD::CMGT)
MAKE_CASE(AArch64ISD::CMHI)
MAKE_CASE(AArch64ISD::CMHS)
MAKE_CASE(AArch64ISD::FCMEQ)
MAKE_CASE(AArch64ISD::FCMGE)
MAKE_CASE(AArch64ISD::FCMGT)
MAKE_CASE(AArch64ISD::CMEQz)
MAKE_CASE(AArch64ISD::CMGEz)
MAKE_CASE(AArch64ISD::CMGTz)
MAKE_CASE(AArch64ISD::CMLEz)
MAKE_CASE(AArch64ISD::CMLTz)
MAKE_CASE(AArch64ISD::FCMEQz)
MAKE_CASE(AArch64ISD::FCMGEz)
MAKE_CASE(AArch64ISD::FCMGTz)
MAKE_CASE(AArch64ISD::FCMLEz)
MAKE_CASE(AArch64ISD::FCMLTz)
MAKE_CASE(AArch64ISD::SADDV)
MAKE_CASE(AArch64ISD::UADDV)
MAKE_CASE(AArch64ISD::SRHADD)
MAKE_CASE(AArch64ISD::URHADD)
MAKE_CASE(AArch64ISD::SHADD)
MAKE_CASE(AArch64ISD::UHADD)
MAKE_CASE(AArch64ISD::SDOT)
MAKE_CASE(AArch64ISD::UDOT)
MAKE_CASE(AArch64ISD::SMINV)
MAKE_CASE(AArch64ISD::UMINV)
MAKE_CASE(AArch64ISD::SMAXV)
MAKE_CASE(AArch64ISD::UMAXV)
MAKE_CASE(AArch64ISD::SADDV_PRED)
MAKE_CASE(AArch64ISD::UADDV_PRED)
MAKE_CASE(AArch64ISD::SMAXV_PRED)
MAKE_CASE(AArch64ISD::UMAXV_PRED)
MAKE_CASE(AArch64ISD::SMINV_PRED)
MAKE_CASE(AArch64ISD::UMINV_PRED)
MAKE_CASE(AArch64ISD::ORV_PRED)
MAKE_CASE(AArch64ISD::EORV_PRED)
MAKE_CASE(AArch64ISD::ANDV_PRED)
MAKE_CASE(AArch64ISD::CLASTA_N)
MAKE_CASE(AArch64ISD::CLASTB_N)
MAKE_CASE(AArch64ISD::LASTA)
MAKE_CASE(AArch64ISD::LASTB)
MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
MAKE_CASE(AArch64ISD::LS64_BUILD)
MAKE_CASE(AArch64ISD::LS64_EXTRACT)
MAKE_CASE(AArch64ISD::TBL)
MAKE_CASE(AArch64ISD::FADD_PRED)
MAKE_CASE(AArch64ISD::FADDA_PRED)
MAKE_CASE(AArch64ISD::FADDV_PRED)
MAKE_CASE(AArch64ISD::FDIV_PRED)
MAKE_CASE(AArch64ISD::FMA_PRED)
MAKE_CASE(AArch64ISD::FMAX_PRED)
MAKE_CASE(AArch64ISD::FMAXV_PRED)
MAKE_CASE(AArch64ISD::FMAXNM_PRED)
MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
MAKE_CASE(AArch64ISD::FMIN_PRED)
MAKE_CASE(AArch64ISD::FMINV_PRED)
MAKE_CASE(AArch64ISD::FMINNM_PRED)
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
MAKE_CASE(AArch64ISD::FMUL_PRED)
MAKE_CASE(AArch64ISD::FSUB_PRED)
MAKE_CASE(AArch64ISD::BIC)
MAKE_CASE(AArch64ISD::BIT)
MAKE_CASE(AArch64ISD::CBZ)
MAKE_CASE(AArch64ISD::CBNZ)
MAKE_CASE(AArch64ISD::TBZ)
MAKE_CASE(AArch64ISD::TBNZ)
MAKE_CASE(AArch64ISD::TC_RETURN)
MAKE_CASE(AArch64ISD::PREFETCH)
MAKE_CASE(AArch64ISD::SITOF)
MAKE_CASE(AArch64ISD::UITOF)
MAKE_CASE(AArch64ISD::NVCAST)
MAKE_CASE(AArch64ISD::MRS)
MAKE_CASE(AArch64ISD::SQSHL_I)
MAKE_CASE(AArch64ISD::UQSHL_I)
MAKE_CASE(AArch64ISD::SRSHR_I)
MAKE_CASE(AArch64ISD::URSHR_I)
MAKE_CASE(AArch64ISD::SQSHLU_I)
MAKE_CASE(AArch64ISD::WrapperLarge)
MAKE_CASE(AArch64ISD::LD2post)
MAKE_CASE(AArch64ISD::LD3post)
MAKE_CASE(AArch64ISD::LD4post)
MAKE_CASE(AArch64ISD::ST2post)
MAKE_CASE(AArch64ISD::ST3post)
MAKE_CASE(AArch64ISD::ST4post)
MAKE_CASE(AArch64ISD::LD1x2post)
MAKE_CASE(AArch64ISD::LD1x3post)
MAKE_CASE(AArch64ISD::LD1x4post)
MAKE_CASE(AArch64ISD::ST1x2post)
MAKE_CASE(AArch64ISD::ST1x3post)
MAKE_CASE(AArch64ISD::ST1x4post)
MAKE_CASE(AArch64ISD::LD1DUPpost)
MAKE_CASE(AArch64ISD::LD2DUPpost)
MAKE_CASE(AArch64ISD::LD3DUPpost)
MAKE_CASE(AArch64ISD::LD4DUPpost)
MAKE_CASE(AArch64ISD::LD1LANEpost)
MAKE_CASE(AArch64ISD::LD2LANEpost)
MAKE_CASE(AArch64ISD::LD3LANEpost)
MAKE_CASE(AArch64ISD::LD4LANEpost)
MAKE_CASE(AArch64ISD::ST2LANEpost)
MAKE_CASE(AArch64ISD::ST3LANEpost)
MAKE_CASE(AArch64ISD::ST4LANEpost)
MAKE_CASE(AArch64ISD::SMULL)
MAKE_CASE(AArch64ISD::UMULL)
MAKE_CASE(AArch64ISD::FRECPE)
MAKE_CASE(AArch64ISD::FRECPS)
MAKE_CASE(AArch64ISD::FRSQRTE)
MAKE_CASE(AArch64ISD::FRSQRTS)
MAKE_CASE(AArch64ISD::STG)
MAKE_CASE(AArch64ISD::STZG)
MAKE_CASE(AArch64ISD::ST2G)
MAKE_CASE(AArch64ISD::STZ2G)
MAKE_CASE(AArch64ISD::SUNPKHI)
MAKE_CASE(AArch64ISD::SUNPKLO)
MAKE_CASE(AArch64ISD::UUNPKHI)
MAKE_CASE(AArch64ISD::UUNPKLO)
MAKE_CASE(AArch64ISD::INSR)
MAKE_CASE(AArch64ISD::PTEST)
MAKE_CASE(AArch64ISD::PTRUE)
MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::ST1_PRED)
MAKE_CASE(AArch64ISD::SST1_PRED)
MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
MAKE_CASE(AArch64ISD::SSTNT1_PRED)
MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
MAKE_CASE(AArch64ISD::LDP)
MAKE_CASE(AArch64ISD::STP)
MAKE_CASE(AArch64ISD::STNP)
MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::INDEX_VECTOR)
MAKE_CASE(AArch64ISD::UADDLP)
MAKE_CASE(AArch64ISD::CALL_RVMARKER)
}
#undef MAKE_CASE
return nullptr;
}
MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *MBB) const {
// We materialise the F128CSEL pseudo-instruction as some control flow and a
// phi node:
// OrigBB:
// [... previous instrs leading to comparison ...]
// b.ne TrueBB
// b EndBB
// TrueBB:
// ; Fallthrough
// EndBB:
// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
DebugLoc DL = MI.getDebugLoc();
MachineFunction::iterator It = ++MBB->getIterator();
Register DestReg = MI.getOperand(0).getReg();
Register IfTrueReg = MI.getOperand(1).getReg();
Register IfFalseReg = MI.getOperand(2).getReg();
unsigned CondCode = MI.getOperand(3).getImm();
bool NZCVKilled = MI.getOperand(4).isKill();
MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, TrueBB);
MF->insert(It, EndBB);
// Transfer rest of current basic-block to EndBB
EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
MBB->end());
EndBB->transferSuccessorsAndUpdatePHIs(MBB);
BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
MBB->addSuccessor(TrueBB);
MBB->addSuccessor(EndBB);
// TrueBB falls through to the end.
TrueBB->addSuccessor(EndBB);
if (!NZCVKilled) {
TrueBB->addLiveIn(AArch64::NZCV);
EndBB->addLiveIn(AArch64::NZCV);
}
BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
.addReg(IfTrueReg)
.addMBB(TrueBB)
.addReg(IfFalseReg)
.addMBB(MBB);
MI.eraseFromParent();
return EndBB;
}
MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
MachineInstr &MI, MachineBasicBlock *BB) const {
assert(!isAsynchronousEHPersonality(classifyEHPersonality(
BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!");
return BB;
}
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
default:
#ifndef NDEBUG
MI.dump();
#endif
llvm_unreachable("Unexpected instruction for custom inserter!");
case AArch64::F128CSEL:
return EmitF128CSEL(MI, BB);
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
case TargetOpcode::STATEPOINT:
return emitPatchPoint(MI, BB);
case AArch64::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
}
}
//===----------------------------------------------------------------------===//
// AArch64 Lowering private implementation.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
// Forward declarations of SVE fixed length lowering helpers
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFixedMaskToScalableVector(SDValue Mask,
SelectionDAG &DAG);
/// isZerosVector - Check whether SDNode N is a zero-filled vector.
static bool isZerosVector(const SDNode *N) {
// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (ISD::isConstantSplatVectorAllZeros(N))
return true;
if (N->getOpcode() != AArch64ISD::DUP)
return false;
auto Opnd0 = N->getOperand(0);
auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
}
/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
/// CC
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unknown condition code!");
case ISD::SETNE:
return AArch64CC::NE;
case ISD::SETEQ:
return AArch64CC::EQ;
case ISD::SETGT:
return AArch64CC::GT;
case ISD::SETGE:
return AArch64CC::GE;
case ISD::SETLT:
return AArch64CC::LT;
case ISD::SETLE:
return AArch64CC::LE;
case ISD::SETUGT:
return AArch64CC::HI;
case ISD::SETUGE:
return AArch64CC::HS;
case ISD::SETULT:
return AArch64CC::LO;
case ISD::SETULE:
return AArch64CC::LS;
}
}
/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
static void changeFPCCToAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
case ISD::SETOEQ:
CondCode = AArch64CC::EQ;
break;
case ISD::SETGT:
case ISD::SETOGT:
CondCode = AArch64CC::GT;
break;
case ISD::SETGE:
case ISD::SETOGE:
CondCode = AArch64CC::GE;
break;
case ISD::SETOLT:
CondCode = AArch64CC::MI;
break;
case ISD::SETOLE:
CondCode = AArch64CC::LS;
break;
case ISD::SETONE:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GT;
break;
case ISD::SETO:
CondCode = AArch64CC::VC;
break;
case ISD::SETUO:
CondCode = AArch64CC::VS;
break;
case ISD::SETUEQ:
CondCode = AArch64CC::EQ;
CondCode2 = AArch64CC::VS;
break;
case ISD::SETUGT:
CondCode = AArch64CC::HI;
break;
case ISD::SETUGE:
CondCode = AArch64CC::PL;
break;
case ISD::SETLT:
case ISD::SETULT:
CondCode = AArch64CC::LT;
break;
case ISD::SETLE:
case ISD::SETULE:
CondCode = AArch64CC::LE;
break;
case ISD::SETNE:
case ISD::SETUNE:
CondCode = AArch64CC::NE;
break;
}
}
/// Convert a DAG fp condition code to an AArch64 CC.
/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
/// should be AND'ed instead of OR'ed.
static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
assert(CondCode2 == AArch64CC::AL);
break;
case ISD::SETONE:
// (a one b)
// == ((a olt b) || (a ogt b))
// == ((a ord b) && (a une b))
CondCode = AArch64CC::VC;
CondCode2 = AArch64CC::NE;
break;
case ISD::SETUEQ:
// (a ueq b)
// == ((a uno b) || (a oeq b))
// == ((a ule b) && (a uge b))
CondCode = AArch64CC::PL;
CondCode2 = AArch64CC::LE;
break;
}
}
/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
/// CC usable with the vector instructions. Fewer operations are available
/// without a real NZCV register, so we have to use less efficient combinations
/// to get the same effect.
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2,
bool &Invert) {
Invert = false;
switch (CC) {
default:
// Mostly the scalar mappings work fine.
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
break;
case ISD::SETUO:
Invert = true;
LLVM_FALLTHROUGH;
case ISD::SETO:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GE;
break;
case ISD::SETUEQ:
case ISD::SETULT:
case ISD::SETULE:
case ISD::SETUGT:
case ISD::SETUGE:
// All of the compare-mask comparisons are ordered, but we can switch
// between the two by a double inversion. E.g. ULE == !OGT.
Invert = true;
changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
CondCode, CondCode2);
break;
}
}
static bool isLegalArithImmed(uint64_t C) {
// Matches AArch64DAGToDAGISel::SelectArithImmed().
bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
LLVM_DEBUG(dbgs() << "Is imm " << C
<< " legal: " << (IsLegal ? "yes\n" : "no\n"));
return IsLegal;
}
// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
// can be set differently by this operation. It comes down to whether
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
// everything is fine. If not then the optimization is wrong. Thus general
// comparisons are only valid if op2 != 0.
//
// So, finally, the only LLVM-native comparisons that don't mention C and V
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
// the absence of information about op2.
static bool isCMN(SDValue Op, ISD::CondCode CC) {
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE);
}
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
SelectionDAG &DAG, SDValue Chain,
bool IsSignaling) {
EVT VT = LHS.getValueType();
assert(VT != MVT::f128);
assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented");
unsigned Opcode =
IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
}
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
if (VT.isFloatingPoint()) {
assert(VT != MVT::f128);
if (VT == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
VT = MVT::f32;
}
return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
}
// The CMP instruction is just an alias for SUBS, and representing it as
// SUBS means that it's possible to get CSE with subtract operations.
// A later phase can perform the optimization of setting the destination
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;
if (isCMN(RHS, CC)) {
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
} else if (isCMN(LHS, CC)) {
// As we are looking for EQ/NE compares, the operands can be commuted ; can
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
LHS = LHS.getOperand(1);
} else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
if (LHS.getOpcode() == ISD::AND) {
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
// of the signed comparisons.
const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
DAG.getVTList(VT, MVT_CC),
LHS.getOperand(0),
LHS.getOperand(1));
// Replace all users of (and X, Y) with newly generated (ands X, Y)
DAG.ReplaceAllUsesWith(LHS, ANDSNode);
return ANDSNode.getValue(1);
} else if (LHS.getOpcode() == AArch64ISD::ANDS) {
// Use result of ANDS
return LHS.getValue(1);
}
}
return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
.getValue(1);
}
/// \defgroup AArch64CCMP CMP;CCMP matching
///
/// These functions deal with the formation of CMP;CCMP;... sequences.
/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
/// a comparison. They set the NZCV flags to a predefined value if their
/// predicate is false. This allows to express arbitrary conjunctions, for
/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
/// expressed as:
/// cmp A
/// ccmp B, inv(CB), CA
/// check for CB flags
///
/// This naturally lets us implement chains of AND operations with SETCC
/// operands. And we can even implement some other situations by transforming
/// them:
/// - We can implement (NEG SETCC) i.e. negating a single comparison by
/// negating the flags used in a CCMP/FCCMP operations.
/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
/// by negating the flags we test for afterwards. i.e.
/// NEG (CMP CCMP CCCMP ...) can be implemented.
/// - Note that we can only ever negate all previously processed results.
/// What we can not implement by flipping the flags to test is a negation
/// of two sub-trees (because the negation affects all sub-trees emitted so
/// far, so the 2nd sub-tree we emit would also affect the first).
/// With those tools we can implement some OR operations:
/// - (OR (SETCC A) (SETCC B)) can be implemented via:
/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
/// - After transforming OR to NEG/AND combinations we may be able to use NEG
/// elimination rules from earlier to implement the whole thing as a
/// CCMP/FCCMP chain.
///
/// As complete example:
/// or (or (setCA (cmp A)) (setCB (cmp B)))
/// (and (setCC (cmp C)) (setCD (cmp D)))"
/// can be reassociated to:
/// or (and (setCC (cmp C)) setCD (cmp D))
// (or (setCA (cmp A)) (setCB (cmp B)))
/// can be transformed to:
/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
/// which can be implemented as:
/// cmp C
/// ccmp D, inv(CD), CC
/// ccmp A, CA, inv(CD)
/// ccmp B, CB, inv(CA)
/// check for CB flags
///
/// A counterexample is "or (and A B) (and C D)" which translates to
/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
/// can only implement 1 of the inner (not) operations, but not both!
/// @{
/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
ISD::CondCode CC, SDValue CCOp,
AArch64CC::CondCode Predicate,
AArch64CC::CondCode OutCC,
const SDLoc &DL, SelectionDAG &DAG) {
unsigned Opcode = 0;
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
if (LHS.getValueType().isFloatingPoint()) {
assert(LHS.getValueType() != MVT::f128);
if (LHS.getValueType() == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
}
Opcode = AArch64ISD::FCCMP;
} else if (RHS.getOpcode() == ISD::SUB) {
SDValue SubOp0 = RHS.getOperand(0);
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
// See emitComparison() on why we can only do this for SETEQ and SETNE.
Opcode = AArch64ISD::CCMN;
RHS = RHS.getOperand(1);
}
}
if (Opcode == 0)
Opcode = AArch64ISD::CCMP;
SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
}
/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
/// expressed as a conjunction. See \ref AArch64CCMP.
/// \param CanNegate Set to true if we can negate the whole sub-tree just by
/// changing the conditions on the SETCC tests.
/// (this means we can call emitConjunctionRec() with
/// Negate==true on this sub-tree)
/// \param MustBeFirst Set to true if this subtree needs to be negated and we
/// cannot do the negation naturally. We are required to
/// emit the subtree first in this case.
/// \param WillNegate Is true if are called when the result of this
/// subexpression must be negated. This happens when the
/// outer expression is an OR. We can use this fact to know
/// that we have a double negation (or (or ...) ...) that
/// can be implemented for free.
static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
bool &MustBeFirst, bool WillNegate,
unsigned Depth = 0) {
if (!Val.hasOneUse())
return false;
unsigned Opcode = Val->getOpcode();
if (Opcode == ISD::SETCC) {
if (Val->getOperand(0).getValueType() == MVT::f128)
return false;
CanNegate = true;
MustBeFirst = false;
return true;
}
// Protect against exponential runtime and stack overflow.
if (Depth > 6)
return false;
if (Opcode == ISD::AND || Opcode == ISD::OR) {
bool IsOR = Opcode == ISD::OR;
SDValue O0 = Val->getOperand(0);
SDValue O1 = Val->getOperand(1);
bool CanNegateL;
bool MustBeFirstL;
if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
return false;
bool CanNegateR;
bool MustBeFirstR;
if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
return false;
if (MustBeFirstL && MustBeFirstR)
return false;
if (IsOR) {
// For an OR expression we need to be able to naturally negate at least
// one side or we cannot do the transformation at all.
if (!CanNegateL && !CanNegateR)
return false;
// If we the result of the OR will be negated and we can naturally negate
// the leafs, then this sub-tree as a whole negates naturally.
CanNegate = WillNegate && CanNegateL && CanNegateR;
// If we cannot naturally negate the whole sub-tree, then this must be
// emitted first.
MustBeFirst = !CanNegate;
} else {
assert(Opcode == ISD::AND && "Must be OR or AND");
// We cannot naturally negate an AND operation.
CanNegate = false;
MustBeFirst = MustBeFirstL || MustBeFirstR;
}
return true;
}
return false;
}
/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
/// Tries to transform the given i1 producing node @p Val to a series compare
/// and conditional compare operations. @returns an NZCV flags producing node
/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
/// transformation was not possible.
/// \p Negate is true if we want this sub-tree being negated just by changing
/// SETCC conditions.
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
AArch64CC::CondCode Predicate) {
// We're at a tree leaf, produce a conditional comparison operation.
unsigned Opcode = Val->getOpcode();
if (Opcode == ISD::SETCC) {
SDValue LHS = Val->getOperand(0);
SDValue RHS = Val->getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
bool isInteger = LHS.getValueType().isInteger();
if (Negate)
CC = getSetCCInverse(CC, LHS.getValueType());
SDLoc DL(Val);
// Determine OutCC and handle FP special case.
if (isInteger) {
OutCC = changeIntCCToAArch64CC(CC);
} else {
assert(LHS.getValueType().isFloatingPoint());
AArch64CC::CondCode ExtraCC;
changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
// Some floating point conditions can't be tested with a single condition
// code. Construct an additional comparison in this case.
if (ExtraCC != AArch64CC::AL) {
SDValue ExtraCmp;
if (!CCOp.getNode())
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
else
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
ExtraCC, DL, DAG);
CCOp = ExtraCmp;
Predicate = ExtraCC;
}
}
// Produce a normal comparison if we are first in the chain
if (!CCOp)
return emitComparison(LHS, RHS, CC, DL, DAG);
// Otherwise produce a ccmp.
return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
DAG);
}
assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
bool IsOR = Opcode == ISD::OR;
SDValue LHS = Val->getOperand(0);
bool CanNegateL;
bool MustBeFirstL;
bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
assert(ValidL && "Valid conjunction/disjunction tree");
(void)ValidL;
SDValue RHS = Val->getOperand(1);
bool CanNegateR;
bool MustBeFirstR;
bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
assert(ValidR && "Valid conjunction/disjunction tree");
(void)ValidR;
// Swap sub-tree that must come first to the right side.
if (MustBeFirstL) {
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
std::swap(LHS, RHS);
std::swap(CanNegateL, CanNegateR);
std::swap(MustBeFirstL, MustBeFirstR);
}
bool NegateR;
bool NegateAfterR;
bool NegateL;
bool NegateAfterAll;
if (Opcode == ISD::OR) {
// Swap the sub-tree that we can negate naturally to the left.
if (!CanNegateL) {
assert(CanNegateR && "at least one side must be negatable");
assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
assert(!Negate);
std::swap(LHS, RHS);
NegateR = false;
NegateAfterR = true;
} else {
// Negate the left sub-tree if possible, otherwise negate the result.
NegateR = CanNegateR;
NegateAfterR = !CanNegateR;
}
NegateL = true;
NegateAfterAll = !Negate;
} else {
assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
assert(!Negate && "Valid conjunction/disjunction tree");
NegateL = false;
NegateR = false;
NegateAfterR = false;
NegateAfterAll = false;
}
// Emit sub-trees.
AArch64CC::CondCode RHSCC;
SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
if (NegateAfterR)
RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
if (NegateAfterAll)
OutCC = AArch64CC::getInvertedCondCode(OutCC);
return CmpL;
}
/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
/// In some cases this is even possible with OR operations in the expression.
/// See \ref AArch64CCMP.
/// \see emitConjunctionRec().
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC) {
bool DummyCanNegate;
bool DummyMustBeFirst;
if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
return SDValue();
return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
}
/// @}
/// Returns how profitable it is to fold a comparison's operand's shift and/or
/// extension operations.
static unsigned getCmpOperandFoldingProfit(SDValue Op) {
auto isSupportedExtend = [&](SDValue V) {
if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
return true;
if (V.getOpcode() == ISD::AND)
if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
uint64_t Mask = MaskCst->getZExtValue();
return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
}
return false;
};
if (!Op.hasOneUse())
return 0;
if (isSupportedExtend(Op))
return 1;
unsigned Opc = Op.getOpcode();
if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
uint64_t Shift = ShiftCst->getZExtValue();
if (isSupportedExtend(Op.getOperand(0)))
return (Shift <= 4) ? 2 : 1;
EVT VT = Op.getValueType();
if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
return 1;
}
return 0;
}
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG,
const SDLoc &dl) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
uint64_t C = RHSC->getZExtValue();
if (!isLegalArithImmed(C)) {
// Constant does not fit, try adjusting it by one?
switch (CC) {
default:
break;
case ISD::SETLT:
case ISD::SETGE:
if ((VT == MVT::i32 && C != 0x80000000 &&
isLegalArithImmed((uint32_t)(C - 1))) ||
(VT == MVT::i64 && C != 0x80000000ULL &&
isLegalArithImmed(C - 1ULL))) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
if ((VT == MVT::i32 && C != 0 &&
isLegalArithImmed((uint32_t)(C - 1))) ||
(VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETLE:
case ISD::SETGT:
if ((VT == MVT::i32 && C != INT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) ||
(VT == MVT::i64 && C != INT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
if ((VT == MVT::i32 && C != UINT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) ||
(VT == MVT::i64 && C != UINT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
}
}
}
// Comparisons are canonicalized so that the RHS operand is simpler than the
// LHS one, the extreme case being when RHS is an immediate. However, AArch64
// can fold some shift+extend operations on the RHS operand, so swap the
// operands if that can be done.
//
// For example:
// lsl w13, w11, #1
// cmp w13, w12
// can be turned into:
// cmp w12, w11, lsl #1
if (!isa<ConstantSDNode>(RHS) ||
!isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
std::swap(LHS, RHS);
CC = ISD::getSetCCSwappedOperands(CC);
}
}
SDValue Cmp;
AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
// For the i8 operand, the largest immediate is 255, so this can be easily
// encoded in the compare instruction. For the i16 operand, however, the
// largest immediate cannot be encoded in the compare.
// Therefore, use a sign extending load and cmn to avoid materializing the
// -1 constant. For example,
// movz w1, #65535
// ldrh w0, [x0, #0]
// cmp w0, w1
// >
// ldrsh w0, [x0, #0]
// cmn w0, #1
// Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
// if and only if (sext LHS) == (sext RHS). The checks are in place to
// ensure both the LHS and RHS are truly zero extended and to make sure the
// transformation is profitable.
if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
LHS.getNode()->hasNUsesOfValue(1, 0)) {
int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
SDValue SExt =
DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
DAG.getValueType(MVT::i16));
Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
RHS.getValueType()),
CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
}
if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
}
}
}
if (!Cmp) {
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
return Cmp;
}
static std::pair<SDValue, SDValue>
getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
"Unsupported value type");
SDValue Value, Overflow;
SDLoc DL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
unsigned Opc = 0;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown overflow instruction!");
case ISD::SADDO:
Opc = AArch64ISD::ADDS;
CC = AArch64CC::VS;
break;
case ISD::UADDO:
Opc = AArch64ISD::ADDS;
CC = AArch64CC::HS;
break;
case ISD::SSUBO:
Opc = AArch64ISD::SUBS;
CC = AArch64CC::VS;
break;
case ISD::USUBO:
Opc = AArch64ISD::SUBS;
CC = AArch64CC::LO;
break;
// Multiply needs a little bit extra work.
case ISD::SMULO:
case ISD::UMULO: {
CC = AArch64CC::NE;
bool IsSigned = Op.getOpcode() == ISD::SMULO;
if (Op.getValueType() == MVT::i32) {
// Extend to 64-bits, then perform a 64-bit multiply.
unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
// Check that the result fits into a 32-bit integer.
SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
if (IsSigned) {
// cmp xreg, wreg, sxtw
SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
} else {
// tst xreg, #0xffffffff00000000
SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
Overflow =
DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
}
break;
}
assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
// For the 64 bit multiply
Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
if (IsSigned) {
SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
DAG.getConstant(63, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
.getValue(1);
} else {
SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs,
DAG.getConstant(0, DL, MVT::i64),
UpperBits).getValue(1);
}
break;
}
} // switch (...)
if (Opc) {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
// Emit the AArch64 operation with overflow check.
Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
Overflow = Value.getValue(1);
}
return std::make_pair(Value, Overflow);
}
SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerToScalableOp(Op, DAG);
SDValue Sel = Op.getOperand(0);
SDValue Other = Op.getOperand(1);
SDLoc dl(Sel);
// If the operand is an overflow checking operation, invert the condition
// code and kill the Not operation. I.e., transform:
// (xor (overflow_op_bool, 1))
// -->
// (csel 1, 0, invert(cc), overflow_op_bool)
// ... which later gets transformed to just a cset instruction with an
// inverted condition code, rather than a cset + eor sequence.
if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
return SDValue();
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
AArch64CC::CondCode CC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
CCVal, Overflow);
}
// If neither operand is a SELECT_CC, give up.
if (Sel.getOpcode() != ISD::SELECT_CC)
std::swap(Sel, Other);
if (Sel.getOpcode() != ISD::SELECT_CC)
return Op;
// The folding we want to perform is:
// (xor x, (select_cc a, b, cc, 0, -1) )
// -->
// (csel x, (xor x, -1), cc ...)
//
// The latter will get matched to a CSINV instruction.
ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
SDValue LHS = Sel.getOperand(0);
SDValue RHS = Sel.getOperand(1);
SDValue TVal = Sel.getOperand(2);
SDValue FVal = Sel.getOperand(3);
// FIXME: This could be generalized to non-integer comparisons.
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
return Op;
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
// The values aren't constants, this isn't the pattern we're looking for.
if (!CFVal || !CTVal)
return Op;
// We can commute the SELECT_CC by inverting the condition. This
// might be needed to make this fit into a CSINV pattern.
if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
// If the constants line up, perform the transform!
if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
FVal = Other;
TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
DAG.getConstant(-1ULL, dl, Other.getValueType()));
return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
CCVal, Cmp);
}
return Op;
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
unsigned Opc;
bool ExtraOp = false;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Invalid code");
case ISD::ADDC:
Opc = AArch64ISD::ADDS;
break;
case ISD::SUBC:
Opc = AArch64ISD::SUBS;
break;
case ISD::ADDE:
Opc = AArch64ISD::ADCS;
ExtraOp = true;
break;
case ISD::SUBE:
Opc = AArch64ISD::SBCS;
ExtraOp = true;
break;
}
if (!ExtraOp)
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
Op.getOperand(2));
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
return SDValue();
SDLoc dl(Op);
AArch64CC::CondCode CC;
// The actual operation that sets the overflow or carry flag.
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
// We use 0 and 1 as false and true values.
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
// We use an inverted condition, because the conditional select is inverted
// too. This will allow it to be selected to a single instruction:
// CSINC Wd, WZR, WZR, invert(cond).
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
CCVal, Overflow);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
// Prefetch operands are:
// 1: Address to prefetch
// 2: bool isWrite
// 3: int locality (0 = no locality ... 3 = extreme locality)
// 4: bool isDataCache
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
bool IsStream = !Locality;
// When the locality number is set
if (Locality) {
// The front-end should have filtered out the out-of-range values
assert(Locality <= 3 && "Prefetch locality out-of-range");
// The locality degree is the opposite of the cache speed.
// Put the number the other way around.
// The encoding starts at 0 for level 1
Locality = 3 - Locality;
}
// built the mask value encoding the expected behavior.
unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
(!IsData << 3) | // IsDataCache bit
(Locality << 1) | // Cache level bits
(unsigned)IsStream; // Stream bit
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
if (useSVEForFixedLengthVectorVT(VT))
return LowerFixedLengthFPExtendToSVE(Op, DAG);
assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
return SDValue();
}
SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
EVT SrcVT = SrcVal.getValueType();
if (useSVEForFixedLengthVectorVT(SrcVT))
return LowerFixedLengthFPRoundToSVE(Op, DAG);
if (SrcVT != MVT::f128) {
// Expand cases where the input is a vector bigger than NEON.
if (useSVEForFixedLengthVectorVT(SrcVT))
return SDValue();
// It's legal except when f128 is involved
return Op;
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
EVT InVT = Op.getOperand(0).getValueType();
EVT VT = Op.getValueType();
if (VT.isScalableVector()) {
unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
? AArch64ISD::FCVTZU_MERGE_PASSTHRU
: AArch64ISD::FCVTZS_MERGE_PASSTHRU;
return LowerToPredicatedOp(Op, DAG, Opcode);
}
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
return LowerFixedLengthFPToIntToSVE(Op, DAG);
unsigned NumElts = InVT.getVectorNumElements();
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (InVT.getVectorElementType() == MVT::f16 &&
!Subtarget->hasFullFP16()) {
MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
SDLoc dl(Op);
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
}
uint64_t VTSize = VT.getFixedSizeInBits();
uint64_t InVTSize = InVT.getFixedSizeInBits();
if (VTSize < InVTSize) {
SDLoc dl(Op);
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
Op.getOperand(0));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
}
if (VTSize > InVTSize) {
SDLoc dl(Op);
MVT ExtVT =
MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
VT.getVectorNumElements());
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
// Type changing conversions are illegal.
return Op;
}
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
if (SrcVal.getValueType().isVector())
return LowerVectorFP_TO_INT(Op, DAG);
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
SDLoc dl(Op);
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
}
if (SrcVal.getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
SelectionDAG &DAG) const {
// AArch64 FP-to-int conversions saturate to the destination register size, so
// we can lower common saturating conversions to simple instructions.
SDValue SrcVal = Op.getOperand(0);
EVT SrcVT = SrcVal.getValueType();
EVT DstVT = Op.getValueType();
EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
uint64_t SatWidth = SatVT.getScalarSizeInBits();
uint64_t DstWidth = DstVT.getScalarSizeInBits();
assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width");
// TODO: Support lowering of NEON and SVE conversions.
if (SrcVT.isVector())
return SDValue();
// TODO: Saturate to SatWidth explicitly.
if (SatWidth != DstWidth)
return SDValue();
// In the absence of FP16 support, promote f32 to f16, like LowerFP_TO_INT().
if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal),
Op.getOperand(1));
// Cases that we can emit directly.
if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
(SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
(DstVT == MVT::i64 || DstVT == MVT::i32))
return Op;
// For all other cases, fall back on the expanded form.
return SDValue();
}
SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
EVT VT = Op.getValueType();
SDLoc dl(Op);
SDValue In = Op.getOperand(0);
EVT InVT = In.getValueType();
unsigned Opc = Op.getOpcode();
bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
if (VT.isScalableVector()) {
if (InVT.getVectorElementType() == MVT::i1) {
// We can't directly extend an SVE predicate; extend it first.
unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = getPromotedVTForPredicate(InVT);
In = DAG.getNode(CastOpc, dl, CastVT, In);
return DAG.getNode(Opc, dl, VT, In);
}
unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
return LowerToPredicatedOp(Op, DAG, Opcode);
}
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
return LowerFixedLengthIntToFPToSVE(Op, DAG);
uint64_t VTSize = VT.getFixedSizeInBits();
uint64_t InVTSize = InVT.getFixedSizeInBits();
if (VTSize < InVTSize) {
MVT CastVT =
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
In = DAG.getNode(Opc, dl, CastVT, In);
return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
}
if (VTSize > InVTSize) {
unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = VT.changeVectorElementTypeToInteger();
In = DAG.getNode(CastOpc, dl, CastVT, In);
return DAG.getNode(Opc, dl, VT, In);
}
return Op;
}
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (Op.getValueType() == MVT::f16 &&
!Subtarget->hasFullFP16()) {
assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
SDLoc dl(Op);
return DAG.getNode(
ISD::FP_ROUND, dl, MVT::f16,
DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
DAG.getIntPtrConstant(0, dl));
}
// i128 conversions are libcalls.
if (SrcVal.getValueType() == MVT::i128)
return SDValue();
// Other conversions are legal, unless it's to the completely software-based
// fp128.
if (Op.getValueType() != MVT::f128)
return Op;
return SDValue();
}
SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
SelectionDAG &DAG) const {
// For iOS, we want to call an alternative entry point: __sincos_stret,
// which returns the values in two S / D registers.
SDLoc dl(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
: RTLIB::SINCOS_STRET_F32;
const char *LibcallName = getLibcallName(LC);
SDValue Callee =
DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
StructType *RetTy = StructType::get(ArgTy, ArgTy);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
.setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
}
static MVT getSVEContainerType(EVT ContentTy);
SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
EVT OpVT = Op.getValueType();
EVT ArgVT = Op.getOperand(0).getValueType();
if (useSVEForFixedLengthVectorVT(OpVT))
return LowerFixedLengthBitcastToSVE(Op, DAG);
if (OpVT.isScalableVector()) {
if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&
"Expected int->fp bitcast!");
SDValue ExtResult =
DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
Op.getOperand(0));
return getSVESafeBitCast(OpVT, ExtResult, DAG);
}
return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
}
if (OpVT != MVT::f16 && OpVT != MVT::bf16)
return SDValue();
assert(ArgVT == MVT::i16);
SDLoc DL(Op);
Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
return SDValue(
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
}
static EVT getExtensionTo64Bits(const EVT &OrigVT) {
if (OrigVT.getSizeInBits() >= 64)
return OrigVT;
assert(OrigVT.isSimple() && "Expecting a simple value type");
MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
switch (OrigSimpleTy) {
default: llvm_unreachable("Unexpected Vector Type");
case MVT::v2i8:
case MVT::v2i16:
return MVT::v2i32;
case MVT::v4i8:
return MVT::v4i16;
}
}
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
const EVT &OrigTy,
const EVT &ExtTy,
unsigned ExtOpcode) {
// The vector originally had a size of OrigTy. It was then extended to ExtTy.
// We expect the ExtTy to be 128-bits total. If the OrigTy is less than
// 64-bits we need to insert a new extension so that it will be 64-bits.
assert(ExtTy.is128BitVector() && "Unexpected extension size");
if (OrigTy.getSizeInBits() >= 64)
return N;
// Must extend size to at least 64 bits to be used as an operand for VMULL.
EVT NewVT = getExtensionTo64Bits(OrigTy);
return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
}
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
bool isSigned) {
EVT VT = N->getValueType(0);
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
for (const SDValue &Elt : N->op_values()) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
unsigned EltSize = VT.getScalarSizeInBits();
unsigned HalfSize = EltSize / 2;
if (isSigned) {
if (!isIntN(HalfSize, C->getSExtValue()))
return false;
} else {
if (!isUIntN(HalfSize, C->getZExtValue()))
return false;
}
continue;
}
return false;
}
return true;
}
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() == ISD::SIGN_EXTEND ||
N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
N->getOperand(0)->getValueType(0),
N->getValueType(0),
N->getOpcode());
assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
EVT VT = N->getValueType(0);
SDLoc dl(N);
unsigned EltSize = VT.getScalarSizeInBits() / 2;
unsigned NumElts = VT.getVectorNumElements();
MVT TruncVT = MVT::getIntegerVT(EltSize);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElts; ++i) {
ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
const APInt &CInt = C->getAPIntValue();
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}
return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
}
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
return N->getOpcode() == ISD::SIGN_EXTEND ||
N->getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, true);
}
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return N->getOpcode() == ISD::ZERO_EXTEND ||
N->getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, false);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N->getOpcode();
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDNode *N0 = N->getOperand(0).getNode();
SDNode *N1 = N->getOperand(1).getNode();
return N0->hasOneUse() && N1->hasOneUse() &&
isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
}
return false;
}
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N->getOpcode();
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDNode *N0 = N->getOperand(0).getNode();
SDNode *N1 = N->getOperand(1).getNode();
return N0->hasOneUse() && N1->hasOneUse() &&
isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
}
return false;
}
SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
// The rounding mode is in bits 23:22 of the FPSCR.
// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
SDValue Chain = Op.getOperand(0);
SDValue FPCR_64 = DAG.getNode(
ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
{Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
Chain = FPCR_64.getValue(1);
SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32));
SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32));
return DAG.getMergeValues({AND, Chain}, dl);
}
SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Chain = Op->getOperand(0);
SDValue RMValue = Op->getOperand(1);
// The rounding mode is in bits 23:22 of the FPCR.
// The llvm.set.rounding argument value to the rounding mode in FPCR mapping
// is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
// ((arg - 1) & 3) << 22).
//
// The argument of llvm.set.rounding must be within the segment [0, 3], so
// NearestTiesToAway (4) is not handled here. It is responsibility of the code
// generated llvm.set.rounding to ensure this condition.
// Calculate new value of FPCR[23:22].
RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
DAG.getConstant(1, DL, MVT::i32));
RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
DAG.getConstant(0x3, DL, MVT::i32));
RMValue =
DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
// Get current value of FPCR.
SDValue Ops[] = {
Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
SDValue FPCR =
DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
Chain = FPCR.getValue(1);
FPCR = FPCR.getValue(0);
// Put new rounding mode into FPSCR[23:22].
const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
DAG.getConstant(RMMask, DL, MVT::i64));
FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
SDValue Ops2[] = {
Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
FPCR};
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
}
SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// If SVE is available then i64 vector multiplications can also be made legal.
bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
assert(VT.is128BitVector() && VT.isInteger() &&
"unexpected type for custom-lowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
unsigned NewOpc = 0;
bool isMLA = false;
bool isN0SExt = isSignExtended(N0, DAG);
bool isN1SExt = isSignExtended(N1, DAG);
if (isN0SExt && isN1SExt)
NewOpc = AArch64ISD::SMULL;
else {
bool isN0ZExt = isZeroExtended(N0, DAG);
bool isN1ZExt = isZeroExtended(N1, DAG);
if (isN0ZExt && isN1ZExt)
NewOpc = AArch64ISD::UMULL;
else if (isN1SExt || isN1ZExt) {
// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
// into (s/zext A * s/zext C) + (s/zext B * s/zext C)
if (isN1SExt && isAddSubSExt(N0, DAG)) {
NewOpc = AArch64ISD::SMULL;
isMLA = true;
} else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
NewOpc = AArch64ISD::UMULL;
isMLA = true;
} else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
std::swap(N0, N1);
NewOpc = AArch64ISD::UMULL;
isMLA = true;
}
}
if (!NewOpc) {
if (VT == MVT::v2i64)
// Fall through to expand this. It is not legal.
return SDValue();
else
// Other vector multiplications are legal.
return Op;
}
}
// Legalize to a S/UMULL instruction
SDLoc DL(Op);
SDValue Op0;
SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
if (!isMLA) {
Op0 = skipExtensionForVectorMULL(N0, DAG);
assert(Op0.getValueType().is64BitVector() &&
Op1.getValueType().is64BitVector() &&
"unexpected types for extended operands to VMULL");
return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
}
// Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
// isel lowering to take advantage of no-stall back to back s/umul + s/umla.
// This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
EVT Op1VT = Op1.getValueType();
return DAG.getNode(N0->getOpcode(), DL, VT,
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
int Pattern) {
return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
DAG.getTargetConstant(Pattern, DL, MVT::i32));
}
static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT OutVT = Op.getValueType();
SDValue InOp = Op.getOperand(1);
EVT InVT = InOp.getValueType();
// Return the operand if the cast isn't changing type,
// i.e. <n x 16 x i1> -> <n x 16 x i1>
if (InVT == OutVT)
return InOp;
SDValue Reinterpret =
DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp);
// If the argument converted to an svbool is a ptrue or a comparison, the
// lanes introduced by the widening are zero by construction.
switch (InOp.getOpcode()) {
case AArch64ISD::SETCC_MERGE_ZERO:
return Reinterpret;
case ISD::INTRINSIC_WO_CHAIN:
if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
return Reinterpret;
}
// Otherwise, zero the newly introduced lanes.
SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
SDValue MaskReinterpret =
DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask);
return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
}
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
}
case Intrinsic::aarch64_neon_abs: {
EVT Ty = Op.getValueType();
if (Ty == MVT::i64) {
SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
Op.getOperand(1));
Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
} else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
} else {
report_fatal_error("Unexpected type for AArch64 NEON intrinic");
}
}
case Intrinsic::aarch64_neon_smax:
return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_umax:
return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_smin:
return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_umin:
return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_sunpkhi:
return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_sunpklo:
return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uunpkhi:
return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uunpklo:
return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_clasta_n:
return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_clastb_n:
return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_lasta:
return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_lastb:
return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_rev:
return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_tbl:
return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_trn1:
return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_trn2:
return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_uzp1:
return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_uzp2:
return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_zip1:
return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_zip2:
return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_splice:
return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_ptrue:
return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_clz:
return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_cnt: {
SDValue Data = Op.getOperand(3);
// CTPOP only supports integer operands.
if (Data.getValueType().isFloatingPoint())
Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Data, Op.getOperand(1));
}
case Intrinsic::aarch64_sve_dupq_lane:
return LowerDUPQLane(Op, DAG);
case Intrinsic::aarch64_sve_convert_from_svbool:
return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_convert_to_svbool:
return lowerConvertToSVBool(Op, DAG);
case Intrinsic::aarch64_sve_fneg:
return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintp:
return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintm:
return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frinti:
return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintx:
return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frinta:
return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintn:
return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintz:
return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_ucvtf:
return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_scvtf:
return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_fcvtzu:
return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_fcvtzs:
return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_fsqrt:
return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frecpx:
return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_fabs:
return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_abs:
return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_neg:
return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_insr: {
SDValue Scalar = Op.getOperand(2);
EVT ScalarTy = Scalar.getValueType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
Op.getOperand(1), Scalar);
}
case Intrinsic::aarch64_sve_rbit:
return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_revb:
return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_sxtb:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_sxth:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_sxtw:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uxtb:
return DAG.getNode(
AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uxth:
return DAG.getNode(
AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uxtw:
return DAG.getNode(
AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
Op.getOperand(1));
case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
const auto *RegInfo = Subtarget->getRegisterInfo();
unsigned Reg = RegInfo->getLocalAddressRegister(MF);
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
Op.getSimpleValueType());
}
case Intrinsic::eh_recoverfp: {
// FIXME: This needs to be implemented to correctly handle highly aligned
// stack objects. For now we simply return the incoming FP. Refer D53541
// for more details.
SDValue FnOp = Op.getOperand(1);
SDValue IncomingFPOp = Op.getOperand(2);
GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
if (!Fn)
report_fatal_error(
"llvm.eh.recoverfp must take a function as the first argument");
return IncomingFPOp;
}
case Intrinsic::aarch64_neon_vsri:
case Intrinsic::aarch64_neon_vsli: {
EVT Ty = Op.getValueType();
if (!Ty.isVector())
report_fatal_error("Unexpected type for aarch64_neon_vsli");
assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
Op.getOperand(3));
}
case Intrinsic::aarch64_neon_srhadd:
case Intrinsic::aarch64_neon_urhadd:
case Intrinsic::aarch64_neon_shadd:
case Intrinsic::aarch64_neon_uhadd: {
bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
IntNo == Intrinsic::aarch64_neon_shadd);
bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
IntNo == Intrinsic::aarch64_neon_urhadd);
unsigned Opcode =
IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
: (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
}
case Intrinsic::aarch64_neon_sabd:
case Intrinsic::aarch64_neon_uabd: {
unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
: ISD::ABDS;
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
}
case Intrinsic::aarch64_neon_uaddlp: {
unsigned Opcode = AArch64ISD::UADDLP;
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
}
case Intrinsic::aarch64_neon_sdot:
case Intrinsic::aarch64_neon_udot:
case Intrinsic::aarch64_sve_sdot:
case Intrinsic::aarch64_sve_udot: {
unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
IntNo == Intrinsic::aarch64_sve_udot)
? AArch64ISD::UDOT
: AArch64ISD::SDOT;
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
}
}
bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
if (VT.getVectorElementType() == MVT::i8 ||
VT.getVectorElementType() == MVT::i16) {
EltTy = MVT::i32;
return true;
}
return false;
}
bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
if (VT.getVectorElementType() == MVT::i32 &&
VT.getVectorElementCount().getKnownMinValue() >= 4)
return true;
return false;
}
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
return ExtVal.getValueType().isScalableVector();
}
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
AArch64ISD::GLD1_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
AArch64ISD::GLD1_UXTW_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
AArch64ISD::GLD1_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
AArch64ISD::GLD1_SXTW_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
AArch64ISD::GLD1_SCALED_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
AArch64ISD::GLD1_SCALED_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
};
auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
return AddrModes.find(Key)->second;
}
unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
AArch64ISD::SST1_PRED},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
AArch64ISD::SST1_UXTW_PRED},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
AArch64ISD::SST1_PRED},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
AArch64ISD::SST1_SXTW_PRED},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
AArch64ISD::SST1_SCALED_PRED},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
AArch64ISD::SST1_UXTW_SCALED_PRED},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
AArch64ISD::SST1_SCALED_PRED},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
AArch64ISD::SST1_SXTW_SCALED_PRED},
};
auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
return AddrModes.find(Key)->second;
}
unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("unimplemented opcode");
return Opcode;
case AArch64ISD::GLD1_MERGE_ZERO:
return AArch64ISD::GLD1S_MERGE_ZERO;
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
}
}
bool getGatherScatterIndexIsExtended(SDValue Index) {
unsigned Opcode = Index.getOpcode();
if (Opcode == ISD::SIGN_EXTEND_INREG)
return true;
if (Opcode == ISD::AND) {
SDValue Splat = Index.getOperand(1);
if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
return false;
ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
return false;
return true;
}
return false;
}
// If the base pointer of a masked gather or scatter is null, we
// may be able to swap BasePtr & Index and use the vector + register
// or vector + immediate addressing mode, e.g.
// VECTOR + REGISTER:
// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
// -> getelementptr %offset, <vscale x N x T> %indices
// VECTOR + IMMEDIATE:
// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
// -> getelementptr #x, <vscale x N x T> %indices
void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
unsigned &Opcode, bool IsGather,
SelectionDAG &DAG) {
if (!isNullConstant(BasePtr))
return;
// FIXME: This will not match for fixed vector type codegen as the nodes in
// question will have fixed<->scalable conversions around them. This should be
// moved to a DAG combine or complex pattern so that is executes after all of
// the fixed vector insert and extracts have been removed. This deficiency
// will result in a sub-optimal addressing mode being used, i.e. an ADD not
// being folded into the scatter/gather.
ConstantSDNode *Offset = nullptr;
if (Index.getOpcode() == ISD::ADD)
if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
if (isa<ConstantSDNode>(SplatVal))
Offset = cast<ConstantSDNode>(SplatVal);
else {
BasePtr = SplatVal;
Index = Index->getOperand(0);
return;
}
}
unsigned NewOp =
IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
if (!Offset) {
std::swap(BasePtr, Index);
Opcode = NewOp;
return;
}
uint64_t OffsetVal = Offset->getZExtValue();
unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
// Index is out of range for the immediate addressing mode
BasePtr = ConstOffset;
Index = Index->getOperand(0);
return;
}
// Immediate is in range
Opcode = NewOp;
BasePtr = Index->getOperand(0);
Index = ConstOffset;
}
SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
assert(MGT && "Can only custom lower gather load nodes");
bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector();
SDValue Index = MGT->getIndex();
SDValue Chain = MGT->getChain();
SDValue PassThru = MGT->getPassThru();
SDValue Mask = MGT->getMask();
SDValue BasePtr = MGT->getBasePtr();
ISD::LoadExtType ExtTy = MGT->getExtensionType();
ISD::MemIndexType IndexType = MGT->getIndexType();
bool IsScaled =
IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
bool IsSigned =
IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
bool IdxNeedsExtend =
getGatherScatterIndexIsExtended(Index) ||
Index.getSimpleValueType().getVectorElementType() == MVT::i32;
bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
EVT VT = PassThru.getSimpleValueType();
EVT IndexVT = Index.getSimpleValueType();
EVT MemVT = MGT->getMemoryVT();
SDValue InputVT = DAG.getValueType(MemVT);
if (VT.getVectorElementType() == MVT::bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
if (IsFixedLength) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors");
if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
} else {
MemVT = getContainerForFixedLengthVector(DAG, MemVT);
IndexVT = MemVT.changeTypeToInteger();
}
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
Mask = DAG.getNode(
ISD::ZERO_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
}
if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
PassThru = SDValue();
if (VT.isFloatingPoint() && !IsFixedLength) {
// Handle FP data by using an integer gather and casting the result.
if (PassThru) {
EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
}
InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
}
SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other);
if (getGatherScatterIndexIsExtended(Index))
Index = Index.getOperand(0);
unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
/*isGather=*/true, DAG);
if (ResNeedsSignExtend)
Opcode = getSignExtendedGatherOpcode(Opcode);
if (IsFixedLength) {
if (Index.getSimpleValueType().isFixedLengthVector())
Index = convertToScalableVector(DAG, IndexVT, Index);
if (BasePtr.getSimpleValueType().isFixedLengthVector())
BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
Mask = convertFixedMaskToScalableVector(Mask, DAG);
}
SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT};
SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops);
Chain = Result.getValue(1);
if (IsFixedLength) {
Result = convertFromScalableVector(
DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()),
Result);
Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result);
Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
if (PassThru)
Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru);
} else {
if (PassThru)
Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru);
if (VT.isFloatingPoint())
Result = getSVESafeBitCast(VT, Result, DAG);
}
return DAG.getMergeValues({Result, Chain}, DL);
}
SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
assert(MSC && "Can only custom lower scatter store nodes");
bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector();
SDValue Index = MSC->getIndex();
SDValue Chain = MSC->getChain();
SDValue StoreVal = MSC->getValue();
SDValue Mask = MSC->getMask();
SDValue BasePtr = MSC->getBasePtr();
ISD::MemIndexType IndexType = MSC->getIndexType();
bool IsScaled =
IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
bool IsSigned =
IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
bool NeedsExtend =
getGatherScatterIndexIsExtended(Index) ||
Index.getSimpleValueType().getVectorElementType() == MVT::i32;
EVT VT = StoreVal.getSimpleValueType();
EVT IndexVT = Index.getSimpleValueType();
SDVTList VTs = DAG.getVTList(MVT::Other);
EVT MemVT = MSC->getMemoryVT();
SDValue InputVT = DAG.getValueType(MemVT);
if (VT.getVectorElementType() == MVT::bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
if (IsFixedLength) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors");
if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
} else {
MemVT = getContainerForFixedLengthVector(DAG, MemVT);
IndexVT = MemVT.changeTypeToInteger();
}
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
StoreVal =
DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal);
StoreVal = DAG.getNode(
ISD::ANY_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
Mask = DAG.getNode(
ISD::ZERO_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
} else if (VT.isFloatingPoint()) {
// Handle FP data by casting the data so an integer scatter can be used.
EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
}
if (getGatherScatterIndexIsExtended(Index))
Index = Index.getOperand(0);
unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
/*isGather=*/false, DAG);
if (IsFixedLength) {
if (Index.getSimpleValueType().isFixedLengthVector())
Index = convertToScalableVector(DAG, IndexVT, Index);
if (BasePtr.getSimpleValueType().isFixedLengthVector())
BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
Mask = convertFixedMaskToScalableVector(Mask, DAG);
}
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
return DAG.getNode(Opcode, DL, VTs, Ops);
}
SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
assert(LoadNode && "Expected custom lowering of a masked load node");
EVT VT = Op->getValueType(0);
if (useSVEForFixedLengthVectorVT(VT, true))
return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
SDValue PassThru = LoadNode->getPassThru();
SDValue Mask = LoadNode->getMask();
if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
return Op;
SDValue Load = DAG.getMaskedLoad(
VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
LoadNode->getExtensionType());
SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
}
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
EVT VT, EVT MemVT,
SelectionDAG &DAG) {
assert(VT.isVector() && "VT should be a vector type");
assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
SDValue Value = ST->getValue();
// It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
// the word lane which represent the v4i8 subvector. It optimizes the store
// to:
//
// xtn v0.8b, v0.8h
// str s0, [x0]
SDValue Undef = DAG.getUNDEF(MVT::i16);
SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
{Undef, Undef, Undef, Undef});
SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
Value, UndefVec);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Trunc, DAG.getConstant(0, DL, MVT::i64));
return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
ST->getBasePtr(), ST->getMemOperand());
}
// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
// from vector v4i16 to v4i8 or volatile stores of i128.
SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc Dl(Op);
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
assert (StoreNode && "Can only custom lower store nodes");
SDValue Value = StoreNode->getValue();
EVT VT = Value.getValueType();
EVT MemVT = StoreNode->getMemoryVT();
if (VT.isVector()) {
if (useSVEForFixedLengthVectorVT(VT, true))
return LowerFixedLengthVectorStoreToSVE(Op, DAG);
unsigned AS = StoreNode->getAddressSpace();
Align Alignment = StoreNode->getAlign();
if (Alignment < MemVT.getStoreSize() &&
!allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
StoreNode->getMemOperand()->getFlags(),
nullptr)) {
return scalarizeVectorStore(StoreNode, DAG);
}
if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
MemVT == MVT::v4i8) {
return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
}
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of
// the custom lowering, as there are no un-paired non-temporal stores and
// legalization will break up 256 bit inputs.
ElementCount EC = MemVT.getVectorElementCount();
if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
EC.isKnownEven() &&
((MemVT.getScalarSizeInBits() == 8u ||
MemVT.getScalarSizeInBits() == 16u ||
MemVT.getScalarSizeInBits() == 32u ||
MemVT.getScalarSizeInBits() == 64u))) {
SDValue Lo =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
SDValue Hi =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
StoreNode->getValue(),
DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
StoreNode->getMemoryVT(), StoreNode->getMemOperand());
return Result;
}
} else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
assert(StoreNode->getValue()->getValueType(0) == MVT::i128);
SDValue Lo =
DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
DAG.getConstant(0, Dl, MVT::i64));
SDValue Hi =
DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
DAG.getConstant(1, Dl, MVT::i64));
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
StoreNode->getMemoryVT(), StoreNode->getMemOperand());
return Result;
} else if (MemVT == MVT::i64x8) {
SDValue Value = StoreNode->getValue();
assert(Value->getValueType(0) == MVT::i64x8);
SDValue Chain = StoreNode->getChain();
SDValue Base = StoreNode->getBasePtr();
EVT PtrVT = Base.getValueType();
for (unsigned i = 0; i < 8; i++) {
SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
Value, DAG.getConstant(i, Dl, MVT::i32));
SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
DAG.getConstant(i * 8, Dl, PtrVT));
Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
StoreNode->getOriginalAlign());
}
return Chain;
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
assert(LoadNode && "Expected custom lowering of a load node");
if (LoadNode->getMemoryVT() == MVT::i64x8) {
SmallVector<SDValue, 8> Ops;
SDValue Base = LoadNode->getBasePtr();
SDValue Chain = LoadNode->getChain();
EVT PtrVT = Base.getValueType();
for (unsigned i = 0; i < 8; i++) {
SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
DAG.getConstant(i * 8, DL, PtrVT));
SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
LoadNode->getPointerInfo(),
LoadNode->getOriginalAlign());
Ops.push_back(Part);
Chain = SDValue(Part.getNode(), 1);
}
SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
return DAG.getMergeValues({Loaded, Chain}, DL);
}
// Custom lowering for extending v4i8 vector loads.
EVT VT = Op->getValueType(0);
assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32");
if (LoadNode->getMemoryVT() != MVT::v4i8)
return SDValue();
unsigned ExtType;
if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
ExtType = ISD::SIGN_EXTEND;
else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
LoadNode->getExtensionType() == ISD::EXTLOAD)
ExtType = ISD::ZERO_EXTEND;
else
return SDValue();
SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
LoadNode->getBasePtr(), MachinePointerInfo());
SDValue Chain = Load.getValue(1);
SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
DAG.getConstant(0, DL, MVT::i64));
if (VT == MVT::v4i32)
Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
return DAG.getMergeValues({Ext, Chain}, DL);
}
// Generate SUBS and CSEL for integer abs.
SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
if (VT.isVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
SDLoc DL(Op);
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(0));
// Generate SUBS & CSEL.
SDValue Cmp =
DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
Op.getOperand(0), DAG.getConstant(0, DL, VT));
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
Cmp.getValue(1));
}
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
LLVM_DEBUG(Op.dump());
switch (Op.getOpcode()) {
default:
llvm_unreachable("unimplemented operand");
return SDValue();
case ISD::BITCAST:
return LowerBITCAST(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:
return LowerGlobalTLSAddress(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
return LowerSETCC(Op, DAG);
case ISD::BR_CC:
return LowerBR_CC(Op, DAG);
case ISD::SELECT:
return LowerSELECT(Op, DAG);
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG);
case ISD::JumpTable:
return LowerJumpTable(Op, DAG);
case ISD::BR_JT:
return LowerBR_JT(Op, DAG);
case ISD::ConstantPool:
return LowerConstantPool(Op, DAG);
case ISD::BlockAddress:
return LowerBlockAddress(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG);
case ISD::VACOPY:
return LowerVACOPY(Op, DAG);
case ISD::VAARG:
return LowerVAARG(Op, DAG);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE:
return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO:
return LowerXALUO(Op, DAG);
case ISD::FADD:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
case ISD::FSUB:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
case ISD::FMUL:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
case ISD::FMA:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
case ISD::FDIV:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
case ISD::FNEG:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
case ISD::FCEIL:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
case ISD::FFLOOR:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
case ISD::FNEARBYINT:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
case ISD::FRINT:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
case ISD::FROUND:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
case ISD::FROUNDEVEN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
case ISD::FTRUNC:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
case ISD::FSQRT:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
case ISD::FABS:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND:
return LowerFP_EXTEND(Op, DAG);
case ISD::FRAMEADDR:
return LowerFRAMEADDR(Op, DAG);
case ISD::SPONENTRY:
return LowerSPONENTRY(Op, DAG);
case ISD::RETURNADDR:
return LowerRETURNADDR(Op, DAG);
case ISD::ADDROFRETURNADDR:
return LowerADDROFRETURNADDR(Op, DAG);
case ISD::CONCAT_VECTORS:
return LowerCONCAT_VECTORS(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::SPLAT_VECTOR:
return LowerSPLAT_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:
return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:
case ISD::UDIV:
return LowerDIV(Op, DAG);
case ISD::SMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
/*OverrideNEON=*/true);
case ISD::UMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
/*OverrideNEON=*/true);
case ISD::SMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
/*OverrideNEON=*/true);
case ISD::UMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
/*OverrideNEON=*/true);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
return LowerVectorSRA_SRL_SHL(Op, DAG);
case ISD::SHL_PARTS:
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
return LowerShiftParts(Op, DAG);
case ISD::CTPOP:
return LowerCTPOP(Op, DAG);
case ISD::FCOPYSIGN:
return LowerFCOPYSIGN(Op, DAG);
case ISD::OR:
return LowerVectorOR(Op, DAG);
case ISD::XOR:
return LowerXOR(Op, DAG);
case ISD::PREFETCH:
return LowerPREFETCH(Op, DAG);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
return LowerFP_TO_INT(Op, DAG);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return LowerFP_TO_INT_SAT(Op, DAG);
case ISD::FSINCOS:
return LowerFSINCOS(Op, DAG);
case ISD::FLT_ROUNDS_:
return LowerFLT_ROUNDS_(Op, DAG);
case ISD::SET_ROUNDING:
return LowerSET_ROUNDING(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
case ISD::MULHS:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
/*OverrideNEON=*/true);
case ISD::MULHU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
/*OverrideNEON=*/true);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::STORE:
return LowerSTORE(Op, DAG);
case ISD::MSTORE:
return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
case ISD::MGATHER:
return LowerMGATHER(Op, DAG);
case ISD::MSCATTER:
return LowerMSCATTER(Op, DAG);
case ISD::VECREDUCE_SEQ_FADD:
return LowerVECREDUCE_SEQ_FADD(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
return LowerVECREDUCE(Op, DAG);
case ISD::ATOMIC_LOAD_SUB:
return LowerATOMIC_LOAD_SUB(Op, DAG);
case ISD::ATOMIC_LOAD_AND:
return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VSCALE:
return LowerVSCALE(Op, DAG);
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
case ISD::SIGN_EXTEND_INREG: {
// Only custom lower when ExtraVT has a legal byte based element type.
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
EVT ExtraEltVT = ExtraVT.getVectorElementType();
if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
(ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
return SDValue();
return LowerToPredicatedOp(Op, DAG,
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
}
case ISD::TRUNCATE:
return LowerTRUNCATE(Op, DAG);
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
case ISD::LOAD:
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
return LowerLOAD(Op, DAG);
case ISD::ADD:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
case ISD::AND:
return LowerToScalableOp(Op, DAG);
case ISD::SUB:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
case ISD::FMAXIMUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
case ISD::FMAXNUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
case ISD::FMINIMUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
case ISD::FMINNUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
case ISD::VSELECT:
return LowerFixedLengthVectorSelectToSVE(Op, DAG);
case ISD::ABS:
return LowerABS(Op, DAG);
case ISD::BITREVERSE:
return LowerBitreverse(Op, DAG);
case ISD::BSWAP:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
case ISD::CTLZ:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
/*OverrideNEON=*/true);
case ISD::CTTZ:
return LowerCTTZ(Op, DAG);
case ISD::VECTOR_SPLICE:
return LowerVECTOR_SPLICE(Op, DAG);
}
}
bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
return !Subtarget->useSVEForFixedLengthVectors();
}
bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
EVT VT, bool OverrideNEON) const {
if (!Subtarget->useSVEForFixedLengthVectors())
return false;
if (!VT.isFixedLengthVector())
return false;
// Don't use SVE for vectors we cannot scalarize if required.
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
// Fixed length predicates should be promoted to i8.
// NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
case MVT::i1:
default:
return false;
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f16:
case MVT::f32:
case MVT::f64:
break;
}
// All SVE implementations support NEON sized vectors.
if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
return true;
// Ensure NEON MVTs only belong to a single register class.
if (VT.getFixedSizeInBits() <= 128)
return false;
// Don't use SVE for types that don't fit.
if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
return false;
// TODO: Perhaps an artificial restriction, but worth having whilst getting
// the base fixed length SVE support in place.
if (!VT.isPow2VectorType())
return false;
return true;
}
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
switch (CC) {
default:
report_fatal_error("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
case CallingConv::GHC:
return CC_AArch64_GHC;
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::PreserveMost:
case CallingConv::CXX_FAST_TLS:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
if (Subtarget->isTargetWindows() && IsVarArg)
return CC_AArch64_Win64_VarArg;
if (!Subtarget->isTargetDarwin())
return CC_AArch64_AAPCS;
if (!IsVarArg)
return CC_AArch64_DarwinPCS;
return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
: CC_AArch64_DarwinPCS_VarArg;
case CallingConv::Win64:
return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
case CallingConv::CFGuard_Check:
return CC_AArch64_Win64_CFGuard_Check;
case CallingConv::AArch64_VectorCall:
case CallingConv::AArch64_SVE_VectorCall:
return CC_AArch64_AAPCS;
}
}
CCAssignFn *
AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
}
SDValue AArch64TargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
// At this point, Ins[].VT may already be promoted to i32. To correctly
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
// Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
// we use a special version of AnalyzeFormalArguments to pass in ValVT and
// LocVT.
unsigned NumArgs = Ins.size();
Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
if (Ins[i].isOrigArg()) {
std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
CurArgIdx = Ins[i].getOrigArgIndex();
// Get type of the original argument.
EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
/*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
ValVT = MVT::i8;
else if (ActualMVT == MVT::i16)
ValVT = MVT::i16;
}
bool UseVarArgCC = false;
if (IsWin64)
UseVarArgCC = isVarArg;
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
assert(!Res && "Call operand has unhandled type");
(void)Res;
}
SmallVector<SDValue, 16> ArgValues;
unsigned ExtraArgLocs = 0;
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
if (Ins[i].Flags.isByVal()) {
// Byval is used for HFAs in the PCS, but the system should work in a
// non-compliant manner for larger structs.
EVT PtrVT = getPointerTy(DAG.getDataLayout());
int Size = Ins[i].Flags.getByValSize();
unsigned NumRegs = (Size + 7) / 8;
// FIXME: This works on big-endian for composite byvals, which are the common
// case. It should also work for fundamental types too.
unsigned FrameIdx =
MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
InVals.push_back(FrameIdxN);
continue;
}
if (Ins[i].Flags.isSwiftAsync())
MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
SDValue ArgValue;
if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = &AArch64::GPR32RegClass;
else if (RegVT == MVT::i64)
RC = &AArch64::GPR64RegClass;
else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
RC = &AArch64::FPR16RegClass;
else if (RegVT == MVT::f32)
RC = &AArch64::FPR32RegClass;
else if (RegVT == MVT::f64 || RegVT.is64BitVector())
RC = &AArch64::FPR64RegClass;
else if (RegVT == MVT::f128 || RegVT.is128BitVector())
RC = &AArch64::FPR128RegClass;
else if (RegVT.isScalableVector() &&
RegVT.getVectorElementType() == MVT::i1)
RC = &AArch64::PPRRegClass;
else if (RegVT.isScalableVector())
RC = &AArch64::ZPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
// If this is an 8, 16 or 32-bit value, it is really passed promoted
// to 64 bits. Insert an assert[sz]ext to capture this, then
// truncate to the right size.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
break;
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
break;
case CCValAssign::AExt:
case CCValAssign::SExt:
case CCValAssign::ZExt:
break;
case CCValAssign::AExtUpper:
ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
DAG.getConstant(32, DL, RegVT));
ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
break;
}
} else { // VA.isRegLoc()
assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
unsigned ArgOffset = VA.getLocMemOffset();
unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
? VA.getLocVT().getSizeInBits()
: VA.getValVT().getSizeInBits()) / 8;
uint32_t BEAlign = 0;
if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
!Ins[i].Flags.isInConsecutiveRegs())
BEAlign = 8 - ArgSize;
int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
MVT MemVT = VA.getValVT();
switch (VA.getLocInfo()) {
default:
break;
case CCValAssign::Trunc:
case CCValAssign::BCvt:
MemVT = VA.getLocVT();
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
MemVT = VA.getLocVT();
break;
case CCValAssign::SExt:
ExtType = ISD::SEXTLOAD;
break;
case CCValAssign::ZExt:
ExtType = ISD::ZEXTLOAD;
break;
case CCValAssign::AExt:
ExtType = ISD::EXTLOAD;
break;
}
ArgValue = DAG.getExtLoad(
ExtType, DL, VA.getLocVT(), Chain, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
MemVT);
}
if (VA.getLocInfo() == CCValAssign::Indirect) {
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
unsigned NumParts = 1;
if (Ins[i].Flags.isInConsecutiveRegs()) {
assert(!Ins[i].Flags.isInConsecutiveRegsLast());
while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
++NumParts;
}
MVT PartLoad = VA.getValVT();
SDValue Ptr = ArgValue;
// Ensure we generate all loads for each tuple part, whilst updating the
// pointer after each load correctly using vscale.
while (NumParts > 0) {
ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
InVals.push_back(ArgValue);
NumParts--;
if (NumParts > 0) {
SDValue BytesIncrement = DAG.getVScale(
DL, Ptr.getValueType(),
APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
BytesIncrement, Flags);
ExtraArgLocs++;
i++;
}
}
} else {
if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
ArgValue, DAG.getValueType(MVT::i32));
InVals.push_back(ArgValue);
}
}
assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
// varargs
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
if (isVarArg) {
if (!Subtarget->isTargetDarwin() || IsWin64) {
// The AAPCS variadic function ABI is identical to the non-variadic
// one. As a result there may be more arguments in registers and we should
// save them for future reference.
// Win64 variadic functions also pass arguments in registers, but all float
// arguments are passed in integer registers.
saveVarArgRegisters(CCInfo, DAG, DL, Chain);
}
// This will point to the next argument passed via stack.
unsigned StackOffset = CCInfo.getNextStackOffset();
// We currently pass all varargs at 8-byte alignment, or 4 for ILP32
StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
if (MFI.hasMustTailInVarArgFunc()) {
SmallVector<MVT, 2> RegParmTypes;
RegParmTypes.push_back(MVT::i64);
RegParmTypes.push_back(MVT::f128);
// Compute the set of forwarded registers. The rest are scratch.
SmallVectorImpl<ForwardedRegister> &Forwards =
FuncInfo->getForwardedMustTailRegParms();
CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
CC_AArch64_AAPCS);
// Conservatively forward X8, since it might be used for aggregate return.
if (!CCInfo.isAllocated(AArch64::X8)) {
unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
}
}
}
// On Windows, InReg pointers must be returned, so record the pointer in a
// virtual register at the start of the function so it can be returned in the
// epilogue.
if (IsWin64) {
for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
if (Ins[I].Flags.isInReg()) {
assert(!FuncInfo->getSRetReturnReg());
MVT PtrTy = getPointerTy(DAG.getDataLayout());
Register Reg =
MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
break;
}
}
}
unsigned StackArgSize = CCInfo.getNextStackOffset();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
// This is a non-standard ABI so by fiat I say we're allowed to make full
// use of the stack area to be popped, which must be aligned to 16 bytes in
// any case:
StackArgSize = alignTo(StackArgSize, 16);
// If we're expected to restore the stack (e.g. fastcc) then we'll be adding
// a multiple of 16.
FuncInfo->setArgumentStackToRestore(StackArgSize);
// This realignment carries over to the available bytes below. Our own
// callers will guarantee the space is free by giving an aligned value to
// CALLSEQ_START.
}
// Even if we're not expected to free up the space, it's useful to know how
// much is there while considering tail calls (because we can reuse it).
FuncInfo->setBytesInStackArgArea(StackArgSize);
if (Subtarget->hasCustomCallingConv())
Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
return Chain;
}
void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
SelectionDAG &DAG,
const SDLoc &DL,
SDValue &Chain) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
auto PtrVT = getPointerTy(DAG.getDataLayout());
bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
SmallVector<SDValue, 8> MemOps;
static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7 };
static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
int GPRIdx = 0;
if (GPRSaveSize != 0) {
if (IsWin64) {
GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
if (GPRSaveSize & 15)
// The extra size here, if triggered, will always be 8.
MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
} else
GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
SDValue Store = DAG.getStore(
Val.getValue(1), DL, Val, FIN,
IsWin64
? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
GPRIdx,
(i - FirstVariadicGPR) * 8)
: MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
MemOps.push_back(Store);
FIN =
DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
}
}
FuncInfo->setVarArgsGPRIndex(GPRIdx);
FuncInfo->setVarArgsGPRSize(GPRSaveSize);
if (Subtarget->hasFPARMv8() && !IsWin64) {
static const MCPhysReg FPRArgRegs[] = {
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
int FPRIdx = 0;
if (FPRSaveSize != 0) {
FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
SDValue Store = DAG.getStore(
Val.getValue(1), DL, Val, FIN,
MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getConstant(16, DL, PtrVT));
}
}
FuncInfo->setVarArgsFPRIndex(FPRIdx);
FuncInfo->setVarArgsFPRSize(FPRSaveSize);
}
if (!MemOps.empty()) {
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue AArch64TargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const {
CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
// Pass 'this' value directly from the argument to return value, to avoid
// reg unit interference
if (i == 0 && isThisReturn) {
assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment");
InVals.push_back(ThisVal);
continue;
}
// Avoid copying a physreg twice since RegAllocFast is incompetent and only
// allows one use of a physreg per block.
SDValue Val = CopiedRegs.lookup(VA.getLocReg());
if (!Val) {
Val =
DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
CopiedRegs[VA.getLocReg()] = Val;
}
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
break;
case CCValAssign::AExtUpper:
Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
DAG.getConstant(32, DL, VA.getLocVT()));
LLVM_FALLTHROUGH;
case CCValAssign::AExt:
LLVM_FALLTHROUGH;
case CCValAssign::ZExt:
Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
break;
}
InVals.push_back(Val);
}
return Chain;
}
/// Return true if the calling convention is one that we can guarantee TCO for.
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
}
/// Return true if we might ever do TCO for calls with this calling convention.
static bool mayTailCallThisCC(CallingConv::ID CC) {
switch (CC) {
case CallingConv::C:
case CallingConv::AArch64_SVE_VectorCall:
case CallingConv::PreserveMost:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
case CallingConv::Fast:
return true;
default:
return false;
}
}
bool AArch64TargetLowering::isEligibleForTailCallOptimization(
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
if (!mayTailCallThisCC(CalleeCC))
return false;
MachineFunction &MF = DAG.getMachineFunction();
const Function &CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF.getCallingConv();
// Functions using the C or Fast calling convention that have an SVE signature
// preserve more registers and should assume the SVE_VectorCall CC.
// The check for matching callee-saved regs will determine whether it is
// eligible for TCO.
if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
CallerCC = CallingConv::AArch64_SVE_VectorCall;
bool CCMatch = CallerCC == CalleeCC;
// When using the Windows calling convention on a non-windows OS, we want
// to back up and restore X18 in such functions; we can't do a tail call
// from those functions.
if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
CalleeCC != CallingConv::Win64)
return false;
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible (see
// X86) but less efficient and uglier in LowerCall.
for (Function::const_arg_iterator i = CallerF.arg_begin(),
e = CallerF.arg_end();
i != e; ++i) {
if (i->hasByValAttr())
return false;
// On Windows, "inreg" attributes signify non-aggregate indirect returns.
// In this case, it is necessary to save/restore X0 in the callee. Tail
// call opt interferes with this. So we disable tail call opt when the
// caller has an argument with "inreg" attribute.
// FIXME: Check whether the callee also has an "inreg" argument.
if (i->hasInRegAttr())
return false;
}
if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
return CCMatch;
// Externally-defined functions with weak linkage should not be
// tail-called on AArch64 when the OS does not support dynamic
// pre-emption of symbols, as the AAELF spec requires normal calls
// to undefined weak functions to be replaced with a NOP or jump to the
// next instruction. The behaviour of branch instructions in this
// situation (as used for tail calls) is implementation-defined, so we
// cannot rely on the linker replacing the tail call with a return.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
const Triple &TT = getTargetMachine().getTargetTriple();
if (GV->hasExternalWeakLinkage() &&
(!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
return false;
}
// Now we search for cases where we can use a tail call without changing the
// ABI. Sibcall is used in some places (particularly gcc) to refer to this
// concept.
// I want anyone implementing a new calling convention to think long and hard
// about this assert.
assert((!isVarArg || CalleeCC == CallingConv::C) &&
"Unexpected variadic calling convention");
LLVMContext &C = *DAG.getContext();
if (isVarArg && !Outs.empty()) {
// At least two cases here: if caller is fastcc then we can't have any
// memory arguments (we'd be expected to clean up the stack afterwards). If
// caller is C then we could potentially use its argument area.
// FIXME: for now we take the most conservative of these in both cases:
// disallow all variadic memory operands.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
for (const CCValAssign &ArgLoc : ArgLocs)
if (!ArgLoc.isRegLoc())
return false;
}
// Check that the call results are passed in the same way.
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
CCAssignFnForCall(CalleeCC, isVarArg),
CCAssignFnForCall(CallerCC, isVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (!CCMatch) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
if (Subtarget->hasCustomCallingConv()) {
TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
}
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
// Nothing more to check if the callee is taking no arguments
if (Outs.empty())
return true;
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
// If any of the arguments is passed indirectly, it must be SVE, so the
// 'getBytesInStackArgArea' is not sufficient to determine whether we need to
// allocate space on the stack. That is why we determine this explicitly here
// the call cannot be a tailcall.
if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
assert((A.getLocInfo() != CCValAssign::Indirect ||
A.getValVT().isScalableVector()) &&
"Expected value to be scalable");
return A.getLocInfo() == CCValAssign::Indirect;
}))
return false;
// If the stack arguments for this call do not fit into our own save area then
// the call cannot be made tail.
if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
return false;
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
return false;
return true;
}
SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
SelectionDAG &DAG,
MachineFrameInfo &MFI,
int ClobberedFI) const {
SmallVector<SDValue, 8> ArgChains;
int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
// Include the original chain at the beginning of the list. When this is
// used by target LowerCall hooks, this helps legalize find the
// CALLSEQ_BEGIN node.
ArgChains.push_back(Chain);
// Add a chain value for each stack argument corresponding
for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
UE = DAG.getEntryNode().getNode()->use_end();
U != UE; ++U)
if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0) {
int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
int64_t InLastByte = InFirstByte;
InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
(FirstByte <= InFirstByte && InFirstByte <= LastByte))
ArgChains.push_back(SDValue(L, 1));
}
// Build a tokenfactor for all the chains.
return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
}
bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
bool TailCallOpt) const {
return (CallCC == CallingConv::Fast && TailCallOpt) ||
CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
}
/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
/// and add input and output parameter nodes.
SDValue
AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &DL = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
MachineFunction::CallSiteInfo CSInfo;
bool IsThisReturn = false;
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
bool IsSibCall = false;
bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);
// Check callee args/returns for SVE registers and set calling convention
// accordingly.
if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
return Out.VT.isScalableVector();
});
bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
return In.VT.isScalableVector();
});
if (CalleeInSVE || CalleeOutSVE)
CallConv = CallingConv::AArch64_SVE_VectorCall;
}
if (IsTailCall) {
// Check if it's really possible to do a tail call.
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
// A sibling call is one where we're under the usual C ABI and not planning
// to change that but can still do a tail call:
if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
CallConv != CallingConv::SwiftTail)
IsSibCall = true;
if (IsTailCall)
++NumTailCalls;
}
if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
if (IsVarArg) {
// Handle fixed and variable vector arguments differently.
// Variable vector arguments always go into memory.
unsigned NumArgs = Outs.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Outs[i].VT;
if (!Outs[i].IsFixed && ArgVT.isScalableVector())
report_fatal_error("Passing SVE types to variadic functions is "
"currently not supported");
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
bool UseVarArgCC = !Outs[i].IsFixed;
// On Windows, the fixed arguments in a vararg call are passed in GPRs
// too, so use the vararg CC to force them to integer registers.
if (IsCalleeWin64)
UseVarArgCC = true;
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
assert(!Res && "Call operand has unhandled type");
(void)Res;
}
} else {
// At this point, Outs[].VT may already be promoted to i32. To correctly
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
// Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
// we use a special version of AnalyzeCallOperands to pass in ValVT and
// LocVT.
unsigned NumArgs = Outs.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Outs[i].VT;
// Get type of the original argument.
EVT ActualVT = getValueType(DAG.getDataLayout(),
CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
/*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
ValVT = MVT::i8;
else if (ActualMVT == MVT::i16)
ValVT = MVT::i16;
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
assert(!Res && "Call operand has unhandled type");
(void)Res;
}
}
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (IsSibCall) {
// Since we're not changing the ABI to make this a tail call, the memory
// operands are already available in the caller's incoming argument space.
NumBytes = 0;
}
// FPDiff is the byte offset of the call's argument area from the callee's.
// Stores to callee stack arguments will be placed in FixedStackSlots offset
// by this amount for a tail call. In a sibling call it must be 0 because the
// caller will deallocate the entire stack and the callee still expects its
// arguments to begin at SP+0. Completely unused for non-tail calls.
int FPDiff = 0;
if (IsTailCall && !IsSibCall) {
unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
// Since callee will pop argument stack as a tail call, we must keep the
// popped size 16-byte aligned.
NumBytes = alignTo(NumBytes, 16);
// FPDiff will be negative if this tail call requires more space than we
// would automatically have in our incoming argument space. Positive if we
// can actually shrink the stack.
FPDiff = NumReusableBytes - NumBytes;
// Update the required reserved area if this is the tail call requiring the
// most argument stack space.
if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
FuncInfo->setTailCallReservedStack(-FPDiff);
// The stack pointer must be 16-byte aligned at all times it's used for a
// memory operation, which in practice means at *all* times and in
// particular across call boundaries. Therefore our own arguments started at
// a 16-byte aligned SP and the delta applied for the tail call should
// satisfy the same constraint.
assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
}
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall)
Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
getPointerTy(DAG.getDataLayout()));
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallSet<unsigned, 8> RegsUsed;
SmallVector<SDValue, 8> MemOpChains;
auto PtrVT = getPointerTy(DAG.getDataLayout());
if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
for (const auto &F : Forwards) {
SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
RegsToPass.emplace_back(F.PReg, Val);
}
}
// Walk the register/memloc assignments, inserting copies/loads.
unsigned ExtraArgLocs = 0;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Promote the value if needed.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
if (Outs[i].ArgVT == MVT::i1) {
// AAPCS requires i1 to be zero-extended to 8-bits by the caller.
Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
}
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExtUpper:
assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
DAG.getConstant(32, DL, VA.getLocVT()));
break;
case CCValAssign::BCvt:
Arg = DAG.getBitcast(VA.getLocVT(), Arg);
break;
case CCValAssign::Trunc:
Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
break;
case CCValAssign::FPExt:
Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
uint64_t PartSize = StoreSize;
unsigned NumParts = 1;
if (Outs[i].Flags.isInConsecutiveRegs()) {
assert(!Outs[i].Flags.isInConsecutiveRegsLast());
while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
++NumParts;
StoreSize *= NumParts;
}
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
MFI.setStackID(FI, TargetStackID::ScalableVector);
MachinePointerInfo MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
SDValue Ptr = DAG.getFrameIndex(
FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
SDValue SpillSlot = Ptr;
// Ensure we generate all stores for each tuple part, whilst updating the
// pointer after each store correctly using vscale.
while (NumParts) {
Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
NumParts--;
if (NumParts > 0) {
SDValue BytesIncrement = DAG.getVScale(
DL, Ptr.getValueType(),
APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
MPI = MachinePointerInfo(MPI.getAddrSpace());
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
BytesIncrement, Flags);
ExtraArgLocs++;
i++;
}
}
Arg = SpillSlot;
break;
}
if (VA.isRegLoc()) {
if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
Outs[0].VT == MVT::i64) {
assert(VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment");
assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
"unexpected use of 'returned'");
IsThisReturn = true;
}
if (RegsUsed.count(VA.getLocReg())) {
// If this register has already been used then we're trying to pack
// parts of an [N x i32] into an X-register. The extension type will
// take care of putting the two halves in the right place but we have to
// combine them.
SDValue &Bits =
llvm::find_if(RegsToPass,
[=](const std::pair<unsigned, SDValue> &Elt) {
return Elt.first == VA.getLocReg();
})
->second;
Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
// Call site info is used for function's parameter entry value
// tracking. For now we track only simple cases when parameter
// is transferred through whole register.
llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
return ArgReg.Reg == VA.getLocReg();
});
} else {
RegsToPass.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
const TargetOptions &Options = DAG.getTarget().Options;
if (Options.EmitCallSiteInfo)
CSInfo.emplace_back(VA.getLocReg(), i);
}
} else {
assert(VA.isMemLoc());
SDValue DstAddr;
MachinePointerInfo DstInfo;
// FIXME: This works on big-endian for composite byvals, which are the
// common case. It should also work for fundamental types too.
uint32_t BEAlign = 0;
unsigned OpSize;
if (VA.getLocInfo() == CCValAssign::Indirect ||
VA.getLocInfo() == CCValAssign::Trunc)
OpSize = VA.getLocVT().getFixedSizeInBits();
else
OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
: VA.getValVT().getSizeInBits();
OpSize = (OpSize + 7) / 8;
if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
!Flags.isInConsecutiveRegs()) {
if (OpSize < 8)
BEAlign = 8 - OpSize;
}
unsigned LocMemOffset = VA.getLocMemOffset();
int32_t Offset = LocMemOffset + BEAlign;
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
if (IsTailCall) {
Offset = Offset + FPDiff;
int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
DstAddr = DAG.getFrameIndex(FI, PtrVT);
DstInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// Make sure any stack arguments overlapping with where we're storing
// are loaded before this eventual operation. Otherwise they'll be
// clobbered.
Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
} else {
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
SDValue SizeNode =
DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
SDValue Cpy = DAG.getMemcpy(
Chain, DL, DstAddr, Arg, SizeNode,
Outs[i].Flags.getNonZeroByValAlign(),
/*isVol = */ false, /*AlwaysInline = */ false,
/*isTailCall = */ false, DstInfo, MachinePointerInfo());
MemOpChains.push_back(Cpy);
} else {
// Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
// promoted to a legal register type i32, we should truncate Arg back to
// i1/i8/i16.
if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
VA.getValVT() == MVT::i16)
Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
MemOpChains.push_back(Store);
}
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
for (auto &RegToPass : RegsToPass) {
Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
RegToPass.second, InFlag);
InFlag = Chain.getValue(1);
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
auto GV = G->getGlobal();
unsigned OpFlags =
Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
if (OpFlags & AArch64II::MO_GOT) {
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
} else {
const GlobalValue *GV = G->getGlobal();
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
}
} else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
Subtarget->isTargetMachO()) {
const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
} else {
const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
}
// We don't usually want to end the call-sequence here because we would tidy
// the frame up *after* the call, however in the ABI-changing tail-call case
// we've carefully laid out the parameters so that when sp is reset they'll be
// in the correct location.
if (IsTailCall && !IsSibCall) {
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
InFlag = Chain.getValue(1);
}
std::vector<SDValue> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
if (IsTailCall) {
// Each tail call may have to adjust the stack by a different amount, so
// this information must travel along with the operation for eventual
// consumption by emitEpilogue.
Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
}
// Add argument registers to the end of the list so that they are known live
// into the call.
for (auto &RegToPass : RegsToPass)
Ops.push_back(DAG.getRegister(RegToPass.first,
RegToPass.second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (IsThisReturn) {
// For 'this' returns, use the X0-preserving mask if applicable
Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
IsThisReturn = false;
Mask = TRI->getCallPreservedMask(MF, CallConv);
}
} else
Mask = TRI->getCallPreservedMask(MF, CallConv);
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(MF, &Mask);
if (TRI->isAnyArgRegReserved(MF))
TRI->emitReservedArgRegCallError(MF);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
// If we're doing a tall call, use a TC_RETURN here rather than an
// actual call instruction.
if (IsTailCall) {
MF.getFrameInfo().setHasTailCall();
SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
return Ret;
}
unsigned CallOpc = AArch64ISD::CALL;
// Calls with operand bundle "clang.arc.attachedcall" are special. They should
// be expanded to the call, directly followed by a special marker sequence.
// Use the CALL_RVMARKER to do that.
if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
assert(!IsTailCall &&
"tail calls cannot be marked with clang.arc.attachedcall");
CallOpc = AArch64ISD::CALL_RVMARKER;
}
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
InFlag = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
uint64_t CalleePopBytes =
DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
DAG.getIntPtrConstant(CalleePopBytes, DL, true),
InFlag, DL);
if (!Ins.empty())
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
InVals, IsThisReturn,
IsThisReturn ? OutVals[0] : SDValue());
}
bool AArch64TargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC);
}
SDValue
AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
auto &MF = DAG.getMachineFunction();
auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC);
// Copy the result values into the output registers.
SDValue Flag;
SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
SmallSet<unsigned, 4> RegsUsed;
for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue Arg = OutVals[realRVLocIdx];
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
if (Outs[i].ArgVT == MVT::i1) {
// AAPCS requires i1 to be zero-extended to i8 by the producer of the
// value. This is strictly redundant on Darwin (which uses "zeroext
// i1"), but will be optimised out before ISel.
Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
}
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
case CCValAssign::ZExt:
Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
break;
case CCValAssign::AExtUpper:
assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
DAG.getConstant(32, DL, VA.getLocVT()));
break;
}
if (RegsUsed.count(VA.getLocReg())) {
SDValue &Bits =
llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
return Elt.first == VA.getLocReg();
})->second;
Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
} else {
RetVals.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
}
}
SmallVector<SDValue, 4> RetOps(1, Chain);
for (auto &RetVal : RetVals) {
Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(
DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
}
// Windows AArch64 ABIs require that for returning structs by value we copy
// the sret argument into X0 for the return.
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into X0.
if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
getPointerTy(MF.getDataLayout()));
unsigned RetValReg = AArch64::X0;
Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
}
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *I =
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
if (I) {
for (; *I; ++I) {
if (AArch64::GPR64RegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::i64));
else if (AArch64::FPR64RegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
}
}
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
if (Flag.getNode())
RetOps.push_back(Flag);
return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
}
//===----------------------------------------------------------------------===//
// Other Lowering Code
//===----------------------------------------------------------------------===//
SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
N->getOffset(), Flag);
}
SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
}
SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
N->getOffset(), Flag);
}
SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
}
// (loadGOT sym)
template <class NodeTy>
SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes instead of using a wrapper node.
return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
}
// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
template <class NodeTy>
SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
const unsigned char MO_NC = AArch64II::MO_NC;
return DAG.getNode(
AArch64ISD::WrapperLarge, DL, Ty,
getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
}
// (addlow (adrp %hi(sym)) %lo(sym))
template <class NodeTy>
SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
SDValue Lo = getTargetNode(N, Ty, DAG,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
}
// (adr sym)
template <class NodeTy>
SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
}
SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
if (OpFlags != AArch64II::MO_NO_FLAG)
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node");
// This also catches the large code model case for Darwin, and tiny code
// model with got relocations.
if ((OpFlags & AArch64II::MO_GOT) != 0) {
return getGOT(GN, DAG, OpFlags);
}
SDValue Result;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
Result = getAddrLarge(GN, DAG, OpFlags);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
Result = getAddrTiny(GN, DAG, OpFlags);
} else {
Result = getAddr(GN, DAG, OpFlags);
}
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(GN);
if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
/// Convert a TLS address reference into the correct sequence of loads
/// and calls to compute the variable's address (for Darwin, currently) and
/// return an SDValue containing the final node.
/// Darwin only has one TLS scheme which must be capable of dealing with the
/// fully general situation, in the worst case. This means:
/// + "extern __thread" declaration.
/// + Defined in a possibly unknown dynamic library.
///
/// The general system is that each __thread variable has a [3 x i64] descriptor
/// which contains information used by the runtime to calculate the address. The
/// only part of this the compiler needs to know about is the first xword, which
/// contains a function pointer that must be called with the address of the
/// entire descriptor in "x0".
///
/// Since this descriptor may be in a different unit, in general even the
/// descriptor must be accessed via an indirect load. The "ideal" code sequence
/// is:
/// adrp x0, _var@TLVPPAGE
/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
/// ; the function pointer
/// blr x1 ; Uses descriptor address in x0
/// ; Address of _var is now in x0.
///
/// If the address of _var's descriptor *is* known to the linker, then it can
/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
/// a slight efficiency gain.
SDValue
AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin() &&
"This function expects a Darwin target");
SDLoc DL(Op);
MVT PtrVT = getPointerTy(DAG.getDataLayout());
MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDValue TLVPAddr =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
// The first entry in the descriptor is a function pointer that we must call
// to obtain the address of the variable.
SDValue Chain = DAG.getEntryNode();
SDValue FuncTLVGet = DAG.getLoad(
PtrMemVT, DL, Chain, DescAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
Align(PtrMemVT.getSizeInBits() / 8),
MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
Chain = FuncTLVGet.getValue(1);
// Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setAdjustsStack(true);
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask = TRI->getTLSCallPreservedMask();
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
// Finally, we can make the call. This is just a degenerate version of a
// normal AArch64 call node: x0 takes the address of the descriptor, and
// returns the address of the variable in this thread.
Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
Chain =
DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
DAG.getRegisterMask(Mask), Chain.getValue(1));
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
}
/// Convert a thread-local variable reference into a sequence of instructions to
/// compute the variable's address for the local exec TLS model of ELF targets.
/// The sequence depends on the maximum TLS area size.
SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
SDValue ThreadBase,
const SDLoc &DL,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue TPOff, Addr;
switch (DAG.getTarget().Options.TLSSize) {
default:
llvm_unreachable("Unexpected TLS size");
case 12: {
// mrs x0, TPIDR_EL0
// add x0, x0, :tprel_lo12:a
SDValue Var = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
Var,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
}
case 24: {
// mrs x0, TPIDR_EL0
// add x0, x0, :tprel_hi12:a
// add x0, x0, :tprel_lo12_nc:a
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
HiVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
}
case 32: {
// mrs x1, TPIDR_EL0
// movz x0, #:tprel_g1:a
// movk x0, #:tprel_g0_nc:a
// add x0, x1, x0
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
DAG.getTargetConstant(16, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
case 48: {
// mrs x1, TPIDR_EL0
// movz x0, #:tprel_g2:a
// movk x0, #:tprel_g1_nc:a
// movk x0, #:tprel_g0_nc:a
// add x0, x1, x0
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
SDValue MiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
DAG.getTargetConstant(32, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
DAG.getTargetConstant(16, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
}
}
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
/// is a function pointer to carry out the resolution.
///
/// The sequence is:
/// adrp x0, :tlsdesc:var
/// ldr x1, [x0, #:tlsdesc_lo12:var]
/// add x0, x0, #:tlsdesc_lo12:var
/// .tlsdesccall var
/// blr x1
/// (TPIDR_EL0 offset now in x0)
///
/// The above sequence must be produced unscheduled, to enable the linker to
/// optimize/relax this sequence.
/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
/// above sequence, and expanded really late in the compilation flow, to ensure
/// the sequence is produced as per above.
SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
const SDLoc &DL,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain =
DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
SDValue Glue = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
SDValue
AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
if (Model == TLSModel::LocalDynamic)
Model = TLSModel::GeneralDynamic;
}
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
Model != TLSModel::LocalExec)
report_fatal_error("ELF TLS only supported in small memory model or "
"in local exec TLS model");
// Different choices can be made for the maximum size of the TLS area for a
// module. For the small address model, the default TLS size is 16MiB and the
// maximum TLS size is 4GiB.
// FIXME: add tiny and large code model support for TLS access models other
// than local exec. We currently generate the same code as small for tiny,
// which may be larger than needed.
SDValue TPOff;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
const GlobalValue *GV = GA->getGlobal();
SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
if (Model == TLSModel::LocalExec) {
return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
} else if (Model == TLSModel::LocalDynamic) {
// Local-dynamic accesses proceed in two phases. A general-dynamic TLS
// descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
// the beginning of the module's TLS region, followed by a DTPREL offset
// calculation.
// These accesses will need deduplicating if there's more than one.
AArch64FunctionInfo *MFI =
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
AArch64II::MO_TLS);
// Now we can calculate the offset from TPIDR_EL0 to this module's
// thread-local area.
TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
// Now use :dtprel_whatever: operations to calculate this variable's offset
// in its thread-storage area.
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
} else if (Model == TLSModel::GeneralDynamic) {
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
SDValue SymAddr =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
// Finally we can make a call to calculate the offset from tpidr_el0.
TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
} else
llvm_unreachable("Unsupported ELF TLS access model");
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
SDValue
AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
SDValue Chain = DAG.getEntryNode();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
// Load the ThreadLocalStoragePointer from the TEB
// A pointer to the TLS array is located at offset 0x58 from the TEB.
SDValue TLSArray =
DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
Chain = TLSArray.getValue(1);
// Load the TLS index from the C runtime;
// This does the same as getAddr(), but without having a GlobalAddressSDNode.
// This also does the same as LOADgot, but using a generic i32 load,
// while LOADgot only loads i64.
SDValue TLSIndexHi =
DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
"_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
SDValue TLSIndex =
DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
Chain = TLSIndex.getValue(1);
// The pointer to the thread's TLS data area is at the TLS Index scaled by 8
// offset into the TLSArray.
TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
DAG.getConstant(3, DL, PtrVT));
SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
MachinePointerInfo());
Chain = TLS.getValue(1);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
SDValue TGAHi = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue TGALo = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
// Add the offset from the start of the .tls section (section base).
SDValue Addr =
SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
return Addr;
}
SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().useEmulatedTLS())
return LowerToTLSEmulatedModel(GA, DAG);
if (Subtarget->isTargetDarwin())
return LowerDarwinGlobalTLSAddress(Op, DAG);
if (Subtarget->isTargetELF())
return LowerELFGlobalTLSAddress(Op, DAG);
if (Subtarget->isTargetWindows())
return LowerWindowsGlobalTLSAddress(Op, DAG);
llvm_unreachable("Unexpected platform trying to use TLS");
}
// Looks through \param Val to determine the bit that can be used to
// check the sign of the value. It returns the unextended value and
// the sign bit position.
std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
return {Val.getOperand(0),
cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
1};
if (Val.getOpcode() == ISD::SIGN_EXTEND)
return {Val.getOperand(0),
Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
return {Val, Val.getValueSizeInBits() - 1};
}
SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
MachineFunction &MF = DAG.getMachineFunction();
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
// will not be produced, as they are conditional branch instructions that do
// not set flags.
bool ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
// Handle f128 first, since lowering it will result in comparing the return
// value of a libcall against zero, which is just what the rest of LowerBR_CC
// is expecting to deal with.
if (LHS.getValueType() == MVT::f128) {
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!RHS.getNode()) {
RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
// instruction.
if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
return SDValue();
// The actual operation with overflow check.
AArch64CC::CondCode OFCC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
if (CC == ISD::SETNE)
OFCC = getInvertedCondCode(OFCC);
SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
Overflow);
}
if (LHS.getValueType().isInteger()) {
assert((LHS.getValueType() == RHS.getValueType()) &&
(LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
// If the RHS of the comparison is zero, we can potentially fold this
// to a specialized branch.
const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
if (CC == ISD::SETEQ) {
// See if we can use a TBZ to fold in an AND as well.
// TBZ has a smaller branch displacement than CBZ. If the offset is
// out of bounds, a late MI-layer pass rewrites branches.
// 403.gcc is an example that hits this case.
if (LHS.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
isPowerOf2_64(LHS.getConstantOperandVal(1))) {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
Dest);
}
return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
} else if (CC == ISD::SETNE) {
// See if we can use a TBZ to fold in an AND as well.
// TBZ has a smaller branch displacement than CBZ. If the offset is
// out of bounds, a late MI-layer pass rewrites branches.
// 403.gcc is an example that hits this case.
if (LHS.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
isPowerOf2_64(LHS.getConstantOperandVal(1))) {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
Dest);
}
return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
} else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
uint64_t SignBitPos;
std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}
}
if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
uint64_t SignBitPos;
std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
Cmp);
}
assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two branches to implement.
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue BR1 =
DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
if (CC2 != AArch64CC::AL) {
SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
Cmp);
}
return BR1;
}
SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue In1 = Op.getOperand(0);
SDValue In2 = Op.getOperand(1);
EVT SrcVT = In2.getValueType();
if (SrcVT.bitsLT(VT))
In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
else if (SrcVT.bitsGT(VT))
In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
EVT VecVT;
uint64_t EltMask;
SDValue VecVal1, VecVal2;
auto setVecVal = [&] (int Idx) {
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
DAG.getUNDEF(VecVT), In1);
VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
DAG.getUNDEF(VecVT), In2);
} else {
VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
}
};
if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
EltMask = 0x80000000ULL;
setVecVal(AArch64::ssub);
} else if (VT == MVT::f64 || VT == MVT::v2f64) {
VecVT = MVT::v2i64;
// We want to materialize a mask with the high bit set, but the AdvSIMD
// immediate moves cannot materialize that in a single instruction for
// 64-bit elements. Instead, materialize zero and then negate it.
EltMask = 0;
setVecVal(AArch64::dsub);
} else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
EltMask = 0x8000ULL;
setVecVal(AArch64::hsub);
} else {
llvm_unreachable("Invalid type for copysign!");
}
SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
// If we couldn't materialize the mask above, then the mask vector will be
// the zero vector, and we need to negate it here.
if (VT == MVT::f64 || VT == MVT::v2f64) {
BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
}
SDValue Sel =
DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
if (VT == MVT::f16)
return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
if (VT == MVT::f32)
return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
else if (VT == MVT::f64)
return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
else
return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
}
SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat))
return SDValue();
if (!Subtarget->hasNEON())
return SDValue();
// While there is no integer popcount instruction, it can
// be more efficiently lowered to the following sequence that uses
// AdvSIMD registers/instructions as long as the copies to/from
// the AdvSIMD registers are cheap.
// FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
// CNT V0.8B, V0.8B // 8xbyte pop-counts
// ADDV B0, V0.8B // sum 8xbyte pop-counts
// UMOV X0, V0.B[0] // copy byte result back to integer reg
SDValue Val = Op.getOperand(0);
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (VT == MVT::i32 || VT == MVT::i64) {
if (VT == MVT::i32)
Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
SDValue UaddLV = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
if (VT == MVT::i64)
UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
return UaddLV;
} else if (VT == MVT::i128) {
Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);
SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
SDValue UaddLV = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
}
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering");
EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
Val = DAG.getBitcast(VT8Bit, Val);
Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
// Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
unsigned EltSize = 8;
unsigned NumElts = VT.is64BitVector() ? 8 : 16;
while (EltSize != VT.getScalarSizeInBits()) {
EltSize *= 2;
NumElts /= 2;
MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
Val = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
}
return Val;
}
SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isScalableVector() ||
useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true));
SDLoc DL(Op);
SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
}
SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isScalableVector() ||
useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
true);
SDLoc DL(Op);
SDValue REVB;
MVT VST;
switch (VT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("Invalid type for bitreverse!");
case MVT::v2i32: {
VST = MVT::v8i8;
REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
break;
}
case MVT::v4i32: {
VST = MVT::v16i8;
REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
break;
}
case MVT::v1i64: {
VST = MVT::v8i8;
REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
break;
}
case MVT::v2i64: {
VST = MVT::v16i8;
REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
break;
}
}
return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
DAG.getNode(ISD::BITREVERSE, DL, VST, REVB));
}
SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
return LowerVSETCC(Op, DAG);
bool IsStrict = Op->isStrictFPOpcode();
bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
unsigned OpNo = IsStrict ? 1 : 0;
SDValue Chain;
if (IsStrict)
Chain = Op.getOperand(0);
SDValue LHS = Op.getOperand(OpNo + 0);
SDValue RHS = Op.getOperand(OpNo + 1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
SDLoc dl(Op);
// We chose ZeroOrOneBooleanContents, so use zero and one.
EVT VT = Op.getValueType();
SDValue TVal = DAG.getConstant(1, dl, VT);
SDValue FVal = DAG.getConstant(0, dl, VT);
// Handle f128 first, since one possible outcome is a normal integer
// comparison which gets picked up by the next if statement.
if (LHS.getValueType() == MVT::f128) {
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
IsSignaling);
// If softenSetCCOperands returned a scalar, use it.
if (!RHS.getNode()) {
assert(LHS.getValueType() == Op.getValueType() &&
"Unexpected setcc expansion!");
return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
}
}
if (LHS.getValueType().isInteger()) {
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(
LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
// matched to a single CSINC instruction.
SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
}
// Now we know we're dealing with FP values.
assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
// If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
// and do the comparison.
SDValue Cmp;
if (IsStrict)
Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
else
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
SDValue Res;
if (CC2 == AArch64CC::AL) {
changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
CC2);
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
// matched to a single CSINC instruction.
Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
} else {
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
// totally clean. Some of them require two CSELs to implement. As is in
// this case, we emit the first CSEL and then emit a second using the output
// of the first as the RHS. We're effectively OR'ing the two CC's together.
// FIXME: It would be nice if we could match the two CSELs to two CSINCs.
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue CS1 =
DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
}
SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
SDValue RHS, SDValue TVal,
SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const {
// Handle f128 first, because it will result in a comparison of some RTLIB
// call result against zero.
if (LHS.getValueType() == MVT::f128) {
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!RHS.getNode()) {
RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
// Also handle f16, for which we need to do a f32 comparison.
if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
}
// Next, handle integers.
if (LHS.getValueType().isInteger()) {
assert((LHS.getValueType() == RHS.getValueType()) &&
(LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
// Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
// supported types.
if (CC == ISD::SETGT && RHSC && RHSC->isAllOnesValue() && CTVal && CFVal &&
CTVal->isOne() && CFVal->isAllOnesValue() &&
LHS.getValueType() == TVal.getValueType()) {
EVT VT = LHS.getValueType();
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, LHS,
DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
}
unsigned Opcode = AArch64ISD::CSEL;
// If both the TVal and the FVal are constants, see if we can swap them in
// order to for a CSINV or CSINC out of them.
if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
} else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
} else if (TVal.getOpcode() == ISD::XOR) {
// If TVal is a NOT we want to swap TVal and FVal so that we can match
// with a CSINV rather than a CSEL.
if (isAllOnesConstant(TVal.getOperand(1))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
} else if (TVal.getOpcode() == ISD::SUB) {
// If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
// that we can match with a CSNEG rather than a CSEL.
if (isNullConstant(TVal.getOperand(0))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
} else if (CTVal && CFVal) {
const int64_t TrueVal = CTVal->getSExtValue();
const int64_t FalseVal = CFVal->getSExtValue();
bool Swap = false;
// If both TVal and FVal are constants, see if FVal is the
// inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
// instead of a CSEL in that case.
if (TrueVal == ~FalseVal) {
Opcode = AArch64ISD::CSINV;
} else if (FalseVal > std::numeric_limits<int64_t>::min() &&
TrueVal == -FalseVal) {
Opcode = AArch64ISD::CSNEG;
} else if (TVal.getValueType() == MVT::i32) {
// If our operands are only 32-bit wide, make sure we use 32-bit
// arithmetic for the check whether we can use CSINC. This ensures that
// the addition in the check will wrap around properly in case there is
// an overflow (which would not be the case if we do the check with
// 64-bit arithmetic).
const uint32_t TrueVal32 = CTVal->getZExtValue();
const uint32_t FalseVal32 = CFVal->getZExtValue();
if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
Opcode = AArch64ISD::CSINC;
if (TrueVal32 > FalseVal32) {
Swap = true;
}
}
// 64-bit check whether we can use CSINC.
} else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
Opcode = AArch64ISD::CSINC;
if (TrueVal > FalseVal) {
Swap = true;
}
}
// Swap TVal and FVal if necessary.
if (Swap) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
if (Opcode != AArch64ISD::CSEL) {
// Drop FVal since we can get its value by simply inverting/negating
// TVal.
FVal = TVal;
}
}
// Avoid materializing a constant when possible by reusing a known value in
// a register. However, don't perform this optimization if the known value
// is one, zero or negative one in the case of a CSEL. We can always
// materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
// FVal, respectively.
ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
!RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
// Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
// "a != C ? x : a" to avoid materializing C.
if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
TVal = LHS;
else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
FVal = LHS;
} else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
// Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
// avoid materializing C.
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
Opcode = AArch64ISD::CSINV;
TVal = LHS;
FVal = DAG.getConstant(0, dl, FVal.getValueType());
}
}
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
EVT VT = TVal.getValueType();
return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
}
// Now we know we're dealing with FP values.
assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
assert(LHS.getValueType() == RHS.getValueType());
EVT VT = TVal.getValueType();
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two CSELs to implement.
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
if (DAG.getTarget().Options.UnsafeFPMath) {
// Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
// "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
if (RHSVal && RHSVal->isZero()) {
ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
TVal = LHS;
else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
CFVal && CFVal->isZero() &&
FVal.getValueType() == LHS.getValueType())
FVal = LHS;
}
}
// Emit first, and possibly only, CSEL.
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
// If we need a second CSEL, emit it, using the output of the first as the
// RHS. We're effectively OR'ing the two CC's together.
if (CC2 != AArch64CC::AL) {
SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
// Otherwise, return the output of the first CSEL.
return CS1;
}
SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
SelectionDAG &DAG) const {
EVT Ty = Op.getValueType();
auto Idx = Op.getConstantOperandAPInt(2);
if (Idx.sge(-1) && Idx.slt(Ty.getVectorMinNumElements()))
return Op;
return SDValue();
}
SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue TVal = Op.getOperand(2);
SDValue FVal = Op.getOperand(3);
SDLoc DL(Op);
return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
}
SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
SelectionDAG &DAG) const {
SDValue CCVal = Op->getOperand(0);
SDValue TVal = Op->getOperand(1);
SDValue FVal = Op->getOperand(2);
SDLoc DL(Op);
EVT Ty = Op.getValueType();
if (Ty.isScalableVector()) {
SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());
SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
}
if (useSVEForFixedLengthVectorVT(Ty)) {
// FIXME: Ideally this would be the same as above using i1 types, however
// for the moment we can't deal with fixed i1 vector types properly, so
// instead extend the predicate to a result type sized integer vector.
MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits());
MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount());
SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT);
SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal);
return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
}
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
// instruction.
if (ISD::isOverflowIntrOpRes(CCVal)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
return SDValue();
AArch64CC::CondCode OFCC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
CCVal, Overflow);
}
// Lower it the same way as we would lower a SELECT_CC node.
ISD::CondCode CC;
SDValue LHS, RHS;
if (CCVal.getOpcode() == ISD::SETCC) {
LHS = CCVal.getOperand(0);
RHS = CCVal.getOperand(1);
CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
} else {
LHS = CCVal;
RHS = DAG.getConstant(0, DL, CCVal.getValueType());
CC = ISD::SETNE;
}
return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
}
SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
SelectionDAG &DAG) const {
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
return getAddrLarge(JT, DAG);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
return getAddrTiny(JT, DAG);
}
return getAddr(JT, DAG);
}
SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
SelectionDAG &DAG) const {
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
SDLoc DL(Op);
SDValue JT = Op.getOperand(1);
SDValue Entry = Op.getOperand(2);
int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
SDNode *Dest =
DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
SDValue(Dest, 0));
}
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
// Use the GOT for the large code model on iOS.
if (Subtarget->isTargetMachO()) {
return getGOT(CP, DAG);
}
return getAddrLarge(CP, DAG);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
return getAddrTiny(CP, DAG);
} else {
return getAddr(CP, DAG);
}
}
SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
return getAddrLarge(BA, DAG);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
return getAddrTiny(BA, DAG);
}
return getAddr(BA, DAG);
}
SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
SelectionDAG &DAG) const {
AArch64FunctionInfo *FuncInfo =
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
getPointerTy(DAG.getDataLayout()));
FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV));
}
SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
SelectionDAG &DAG) const {
AArch64FunctionInfo *FuncInfo =
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
? FuncInfo->getVarArgsGPRIndex()
: FuncInfo->getVarArgsStackIndex(),
getPointerTy(DAG.getDataLayout()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV));
}
SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
SelectionDAG &DAG) const {
// The layout of the va_list struct is specified in the AArch64 Procedure Call
// Standard, section B.3.
MachineFunction &MF = DAG.getMachineFunction();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SDValue VAList = Op.getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SmallVector<SDValue, 4> MemOps;
// void *__stack at offset 0
unsigned Offset = 0;
SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
MachinePointerInfo(SV), Align(PtrSize)));
// void *__gr_top at offset 8 (4 on ILP32)
Offset += PtrSize;
int GPRSize = FuncInfo->getVarArgsGPRSize();
if (GPRSize > 0) {
SDValue GRTop, GRTopAddr;
GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
DAG.getConstant(GPRSize, DL, PtrVT));
GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
MachinePointerInfo(SV, Offset),
Align(PtrSize)));
}
// void *__vr_top at offset 16 (8 on ILP32)
Offset += PtrSize;
int FPRSize = FuncInfo->getVarArgsFPRSize();
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
DAG.getConstant(FPRSize, DL, PtrVT));
VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
MachinePointerInfo(SV, Offset),
Align(PtrSize)));
}
// int __gr_offs at offset 24 (12 on ILP32)
Offset += PtrSize;
SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
MemOps.push_back(
DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
// int __vr_offs at offset 28 (16 on ILP32)
Offset += 4;
SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
MemOps.push_back(
DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
return LowerWin64_VASTART(Op, DAG);
else if (Subtarget->isTargetDarwin())
return LowerDarwin_VASTART(Op, DAG);
else
return LowerAAPCS_VASTART(Op, DAG);
}
SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
SelectionDAG &DAG) const {
// AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
// pointer.
SDLoc DL(Op);
unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
unsigned VaListSize =
(Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
? PtrSize
: Subtarget->isTargetILP32() ? 20 : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
DAG.getConstant(VaListSize, DL, MVT::i32),
Align(PtrSize), false, false, false,
MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
}
SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin");
const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
MaybeAlign Align(Op.getConstantOperandVal(3));
unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
auto PtrVT = getPointerTy(DAG.getDataLayout());
auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
SDValue VAList =
DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
Chain = VAList.getValue(1);
VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
if (VT.isScalableVector())
report_fatal_error("Passing SVE types to variadic functions is "
"currently not supported");
if (Align && *Align > MinSlotSize) {
VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Align->value() - 1, DL, PtrVT));
VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
}
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
// Scalar integer and FP values smaller than 64 bits are implicitly extended
// up to 64 bits. At the very least, we have to increase the striding of the
// vaargs list to match this, and for FP values we need to introduce
// FP_ROUND nodes as well.
if (VT.isInteger() && !VT.isVector())
ArgSize = std::max(ArgSize, MinSlotSize);
bool NeedFPTrunc = false;
if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
ArgSize = 8;
NeedFPTrunc = true;
}
// Increment the pointer, VAList, to the next vaarg
SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(ArgSize, DL, PtrVT));
VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
// Store the incremented VAList to the legalized pointer
SDValue APStore =
DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
// Load the actual argument out of the pointer VAList
if (NeedFPTrunc) {
// Load the value as an f64.
SDValue WideFP =
DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
// Round the value down to an f32.
SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
DAG.getIntPtrConstant(1, DL));
SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
// Merge the rounded value with the chain output of the load.
return DAG.getMergeValues(Ops, DL);
}
return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
}
SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue FrameAddr =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
while (Depth--)
FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo());
if (Subtarget->isTargetILP32())
FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
DAG.getValueType(VT));
return FrameAddr;
}
SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
SelectionDAG &DAG) const {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
EVT VT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
int FI = MFI.CreateFixedObject(4, 0, false);
return DAG.getFrameIndex(FI, VT);
}
#define GET_REGISTER_MATCHER
#include "AArch64GenAsmMatcher.inc"
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
Register AArch64TargetLowering::
getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
Register Reg = MatchRegisterName(RegName);
if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
if (!Subtarget->isXRegisterReserved(DwarfRegNum))
Reg = 0;
}
if (Reg)
return Reg;
report_fatal_error(Twine("Invalid register name \""
+ StringRef(RegName) + "\"."));
}
SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FrameAddr =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
}
SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setReturnAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue ReturnAddress;
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
ReturnAddress = DAG.getLoad(
VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
} else {
// Return LR, which contains the return address. Mark it an implicit
// live-in.
unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
}
// The XPACLRI instruction assembles to a hint-space instruction before
// Armv8.3-A therefore this instruction can be safely used for any pre
// Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
// that instead.
SDNode *St;
if (Subtarget->hasPAuth()) {
St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
} else {
// XPACLRI operates on LR therefore we must move the operand accordingly.
SDValue Chain =
DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
}
return SDValue(St, 0);
}
/// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
/// i32 values and take a 2 x i32 value to shift plus a shift amount.
SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
SelectionDAG &DAG) const {
SDValue Lo, Hi;
expandShiftParts(Op.getNode(), Lo, Hi, DAG);
return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
}
bool AArch64TargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
// Offsets are folded in the DAG combine rather than here so that we can
// intelligently choose an offset based on the uses.
return false;
}
bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool OptForSize) const {
bool IsLegal = false;
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
// 16-bit case when target has full fp16 support.
// FIXME: We should be able to handle f128 as well with a clever lowering.
const APInt ImmInt = Imm.bitcastToAPInt();
if (VT == MVT::f64)
IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
else if (VT == MVT::f32)
IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
else if (VT == MVT::f16 && Subtarget->hasFullFP16())
IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
// TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
// generate that fmov.
// If we can not materialize in immediate field for fmov, check if the
// value can be encoded as the immediate operand of a logical instruction.
// The immediate value will be created with either MOVZ, MOVN, or ORR.
if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
// The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
// however the mov+fmov sequence is always better because of the reduced
// cache pressure. The timings are still the same if you consider
// movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
// movw+movk is fused). So we limit up to 2 instrdduction at most.
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
Insn);
unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
IsLegal = Insn.size() <= Limit;
}
LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
<< " imm value: "; Imm.dump(););
return IsLegal;
}
//===----------------------------------------------------------------------===//
// AArch64 Optimization Hooks
//===----------------------------------------------------------------------===//
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
SDValue Operand, SelectionDAG &DAG,
int &ExtraSteps) {
EVT VT = Operand.getValueType();
if (ST->hasNEON() &&
(VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
VT == MVT::f32 || VT == MVT::v1f32 ||
VT == MVT::v2f32 || VT == MVT::v4f32)) {
if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
// For the reciprocal estimates, convergence is quadratic, so the number
// of digits is doubled after each iteration. In ARMv8, the accuracy of
// the initial estimate is 2^-8. Thus the number of extra steps to refine
// the result for float (23 mantissa bits) is 2 and for double (52
// mantissa bits) is 3.
ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
SDValue
AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
const DenormalMode &Mode) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
}
SDValue
AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
SelectionDAG &DAG) const {
return Op;
}
SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &ExtraSteps,
bool &UseOneConst,
bool Reciprocal) const {
if (Enabled == ReciprocalEstimate::Enabled ||
(Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
DAG, ExtraSteps)) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();
SDNodeFlags Flags;
Flags.setAllowReassociation(true);
// Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
// AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
for (int i = ExtraSteps; i > 0; --i) {
SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
Flags);
Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
if (!Reciprocal)
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
ExtraSteps = 0;
return Estimate;
}
return SDValue();
}
SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &ExtraSteps) const {
if (Enabled == ReciprocalEstimate::Enabled)
if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
DAG, ExtraSteps)) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();
SDNodeFlags Flags;
Flags.setAllowReassociation(true);
// Newton reciprocal iteration: E * (2 - X * E)
// AArch64 reciprocal iteration instruction: (2 - M * N)
for (int i = ExtraSteps; i > 0; --i) {
SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
Estimate, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
ExtraSteps = 0;
return Estimate;
}
return SDValue();
}
//===----------------------------------------------------------------------===//
// AArch64 Inline Assembly Support
//===----------------------------------------------------------------------===//
// Table of Constraints
// TODO: This is the current set of constraints supported by ARM for the
// compiler, not all of them may make sense.
//
// r - A general register
// w - An FP/SIMD register of some size in the range v0-v31
// x - An FP/SIMD register of some size in the range v0-v15
// I - Constant that can be used with an ADD instruction
// J - Constant that can be used with a SUB instruction
// K - Constant that can be used with a 32-bit logical instruction
// L - Constant that can be used with a 64-bit logical instruction
// M - Constant that can be used as a 32-bit MOV immediate
// N - Constant that can be used as a 64-bit MOV immediate
// Q - A memory reference with base register and no offset
// S - A symbolic address
// Y - Floating point constant zero
// Z - Integer constant zero
//
// Note that general register operands will be output using their 64-bit x
// register name, whatever the size of the variable, unless the asm operand
// is prefixed by the %w modifier. Floating-point and SIMD register operands
// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
// %q modifier.
const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
// At this point, we have to lower this constraint to something else, so we
// lower it to an "r" or "w". However, by doing this we will force the result
// to be in register, while the X constraint is much more permissive.
//
// Although we are correct (we are free to emit anything, without
// constraints), we might break use cases that would expect us to be more
// efficient and emit something else.
if (!Subtarget->hasFPARMv8())
return "r";
if (ConstraintVT.isFloatingPoint())
return "w";
if (ConstraintVT.isVector() &&
(ConstraintVT.getSizeInBits() == 64 ||
ConstraintVT.getSizeInBits() == 128))
return "w";
return "r";
}
enum PredicateConstraint {
Upl,
Upa,
Invalid
};
static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
PredicateConstraint P = PredicateConstraint::Invalid;
if (Constraint == "Upa")
P = PredicateConstraint::Upa;
if (Constraint == "Upl")
P = PredicateConstraint::Upl;
return P;
}
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
AArch64TargetLowering::ConstraintType
AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default:
break;
case 'x':
case 'w':
case 'y':
return C_RegisterClass;
// An address with a single base register. Due to the way we
// currently handle addresses it is the same as 'r'.
case 'Q':
return C_Memory;
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'Y':
case 'Z':
return C_Immediate;
case 'z':
case 'S': // A symbolic address
return C_Other;
}
} else if (parsePredicateConstraint(Constraint) !=
PredicateConstraint::Invalid)
return C_RegisterClass;
return TargetLowering::getConstraintType(Constraint);
}
/// Examine constraint type and operand type and determine a weight value.
/// This object must already have been set up with the operand type
/// and the current alternative constraint selected.
TargetLowering::ConstraintWeight
AArch64TargetLowering::getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const {
ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
break;
case 'x':
case 'w':
case 'y':
if (type->isFloatingPointTy() || type->isVectorTy())
weight = CW_Register;
break;
case 'z':
weight = CW_Constant;
break;
case 'U':
if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid)
weight = CW_Register;
break;
}
return weight;
}
std::pair<unsigned, const TargetRegisterClass *>
AArch64TargetLowering::getRegForInlineAsmConstraint(
const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
if (VT.isScalableVector())
return std::make_pair(0U, nullptr);
if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
if (VT.getFixedSizeInBits() == 64)
return std::make_pair(0U, &AArch64::GPR64commonRegClass);
return std::make_pair(0U, &AArch64::GPR32commonRegClass);
case 'w': {
if (!Subtarget->hasFPARMv8())
break;
if (VT.isScalableVector()) {
if (VT.getVectorElementType() != MVT::i1)
return std::make_pair(0U, &AArch64::ZPRRegClass);
return std::make_pair(0U, nullptr);
}
uint64_t VTSize = VT.getFixedSizeInBits();
if (VTSize == 16)
return std::make_pair(0U, &AArch64::FPR16RegClass);
if (VTSize == 32)
return std::make_pair(0U, &AArch64::FPR32RegClass);
if (VTSize == 64)
return std::make_pair(0U, &AArch64::FPR64RegClass);
if (VTSize == 128)
return std::make_pair(0U, &AArch64::FPR128RegClass);
break;
}
// The instructions that this constraint is designed for can
// only take 128-bit registers so just use that regclass.
case 'x':
if (!Subtarget->hasFPARMv8())
break;
if (VT.isScalableVector())
return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &AArch64::FPR128_loRegClass);
break;
case 'y':
if (!Subtarget->hasFPARMv8())
break;
if (VT.isScalableVector())
return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
break;
}
} else {
PredicateConstraint PC = parsePredicateConstraint(Constraint);
if (PC != PredicateConstraint::Invalid) {
if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
return std::make_pair(0U, nullptr);
bool restricted = (PC == PredicateConstraint::Upl);
return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
: std::make_pair(0U, &AArch64::PPRRegClass);
}
}
if (StringRef("{cc}").equals_insensitive(Constraint))
return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass *> Res;
Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// Not found as a standard register?
if (!Res.second) {
unsigned Size = Constraint.size();
if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
int RegNo;
bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
if (!Failed && RegNo >= 0 && RegNo <= 31) {
// v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
// By default we'll emit v0-v31 for this unless there's a modifier where
// we'll emit the correct register as well.
if (VT != MVT::Other && VT.getSizeInBits() == 64) {
Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
Res.second = &AArch64::FPR64RegClass;
} else {
Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
Res.second = &AArch64::FPR128RegClass;
}
}
}
}
if (Res.second && !Subtarget->hasFPARMv8() &&
!AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
!AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
return std::make_pair(0U, nullptr);
return Res;
}
EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL,
llvm::Type *Ty,
bool AllowUnknown) const {
if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
return EVT(MVT::i64x8);
return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void AArch64TargetLowering::LowerAsmOperandForConstraint(
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result;
// Currently only support length 1 constraints.
if (Constraint.length() != 1)
return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default:
break;
// This set of constraints deal with valid constants for various instructions.
// Validate and return a target constant for them if we can.
case 'z': {
// 'z' maps to xzr or wzr so it needs an input of 0.
if (!isNullConstant(Op))
return;
if (Op.getValueType() == MVT::i64)
Result = DAG.getRegister(AArch64::XZR, MVT::i64);
else
Result = DAG.getRegister(AArch64::WZR, MVT::i32);
break;
}
case 'S': {
// An absolute symbolic address or label reference.
if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
GA->getValueType(0));
} else if (const BlockAddressSDNode *BA =
dyn_cast<BlockAddressSDNode>(Op)) {
Result =
DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
} else
return;
break;
}
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
if (!C)
return;
// Grab the value and do some validation.
uint64_t CVal = C->getZExtValue();
switch (ConstraintLetter) {
// The I constraint applies only to simple ADD or SUB immediate operands:
// i.e. 0 to 4095 with optional shift by 12
// The J constraint applies only to ADD or SUB immediates that would be
// valid when negated, i.e. if [an add pattern] were to be output as a SUB
// instruction [or vice versa], in other words -1 to -4095 with optional
// left shift by 12.
case 'I':
if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
break;
return;
case 'J': {
uint64_t NVal = -C->getSExtValue();
if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
CVal = C->getSExtValue();
break;
}
return;
}
// The K and L constraints apply *only* to logical immediates, including
// what used to be the MOVI alias for ORR (though the MOVI alias has now
// been removed and MOV should be used). So these constraints have to
// distinguish between bit patterns that are valid 32-bit or 64-bit
// "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
// not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
// versa.
case 'K':
if (AArch64_AM::isLogicalImmediate(CVal, 32))
break;
return;
case 'L':
if (AArch64_AM::isLogicalImmediate(CVal, 64))
break;
return;
// The M and N constraints are a superset of K and L respectively, for use
// with the MOV (immediate) alias. As well as the logical immediates they
// also match 32 or 64-bit immediates that can be loaded either using a
// *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
// (M) or 64-bit 0x1234000000000000 (N) etc.
// As a note some of this code is liberally stolen from the asm parser.
case 'M': {
if (!isUInt<32>(CVal))
return;
if (AArch64_AM::isLogicalImmediate(CVal, 32))
break;
if ((CVal & 0xFFFF) == CVal)
break;
if ((CVal & 0xFFFF0000ULL) == CVal)
break;
uint64_t NCVal = ~(uint32_t)CVal;
if ((NCVal & 0xFFFFULL) == NCVal)
break;
if ((NCVal & 0xFFFF0000ULL) == NCVal)
break;
return;
}
case 'N': {
if (AArch64_AM::isLogicalImmediate(CVal, 64))
break;
if ((CVal & 0xFFFFULL) == CVal)
break;
if ((CVal & 0xFFFF0000ULL) == CVal)
break;
if ((CVal & 0xFFFF00000000ULL) == CVal)
break;
if ((CVal & 0xFFFF000000000000ULL) == CVal)
break;
uint64_t NCVal = ~CVal;
if ((NCVal & 0xFFFFULL) == NCVal)
break;
if ((NCVal & 0xFFFF0000ULL) == NCVal)
break;
if ((NCVal & 0xFFFF00000000ULL) == NCVal)
break;
if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
break;
return;
}
default:
return;
}
// All assembler immediates are 64-bit integers.
Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
break;
}
if (Result.getNode()) {
Ops.push_back(Result);
return;
}
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
//===----------------------------------------------------------------------===//
// AArch64 Advanced SIMD Support
//===----------------------------------------------------------------------===//
/// WidenVector - Given a value in the V64 register class, produce the
/// equivalent value in the V128 register class.
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
EVT VT = V64Reg.getValueType();
unsigned NarrowSize = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType().getSimpleVT();
MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
SDLoc DL(V64Reg);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
V64Reg, DAG.getConstant(0, DL, MVT::i64));
}
/// getExtFactor - Determine the adjustment factor for the position when
/// generating an "extract from vector registers" instruction.
static unsigned getExtFactor(SDValue &V) {
EVT EltType = V.getValueType().getVectorElementType();
return EltType.getSizeInBits() / 8;
}
/// NarrowVector - Given a value in the V128 register class, produce the
/// equivalent value in the V64 register class.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
EVT VT = V128Reg.getValueType();
unsigned WideSize = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType().getSimpleVT();
MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
SDLoc DL(V128Reg);
return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
}
// Gather data to see if the operation can be modelled as a
// shuffle in combination with VEXTs.
SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
SDLoc dl(Op);
EVT VT = Op.getValueType();
assert(!VT.isScalableVector() &&
"Scalable vectors cannot be used with ISD::BUILD_VECTOR");
unsigned NumElts = VT.getVectorNumElements();
struct ShuffleSourceInfo {
SDValue Vec;
unsigned MinElt;
unsigned MaxElt;
// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
// be compatible with the shuffle we intend to construct. As a result
// ShuffleVec will be some sliding window into the original Vec.
SDValue ShuffleVec;
// Code should guarantee that element i in Vec starts at element "WindowBase
// + i * WindowScale in ShuffleVec".
int WindowBase;
int WindowScale;
ShuffleSourceInfo(SDValue Vec)
: Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
// node.
SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.isUndef())
continue;
else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(V.getOperand(1))) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: "
"a shuffle can only come from building a vector from "
"various elements of other vectors, provided their "
"indices are constant\n");
return SDValue();
}
// Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
auto Source = find(Sources, SourceVec);
if (Source == Sources.end())
Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
// Update the minimum and maximum lane number seen.
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
Source->MinElt = std::min(Source->MinElt, EltNo);
Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
if (Sources.size() > 2) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n");
return SDValue();
}
// Find out the smallest element size among result and two sources, and use
// it as element size to build the shuffle_vector.
EVT SmallestEltTy = VT.getVectorElementType();
for (auto &Source : Sources) {
EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
if (SrcEltTy.bitsLT(SmallestEltTy)) {
SmallestEltTy = SrcEltTy;
}
}
unsigned ResMultiplier =
VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
uint64_t VTSize = VT.getFixedSizeInBits();
NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
// If the source vector is too wide or too narrow, we may nevertheless be able
// to construct a compatible shuffle either by concatenating it with UNDEF or
// extracting a suitable range of elements.
for (auto &Src : Sources) {
EVT SrcVT = Src.ShuffleVec.getValueType();
uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
if (SrcVTSize == VTSize)
continue;
// This stage of the search produces a source with the same element type as
// the original, but with a total width matching the BUILD_VECTOR output.
EVT EltVT = SrcVT.getVectorElementType();
unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
if (SrcVTSize < VTSize) {
assert(2 * SrcVTSize == VTSize);
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
Src.ShuffleVec =
DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
DAG.getUNDEF(Src.ShuffleVec.getValueType()));
continue;
}
if (SrcVTSize != 2 * VTSize) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: result vector too small to extract\n");
return SDValue();
}
if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n");
return SDValue();
}
if (Src.MinElt >= NumSrcElts) {
// The extraction can just take the second half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(NumSrcElts, dl, MVT::i64));
Src.WindowBase = -NumSrcElts;
} else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(0, dl, MVT::i64));
} else {
// An actual VEXT is needed
SDValue VEXTSrc1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(0, dl, MVT::i64));
SDValue VEXTSrc2 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(NumSrcElts, dl, MVT::i64));
unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
if (!SrcVT.is64BitVector()) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors.");
return SDValue();
}
Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
VEXTSrc2,
DAG.getConstant(Imm, dl, MVT::i32));
Src.WindowBase = -Src.MinElt;
}
}
// Another possible incompatibility occurs from the vector element types. We
// can fix this by bitcasting the source vectors to the same type we intend
// for the shuffle.
for (auto &Src : Sources) {
EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
if (SrcEltTy == SmallestEltTy)
continue;
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
Src.WindowScale =
SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
Src.WindowBase *= Src.WindowScale;
}
// Final sanity check before we try to actually produce a shuffle.
LLVM_DEBUG(for (auto Src
: Sources)
assert(Src.ShuffleVec.getValueType() == ShuffleVT););
// The stars all align, our next step is to produce the mask for the shuffle.
SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
if (Entry.isUndef())
continue;
auto Src = find(Sources, Entry.getOperand(0));
int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
// segment.
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
VT.getScalarSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
// This source is expected to fill ResMultiplier lanes of the final shuffle,
// starting at the appropriate offset.
int *LaneMask = &Mask[i * ResMultiplier];
int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
ExtractBase += NumElts * (Src - Sources.begin());
for (int j = 0; j < LanesDefined; ++j)
LaneMask[j] = ExtractBase + j;
}
// Final check before we try to produce nonsense...
if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n");
return SDValue();
}
SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
for (unsigned i = 0; i < Sources.size(); ++i)
ShuffleOps[i] = Sources[i].ShuffleVec;
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
ShuffleOps[1], Mask);
SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
dbgs() << "Reshuffle, creating node: "; V.dump(););
return V;
}
// check if an EXT instruction can handle the shuffle mask when the
// vector sources of the shuffle are the same.
static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
// Assume that the first shuffle index is not UNDEF. Fail if it is.
if (M[0] < 0)
return false;
Imm = M[0];
// If this is a VEXT shuffle, the immediate value is the index of the first
// element. The other shuffle indices must be the successive elements after
// the first one.
unsigned ExpectedElt = Imm;
for (unsigned i = 1; i < NumElts; ++i) {
// Increment the expected index. If it wraps around, just follow it
// back to index zero and keep going.
++ExpectedElt;
if (ExpectedElt == NumElts)
ExpectedElt = 0;
if (M[i] < 0)
continue; // ignore UNDEF indices
if (ExpectedElt != static_cast<unsigned>(M[i]))
return false;
}
return true;
}
/// Check if a vector shuffle corresponds to a DUP instructions with a larger
/// element width than the vector lane type. If that is the case the function
/// returns true and writes the value of the DUP instruction lane operand into
/// DupLaneOp
static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
unsigned &DupLaneOp) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for wide DUP are: 16, 32, 64");
if (BlockSize <= VT.getScalarSizeInBits())
return false;
if (BlockSize % VT.getScalarSizeInBits() != 0)
return false;
if (VT.getSizeInBits() % BlockSize != 0)
return false;
size_t SingleVecNumElements = VT.getVectorNumElements();
size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
size_t NumBlocks = VT.getSizeInBits() / BlockSize;
// We are looking for masks like
// [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
// might be replaced by 'undefined'. BlockIndices will eventually contain
// lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
// for the above examples)
SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
for (size_t I = 0; I < NumEltsPerBlock; I++) {
int Elt = M[BlockIndex * NumEltsPerBlock + I];
if (Elt < 0)
continue;
// For now we don't support shuffles that use the second operand
if ((unsigned)Elt >= SingleVecNumElements)
return false;
if (BlockElts[I] < 0)
BlockElts[I] = Elt;
else if (BlockElts[I] != Elt)
return false;
}
// We found a candidate block (possibly with some undefs). It must be a
// sequence of consecutive integers starting with a value divisible by
// NumEltsPerBlock with some values possibly replaced by undef-s.
// Find first non-undef element
auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
assert(FirstRealEltIter != BlockElts.end() &&
"Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()");
if (FirstRealEltIter == BlockElts.end()) {
DupLaneOp = 0;
return true;
}
// Index of FirstRealElt in BlockElts
size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
if ((unsigned)*FirstRealEltIter < FirstRealIndex)
return false;
// BlockElts[0] must have the following value if it isn't undef:
size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
// Check the first element
if (Elt0 % NumEltsPerBlock != 0)
return false;
// Check that the sequence indeed consists of consecutive integers (modulo
// undefs)
for (size_t I = 0; I < NumEltsPerBlock; I++)
if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
return false;
DupLaneOp = Elt0 / NumEltsPerBlock;
return true;
}
// check if an EXT instruction can handle the shuffle mask when the
// vector sources of the shuffle are different.
static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
unsigned &Imm) {
// Look for the first non-undef element.
const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
// Benefit form APInt to handle overflow when calculating expected element.
unsigned NumElts = VT.getVectorNumElements();
unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
// The following shuffle indices must be the successive elements after the
// first real element.
const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
[&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
if (FirstWrongElt != M.end())
return false;
// The index of an EXT is the first element if it is not UNDEF.
// Watch out for the beginning UNDEFs. The EXT index should be the expected
// value of the first element. E.g.
// <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
// <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
// ExpectedElt is the last mask index plus 1.
Imm = ExpectedElt.getZExtValue();
// There are two difference cases requiring to reverse input vectors.
// For example, for vector <4 x i32> we have the following cases,
// Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
// Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
// For both cases, we finally use mask <5, 6, 7, 0>, which requires
// to reverse two input vectors.
if (Imm < NumElts)
ReverseEXT = true;
else
Imm -= NumElts;
return true;
}
/// isREVMask - Check if a vector shuffle corresponds to a REV
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for REV are: 16, 32, 64");
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
// If the first shuffle index is UNDEF, be optimistic.
if (M[0] < 0)
BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
return false;
}
return true;
}
static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
return false;
Idx += 1;
}
return true;
}
static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != 2 * i + WhichResult)
return false;
}
return true;
}
static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
return false;
}
return true;
}
/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
return false;
Idx += 1;
}
return true;
}
/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned Half = VT.getVectorNumElements() / 2;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned j = 0; j != 2; ++j) {
unsigned Idx = WhichResult;
for (unsigned i = 0; i != Half; ++i) {
int MIdx = M[i + j * Half];
if (MIdx >= 0 && (unsigned)MIdx != Idx)
return false;
Idx += 2;
}
}
return true;
}
/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
return false;
}
return true;
}
static bool isINSMask(ArrayRef<int> M, int NumInputElements,
bool &DstIsLeft, int &Anomaly) {
if (M.size() != static_cast<size_t>(NumInputElements))
return false;
int NumLHSMatch = 0, NumRHSMatch = 0;
int LastLHSMismatch = -1, LastRHSMismatch = -1;
for (int i = 0; i < NumInputElements; ++i) {
if (M[i] == -1) {
++NumLHSMatch;
++NumRHSMatch;
continue;
}
if (M[i] == i)
++NumLHSMatch;
else
LastLHSMismatch = i;
if (M[i] == i + NumInputElements)
++NumRHSMatch;
else
LastRHSMismatch = i;
}
if (NumLHSMatch == NumInputElements - 1) {
DstIsLeft = true;
Anomaly = LastLHSMismatch;
return true;
} else if (NumRHSMatch == NumInputElements - 1) {
DstIsLeft = false;
Anomaly = LastRHSMismatch;
return true;
}
return false;
}
static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
if (VT.getSizeInBits() != 128)
return false;
unsigned NumElts = VT.getVectorNumElements();
for (int I = 0, E = NumElts / 2; I != E; I++) {
if (Mask[I] != I)
return false;
}
int Offset = NumElts / 2;
for (int I = NumElts / 2, E = NumElts; I != E; I++) {
if (Mask[I] != I + SplitLHS * Offset)
return false;
}
return true;
}
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue V0 = Op.getOperand(0);
SDValue V1 = Op.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
VT.getVectorElementType() != V1.getValueType().getVectorElementType())
return SDValue();
bool SplitV0 = V0.getValueSizeInBits() == 128;
if (!isConcatMask(Mask, VT, SplitV0))
return SDValue();
EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
if (SplitV0) {
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
DAG.getConstant(0, DL, MVT::i64));
}
if (V1.getValueSizeInBits() == 128) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
DAG.getConstant(0, DL, MVT::i64));
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
const SDLoc &dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
enum {
OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
OP_VREV,
OP_VDUP0,
OP_VDUP1,
OP_VDUP2,
OP_VDUP3,
OP_VEXT1,
OP_VEXT2,
OP_VEXT3,
OP_VUZPL, // VUZP, left result
OP_VUZPR, // VUZP, right result
OP_VZIPL, // VZIP, left result
OP_VZIPR, // VZIP, right result
OP_VTRNL, // VTRN, left result
OP_VTRNR // VTRN, right result
};
if (OpNum == OP_COPY) {
if (LHSID == (1 * 9 + 2) * 9 + 3)
return LHS;
assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
return RHS;
}
SDValue OpLHS, OpRHS;
OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
EVT VT = OpLHS.getValueType();
switch (OpNum) {
default:
llvm_unreachable("Unknown shuffle opcode!");
case OP_VREV:
// VREV divides the vector in half and swaps within the half.
if (VT.getVectorElementType() == MVT::i32 ||
VT.getVectorElementType() == MVT::f32)
return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
// vrev <4 x i16> -> REV32
if (VT.getVectorElementType() == MVT::i16 ||
VT.getVectorElementType() == MVT::f16 ||
VT.getVectorElementType() == MVT::bf16)
return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
// vrev <4 x i8> -> REV16
assert(VT.getVectorElementType() == MVT::i8);
return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
case OP_VDUP0:
case OP_VDUP1:
case OP_VDUP2:
case OP_VDUP3: {
EVT EltTy = VT.getVectorElementType();
unsigned Opcode;
if (EltTy == MVT::i8)
Opcode = AArch64ISD::DUPLANE8;
else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
Opcode = AArch64ISD::DUPLANE16;
else if (EltTy == MVT::i32 || EltTy == MVT::f32)
Opcode = AArch64ISD::DUPLANE32;
else if (EltTy == MVT::i64 || EltTy == MVT::f64)
Opcode = AArch64ISD::DUPLANE64;
else
llvm_unreachable("Invalid vector element type?");
if (VT.getSizeInBits() == 64)
OpLHS = WidenVector(OpLHS, DAG);
SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
}
case OP_VEXT1:
case OP_VEXT2:
case OP_VEXT3: {
unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
DAG.getConstant(Imm, dl, MVT::i32));
}
case OP_VUZPL:
return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VUZPR:
return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VZIPL:
return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VZIPR:
return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VTRNL:
return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VTRNR:
return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
}
}
static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
SelectionDAG &DAG) {
// Check to see if we can use the TBL instruction.
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc DL(Op);
EVT EltVT = Op.getValueType().getVectorElementType();
unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
SmallVector<SDValue, 8> TBLMask;
for (int Val : ShuffleMask) {
for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
unsigned Offset = Byte + Val * BytesPerElt;
TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
}
}
MVT IndexVT = MVT::v8i8;
unsigned IndexLen = 8;
if (Op.getValueSizeInBits() == 128) {
IndexVT = MVT::v16i8;
IndexLen = 16;
}
SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
SDValue Shuffle;
if (V2.getNode()->isUndef()) {
if (IndexLen == 8)
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
DAG.getBuildVector(IndexVT, DL,
makeArrayRef(TBLMask.data(), IndexLen)));
} else {
if (IndexLen == 8) {
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
DAG.getBuildVector(IndexVT, DL,
makeArrayRef(TBLMask.data(), IndexLen)));
} else {
// FIXME: We cannot, for the moment, emit a TBL2 instruction because we
// cannot currently represent the register constraints on the input
// table registers.
// Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
// DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
// IndexLen));
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
V2Cst, DAG.getBuildVector(IndexVT, DL,
makeArrayRef(TBLMask.data(), IndexLen)));
}
}
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
}
static unsigned getDUPLANEOp(EVT EltType) {
if (EltType == MVT::i8)
return AArch64ISD::DUPLANE8;
if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
return AArch64ISD::DUPLANE16;
if (EltType == MVT::i32 || EltType == MVT::f32)
return AArch64ISD::DUPLANE32;
if (EltType == MVT::i64 || EltType == MVT::f64)
return AArch64ISD::DUPLANE64;
llvm_unreachable("Invalid vector element type?");
}
static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
unsigned Opcode, SelectionDAG &DAG) {
// Try to eliminate a bitcasted extract subvector before a DUPLANE.
auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
// Match: dup (bitcast (extract_subv X, C)), LaneC
if (BitCast.getOpcode() != ISD::BITCAST ||
BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
// The extract index must align in the destination type. That may not
// happen if the bitcast is from narrow to wide type.
SDValue Extract = BitCast.getOperand(0);
unsigned ExtIdx = Extract.getConstantOperandVal(1);
unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
if (ExtIdxInBits % CastedEltBitWidth != 0)
return false;
// Update the lane value by offsetting with the scaled extract index.
LaneC += ExtIdxInBits / CastedEltBitWidth;
// Determine the casted vector type of the wide vector input.
// dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
// Examples:
// dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
// dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
unsigned SrcVecNumElts =
Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
SrcVecNumElts);
return true;
};
MVT CastVT;
if (getScaledOffsetDup(V, Lane, CastVT)) {
V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
} else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
// The lane is incremented by the index of the extract.
// Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
Lane += V.getConstantOperandVal(1);
V = V.getOperand(0);
} else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
// The lane is decremented if we are splatting from the 2nd operand.
// Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
Lane -= Idx * VT.getVectorNumElements() / 2;
V = WidenVector(V.getOperand(Idx), DAG);
} else if (VT.getSizeInBits() == 64) {
// Widen the operand to 128-bit register with undef.
V = WidenVector(V, DAG);
}
return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
}
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
if (useSVEForFixedLengthVectorVT(VT))
return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);
// Convert shuffles that are directly supported on NEON to target-specific
// DAG nodes, instead of keeping them as shuffles and matching them again
// during code selection. This is more efficient and avoids the possibility
// of inconsistencies between legalization and selection.
ArrayRef<int> ShuffleMask = SVN->getMask();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!");
assert(ShuffleMask.size() == VT.getVectorNumElements() &&
"Unexpected VECTOR_SHUFFLE mask size!");
if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1)
Lane = 0;
if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
V1.getOperand(0));
// Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
// constant. If so, we can just reference the lane's definition directly.
if (V1.getOpcode() == ISD::BUILD_VECTOR &&
!isa<ConstantSDNode>(V1.getOperand(Lane)))
return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
// Otherwise, duplicate from the lane of the input vector.
unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
return constructDup(V1, Lane, dl, VT, Opcode, DAG);
}
// Check if the mask matches a DUP for a wider element
for (unsigned LaneSize : {64U, 32U, 16U}) {
unsigned Lane = 0;
if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
: LaneSize == 32 ? AArch64ISD::DUPLANE32
: AArch64ISD::DUPLANE16;
// Cast V1 to an integer vector with required lane size
MVT NewEltTy = MVT::getIntegerVT(LaneSize);
unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
V1 = DAG.getBitcast(NewVecTy, V1);
// Constuct the DUP instruction
V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
// Cast back to the original type
return DAG.getBitcast(VT, V1);
}
}
if (isREVMask(ShuffleMask, VT, 64))
return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
if (isREVMask(ShuffleMask, VT, 32))
return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
if (isREVMask(ShuffleMask, VT, 16))
return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) ||
(VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
ShuffleVectorInst::isReverseMask(ShuffleMask)) {
SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
DAG.getConstant(8, dl, MVT::i32));
}
bool ReverseEXT = false;
unsigned Imm;
if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
if (ReverseEXT)
std::swap(V1, V2);
Imm *= getExtFactor(V1);
return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
DAG.getConstant(Imm, dl, MVT::i32));
} else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
Imm *= getExtFactor(V1);
return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
DAG.getConstant(Imm, dl, MVT::i32));
}
unsigned WhichResult;
if (isZIPMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
if (isUZPMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
if (isTRNMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
}
if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
}
if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
}
if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
return Concat;
bool DstIsLeft;
int Anomaly;
int NumInputElements = V1.getValueType().getVectorNumElements();
if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
SDValue DstVec = DstIsLeft ? V1 : V2;
SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
SDValue SrcVec = V1;
int SrcLane = ShuffleMask[Anomaly];
if (SrcLane >= NumInputElements) {
SrcVec = V2;
SrcLane -= VT.getVectorNumElements();
}
SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
EVT ScalarVT = VT.getVectorElementType();
if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
ScalarVT = MVT::i32;
return DAG.getNode(
ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
DstLaneV);
}
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
unsigned NumElts = VT.getVectorNumElements();
if (NumElts == 4) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
if (ShuffleMask[i] < 0)
PFIndexes[i] = 8;
else
PFIndexes[i] = ShuffleMask[i];
}
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
PFIndexes[2] * 9 + PFIndexes[3];
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
if (Cost <= 4)
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
return GenerateTBL(Op, ShuffleMask, DAG);
}
SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT ElemVT = VT.getScalarType();
SDValue SplatVal = Op.getOperand(0);
if (useSVEForFixedLengthVectorVT(VT))
return LowerToScalableOp(Op, DAG);
// Extend input splat value where needed to fit into a GPR (32b or 64b only)
// FPRs don't have this restriction.
switch (ElemVT.getSimpleVT().SimpleTy) {
case MVT::i1: {
// The only legal i1 vectors are SVE vectors, so we can use SVE-specific
// lowering code.
if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
if (ConstVal->isOne())
return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
// TODO: Add special case for constant false
}
// The general case of i1. There isn't any natural way to do this,
// so we use some trickery with whilelo.
SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
DAG.getValueType(MVT::i1));
SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl,
MVT::i64);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID,
DAG.getConstant(0, dl, MVT::i64), SplatVal);
}
case MVT::i8:
case MVT::i16:
case MVT::i32:
SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
break;
case MVT::i64:
SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
break;
case MVT::f16:
case MVT::bf16:
case MVT::f32:
case MVT::f64:
// Fine as is
break;
default:
report_fatal_error("Unsupported SPLAT_VECTOR input operand type");
}
return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
}
SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (!isTypeLegal(VT) || !VT.isScalableVector())
return SDValue();
// Current lowering only supports the SVE-ACLE types.
if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
return SDValue();
// The DUPQ operation is indepedent of element type so normalise to i64s.
SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
SDValue Idx128 = Op.getOperand(2);
// DUPQ can be used when idx is in range.
auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
if (CIdx && (CIdx->getZExtValue() <= 3)) {
SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
SDNode *DUPQ =
DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI);
return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0));
}
// The ACLE says this must produce the same result as:
// svtbl(data, svadd_x(svptrue_b64(),
// svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
// index * 2))
SDValue One = DAG.getConstant(1, DL, MVT::i64);
SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
// create the vector 0,1,0,1,...
SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64);
SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
// create the vector idx64,idx64+1,idx64,idx64+1,...
SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);
// create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
}
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
APInt &UndefBits) {
EVT VT = BVN->getValueType(0);
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
for (unsigned i = 0; i < NumSplats; ++i) {
CnstBits <<= SplatBitSize;
UndefBits <<= SplatBitSize;
CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
}
return true;
}
return false;
}
// Try 64-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 32-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits,
const SDValue *LHS = nullptr) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
bool isAdvSIMDModImm = false;
uint64_t Shift;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
Shift = 0;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
Shift = 8;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
Shift = 16;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
Shift = 24;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov;
if (LHS)
Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
else
Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 16-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits,
const SDValue *LHS = nullptr) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
bool isAdvSIMDModImm = false;
uint64_t Shift;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
Shift = 0;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
Shift = 8;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov;
if (LHS)
Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
else
Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 32-bit splatted SIMD immediate with shifted ones.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
SelectionDAG &DAG, const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
bool isAdvSIMDModImm = false;
uint64_t Shift;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
Shift = 264;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
Shift = 272;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 8-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try FP splatted SIMD immediate.
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
bool isWide = (VT.getSizeInBits() == 128);
MVT MovTy;
bool isAdvSIMDModImm = false;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
}
else if (isWide &&
(isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
MovTy = MVT::v2f64;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Specialized code to quickly find if PotentialBVec is a BuildVector that
// consists of only the same constant int value, returned in reference arg
// ConstVal
static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
uint64_t &ConstVal) {
BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
if (!Bvec)
return false;
ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
if (!FirstElt)
return false;
EVT VT = Bvec->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
for (unsigned i = 1; i < NumElts; ++i)
if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
return false;
ConstVal = FirstElt->getZExtValue();
return true;
}
static unsigned getIntrinsicID(const SDNode *N) {
unsigned Opcode = N->getOpcode();
switch (Opcode) {
default:
return Intrinsic::not_intrinsic;
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
return IID;
return Intrinsic::not_intrinsic;
}
}
}
// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
// BUILD_VECTORs with constant element C1, C2 is a constant, and:
// - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
// - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
// The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
if (!VT.isVector())
return SDValue();
SDLoc DL(N);
SDValue And;
SDValue Shift;
SDValue FirstOp = N->getOperand(0);
unsigned FirstOpc = FirstOp.getOpcode();
SDValue SecondOp = N->getOperand(1);
unsigned SecondOpc = SecondOp.getOpcode();
// Is one of the operands an AND or a BICi? The AND may have been optimised to
// a BICi in order to use an immediate instead of a register.
// Is the other operand an shl or lshr? This will have been turned into:
// AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) &&
(SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) {
And = FirstOp;
Shift = SecondOp;
} else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) &&
(FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) {
And = SecondOp;
Shift = FirstOp;
} else
return SDValue();
bool IsAnd = And.getOpcode() == ISD::AND;
bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;
// Is the shift amount constant?
ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
if (!C2node)
return SDValue();
uint64_t C1;
if (IsAnd) {
// Is the and mask vector all constant?
if (!isAllConstantBuildVector(And.getOperand(1), C1))
return SDValue();
} else {
// Reconstruct the corresponding AND immediate from the two BICi immediates.
ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
assert(C1nodeImm && C1nodeShift);
C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
}
// Is C1 == ~(Ones(ElemSizeInBits) << C2) or
// C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
// how much one can shift elements of a particular size?
uint64_t C2 = C2node->getZExtValue();
unsigned ElemSizeInBits = VT.getScalarSizeInBits();
if (C2 > ElemSizeInBits)
return SDValue();
APInt C1AsAPInt(ElemSizeInBits, C1);
APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
: APInt::getLowBitsSet(ElemSizeInBits, C2);
if (C1AsAPInt != RequiredC1)
return SDValue();
SDValue X = And.getOperand(0);
SDValue Y = Shift.getOperand(0);
unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
LLVM_DEBUG(N->dump(&DAG));
LLVM_DEBUG(dbgs() << "into: \n");
LLVM_DEBUG(ResultSLI->dump(&DAG));
++NumShiftInserts;
return ResultSLI;
}
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerToScalableOp(Op, DAG);
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
return Res;
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
BuildVectorSDNode *BVN =
dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
if (!BVN) {
// OR commutes, so try swapping the operands.
LHS = Op.getOperand(1);
BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
}
if (!BVN)
return Op;
APInt DefBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
DefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
DefBits, &LHS)))
return NewOp;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
UndefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
UndefBits, &LHS)))
return NewOp;
}
// We can always fall back to a non-immediate OR.
return Op;
}
// Normalize the operands of BUILD_VECTOR. The value of constant operands will
// be truncated to fit element width.
static SDValue NormalizeBuildVector(SDValue Op,
SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT EltTy= VT.getVectorElementType();
if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
return Op;
SmallVector<SDValue, 16> Ops;
for (SDValue Lane : Op->ops()) {
// For integer vectors, type legalization would have promoted the
// operands already. Otherwise, if Op is a floating-point splat
// (with operands cast to integers), then the only possibilities
// are constants and UNDEFs.
if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
APInt LowBits(EltTy.getSizeInBits(),
CstLane->getZExtValue());
Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
} else if (Lane.getNode()->isUndef()) {
Lane = DAG.getUNDEF(MVT::i32);
} else {
assert(Lane.getValueType() == MVT::i32 &&
"Unexpected BUILD_VECTOR operand type");
}
Ops.push_back(Lane);
}
return DAG.getBuildVector(VT, dl, Ops);
}
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
APInt DefBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
return NewOp;
DefBits = ~DefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
return NewOp;
DefBits = UndefBits;
if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
return NewOp;
DefBits = ~UndefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
return NewOp;
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// Try to build a simple constant vector.
Op = NormalizeBuildVector(Op, DAG);
if (VT.isInteger()) {
// Certain vector constants, used to express things like logical NOT and
// arithmetic NEG, are passed through unmodified. This allows special
// patterns for these operations to match, which will lower these constants
// to whatever is proven necessary.
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
if (BVN->isConstant())
if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
unsigned BitSize = VT.getVectorElementType().getSizeInBits();
APInt Val(BitSize,
Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
if (Val.isNullValue() || Val.isAllOnesValue())
return Op;
}
}
if (SDValue V = ConstantBuildVector(Op, DAG))
return V;
// Scan through the operands to find some interesting properties we can
// exploit:
// 1) If only one value is used, we can use a DUP, or
// 2) if only the low element is not undef, we can just insert that, or
// 3) if only one constant value is used (w/ some non-constant lanes),
// we can splat the constant value into the whole vector then fill
// in the non-constant lanes.
// 4) FIXME: If different constant values are used, but we can intelligently
// select the values we'll be overwriting for the non-constant
// lanes such that we can directly materialize the vector
// some other way (MOVI, e.g.), we can be sneaky.
// 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
SDLoc dl(Op);
unsigned NumElts = VT.getVectorNumElements();
bool isOnlyLowElement = true;
bool usesOnlyOneValue = true;
bool usesOnlyOneConstantValue = true;
bool isConstant = true;
bool AllLanesExtractElt = true;
unsigned NumConstantLanes = 0;
unsigned NumDifferentLanes = 0;
unsigned NumUndefLanes = 0;
SDValue Value;
SDValue ConstantValue;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
AllLanesExtractElt = false;
if (V.isUndef()) {
++NumUndefLanes;
continue;
}
if (i > 0)
isOnlyLowElement = false;
if (!isIntOrFPConstant(V))
isConstant = false;
if (isIntOrFPConstant(V)) {
++NumConstantLanes;
if (!ConstantValue.getNode())
ConstantValue = V;
else if (ConstantValue != V)
usesOnlyOneConstantValue = false;
}
if (!Value.getNode())
Value = V;
else if (V != Value) {
usesOnlyOneValue = false;
++NumDifferentLanes;
}
}
if (!Value.getNode()) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n");
return DAG.getUNDEF(VT);
}
// Convert BUILD_VECTOR where all elements but the lowest are undef into
// SCALAR_TO_VECTOR, except for when we have a single-element constant vector
// as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n");
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
}
if (AllLanesExtractElt) {
SDNode *Vector = nullptr;
bool Even = false;
bool Odd = false;
// Check whether the extract elements match the Even pattern <0,2,4,...> or
// the Odd pattern <1,3,5,...>.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
const SDNode *N = V.getNode();
if (!isa<ConstantSDNode>(N->getOperand(1)))
break;
SDValue N0 = N->getOperand(0);
// All elements are extracted from the same vector.
if (!Vector) {
Vector = N0.getNode();
// Check that the type of EXTRACT_VECTOR_ELT matches the type of
// BUILD_VECTOR.
if (VT.getVectorElementType() !=
N0.getValueType().getVectorElementType())
break;
} else if (Vector != N0.getNode()) {
Odd = false;
Even = false;
break;
}
// Extracted values are either at Even indices <0,2,4,...> or at Odd
// indices <1,3,5,...>.
uint64_t Val = N->getConstantOperandVal(1);
if (Val == 2 * i) {
Even = true;
continue;
}
if (Val - 1 == 2 * i) {
Odd = true;
continue;
}
// Something does not match: abort.
Odd = false;
Even = false;
break;
}
if (Even || Odd) {
SDValue LHS =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
DAG.getConstant(0, dl, MVT::i64));
SDValue RHS =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
DAG.getConstant(NumElts, dl, MVT::i64));
if (Even && !Odd)
return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
RHS);
if (Odd && !Even)
return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
RHS);
}
}
// Use DUP for non-constant splats. For f32 constant splats, reduce to
// i32 and try again.
if (usesOnlyOneValue) {
if (!isConstant) {
if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Value.getValueType() != VT) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
}
// This is actually a DUPLANExx operation, which keeps everything vectory.
SDValue Lane = Value.getOperand(1);
Value = Value.getOperand(0);
if (Value.getValueSizeInBits() == 64) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n");
Value = WidenVector(Value, DAG);
}
unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
return DAG.getNode(Opcode, dl, VT, Value, Lane);
}
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
EVT EltTy = VT.getVectorElementType();
assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
EltTy == MVT::f64) && "Unsupported floating-point vector type");
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n");
MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";
Val.dump(););
Val = LowerBUILD_VECTOR(Val, DAG);
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
}
// If we need to insert a small number of different non-constant elements and
// the vector width is sufficiently large, prefer using DUP with the common
// value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
// skip the constant lane handling below.
bool PreferDUPAndInsert =
!isConstant && NumDifferentLanes >= 1 &&
NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
NumDifferentLanes >= NumConstantLanes;
// If there was only one constant value used and for more than one lane,
// start by splatting that value, then replace the non-constant lanes. This
// is better than the default, which will perform a separate initialization
// for each lane.
if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
// Firstly, try to materialize the splat constant.
SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
Val = ConstantBuildVector(Vec, DAG);
if (!Val) {
// Otherwise, materialize the constant and splat it.
Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
}
// Now insert the non-constant lanes.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
if (!isIntOrFPConstant(V))
// Note that type legalization likely mucked about with the VT of the
// source operand, so we may have to convert it here before inserting.
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
}
return Val;
}
// This will generate a load from the constant pool.
if (isConstant) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n");
return SDValue();
}
// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
if (NumElts >= 4) {
if (SDValue shuffle = ReconstructShuffle(Op, DAG))
return shuffle;
}
if (PreferDUPAndInsert) {
// First, build a constant vector with the common element.
SmallVector<SDValue, 8> Ops(NumElts, Value);
SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
// Next, insert the elements that do not match the common value.
for (unsigned I = 0; I < NumElts; ++I)
if (Op.getOperand(I) != Value)
NewVector =
DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
return NewVector;
}
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even
// worse. For a vector with one or two non-undef values, that's
// scalar_to_vector for the elements followed by a shuffle (provided the
// shuffle is valid for the target) and materialization element by element
// on the stack followed by a load for everything else.
if (!isConstant && !usesOnlyOneValue) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n");
SDValue Vec = DAG.getUNDEF(VT);
SDValue Op0 = Op.getOperand(0);
unsigned i = 0;
// Use SCALAR_TO_VECTOR for lane zero to
// a) Avoid a RMW dependency on the full vector register, and
// b) Allow the register coalescer to fold away the copy if the
// value is already in an S or D register, and we're forced to emit an
// INSERT_SUBREG that we can't fold anywhere.
//
// We also allow types like i8 and i16 which are illegal scalar but legal
// vector element types. After type-legalization the inserted value is
// extended (i32) and it is safe to cast them to the vector type by ignoring
// the upper bits of the lowest lane (e.g. v8i8, v4i16).
if (!Op0.isUndef()) {
LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n");
Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
++i;
}
LLVM_DEBUG(if (i < NumElts) dbgs()
<< "Creating nodes for the other vector elements:\n";);
for (; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.isUndef())
continue;
SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
}
return Vec;
}
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n");
return SDValue();
}
SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
assert(Op.getValueType().isScalableVector() &&
isTypeLegal(Op.getValueType()) &&
"Expected legal scalable vector type!");
if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
return Op;
return SDValue();
}
SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerFixedLengthInsertVectorElt(Op, DAG);
// Check for non-constant or out of range lane.
EVT VT = Op.getOperand(0).getValueType();
if (VT.getScalarType() == MVT::i1) {
EVT VectorVT = getPromotedVTForPredicate(VT);
SDLoc DL(Op);
SDValue ExtendedVector =
DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT);
SDValue ExtendedValue =
DAG.getAnyExtOrTrunc(Op.getOperand(1), DL,
VectorVT.getScalarType().getSizeInBits() < 32
? MVT::i32
: VectorVT.getScalarType());
ExtendedVector =
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector,
ExtendedValue, Op.getOperand(2));
return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT);
}
ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
VT == MVT::v8f16 || VT == MVT::v8bf16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
VT != MVT::v4bf16)
return SDValue();
// For V64 types, we perform insertion by expanding the value
// to a V128 type and perform the insertion on that.
SDLoc DL(Op);
SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
EVT WideTy = WideVec.getValueType();
SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
Op.getOperand(1), Op.getOperand(2));
// Re-narrow the resultant vector.
return NarrowVector(Node, DAG);
}
SDValue
AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
EVT VT = Op.getOperand(0).getValueType();
if (VT.getScalarType() == MVT::i1) {
// We can't directly extract from an SVE predicate; extend it first.
// (This isn't the only possible lowering, but it's straightforward.)
EVT VectorVT = getPromotedVTForPredicate(VT);
SDLoc DL(Op);
SDValue Extend =
DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0));
MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy,
Extend, Op.getOperand(1));
return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
}
if (useSVEForFixedLengthVectorVT(VT))
return LowerFixedLengthExtractVectorElt(Op, DAG);
// Check for non-constant or out of range lane.
ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
VT == MVT::v8f16 || VT == MVT::v8bf16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
VT != MVT::v4bf16)
return SDValue();
// For V64 types, we perform extraction by expanding the value
// to a V128 type and perform the extraction on that.
SDLoc DL(Op);
SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
EVT WideTy = WideVec.getValueType();
EVT ExtrTy = WideTy.getVectorElementType();
if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
ExtrTy = MVT::i32;
// For extractions, we just return the result directly.
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getValueType().isFixedLengthVector() &&
"Only cases that extract a fixed length vector are supported!");
EVT InVT = Op.getOperand(0).getValueType();
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Size = Op.getValueSizeInBits();
if (InVT.isScalableVector()) {
// This will be matched by custom code during ISelDAGToDAG.
if (Idx == 0 && isPackedVectorType(InVT, DAG))
return Op;
return SDValue();
}
// This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
if (Idx == 0 && InVT.getSizeInBits() <= 128)
return Op;
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
InVT.getSizeInBits() == 128)
return Op;
return SDValue();
}
SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getValueType().isScalableVector() &&
"Only expect to lower inserts into scalable vectors!");
EVT InVT = Op.getOperand(1).getValueType();
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
if (InVT.isScalableVector()) {
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (!isTypeLegal(VT) || !VT.isInteger())
return SDValue();
SDValue Vec0 = Op.getOperand(0);
SDValue Vec1 = Op.getOperand(1);
// Ensure the subvector is half the size of the main vector.
if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
return SDValue();
// Extend elements of smaller vector...
EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
if (Idx == 0) {
SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
} else if (Idx == InVT.getVectorMinNumElements()) {
SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
}
return SDValue();
}
// This will be matched by custom code during ISelDAGToDAG.
if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
return Op;
return SDValue();
}
SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
assert(VT.isScalableVector() && "Expected a scalable vector.");
bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode);
// SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
// operations, and truncate the result.
EVT WidenedVT;
if (VT == MVT::nxv16i8)
WidenedVT = MVT::nxv8i16;
else if (VT == MVT::nxv8i16)
WidenedVT = MVT::nxv4i32;
else
llvm_unreachable("Unexpected Custom DIV operation");
SDLoc dl(Op);
unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
}
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Currently no fixed length shuffles that require SVE are legal.
if (useSVEForFixedLengthVectorVT(VT))
return false;
if (VT.getVectorNumElements() == 4 &&
(VT.is128BitVector() || VT.is64BitVector())) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
if (M[i] < 0)
PFIndexes[i] = 8;
else
PFIndexes[i] = M[i];
}
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
PFIndexes[2] * 9 + PFIndexes[3];
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
if (Cost <= 4)
return true;
}
bool DummyBool;
int DummyInt;
unsigned DummyUnsigned;
return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
// isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
isZIPMask(M, VT, DummyUnsigned) ||
isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
isConcatMask(M, VT, VT.getSizeInBits() == 128));
}
/// getVShiftImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift operation, where all the elements of the
/// build_vector must have the same constant integer value.
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
// Ignore bit_converts.
while (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
HasAnyUndefs, ElementBits) ||
SplatBitSize > ElementBits)
return false;
Cnt = SplatBits.getSExtValue();
return true;
}
/// isVShiftLImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift left operation. That value must be in the range:
/// 0 <= Value < ElementBits for a left shift; or
/// 0 <= Value <= ElementBits for a long left shift.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
int64_t ElementBits = VT.getScalarSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
}
/// isVShiftRImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift right operation. The value must be in the range:
/// 1 <= Value <= ElementBits for a right shift; or
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
int64_t ElementBits = VT.getScalarSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
}
SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.getScalarType() == MVT::i1) {
// Lower i1 truncate to `(x & 1) != 0`.
SDLoc dl(Op);
EVT OpVT = Op.getOperand(0).getValueType();
SDValue Zero = DAG.getConstant(0, dl, OpVT);
SDValue One = DAG.getConstant(1, dl, OpVT);
SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
}
if (!VT.isVector() || VT.isScalableVector())
return SDValue();
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
return SDValue();
}
SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
int64_t Cnt;
if (!Op.getOperand(1).getValueType().isVector())
return Op;
unsigned EltSize = VT.getScalarSizeInBits();
switch (Op.getOpcode()) {
default:
llvm_unreachable("unexpected shift opcode");
case ISD::SHL:
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
DAG.getConstant(Cnt, DL, MVT::i32));
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
MVT::i32),
Op.getOperand(0), Op.getOperand(1));
case ISD::SRA:
case ISD::SRL:
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
: AArch64ISD::SRL_PRED;
return LowerToPredicatedOp(Op, DAG, Opc);
}
// Right shift immediate
if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
unsigned Opc =
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
DAG.getConstant(Cnt, DL, MVT::i32));
}
// Right shift register. Note, there is not a shift right register
// instruction, but the shift left register instruction takes a signed
// value, where negative numbers specify a right shift.
unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
: Intrinsic::aarch64_neon_ushl;
// negate the shift amount
SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(1));
SDValue NegShiftLeft =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
NegShift);
return NegShiftLeft;
}
return SDValue();
}
static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
AArch64CC::CondCode CC, bool NoNans, EVT VT,
const SDLoc &dl, SelectionDAG &DAG) {
EVT SrcVT = LHS.getValueType();
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
"function only supposed to emit natural comparisons");
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
APInt CnstBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
bool IsZero = IsCnst && (CnstBits == 0);
if (SrcVT.getVectorElementType().isFloatingPoint()) {
switch (CC) {
default:
return SDValue();
case AArch64CC::NE: {
SDValue Fcmeq;
if (IsZero)
Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
else
Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
return DAG.getNOT(dl, Fcmeq, VT);
}
case AArch64CC::EQ:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
case AArch64CC::GE:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
case AArch64CC::GT:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
case AArch64CC::LS:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
case AArch64CC::LT:
if (!NoNans)
return SDValue();
// If we ignore NaNs then we can use to the MI implementation.
LLVM_FALLTHROUGH;
case AArch64CC::MI:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
}
}
switch (CC) {
default:
return SDValue();
case AArch64CC::NE: {
SDValue Cmeq;
if (IsZero)
Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
else
Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
return DAG.getNOT(dl, Cmeq, VT);
}
case AArch64CC::EQ:
if (IsZero)
return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
case AArch64CC::GE:
if (IsZero)
return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
case AArch64CC::GT:
if (IsZero)
return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
case AArch64CC::LE:
if (IsZero)
return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
case AArch64CC::LS:
return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
case AArch64CC::LO:
return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
case AArch64CC::LT:
if (IsZero)
return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
case AArch64CC::HI:
return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
case AArch64CC::HS:
return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
}
}
SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
return LowerFixedLengthVectorSetccToSVE(Op, DAG);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
SDLoc dl(Op);
if (LHS.getValueType().getVectorElementType().isInteger()) {
assert(LHS.getValueType() == RHS.getValueType());
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
SDValue Cmp =
EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
}
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
// Make v4f16 (only) fcmp operations utilise vector instructions
// v8f16 support will be a litle more complicated
if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
if (LHS.getValueType().getVectorNumElements() == 4) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
DAG.ReplaceAllUsesWith(Op, NewSetcc);
CmpVT = MVT::v4i32;
} else
return SDValue();
}
assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
LHS.getValueType().getVectorElementType() != MVT::f128);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two branches to implement.
AArch64CC::CondCode CC1, CC2;
bool ShouldInvert;
changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
SDValue Cmp =
EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
if (!Cmp.getNode())
return SDValue();
if (CC2 != AArch64CC::AL) {
SDValue Cmp2 =
EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
if (!Cmp2.getNode())
return SDValue();
Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
}
Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
if (ShouldInvert)
Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
return Cmp;
}
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
SelectionDAG &DAG) {
SDValue VecOp = ScalarOp.getOperand(0);
auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
DAG.getConstant(0, DL, MVT::i64));
}
SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
// Try to lower fixed length reductions to SVE.
EVT SrcVT = Src.getValueType();
bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
Op.getOpcode() == ISD::VECREDUCE_OR ||
Op.getOpcode() == ISD::VECREDUCE_XOR ||
Op.getOpcode() == ISD::VECREDUCE_FADD ||
(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64);
if (SrcVT.isScalableVector() ||
useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
if (SrcVT.getVectorElementType() == MVT::i1)
return LowerPredReductionToSVE(Op, DAG);
switch (Op.getOpcode()) {
case ISD::VECREDUCE_ADD:
return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
case ISD::VECREDUCE_AND:
return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
case ISD::VECREDUCE_OR:
return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
case ISD::VECREDUCE_SMAX:
return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_SMIN:
return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
case ISD::VECREDUCE_UMAX:
return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_UMIN:
return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
case ISD::VECREDUCE_XOR:
return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
case ISD::VECREDUCE_FADD:
return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
case ISD::VECREDUCE_FMAX:
return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
case ISD::VECREDUCE_FMIN:
return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
default:
llvm_unreachable("Unhandled fixed length reduction");
}
}
// Lower NEON reductions.
SDLoc dl(Op);
switch (Op.getOpcode()) {
case ISD::VECREDUCE_ADD:
return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
case ISD::VECREDUCE_SMAX:
return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
case ISD::VECREDUCE_SMIN:
return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
case ISD::VECREDUCE_UMAX:
return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
case ISD::VECREDUCE_UMIN:
return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
case ISD::VECREDUCE_FMAX: {
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
Src);
}
case ISD::VECREDUCE_FMIN: {
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
Src);
}
default:
llvm_unreachable("Unhandled reduction");
}
}
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-add instruction, but not a load-sub.
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
SDValue RHS = Op.getOperand(2);
AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
Op.getOperand(0), Op.getOperand(1), RHS,
AN->getMemOperand());
}
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-clear instruction, but not a load-and.
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
SDValue RHS = Op.getOperand(2);
AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
Op.getOperand(0), Op.getOperand(1), RHS,
AN->getMemOperand());
}
SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
DAG.getConstant(4, dl, MVT::i64));
Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
Chain =
DAG.getNode(AArch64ISD::CALL, dl, DAG.getVTList(MVT::Other, MVT::Glue),
Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
DAG.getRegisterMask(Mask), Chain.getValue(1));
// To match the actual intent better, we should read the output from X15 here
// again (instead of potentially spilling it to the stack), but rereading Size
// from X15 here doesn't work at -O0, since it thinks that X15 is undefined
// here.
Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
DAG.getConstant(4, dl, MVT::i64));
return Chain;
}
SDValue
AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() &&
"Only Windows alloca probing supported");
SDLoc dl(Op);
// Get the inputs.
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
MaybeAlign Align =
cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
EVT VT = Node->getValueType(0);
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
"no-stack-arg-probe")) {
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
Chain = SP.getValue(1);
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
if (Align)
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
SDValue Ops[2] = {SP, Chain};
return DAG.getMergeValues(Ops, dl);
}
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
Chain = SP.getValue(1);
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
if (Align)
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
SDValue Ops[2] = {SP, Chain};
return DAG.getMergeValues(Ops, dl);
}
SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT != MVT::i64 && "Expected illegal VSCALE node");
SDLoc DL(Op);
APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)),
DL, VT);
}
/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
template <unsigned NumVecs>
static bool
setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
Info.opc = ISD::INTRINSIC_VOID;
// Retrieve EC from first vector argument.
const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
ElementCount EC = VT.getVectorElementCount();
#ifndef NDEBUG
// Check the assumption that all input vectors are the same type.
for (unsigned I = 0; I < NumVecs; ++I)
assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
"Invalid type.");
#endif
// memVT is `NumVecs * VT`.
Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
EC * NumVecs);
Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
Info.offset = 0;
Info.align.reset();
Info.flags = MachineMemOperand::MOStore;
return true;
}
/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
/// specified in the intrinsic calls.
bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
case Intrinsic::aarch64_sve_st2:
return setInfoSVEStN<2>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st3:
return setInfoSVEStN<3>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st4:
return setInfoSVEStN<4>(*this, DL, Info, I);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
case Intrinsic::aarch64_neon_ld1x2:
case Intrinsic::aarch64_neon_ld1x3:
case Intrinsic::aarch64_neon_ld1x4:
case Intrinsic::aarch64_neon_ld2lane:
case Intrinsic::aarch64_neon_ld3lane:
case Intrinsic::aarch64_neon_ld4lane:
case Intrinsic::aarch64_neon_ld2r:
case Intrinsic::aarch64_neon_ld3r:
case Intrinsic::aarch64_neon_ld4r: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
Info.align.reset();
// volatile loads with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOLoad;
return true;
}
case Intrinsic::aarch64_neon_st2:
case Intrinsic::aarch64_neon_st3:
case Intrinsic::aarch64_neon_st4:
case Intrinsic::aarch64_neon_st1x2:
case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:
case Intrinsic::aarch64_neon_st2lane:
case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane: {
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
unsigned NumElts = 0;
for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
Info.align.reset();
// volatile stores with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOStore;
return true;
}
case Intrinsic::aarch64_ldaxr:
case Intrinsic::aarch64_ldxr: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getElementType());
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::aarch64_stlxr:
case Intrinsic::aarch64_stxr: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getElementType());
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::aarch64_ldaxp:
case Intrinsic::aarch64_ldxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = Align(16);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
case Intrinsic::aarch64_stlxp:
case Intrinsic::aarch64_stxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
Info.align = Align(16);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
case Intrinsic::aarch64_sve_ldnt1: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(I.getType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getElementType());
Info.flags = MachineMemOperand::MOLoad;
if (Intrinsic == Intrinsic::aarch64_sve_ldnt1)
Info.flags |= MachineMemOperand::MONonTemporal;
return true;
}
case Intrinsic::aarch64_sve_stnt1: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(I.getOperand(0)->getType());
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getElementType());
Info.flags = MachineMemOperand::MOStore;
if (Intrinsic == Intrinsic::aarch64_sve_stnt1)
Info.flags |= MachineMemOperand::MONonTemporal;
return true;
}
default:
break;
}
return false;
}
bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
// TODO: This may be worth removing. Check regression tests for diffs.
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
return false;
// If we're reducing the load width in order to avoid having to use an extra
// instruction to do extension then it's probably a good idea.
if (ExtTy != ISD::NON_EXTLOAD)
return true;
// Don't reduce load width if it would prevent us from combining a shift into
// the offset.
MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
assert(Mem);
const SDValue &Base = Mem->getBasePtr();
if (Base.getOpcode() == ISD::ADD &&
Base.getOperand(1).getOpcode() == ISD::SHL &&
Base.getOperand(1).hasOneUse() &&
Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
// The shift can be combined if it matches the size of the value being
// loaded (and so reducing the width would make it not match).
uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
if (ShiftAmount == Log2_32(LoadBytes))
return false;
}
// We have no reason to disallow reducing the load width, so allow it.
return true;
}
// Truncations from 64-bit GPR to 32-bit GPR is free.
bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
return NumBits1 > NumBits2;
}
bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
uint64_t NumBits1 = VT1.getFixedSizeInBits();
uint64_t NumBits2 = VT2.getFixedSizeInBits();
return NumBits1 > NumBits2;
}
/// Check if it is profitable to hoist instruction in then/else to if.
/// Not profitable if I and it's user can form a FMA instruction
/// because we prefer FMSUB/FMADD.
bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
if (I->getOpcode() != Instruction::FMul)
return true;
if (!I->hasOneUse())
return true;
Instruction *User = I->user_back();
if (User &&
!(User->getOpcode() == Instruction::FSub ||
User->getOpcode() == Instruction::FAdd))
return true;
const TargetOptions &Options = getTargetMachine().Options;
const Function *F = I->getFunction();
const DataLayout &DL = F->getParent()->getDataLayout();
Type *Ty = User->getOperand(0)->getType();
return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
(Options.AllowFPOpFusion == FPOpFusion::Fast ||
Options.UnsafeFPMath));
}
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
// 64-bit GPR.
bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
EVT VT1 = Val.getValueType();
if (isZExtFree(VT1, VT2)) {
return true;
}
if (Val.getOpcode() != ISD::LOAD)
return false;
// 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
VT1.getSizeInBits() <= 32);
}
bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
if (isa<FPExtInst>(Ext))
return false;
// Vector types are not free.
if (Ext->getType()->isVectorTy())
return false;
for (const Use &U : Ext->uses()) {
// The extension is free if we can fold it with a left shift in an
// addressing mode or an arithmetic operation: add, sub, and cmp.
// Is there a shift?
const Instruction *Instr = cast<Instruction>(U.getUser());
// Is this a constant shift?
switch (Instr->getOpcode()) {
case Instruction::Shl:
if (!isa<ConstantInt>(Instr->getOperand(1)))
return false;
break;
case Instruction::GetElementPtr: {
gep_type_iterator GTI = gep_type_begin(Instr);
auto &DL = Ext->getModule()->getDataLayout();
std::advance(GTI, U.getOperandNo()-1);
Type *IdxTy = GTI.getIndexedType();
// This extension will end up with a shift because of the scaling factor.
// 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
// Get the shift amount based on the scaling factor:
// log2(sizeof(IdxTy)) - log2(8).
uint64_t ShiftAmt =
countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
// Is the constant foldable in the shift of the addressing mode?
// I.e., shift amount is between 1 and 4 inclusive.
if (ShiftAmt == 0 || ShiftAmt > 4)
return false;
break;
}
case Instruction::Trunc:
// Check if this is a noop.
// trunc(sext ty1 to ty2) to ty1.
if (Instr->getType() == Ext->getOperand(0)->getType())
continue;
LLVM_FALLTHROUGH;
default:
return false;
}
// At this point we can use the bfm family, so this extension is free
// for that use.
}
return true;
}
/// Check if both Op1 and Op2 are shufflevector extracts of either the lower
/// or upper half of the vector elements.
static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
auto *FullTy = FullV->getType();
auto *HalfTy = HalfV->getType();
return FullTy->getPrimitiveSizeInBits().getFixedSize() ==
2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
};
auto extractHalf = [](Value *FullV, Value *HalfV) {
auto *FullVT = cast<FixedVectorType>(FullV->getType());
auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
};
ArrayRef<int> M1, M2;
Value *S1Op1, *S2Op1;
if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) ||
!match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
return false;
// Check that the operands are half as wide as the result and we extract
// half of the elements of the input vectors.
if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) ||
!extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2))
return false;
// Check the mask extracts either the lower or upper half of vector
// elements.
int M1Start = -1;
int M2Start = -1;
int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
!ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
return false;
return true;
}
/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
/// of the vector elements.
static bool areExtractExts(Value *Ext1, Value *Ext2) {
auto areExtDoubled = [](Instruction *Ext) {
return Ext->getType()->getScalarSizeInBits() ==
2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
};
if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
!match(Ext2, m_ZExtOrSExt(m_Value())) ||
!areExtDoubled(cast<Instruction>(Ext1)) ||
!areExtDoubled(cast<Instruction>(Ext2)))
return false;
return true;
}
/// Check if Op could be used with vmull_high_p64 intrinsic.
static bool isOperandOfVmullHighP64(Value *Op) {
Value *VectorOperand = nullptr;
ConstantInt *ElementIndex = nullptr;
return match(Op, m_ExtractElt(m_Value(VectorOperand),
m_ConstantInt(ElementIndex))) &&
ElementIndex->getValue() == 1 &&
isa<FixedVectorType>(VectorOperand->getType()) &&
cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
}
/// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
}
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
bool AArch64TargetLowering::shouldSinkOperands(
Instruction *I, SmallVectorImpl<Use *> &Ops) const {
if (!I->getType()->isVectorTy())
return false;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::aarch64_neon_umull:
if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
return false;
Ops.push_back(&II->getOperandUse(0));
Ops.push_back(&II->getOperandUse(1));
return true;
case Intrinsic::aarch64_neon_pmull64:
if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
II->getArgOperand(1)))
return false;
Ops.push_back(&II->getArgOperandUse(0));
Ops.push_back(&II->getArgOperandUse(1));
return true;
default:
return false;
}
}
switch (I->getOpcode()) {
case Instruction::Sub:
case Instruction::Add: {
if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
return false;
// If the exts' operands extract either the lower or upper elements, we
// can sink them too.
auto Ext1 = cast<Instruction>(I->getOperand(0));
auto Ext2 = cast<Instruction>(I->getOperand(1));
if (areExtractShuffleVectors(Ext1, Ext2)) {
Ops.push_back(&Ext1->getOperandUse(0));
Ops.push_back(&Ext2->getOperandUse(0));
}
Ops.push_back(&I->getOperandUse(0));
Ops.push_back(&I->getOperandUse(1));
return true;
}
case Instruction::Mul: {
bool IsProfitable = false;
for (auto &Op : I->operands()) {
// Make sure we are not already sinking this operand
if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
continue;
ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
if (!Shuffle || !Shuffle->isZeroEltSplat())
continue;
Value *ShuffleOperand = Shuffle->getOperand(0);
InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
if (!Insert)
continue;
Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
if (!OperandInstr)
continue;
ConstantInt *ElementConstant =
dyn_cast<ConstantInt>(Insert->getOperand(2));
// Check that the insertelement is inserting into element 0
if (!ElementConstant || ElementConstant->getZExtValue() != 0)
continue;
unsigned Opcode = OperandInstr->getOpcode();
if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
continue;
Ops.push_back(&Shuffle->getOperandUse(0));
Ops.push_back(&Op);
IsProfitable = true;
}
return IsProfitable;
}
default:
return false;
}
return false;
}
bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
Align &RequiredAligment) const {
if (!LoadedType.isSimple() ||
(!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
return false;
// Cyclone supports unaligned accesses.
RequiredAligment = Align(1);
unsigned NumBits = LoadedType.getSizeInBits();
return NumBits == 32 || NumBits == 64;
}
/// A helper function for determining the number of interleaved accesses we
/// will generate when lowering accesses of the given type.
unsigned
AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
const DataLayout &DL) const {
return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
}
MachineMemOperand::Flags
AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
return MOStridedAccess;
return MachineMemOperand::MONone;
}
bool AArch64TargetLowering::isLegalInterleavedAccessType(
VectorType *VecTy, const DataLayout &DL) const {
unsigned VecSize = DL.getTypeSizeInBits(VecTy);
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
// Ensure the number of vector elements is greater than 1.
if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
return false;
// Ensure the element type is legal.
if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
return false;
// Ensure the total vector size is 64 or a multiple of 128. Types larger than
// 128 will be split into multiple interleaved accesses.
return VecSize == 64 || VecSize % 128 == 0;
}
/// Lower an interleaved load into a ldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
///
/// Into:
/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
bool AArch64TargetLowering::lowerInterleavedLoad(
LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices, unsigned Factor) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
assert(!Shuffles.empty() && "Empty shufflevector input");
assert(Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices");
const DataLayout &DL = LI->getModule()->getDataLayout();
VectorType *VTy = Shuffles[0]->getType();
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
return false;
unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
auto *FVTy = cast<FixedVectorType>(VTy);
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
Type *EltTy = FVTy->getElementType();
if (EltTy->isPointerTy())
FVTy =
FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
IRBuilder<> Builder(LI);
// The base address of the load.
Value *BaseAddr = LI->getPointerOperand();
if (NumLoads > 1) {
// If we're going to generate more than one load, reset the sub-vector type
// to something legal.
FVTy = FixedVectorType::get(FVTy->getElementType(),
FVTy->getNumElements() / NumLoads);
// We will compute the pointer operand of each load from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr,
FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
}
Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
Type *Tys[2] = {FVTy, PtrTy};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
Intrinsic::aarch64_neon_ld3,
Intrinsic::aarch64_neon_ld4};
Function *LdNFunc =
Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
// Holds sub-vectors extracted from the load intrinsic return values. The
// sub-vectors are associated with the shufflevector instructions they will
// replace.
DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
FVTy->getNumElements() * Factor);
CallInst *LdN = Builder.CreateCall(
LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
// Extract and store the sub-vectors returned by the load intrinsic.
for (unsigned i = 0; i < Shuffles.size(); i++) {
ShuffleVectorInst *SVI = Shuffles[i];
unsigned Index = Indices[i];
Value *SubVec = Builder.CreateExtractValue(LdN, Index);
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
SubVec = Builder.CreateIntToPtr(
SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
FVTy->getNumElements()));
SubVecs[SVI].push_back(SubVec);
}
}
// Replace uses of the shufflevector instructions with the sub-vectors
// returned by the load intrinsic. If a shufflevector instruction is
// associated with more than one sub-vector, those sub-vectors will be
// concatenated into a single wide vector.
for (ShuffleVectorInst *SVI : Shuffles) {
auto &SubVec = SubVecs[SVI];
auto *WideVec =
SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
SVI->replaceAllUsesWith(WideVec);
}
return true;
}
/// Lower an interleaved store into a stN intrinsic.
///
/// E.g. Lower an interleaved store (Factor = 3):
/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
/// store <12 x i32> %i.vec, <12 x i32>* %ptr
///
/// Into:
/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
///
/// Note that the new shufflevectors will be removed and we'll only generate one
/// st3 instruction in CodeGen.
///
/// Example for a more general valid mask (Factor 3). Lower:
/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
/// store <12 x i32> %i.vec, <12 x i32>* %ptr
///
/// Into:
/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
ShuffleVectorInst *SVI,
unsigned Factor) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
unsigned LaneLen = VecTy->getNumElements() / Factor;
Type *EltTy = VecTy->getElementType();
auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
return false;
unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
IRBuilder<> Builder(SI);
// StN intrinsics don't support pointer vectors as arguments. Convert pointer
// vectors to integer vectors.
if (EltTy->isPointerTy()) {
Type *IntTy = DL.getIntPtrType(EltTy);
unsigned NumOpElts =
cast<FixedVectorType>(Op0->getType())->getNumElements();
// Convert to the corresponding integer vector.
auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
SubVecTy = FixedVectorType::get(IntTy, LaneLen);
}
// The base address of the store.
Value *BaseAddr = SI->getPointerOperand();
if (NumStores > 1) {
// If we're going to generate more than one store, reset the lane length
// and sub-vector type to something legal.
LaneLen /= NumStores;
SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
// We will compute the pointer operand of each store from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr,
SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
}
auto Mask = SVI->getShuffleMask();
Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
Type *Tys[2] = {SubVecTy, PtrTy};
static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
Intrinsic::aarch64_neon_st3,
Intrinsic::aarch64_neon_st4};
Function *StNFunc =
Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
SmallVector<Value *, 5> Ops;
// Split the shufflevector operands into sub vectors for the new stN call.
for (unsigned i = 0; i < Factor; i++) {
unsigned IdxI = StoreCount * LaneLen * Factor + i;
if (Mask[IdxI] >= 0) {
Ops.push_back(Builder.CreateShuffleVector(
Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
} else {
unsigned StartMask = 0;
for (unsigned j = 1; j < LaneLen; j++) {
unsigned IdxJ = StoreCount * LaneLen * Factor + j;
if (Mask[IdxJ * Factor + IdxI] >= 0) {
StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
break;
}
}
// Note: Filling undef gaps with random elements is ok, since
// those elements were being written anyway (with undefs).
// In the case of all undefs we're defaulting to using elems from 0
// Note: StartMask cannot be negative, it's checked in
// isReInterleaveMask
Ops.push_back(Builder.CreateShuffleVector(
Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
}
}
// If we generating more than one store, we compute the base address of
// subsequent stores as an offset from the previous.
if (StoreCount > 0)
BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
BaseAddr, LaneLen * Factor);
Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
Builder.CreateCall(StNFunc, Ops);
}
return true;
}
// Lower an SVE structured load intrinsic returning a tuple type to target
// specific intrinsic taking the same input but returning a multi-result value
// of the split tuple type.
//
// E.g. Lowering an LD3:
//
// call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32(
// <vscale x 4 x i1> %pred,
// <vscale x 4 x i32>* %addr)
//
// Output DAG:
//
// t0: ch = EntryToken
// t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0
// t4: i64,ch = CopyFromReg t0, Register:i64 %1
// t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4
// t6: nxv12i32 = concat_vectors t5, t5:1, t5:2
//
// This is called pre-legalization to avoid widening/splitting issues with
// non-power-of-2 tuple types used for LD3, such as nxv12i32.
SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
ArrayRef<SDValue> LoadOps,
EVT VT, SelectionDAG &DAG,
const SDLoc &DL) const {
assert(VT.isScalableVector() && "Can only lower scalable vectors");
unsigned N, Opcode;
static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
{Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
{Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
{Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&
"invalid tuple vector type!");
EVT SplitVT =
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
VT.getVectorElementCount().divideCoefficientBy(N));
assert(isTypeLegal(SplitVT));
SmallVector<EVT, 5> VTs(N, SplitVT);
VTs.push_back(MVT::Other); // Chain
SDVTList NodeTys = DAG.getVTList(VTs);
SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps);
SmallVector<SDValue, 4> PseudoLoadOps;
for (unsigned I = 0; I < N; ++I)
PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps);
}
EVT AArch64TargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
bool CanImplicitFloat =
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
// taken one instruction to materialize the v2i64 zero and one store (with
// restrictive addressing mode). Just do i64 stores.
bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
if (Op.isAligned(AlignCheck))
return true;
bool Fast;
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast;
};
if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
AlignmentIsAcceptable(MVT::v2i64, Align(16)))
return MVT::v2i64;
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
return MVT::f128;
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
return MVT::i64;
if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
return MVT::i32;
return MVT::Other;
}
LLT AArch64TargetLowering::getOptimalMemOpLLT(
const MemOp &Op, const AttributeList &FuncAttributes) const {
bool CanImplicitFloat =
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
// taken one instruction to materialize the v2i64 zero and one store (with
// restrictive addressing mode). Just do i64 stores.
bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
if (Op.isAligned(AlignCheck))
return true;
bool Fast;
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast;
};
if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
AlignmentIsAcceptable(MVT::v2i64, Align(16)))
return LLT::fixed_vector(2, 64);
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
return LLT::scalar(128);
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
return LLT::scalar(64);
if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
return LLT::scalar(32);
return LLT();
}
// 12-bit optionally shifted immediates are legal for adds.
bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
if (Immed == std::numeric_limits<int64_t>::min()) {
LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed
<< ": avoid UB for INT64_MIN\n");
return false;
}
// Same encoding for add/sub, just flip the sign.
Immed = std::abs(Immed);
bool IsLegal = ((Immed >> 12) == 0 ||
((Immed & 0xfff) == 0 && Immed >> 24 == 0));
LLVM_DEBUG(dbgs() << "Is " << Immed
<< " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
return IsLegal;
}
// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
// immediates is the same as for an add or a sub.
bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
return isLegalAddImmediate(Immed);
}
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS, Instruction *I) const {
// AArch64 has five basic addressing modes:
// reg
// reg + 9-bit signed offset
// reg + SIZE_IN_BYTES * 12-bit unsigned offset
// reg1 + reg2
// reg + SIZE_IN_BYTES * reg
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
// No reg+reg+imm addressing.
if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
return false;
// FIXME: Update this method to support scalable addressing modes.
if (isa<ScalableVectorType>(Ty)) {
uint64_t VecElemNumBytes =
DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
return AM.HasBaseReg && !AM.BaseOffs &&
(AM.Scale == 0 || (uint64_t)AM.Scale == VecElemNumBytes);
}
// check reg + imm case:
// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
uint64_t NumBytes = 0;
if (Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
}
if (!AM.Scale) {
int64_t Offset = AM.BaseOffs;
// 9-bit signed offset
if (isInt<9>(Offset))
return true;
// 12-bit unsigned offset
unsigned shift = Log2_64(NumBytes);
if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
// Must be a multiple of NumBytes (NumBytes is a power of 2)
(Offset >> shift) << shift == Offset)
return true;
return false;
}
// Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
}
bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
// Consider splitting large offset of struct or array.
return true;
}
InstructionCost AArch64TargetLowering::getScalingFactorCost(
const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const {
// Scaling factors are not free at all.
// Operands | Rt Latency
// -------------------------------------------
// Rt, [Xn, Xm] | 4
// -------------------------------------------
// Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
// Rt, [Xn, Wm, <extend> #imm] |
if (isLegalAddressingMode(DL, AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1 if
// it is not equal to 0 or 1.
return AM.Scale != 0 && AM.Scale != 1;
return -1;
}
bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
const MachineFunction &MF, EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f16:
return Subtarget->hasFullFP16();
case MVT::f32:
case MVT::f64:
return true;
default:
break;
}
return false;
}
bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
Type *Ty) const {
switch (Ty->getScalarType()->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
return true;
default:
return false;
}
}
bool AArch64TargetLowering::generateFMAsInMachineCombiner(
EVT VT, CodeGenOpt::Level OptLevel) const {
return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector();
}
const MCPhysReg *
AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
// LR is a callee-save register, but we must treat it as clobbered by any call
// site. Hence we include LR in the scratch registers, which are in turn added
// as implicit-defs for stackmaps and patchpoints.
static const MCPhysReg ScratchRegs[] = {
AArch64::X16, AArch64::X17, AArch64::LR, 0
};
return ScratchRegs;
}
bool
AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const {
N = N->getOperand(0).getNode();
EVT VT = N->getValueType(0);
// If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
// it with shift to let it be lowered to UBFX.
if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
isa<ConstantSDNode>(N->getOperand(1))) {
uint64_t TruncMask = N->getConstantOperandVal(1);
if (isMask_64(TruncMask) &&
N->getOperand(0).getOpcode() == ISD::SRL &&
isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
return false;
}
return true;
}
bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
if (BitSize == 0)
return false;
int64_t Val = Imm.getSExtValue();
if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
return true;
if ((int64_t)Val < 0)
Val = ~Val;
if (BitSize == 32)
Val &= (1LL << 32) - 1;
unsigned LZ = countLeadingZeros((uint64_t)Val);
unsigned Shift = (63 - LZ) / 16;
// MOVZ is free so return true for one or fewer MOVK.
return Shift < 3;
}
bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
return false;
return (Index == 0 || Index == ResVT.getVectorNumElements());
}
/// Turn vector tests of the signbit in the form of:
/// xor (sra X, elt_size(X)-1), -1
/// into:
/// cmge X, X, #0
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (!Subtarget->hasNEON() || !VT.isVector())
return SDValue();
// There must be a shift right algebraic before the xor, and the xor must be a
// 'not' operation.
SDValue Shift = N->getOperand(0);
SDValue Ones = N->getOperand(1);
if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
!ISD::isBuildVectorAllOnes(Ones.getNode()))
return SDValue();
// The shift should be smearing the sign bit across each vector element.
auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
return SDValue();
return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
}
// Given a vecreduce_add node, detect the below pattern and convert it to the
// node sequence with UABDL, [S|U]ADB and UADDLP.
//
// i32 vecreduce_add(
// v16i32 abs(
// v16i32 sub(
// v16i32 [sign|zero]_extend(v16i8 a), v16i32 [sign|zero]_extend(v16i8 b))))
// =================>
// i32 vecreduce_add(
// v4i32 UADDLP(
// v8i16 add(
// v8i16 zext(
// v8i8 [S|U]ABD low8:v16i8 a, low8:v16i8 b
// v8i16 zext(
// v8i8 [S|U]ABD high8:v16i8 a, high8:v16i8 b
static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
SelectionDAG &DAG) {
// Assumed i32 vecreduce_add
if (N->getValueType(0) != MVT::i32)
return SDValue();
SDValue VecReduceOp0 = N->getOperand(0);
unsigned Opcode = VecReduceOp0.getOpcode();
// Assumed v16i32 abs
if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
return SDValue();
SDValue ABS = VecReduceOp0;
// Assumed v16i32 sub
if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
return SDValue();
SDValue SUB = ABS->getOperand(0);
unsigned Opcode0 = SUB->getOperand(0).getOpcode();
unsigned Opcode1 = SUB->getOperand(1).getOpcode();
// Assumed v16i32 type
if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
return SDValue();
// Assumed zext or sext
bool IsZExt = false;
if (Opcode0 == ISD::ZERO_EXTEND && Opcode1 == ISD::ZERO_EXTEND) {
IsZExt = true;
} else if (Opcode0 == ISD::SIGN_EXTEND && Opcode1 == ISD::SIGN_EXTEND) {
IsZExt = false;
} else
return SDValue();
SDValue EXT0 = SUB->getOperand(0);
SDValue EXT1 = SUB->getOperand(1);
// Assumed zext's operand has v16i8 type
if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
return SDValue();
// Pattern is dectected. Let's convert it to sequence of nodes.
SDLoc DL(N);
// First, create the node pattern of UABD/SABD.
SDValue UABDHigh8Op0 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
DAG.getConstant(8, DL, MVT::i64));
SDValue UABDHigh8Op1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
DAG.getConstant(8, DL, MVT::i64));
SDValue UABDHigh8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
UABDHigh8Op0, UABDHigh8Op1);
SDValue UABDL = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDHigh8);
// Second, create the node pattern of UABAL.
SDValue UABDLo8Op0 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
DAG.getConstant(0, DL, MVT::i64));
SDValue UABDLo8Op1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
DAG.getConstant(0, DL, MVT::i64));
SDValue UABDLo8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
UABDLo8Op0, UABDLo8Op1);
SDValue ZExtUABD = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDLo8);
SDValue UABAL = DAG.getNode(ISD::ADD, DL, MVT::v8i16, UABDL, ZExtUABD);
// Third, create the node of UADDLP.
SDValue UADDLP = DAG.getNode(AArch64ISD::UADDLP, DL, MVT::v4i32, UABAL);
// Fourth, create the node of VECREDUCE_ADD.
return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP);
}
// Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce
// vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one))
// vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B))
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *ST) {
if (!ST->hasDotProd())
return performVecReduceAddCombineWithUADDLP(N, DAG);
SDValue Op0 = N->getOperand(0);
if (N->getValueType(0) != MVT::i32 ||
Op0.getValueType().getVectorElementType() != MVT::i32)
return SDValue();
unsigned ExtOpcode = Op0.getOpcode();
SDValue A = Op0;
SDValue B;
if (ExtOpcode == ISD::MUL) {
A = Op0.getOperand(0);
B = Op0.getOperand(1);
if (A.getOpcode() != B.getOpcode() ||
A.getOperand(0).getValueType() != B.getOperand(0).getValueType())
return SDValue();
ExtOpcode = A.getOpcode();
}
if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
return SDValue();
EVT Op0VT = A.getOperand(0).getValueType();
if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8)
return SDValue();
SDLoc DL(Op0);
// For non-mla reductions B can be set to 1. For MLA we take the operand of
// the extend B.
if (!B)
B = DAG.getConstant(1, DL, Op0VT);
else
B = B.getOperand(0);
SDValue Zeros =
DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32);
auto DotOpcode =
(ExtOpcode == ISD::ZERO_EXTEND) ? AArch64ISD::UDOT : AArch64ISD::SDOT;
SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
A.getOperand(0), B);
return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
}
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
}
SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
// fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
if ((VT != MVT::i32 && VT != MVT::i64) ||
!(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
unsigned Lg2 = Divisor.countTrailingZeros();
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
// Add (N0 < 0) ? Pow2 - 1 : 0;
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
Created.push_back(Cmp.getNode());
Created.push_back(Add.getNode());
Created.push_back(CSel.getNode());
// Divide by pow2.
SDValue SRA =
DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (Divisor.isNonNegative())
return SRA;
Created.push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
static bool IsSVECntIntrinsic(SDValue S) {
switch(getIntrinsicID(S.getNode())) {
default:
break;
case Intrinsic::aarch64_sve_cntb:
case Intrinsic::aarch64_sve_cnth:
case Intrinsic::aarch64_sve_cntw:
case Intrinsic::aarch64_sve_cntd:
return true;
}
return false;
}
/// Calculates what the pre-extend type is, based on the extension
/// operation node provided by \p Extend.
///
/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
/// pre-extend type is pulled directly from the operand, while other extend
/// operations need a bit more inspection to get this information.
///
/// \param Extend The SDNode from the DAG that represents the extend operation
/// \param DAG The SelectionDAG hosting the \p Extend node
///
/// \returns The type representing the \p Extend source type, or \p MVT::Other
/// if no valid type can be determined
static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
switch (Extend.getOpcode()) {
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return Extend.getOperand(0).getValueType();
case ISD::AssertSext:
case ISD::AssertZext:
case ISD::SIGN_EXTEND_INREG: {
VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
if (!TypeNode)
return MVT::Other;
return TypeNode->getVT();
}
case ISD::AND: {
ConstantSDNode *Constant =
dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
if (!Constant)
return MVT::Other;
uint32_t Mask = Constant->getZExtValue();
if (Mask == UCHAR_MAX)
return MVT::i8;
else if (Mask == USHRT_MAX)
return MVT::i16;
else if (Mask == UINT_MAX)
return MVT::i32;
return MVT::Other;
}
default:
return MVT::Other;
}
llvm_unreachable("Code path unhandled in calculatePreExtendType!");
}
/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
SelectionDAG &DAG) {
ShuffleVectorSDNode *ShuffleNode =
dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
if (!ShuffleNode)
return SDValue();
// Ensuring the mask is zero before continuing
if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
return SDValue();
SDValue InsertVectorElt = VectorShuffle.getOperand(0);
if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
return SDValue();
SDValue InsertLane = InsertVectorElt.getOperand(2);
ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
// Ensures the insert is inserting into lane 0
if (!Constant || Constant->getZExtValue() != 0)
return SDValue();
SDValue Extend = InsertVectorElt.getOperand(1);
unsigned ExtendOpcode = Extend.getOpcode();
bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
ExtendOpcode == ISD::AssertSext;
if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
return SDValue();
EVT TargetType = VectorShuffle.getValueType();
EVT PreExtendType = calculatePreExtendType(Extend, DAG);
if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
TargetType != MVT::v2i64) ||
(PreExtendType == MVT::Other))
return SDValue();
// Restrict valid pre-extend data type
if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
PreExtendType != MVT::i32)
return SDValue();
EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
return SDValue();
if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
return SDValue();
SDLoc DL(VectorShuffle);
SDValue InsertVectorNode = DAG.getNode(
InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
DAG.getConstant(0, DL, MVT::i64));
std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
SDValue VectorShuffleNode =
DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
DAG.getUNDEF(PreExtendVT), ShuffleMask);
SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
DL, TargetType, VectorShuffleNode);
return ExtendNode;
}
/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
// If the value type isn't a vector, none of the operands are going to be dups
if (!Mul->getValueType(0).isVector())
return SDValue();
SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
// Neither operands have been changed, don't make any further changes
if (!Op0 && !Op1)
return SDValue();
SDLoc DL(Mul);
return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
Op0 ? Op0 : Mul->getOperand(0),
Op1 ? Op1 : Mul->getOperand(1));
}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
return Ext;
if (DCI.isBeforeLegalizeOps())
return SDValue();
// The below optimizations require a constant RHS.
if (!isa<ConstantSDNode>(N->getOperand(1)))
return SDValue();
SDValue N0 = N->getOperand(0);
ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
const APInt &ConstValue = C->getAPIntValue();
// Allow the scaling to be folded into the `cnt` instruction by preventing
// the scaling to be obscured here. This makes it easier to pattern match.
if (IsSVECntIntrinsic(N0) ||
(N0->getOpcode() == ISD::TRUNCATE &&
(IsSVECntIntrinsic(N0->getOperand(0)))))
if (ConstValue.sge(1) && ConstValue.sle(16))
return SDValue();
// Multiplication of a power of two plus/minus one can be done more
// cheaply as as shift+add/sub. For now, this is true unilaterally. If
// future CPUs have a cheaper MADD instruction, this may need to be
// gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
// 64-bit is 5 cycles, so this is always a win.
// More aggressively, some multiplications N0 * C can be lowered to
// shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
// e.g. 6=3*2=(2+1)*2.
// TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
// which equals to (1+2)*16-(1+2).
// TrailingZeroes is used to test if the mul can be lowered to
// shift+add+shift.
unsigned TrailingZeroes = ConstValue.countTrailingZeros();
if (TrailingZeroes) {
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into smul or umul.
if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
isZeroExtended(N0.getNode(), DAG)))
return SDValue();
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into madd or msub.
if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
N->use_begin()->getOpcode() == ISD::SUB))
return SDValue();
}
// Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
// and shift+add+shift.
APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
unsigned ShiftAmt, AddSubOpc;
// Is the shifted value the LHS operand of the add/sub?
bool ShiftValUseIsN0 = true;
// Do we need to negate the result?
bool NegateResult = false;
if (ConstValue.isNonNegative()) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
// (mul x, 2^N - 1) => (sub (shl x, N), x)
// (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
APInt SCVMinus1 = ShiftedConstValue - 1;
APInt CVPlus1 = ConstValue + 1;
if (SCVMinus1.isPowerOf2()) {
ShiftAmt = SCVMinus1.logBase2();
AddSubOpc = ISD::ADD;
} else if (CVPlus1.isPowerOf2()) {
ShiftAmt = CVPlus1.logBase2();
AddSubOpc = ISD::SUB;
} else
return SDValue();
} else {
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
APInt CVNegPlus1 = -ConstValue + 1;
APInt CVNegMinus1 = -ConstValue - 1;
if (CVNegPlus1.isPowerOf2()) {
ShiftAmt = CVNegPlus1.logBase2();
AddSubOpc = ISD::SUB;
ShiftValUseIsN0 = false;
} else if (CVNegMinus1.isPowerOf2()) {
ShiftAmt = CVNegMinus1.logBase2();
AddSubOpc = ISD::ADD;
NegateResult = true;
} else
return SDValue();
}
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
DAG.getConstant(ShiftAmt, DL, MVT::i64));
SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
assert(!(NegateResult && TrailingZeroes) &&
"NegateResult and TrailingZeroes cannot both be true for now.");
// Negate the result.
if (NegateResult)
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
// Shift the result.
if (TrailingZeroes)
return DAG.getNode(ISD::SHL, DL, VT, Res,
DAG.getConstant(TrailingZeroes, DL, MVT::i64));
return Res;
}
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
SelectionDAG &DAG) {
// Take advantage of vector comparisons producing 0 or -1 in each lane to
// optimize away operation when it's from a constant.
//
// The general transformation is:
// UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
// AND(VECTOR_CMP(x,y), constant2)
// constant2 = UNARYOP(constant)
// Early exit if this isn't a vector operation, the operand of the
// unary operation isn't a bitwise AND, or if the sizes of the operations
// aren't the same.
EVT VT = N->getValueType(0);
if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
return SDValue();
// Now check that the other operand of the AND is a constant. We could
// make the transformation for non-constant splats as well, but it's unclear
// that would be a benefit as it would not eliminate any operations, just
// perform one more step in scalar code before moving to the vector unit.
if (BuildVectorSDNode *BV =
dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
// Bail out if the vector isn't a constant.
if (!BV->isConstant())
return SDValue();
// Everything checks out. Build up the new and improved node.
SDLoc DL(N);
EVT IntVT = BV->getValueType(0);
// Create a new constant of the appropriate type for the transformed
// DAG.
SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
// The AND node needs bitcasts to/from an integer vector type around it.
SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
N->getOperand(0)->getOperand(0), MaskConst);
SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
return Res;
}
return SDValue();
}
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
// First try to optimize away the conversion when it's conditionally from
// a constant. Vectors only.
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
return Res;
EVT VT = N->getValueType(0);
if (VT != MVT::f32 && VT != MVT::f64)
return SDValue();
// Only optimize when the source and destination types have the same width.
if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
return SDValue();
// If the result of an integer load is only used by an integer-to-float
// conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
// This eliminates an "integer-to-vector-move" UOP and improves throughput.
SDValue N0 = N->getOperand(0);
if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
LN0->getPointerInfo(), LN0->getAlignment(),
LN0->getMemOperand()->getFlags());
// Make sure successors of the original load stay after it by updating them
// to use the new Chain.
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
unsigned Opcode =
(N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
return DAG.getNode(Opcode, SDLoc(N), VT, Load);
}
return SDValue();
}
/// Fold a floating-point multiply by power of two into floating-point to
/// fixed-point conversion.
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (!Subtarget->hasNEON())
return SDValue();
if (!N->getValueType(0).isSimple())
return SDValue();
SDValue Op = N->getOperand(0);
if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
Op.getOpcode() != ISD::FMUL)
return SDValue();
SDValue ConstVec = Op->getOperand(1);
if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
uint32_t FloatBits = FloatTy.getSizeInBits();
if (FloatBits != 32 && FloatBits != 64)
return SDValue();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
uint32_t IntBits = IntTy.getSizeInBits();
if (IntBits != 16 && IntBits != 32 && IntBits != 64)
return SDValue();
// Avoid conversions where iN is larger than the float (e.g., float -> i64).
if (IntBits > FloatBits)
return SDValue();
BitVector UndefElements;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
int32_t Bits = IntBits == 64 ? 64 : 32;
int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
if (C == -1 || C == 0 || C > Bits)
return SDValue();
MVT ResTy;
unsigned NumLanes = Op.getValueType().getVectorNumElements();
switch (NumLanes) {
default:
return SDValue();
case 2:
ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
break;
case 4:
ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
break;
}
if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
return SDValue();
assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
"Illegal vector type after legalization");
SDLoc DL(N);
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
: Intrinsic::aarch64_neon_vcvtfp2fxu;
SDValue FixConv =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
// We can handle smaller integers by generating an extra trunc.
if (IntBits < FloatBits)
FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
return FixConv;
}
/// Fold a floating-point divide by power of two into fixed-point to
/// floating-point conversion.
static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (!Subtarget->hasNEON())
return SDValue();
SDValue Op = N->getOperand(0);
unsigned Opc = Op->getOpcode();
if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
!Op.getOperand(0).getValueType().isSimple() ||
(Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
return SDValue();
SDValue ConstVec = N->getOperand(1);
if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
int32_t IntBits = IntTy.getSizeInBits();
if (IntBits != 16 && IntBits != 32 && IntBits != 64)
return SDValue();
MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
int32_t FloatBits = FloatTy.getSizeInBits();
if (FloatBits != 32 && FloatBits != 64)
return SDValue();
// Avoid conversions where iN is larger than the float (e.g., i64 -> float).
if (IntBits > FloatBits)
return SDValue();
BitVector UndefElements;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
if (C == -1 || C == 0 || C > FloatBits)
return SDValue();
MVT ResTy;
unsigned NumLanes = Op.getValueType().getVectorNumElements();
switch (NumLanes) {
default:
return SDValue();
case 2:
ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
break;
case 4:
ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
break;
}
if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
return SDValue();
SDLoc DL(N);
SDValue ConvInput = Op.getOperand(0);
bool IsSigned = Opc == ISD::SINT_TO_FP;
if (IntBits < FloatBits)
ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
ResTy, ConvInput);
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
: Intrinsic::aarch64_neon_vcvtfxu2fp;
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
DAG.getConstant(C, DL, MVT::i32));
}
/// An EXTR instruction is made up of two shifts, ORed together. This helper
/// searches for and classifies those shifts.
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
bool &FromHi) {
if (N.getOpcode() == ISD::SHL)
FromHi = false;
else if (N.getOpcode() == ISD::SRL)
FromHi = true;
else
return false;
if (!isa<ConstantSDNode>(N.getOperand(1)))
return false;
ShiftAmount = N->getConstantOperandVal(1);
Src = N->getOperand(0);
return true;
}
/// EXTR instruction extracts a contiguous chunk of bits from two existing
/// registers viewed as a high/low pair. This function looks for the pattern:
/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
/// with an EXTR. Can't quite be done in TableGen because the two immediates
/// aren't independent.
static SDValue tryCombineToEXTR(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
EVT VT = N->getValueType(0);
assert(N->getOpcode() == ISD::OR && "Unexpected root");
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
SDValue LHS;
uint32_t ShiftLHS = 0;
bool LHSFromHi = false;
if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
return SDValue();
SDValue RHS;
uint32_t ShiftRHS = 0;
bool RHSFromHi = false;
if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
return SDValue();
// If they're both trying to come from the high part of the register, they're
// not really an EXTR.
if (LHSFromHi == RHSFromHi)
return SDValue();
if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
return SDValue();
if (LHSFromHi) {
std::swap(LHS, RHS);
std::swap(ShiftLHS, ShiftRHS);
}
return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
DAG.getConstant(ShiftRHS, DL, MVT::i64));
}
static SDValue tryCombineToBSL(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
if (!VT.isVector())
return SDValue();
// The combining code currently only works for NEON vectors. In particular,
// it does not work for SVE when dealing with vectors wider than 128 bits.
if (!VT.is64BitVector() && !VT.is128BitVector())
return SDValue();
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND)
return SDValue();
SDValue N1 = N->getOperand(1);
if (N1.getOpcode() != ISD::AND)
return SDValue();
// InstCombine does (not (neg a)) => (add a -1).
// Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
// Loop over all combinations of AND operands.
for (int i = 1; i >= 0; --i) {
for (int j = 1; j >= 0; --j) {
SDValue O0 = N0->getOperand(i);
SDValue O1 = N1->getOperand(j);
SDValue Sub, Add, SubSibling, AddSibling;
// Find a SUB and an ADD operand, one from each AND.
if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
Sub = O0;
Add = O1;
SubSibling = N0->getOperand(1 - i);
AddSibling = N1->getOperand(1 - j);
} else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
Add = O0;
Sub = O1;
AddSibling = N0->getOperand(1 - i);
SubSibling = N1->getOperand(1 - j);
} else
continue;
if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
continue;
// Constant ones is always righthand operand of the Add.
if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
continue;
if (Sub.getOperand(1) != Add.getOperand(0))
continue;
return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
}
}
// (or (and a b) (and (not a) c)) => (bsl a b c)
// We only have to look for constant vectors here since the general, variable
// case can be handled in TableGen.
unsigned Bits = VT.getScalarSizeInBits();
uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
for (int i = 1; i >= 0; --i)
for (int j = 1; j >= 0; --j) {
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
if (!BVN0 || !BVN1)
continue;
bool FoundMatch = true;
for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
if (!CN0 || !CN1 ||
CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
FoundMatch = false;
break;
}
}
if (FoundMatch)
return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
N0->getOperand(1 - i), N1->getOperand(1 - j));
}
return SDValue();
}
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
if (SDValue Res = tryCombineToEXTR(N, DCI))
return Res;
if (SDValue Res = tryCombineToBSL(N, DCI))
return Res;
return SDValue();
}
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT) {
if (!MemVT.getVectorElementType().isSimple())
return false;
uint64_t MaskForTy = 0ull;
switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
case MVT::i8:
MaskForTy = 0xffull;
break;
case MVT::i16:
MaskForTy = 0xffffull;
break;
case MVT::i32:
MaskForTy = 0xffffffffull;
break;
default:
return false;
break;
}
if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
return false;
}
static SDValue performSVEAndCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDValue Src = N->getOperand(0);
unsigned Opc = Src->getOpcode();
// Zero/any extend of an unsigned unpack
if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
SDValue UnpkOp = Src->getOperand(0);
SDValue Dup = N->getOperand(1);
if (Dup.getOpcode() != AArch64ISD::DUP)
return SDValue();
SDLoc DL(N);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
uint64_t ExtVal = C->getZExtValue();
// If the mask is fully covered by the unpack, we don't need to push
// a new AND onto the operand
EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
(ExtVal == 0xFFFF && EltTy == MVT::i16) ||
(ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
return Src;
// Truncate to prevent a DUP with an over wide constant
APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
// Otherwise, make sure we propagate the AND to the operand
// of the unpack
Dup = DAG.getNode(AArch64ISD::DUP, DL,
UnpkOp->getValueType(0),
DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
SDValue And = DAG.getNode(ISD::AND, DL,
UnpkOp->getValueType(0), UnpkOp, Dup);
return DAG.getNode(Opc, DL, N->getValueType(0), And);
}
if (!EnableCombineMGatherIntrinsics)
return SDValue();
SDValue Mask = N->getOperand(1);
if (!Src.hasOneUse())
return SDValue();
EVT MemVT;
// SVE load instructions perform an implicit zero-extend, which makes them
// perfect candidates for combining.
switch (Opc) {
case AArch64ISD::LD1_MERGE_ZERO:
case AArch64ISD::LDNF1_MERGE_ZERO:
case AArch64ISD::LDFF1_MERGE_ZERO:
MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
break;
case AArch64ISD::GLD1_MERGE_ZERO:
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
case AArch64ISD::GLDFF1_MERGE_ZERO:
case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
case AArch64ISD::GLDNT1_MERGE_ZERO:
MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
break;
default:
return SDValue();
}
if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
return Src;
return SDValue();
}
static SDValue performANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDValue LHS = N->getOperand(0);
EVT VT = N->getValueType(0);
if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
if (VT.isScalableVector())
return performSVEAndCombine(N, DCI);
// The combining code below works only for NEON vectors. In particular, it
// does not work for SVE when dealing with vectors wider than 128 bits.
if (!(VT.is64BitVector() || VT.is128BitVector()))
return SDValue();
BuildVectorSDNode *BVN =
dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
if (!BVN)
return SDValue();
// AND does not accept an immediate, so check if we can use a BIC immediate
// instruction instead. We do this here instead of using a (and x, (mvni imm))
// pattern in isel, because some immediates may be lowered to the preferred
// (and x, (movi imm)) form, even though an mvni representation also exists.
APInt DefBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
DefBits = ~DefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
DefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
DefBits, &LHS)))
return NewOp;
UndefBits = ~UndefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
UndefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
UndefBits, &LHS)))
return NewOp;
}
return SDValue();
}
static SDValue performSRLCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
// Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
// high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
// to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() == ISD::BSWAP) {
SDLoc DL(N);
SDValue N1 = N->getOperand(1);
SDValue N00 = N0.getOperand(0);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
uint64_t ShiftAmt = C->getZExtValue();
if (VT == MVT::i32 && ShiftAmt == 16 &&
DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
if (VT == MVT::i64 && ShiftAmt == 32 &&
DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
}
}
return SDValue();
}
// Attempt to form urhadd(OpA, OpB) from
// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
// The original form of the first expression is
// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
// Before this function is called the srl will have been lowered to
// AArch64ISD::VLSHR.
// This pass can also recognize signed variants of the patterns that use sign
// extension instead of zero extension and form a srhadd(OpA, OpB) or a
// shadd(OpA, OpB) from them.
static SDValue
performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
// Since we are looking for a right shift by a constant value of 1 and we are
// operating on types at least 16 bits in length (sign/zero extended OpA and
// OpB, which are at least 8 bits), it follows that the truncate will always
// discard the shifted-in bit and therefore the right shift will be logical
// regardless of the signedness of OpA and OpB.
SDValue Shift = N->getOperand(0);
if (Shift.getOpcode() != AArch64ISD::VLSHR)
return SDValue();
// Is the right shift using an immediate value of 1?
uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
if (ShiftAmount != 1)
return SDValue();
SDValue ExtendOpA, ExtendOpB;
SDValue ShiftOp0 = Shift.getOperand(0);
unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
if (ShiftOp0Opc == ISD::SUB) {
SDValue Xor = ShiftOp0.getOperand(1);
if (Xor.getOpcode() != ISD::XOR)
return SDValue();
// Is the XOR using a constant amount of all ones in the right hand side?
uint64_t C;
if (!isAllConstantBuildVector(Xor.getOperand(1), C))
return SDValue();
unsigned ElemSizeInBits = VT.getScalarSizeInBits();
APInt CAsAPInt(ElemSizeInBits, C);
if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
return SDValue();
ExtendOpA = Xor.getOperand(0);
ExtendOpB = ShiftOp0.getOperand(0);
} else if (ShiftOp0Opc == ISD::ADD) {
ExtendOpA = ShiftOp0.getOperand(0);
ExtendOpB = ShiftOp0.getOperand(1);
} else
return SDValue();
unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
if (!(ExtendOpAOpc == ExtendOpBOpc &&
(ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
return SDValue();
// Is the result of the right shift being truncated to the same value type as
// the original operands, OpA and OpB?
SDValue OpA = ExtendOpA.getOperand(0);
SDValue OpB = ExtendOpB.getOperand(0);
EVT OpAVT = OpA.getValueType();
assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
return SDValue();
SDLoc DL(N);
bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
bool IsRHADD = ShiftOp0Opc == ISD::SUB;
unsigned HADDOpc = IsSignExtend
? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
: (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
return ResultHADD;
}
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
switch (Opcode) {
case ISD::FADD:
return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
case ISD::ADD:
return VT == MVT::i64;
default:
return false;
}
}
static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
// Rewrite for pairwise fadd pattern
// (f32 (extract_vector_elt
// (fadd (vXf32 Other)
// (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
// ->
// (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
// (extract_vector_elt (vXf32 Other) 1))
if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
SDLoc DL(N0);
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
SDValue Other = N00;
// And handle the commutative case.
if (!Shuffle) {
Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
Other = N01;
}
if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
Other == Shuffle->getOperand(0)) {
return DAG.getNode(N0->getOpcode(), DL, VT,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
DAG.getConstant(0, DL, MVT::i64)),
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
DAG.getConstant(1, DL, MVT::i64)));
}
}
return SDValue();
}
static SDValue performConcatVectorsCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
// Optimize concat_vectors of truncated vectors, where the intermediate
// type is illegal, to avoid said illegality, e.g.,
// (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
// (v2i16 (truncate (v2i64)))))
// ->
// (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
// (v4i32 (bitcast (v2i64))),
// <0, 2, 4, 6>)))
// This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
// on both input and result type, so we might generate worse code.
// On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
N1Opc == ISD::TRUNCATE) {
SDValue N00 = N0->getOperand(0);
SDValue N10 = N1->getOperand(0);
EVT N00VT = N00.getValueType();
if (N00VT == N10.getValueType() &&
(N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
for (size_t i = 0; i < Mask.size(); ++i)
Mask[i] = i * 2;
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getVectorShuffle(
MidVT, dl,
DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
}
}
// Wait 'til after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
// Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
// subvectors from the same original vectors. Combine these into a single
// [us]rhadd or [us]hadd that operates on the two original vectors. Example:
// (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
// extract_subvector (v16i8 OpB,
// <0>))),
// (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>),
// extract_subvector (v16i8 OpB,
// <8>)))))
// ->
// (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
(N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
SDValue N10 = N1->getOperand(0);
SDValue N11 = N1->getOperand(1);
EVT N00VT = N00.getValueType();
EVT N10VT = N10.getValueType();
if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
SDValue N00Source = N00->getOperand(0);
SDValue N01Source = N01->getOperand(0);
SDValue N10Source = N10->getOperand(0);
SDValue N11Source = N11->getOperand(0);
if (N00Source == N10Source && N01Source == N11Source &&
N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
assert(N0.getValueType() == N1.getValueType());
uint64_t N00Index = N00.getConstantOperandVal(1);
uint64_t N01Index = N01.getConstantOperandVal(1);
uint64_t N10Index = N10.getConstantOperandVal(1);
uint64_t N11Index = N11.getConstantOperandVal(1);
if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
N10Index == N00VT.getVectorNumElements())
return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
}
}
}
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
if (N0 == N1 && VT.getVectorNumElements() == 2) {
assert(VT.getScalarSizeInBits() == 64);
return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
DAG.getConstant(0, dl, MVT::i64));
}
// Canonicalise concat_vectors so that the right-hand vector has as few
// bit-casts as possible before its real operation. The primary matching
// destination for these operations will be the narrowing "2" instructions,
// which depend on the operation being performed on this right-hand vector.
// For example,
// (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
// becomes
// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
if (N1Opc != ISD::BITCAST)
return SDValue();
SDValue RHS = N1->getOperand(0);
MVT RHSTy = RHS.getValueType().getSimpleVT();
// If the RHS is not a vector, this is not the pattern we're looking for.
if (!RHSTy.isVector())
return SDValue();
LLVM_DEBUG(
dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
RHSTy.getVectorNumElements() * 2);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
RHS));
}
static SDValue tryCombineFixedPointConvert(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// Wait until after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
// Transform a scalar conversion of a value from a lane extract into a
// lane extract of a vector conversion. E.g., from foo1 to foo2:
// double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
// double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
//
// The second form interacts better with instruction selection and the
// register allocator to avoid cross-class register copies that aren't
// coalescable due to a lane reference.
// Check the operand and see if it originates from a lane extract.
SDValue Op1 = N->getOperand(1);
if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
// Yep, no additional predication needed. Perform the transform.
SDValue IID = N->getOperand(0);
SDValue Shift = N->getOperand(2);
SDValue Vec = Op1.getOperand(0);
SDValue Lane = Op1.getOperand(1);
EVT ResTy = N->getValueType(0);
EVT VecResTy;
SDLoc DL(N);
// The vector width should be 128 bits by the time we get here, even
// if it started as 64 bits (the extract_vector handling will have
// done so).
assert(Vec.getValueSizeInBits() == 128 &&
"unexpected vector size on extract_vector_elt!");
if (Vec.getValueType() == MVT::v4i32)
VecResTy = MVT::v4f32;
else if (Vec.getValueType() == MVT::v2i64)
VecResTy = MVT::v2f64;
else
llvm_unreachable("unexpected vector type!");
SDValue Convert =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
}
return SDValue();
}
// AArch64 high-vector "long" operations are formed by performing the non-high
// version on an extract_subvector of each operand which gets the high half:
//
// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
//
// However, there are cases which don't have an extract_high explicitly, but
// have another operation that can be made compatible with one for free. For
// example:
//
// (dupv64 scalar) --> (extract_high (dup128 scalar))
//
// This routine does the actual conversion of such DUPs, once outer routines
// have determined that everything else is in order.
// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
// similarly here.
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
switch (N.getOpcode()) {
case AArch64ISD::DUP:
case AArch64ISD::DUPLANE8:
case AArch64ISD::DUPLANE16:
case AArch64ISD::DUPLANE32:
case AArch64ISD::DUPLANE64:
case AArch64ISD::MOVI:
case AArch64ISD::MOVIshift:
case AArch64ISD::MOVIedit:
case AArch64ISD::MOVImsl:
case AArch64ISD::MVNIshift:
case AArch64ISD::MVNImsl:
break;
default:
// FMOV could be supported, but isn't very useful, as it would only occur
// if you passed a bitcast' floating point immediate to an eligible long
// integer op (addl, smull, ...).
return SDValue();
}
MVT NarrowTy = N.getSimpleValueType();
if (!NarrowTy.is64BitVector())
return SDValue();
MVT ElementTy = NarrowTy.getVectorElementType();
unsigned NumElems = NarrowTy.getVectorNumElements();
MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
SDLoc dl(N);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
DAG.getConstant(NumElems, dl, MVT::i64));
}
static bool isEssentiallyExtractHighSubvector(SDValue N) {
if (N.getOpcode() == ISD::BITCAST)
N = N.getOperand(0);
if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
+ if (N.getOperand(0).getValueType().isScalableVector())
+ return false;
return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
N.getOperand(0).getValueType().getVectorNumElements() / 2;
}
/// Helper structure to keep track of ISD::SET_CC operands.
struct GenericSetCCInfo {
const SDValue *Opnd0;
const SDValue *Opnd1;
ISD::CondCode CC;
};
/// Helper structure to keep track of a SET_CC lowered into AArch64 code.
struct AArch64SetCCInfo {
const SDValue *Cmp;
AArch64CC::CondCode CC;
};
/// Helper structure to keep track of SetCC information.
union SetCCInfo {
GenericSetCCInfo Generic;
AArch64SetCCInfo AArch64;
};
/// Helper structure to be able to read SetCC information. If set to
/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
/// GenericSetCCInfo.
struct SetCCInfoAndKind {
SetCCInfo Info;
bool IsAArch64;
};
/// Check whether or not \p Op is a SET_CC operation, either a generic or
/// an
/// AArch64 lowered one.
/// \p SetCCInfo is filled accordingly.
/// \post SetCCInfo is meanginfull only when this function returns true.
/// \return True when Op is a kind of SET_CC operation.
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
// If this is a setcc, this is straight forward.
if (Op.getOpcode() == ISD::SETCC) {
SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SetCCInfo.IsAArch64 = false;
return true;
}
// Otherwise, check if this is a matching csel instruction.
// In other words:
// - csel 1, 0, cc
// - csel 0, 1, !cc
if (Op.getOpcode() != AArch64ISD::CSEL)
return false;
// Set the information about the operands.
// TODO: we want the operands of the Cmp not the csel
SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
SetCCInfo.IsAArch64 = true;
SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
// Check that the operands matches the constraints:
// (1) Both operands must be constants.
// (2) One must be 1 and the other must be 0.
ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
// Check (1).
if (!TValue || !FValue)
return false;
// Check (2).
if (!TValue->isOne()) {
// Update the comparison when we are interested in !cc.
std::swap(TValue, FValue);
SetCCInfo.Info.AArch64.CC =
AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
}
return TValue->isOne() && FValue->isNullValue();
}
// Returns true if Op is setcc or zext of setcc.
static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
if (isSetCC(Op, Info))
return true;
return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
isSetCC(Op->getOperand(0), Info));
}
// The folding we want to perform is:
// (add x, [zext] (setcc cc ...) )
// -->
// (csel x, (add x, 1), !cc ...)
//
// The latter will get matched to a CSINC instruction.
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
SDValue LHS = Op->getOperand(0);
SDValue RHS = Op->getOperand(1);
SetCCInfoAndKind InfoAndKind;
// If both operands are a SET_CC, then we don't want to perform this
// folding and create another csel as this results in more instructions
// (and higher register usage).
if (isSetCCOrZExtSetCC(LHS, InfoAndKind) &&
isSetCCOrZExtSetCC(RHS, InfoAndKind))
return SDValue();
// If neither operand is a SET_CC, give up.
if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
std::swap(LHS, RHS);
if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
return SDValue();
}
// FIXME: This could be generatized to work for FP comparisons.
EVT CmpVT = InfoAndKind.IsAArch64
? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
: InfoAndKind.Info.Generic.Opnd0->getValueType();
if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
return SDValue();
SDValue CCVal;
SDValue Cmp;
SDLoc dl(Op);
if (InfoAndKind.IsAArch64) {
CCVal = DAG.getConstant(
AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
MVT::i32);
Cmp = *InfoAndKind.Info.AArch64.Cmp;
} else
Cmp = getAArch64Cmp(
*InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1,
ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
dl);
EVT VT = Op->getValueType(0);
LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
}
// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
// Only scalar integer and vector types.
if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
return SDValue();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
return SDValue();
auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
return SDValue();
SDValue Op1 = LHS->getOperand(0);
SDValue Op2 = RHS->getOperand(0);
EVT OpVT1 = Op1.getValueType();
EVT OpVT2 = Op2.getValueType();
if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
Op2.getOpcode() != AArch64ISD::UADDV ||
OpVT1.getVectorElementType() != VT)
return SDValue();
SDValue Val1 = Op1.getOperand(0);
SDValue Val2 = Op2.getOperand(0);
EVT ValVT = Val1->getValueType(0);
SDLoc DL(N);
SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
DAG.getConstant(0, DL, MVT::i64));
}
// ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
if (N->getOpcode() != ISD::ADD)
return SDValue();
SDValue Dot = N->getOperand(0);
SDValue A = N->getOperand(1);
// Handle commutivity
auto isZeroDot = [](SDValue Dot) {
return (Dot.getOpcode() == AArch64ISD::UDOT ||
Dot.getOpcode() == AArch64ISD::SDOT) &&
isZerosVector(Dot.getOperand(0).getNode());
};
if (!isZeroDot(Dot))
std::swap(Dot, A);
if (!isZeroDot(Dot))
return SDValue();
return DAG.getNode(Dot.getOpcode(), SDLoc(N), VT, A, Dot.getOperand(1),
Dot.getOperand(2));
}
// The basic add/sub long vector instructions have variants with "2" on the end
// which act on the high-half of their inputs. They are normally matched by
// patterns like:
//
// (add (zeroext (extract_high LHS)),
// (zeroext (extract_high RHS)))
// -> uaddl2 vD, vN, vM
//
// However, if one of the extracts is something like a duplicate, this
// instruction can still be used profitably. This function puts the DAG into a
// more appropriate form for those patterns to trigger.
static SDValue performAddSubLongCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
MVT VT = N->getSimpleValueType(0);
if (!VT.is128BitVector()) {
if (N->getOpcode() == ISD::ADD)
return performSetccAddFolding(N, DAG);
return SDValue();
}
// Make sure both branches are extended in the same way.
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
LHS.getOpcode() != ISD::SIGN_EXTEND) ||
LHS.getOpcode() != RHS.getOpcode())
return SDValue();
unsigned ExtType = LHS.getOpcode();
// It's not worth doing if at least one of the inputs isn't already an
// extract, but we don't know which it'll be so we have to try both.
if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
if (!RHS.getNode())
return SDValue();
RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
} else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
if (!LHS.getNode())
return SDValue();
LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
}
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
}
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// Try to change sum of two reductions.
if (SDValue Val = performUADDVCombine(N, DAG))
return Val;
if (SDValue Val = performAddDotCombine(N, DAG))
return Val;
return performAddSubLongCombine(N, DCI, DAG);
}
// Massage DAGs which we can use the high-half "long" operations on into
// something isel will recognize better. E.g.
//
// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
// (aarch64_neon_umull (extract_high (v2i64 vec)))
// (extract_high (v2i64 (dup128 scalar)))))
//
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
assert(LHS.getValueType().is64BitVector() &&
RHS.getValueType().is64BitVector() &&
"unexpected shape for long operation");
// Either node could be a DUP, but it's not worth doing both of them (you'd
// just as well use the non-high version) so look for a corresponding extract
// operation on the other "wing".
if (isEssentiallyExtractHighSubvector(LHS)) {
RHS = tryExtendDUPToExtractHigh(RHS, DAG);
if (!RHS.getNode())
return SDValue();
} else if (isEssentiallyExtractHighSubvector(RHS)) {
LHS = tryExtendDUPToExtractHigh(LHS, DAG);
if (!LHS.getNode())
return SDValue();
}
if (IID == Intrinsic::not_intrinsic)
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
N->getOperand(0), LHS, RHS);
}
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
MVT ElemTy = N->getSimpleValueType(0).getScalarType();
unsigned ElemBits = ElemTy.getSizeInBits();
int64_t ShiftAmount;
if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs, ElemBits) ||
SplatBitSize != ElemBits)
return SDValue();
ShiftAmount = SplatValue.getSExtValue();
} else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
ShiftAmount = CVN->getSExtValue();
} else
return SDValue();
unsigned Opcode;
bool IsRightShift;
switch (IID) {
default:
llvm_unreachable("Unknown shift intrinsic");
case Intrinsic::aarch64_neon_sqshl:
Opcode = AArch64ISD::SQSHL_I;
IsRightShift = false;
break;
case Intrinsic::aarch64_neon_uqshl:
Opcode = AArch64ISD::UQSHL_I;
IsRightShift = false;
break;
case Intrinsic::aarch64_neon_srshl:
Opcode = AArch64ISD::SRSHR_I;
IsRightShift = true;
break;
case Intrinsic::aarch64_neon_urshl:
Opcode = AArch64ISD::URSHR_I;
IsRightShift = true;
break;
case Intrinsic::aarch64_neon_sqshlu:
Opcode = AArch64ISD::SQSHLU_I;
IsRightShift = false;
break;
case Intrinsic::aarch64_neon_sshl:
case Intrinsic::aarch64_neon_ushl:
// For positive shift amounts we can use SHL, as ushl/sshl perform a regular
// left shift for positive shift amounts. Below, we only replace the current
// node with VSHL, if this condition is met.
Opcode = AArch64ISD::VSHL;
IsRightShift = false;
break;
}
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
SDLoc dl(N);
return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
DAG.getConstant(-ShiftAmount, dl, MVT::i32));
} else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
SDLoc dl(N);
return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
DAG.getConstant(ShiftAmount, dl, MVT::i32));
}
return SDValue();
}
// The CRC32[BH] instructions ignore the high bits of their data operand. Since
// the intrinsics must be legal and take an i32, this means there's almost
// certainly going to be a zext in the DAG which we can eliminate.
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
SDValue AndN = N->getOperand(2);
if (AndN.getOpcode() != ISD::AND)
return SDValue();
ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
if (!CMask || CMask->getZExtValue() != Mask)
return SDValue();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32,
N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
}
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
SelectionDAG &DAG) {
SDLoc dl(N);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
DAG.getNode(Opc, dl,
N->getOperand(1).getSimpleValueType(),
N->getOperand(1)),
DAG.getConstant(0, dl, MVT::i64));
}
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op1 = N->getOperand(1);
SDValue Op2 = N->getOperand(2);
EVT ScalarTy = Op2.getValueType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
ScalarTy = MVT::i32;
// Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
}
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
SDLoc dl(N);
SDValue Scalar = N->getOperand(3);
EVT ScalarTy = Scalar.getValueType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
SDValue Passthru = N->getOperand(1);
SDValue Pred = N->getOperand(2);
return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
Pred, Scalar, Passthru);
}
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
SDLoc dl(N);
LLVMContext &Ctx = *DAG.getContext();
EVT VT = N->getValueType(0);
assert(VT.isScalableVector() && "Expected a scalable vector.");
// Current lowering only supports the SVE-ACLE types.
if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
return SDValue();
unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
EVT ByteVT =
EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));
// Convert everything to the domain of EXT (i.e bytes).
SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
DAG.getConstant(ElemSize, dl, MVT::i32));
SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
}
static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalize())
return SDValue();
SDValue Comparator = N->getOperand(3);
if (Comparator.getOpcode() == AArch64ISD::DUP ||
Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
unsigned IID = getIntrinsicID(N);
EVT VT = N->getValueType(0);
EVT CmpVT = N->getOperand(2).getValueType();
SDValue Pred = N->getOperand(1);
SDValue Imm;
SDLoc DL(N);
switch (IID) {
default:
llvm_unreachable("Called with wrong intrinsic!");
break;
// Signed comparisons
case Intrinsic::aarch64_sve_cmpeq_wide:
case Intrinsic::aarch64_sve_cmpne_wide:
case Intrinsic::aarch64_sve_cmpge_wide:
case Intrinsic::aarch64_sve_cmpgt_wide:
case Intrinsic::aarch64_sve_cmplt_wide:
case Intrinsic::aarch64_sve_cmple_wide: {
if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
int64_t ImmVal = CN->getSExtValue();
if (ImmVal >= -16 && ImmVal <= 15)
Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
else
return SDValue();
}
break;
}
// Unsigned comparisons
case Intrinsic::aarch64_sve_cmphs_wide:
case Intrinsic::aarch64_sve_cmphi_wide:
case Intrinsic::aarch64_sve_cmplo_wide:
case Intrinsic::aarch64_sve_cmpls_wide: {
if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
uint64_t ImmVal = CN->getZExtValue();
if (ImmVal <= 127)
Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
else
return SDValue();
}
break;
}
}
if (!Imm)
return SDValue();
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
N->getOperand(2), Splat, DAG.getCondCode(CC));
}
return SDValue();
}
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
AArch64CC::CondCode Cond) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(Op);
assert(Op.getValueType().isScalableVector() &&
TLI.isTypeLegal(Op.getValueType()) &&
"Expected legal scalable vector type!");
// Ensure target specific opcodes are using legal type.
EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue TVal = DAG.getConstant(1, DL, OutVT);
SDValue FVal = DAG.getConstant(0, DL, OutVT);
// Set condition code (CC) flags.
SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
// Convert CC to integer based on requested condition.
// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
return DAG.getZExtOrTrunc(Res, DL, VT);
}
static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Pred = N->getOperand(1);
SDValue VecToReduce = N->getOperand(2);
// NOTE: The integer reduction's result type is not always linked to the
// operand's element type so we construct it from the intrinsic's result type.
EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
// SVE reductions set the whole vector register with the first element
// containing the reduction result, which we'll now extract.
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
Zero);
}
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Pred = N->getOperand(1);
SDValue VecToReduce = N->getOperand(2);
EVT ReduceVT = VecToReduce.getValueType();
SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
// SVE reductions set the whole vector register with the first element
// containing the reduction result, which we'll now extract.
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
Zero);
}
static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Pred = N->getOperand(1);
SDValue InitVal = N->getOperand(2);
SDValue VecToReduce = N->getOperand(3);
EVT ReduceVT = VecToReduce.getValueType();
// Ordered reductions use the first lane of the result vector as the
// reduction's initial value.
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
DAG.getUNDEF(ReduceVT), InitVal, Zero);
SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);
// SVE reductions set the whole vector register with the first element
// containing the reduction result, which we'll now extract.
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
Zero);
}
static bool isAllActivePredicate(SDValue N) {
unsigned NumElts = N.getValueType().getVectorMinNumElements();
// Look through cast.
while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
N = N.getOperand(0);
// When reinterpreting from a type with fewer elements the "new" elements
// are not active, so bail if they're likely to be used.
if (N.getValueType().getVectorMinNumElements() < NumElts)
return false;
}
// "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
// or smaller than the implicit element type represented by N.
// NOTE: A larger element count implies a smaller element type.
if (N.getOpcode() == AArch64ISD::PTRUE &&
N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
return N.getValueType().getVectorMinNumElements() >= NumElts;
return false;
}
// If a merged operation has no inactive lanes we can relax it to a predicated
// or unpredicated operation, which potentially allows better isel (perhaps
// using immediate forms) or relaxing register reuse requirements.
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
SelectionDAG &DAG,
bool UnpredOp = false) {
assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
SDValue Pg = N->getOperand(1);
// ISD way to specify an all active predicate.
if (isAllActivePredicate(Pg)) {
if (UnpredOp)
return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), N->getOperand(2),
N->getOperand(3));
else
return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg,
N->getOperand(2), N->getOperand(3));
}
// FUTURE: SplatVector(true)
return SDValue();
}
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
unsigned IID = getIntrinsicID(N);
switch (IID) {
default:
break;
case Intrinsic::aarch64_neon_vcvtfxs2fp:
case Intrinsic::aarch64_neon_vcvtfxu2fp:
return tryCombineFixedPointConvert(N, DCI, DAG);
case Intrinsic::aarch64_neon_saddv:
return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
case Intrinsic::aarch64_neon_uaddv:
return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
case Intrinsic::aarch64_neon_sminv:
return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
case Intrinsic::aarch64_neon_uminv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
case Intrinsic::aarch64_neon_smaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
case Intrinsic::aarch64_neon_umaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmin:
return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmaxnm:
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fminnm:
return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull:
case Intrinsic::aarch64_neon_pmull:
case Intrinsic::aarch64_neon_sqdmull:
return tryCombineLongOpWithDup(IID, N, DCI, DAG);
case Intrinsic::aarch64_neon_sqshl:
case Intrinsic::aarch64_neon_uqshl:
case Intrinsic::aarch64_neon_sqshlu:
case Intrinsic::aarch64_neon_srshl:
case Intrinsic::aarch64_neon_urshl:
case Intrinsic::aarch64_neon_sshl:
case Intrinsic::aarch64_neon_ushl:
return tryCombineShiftImm(IID, N, DAG);
case Intrinsic::aarch64_crc32b:
case Intrinsic::aarch64_crc32cb:
return tryCombineCRC32(0xff, N, DAG);
case Intrinsic::aarch64_crc32h:
case Intrinsic::aarch64_crc32ch:
return tryCombineCRC32(0xffff, N, DAG);
case Intrinsic::aarch64_sve_saddv:
// There is no i64 version of SADDV because the sign is irrelevant.
if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
else
return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
case Intrinsic::aarch64_sve_uaddv:
return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
case Intrinsic::aarch64_sve_smaxv:
return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_umaxv:
return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_sminv:
return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
case Intrinsic::aarch64_sve_uminv:
return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
case Intrinsic::aarch64_sve_orv:
return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
case Intrinsic::aarch64_sve_eorv:
return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
case Intrinsic::aarch64_sve_andv:
return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
case Intrinsic::aarch64_sve_index:
return LowerSVEIntrinsicIndex(N, DAG);
case Intrinsic::aarch64_sve_dup:
return LowerSVEIntrinsicDUP(N, DAG);
case Intrinsic::aarch64_sve_dup_x:
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
N->getOperand(1));
case Intrinsic::aarch64_sve_ext:
return LowerSVEIntrinsicEXT(N, DAG);
case Intrinsic::aarch64_sve_mul:
return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG);
case Intrinsic::aarch64_sve_smulh:
return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG);
case Intrinsic::aarch64_sve_umulh:
return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG);
case Intrinsic::aarch64_sve_smin:
return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
case Intrinsic::aarch64_sve_umin:
return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
case Intrinsic::aarch64_sve_smax:
return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
case Intrinsic::aarch64_sve_umax:
return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
case Intrinsic::aarch64_sve_lsl:
return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
case Intrinsic::aarch64_sve_lsr:
return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
case Intrinsic::aarch64_sve_asr:
return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
case Intrinsic::aarch64_sve_fadd:
return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
case Intrinsic::aarch64_sve_fsub:
return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
case Intrinsic::aarch64_sve_fmul:
return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
case Intrinsic::aarch64_sve_add:
return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
case Intrinsic::aarch64_sve_sub:
return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
case Intrinsic::aarch64_sve_and:
return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
case Intrinsic::aarch64_sve_bic:
return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
case Intrinsic::aarch64_sve_eor:
return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
case Intrinsic::aarch64_sve_orr:
return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
case Intrinsic::aarch64_sve_sqadd:
return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
case Intrinsic::aarch64_sve_sqsub:
return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
case Intrinsic::aarch64_sve_uqadd:
return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
case Intrinsic::aarch64_sve_uqsub:
return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
case Intrinsic::aarch64_sve_sqadd_x:
return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_sqsub_x:
return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_uqadd_x:
return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_uqsub_x:
return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_cmphs:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
break;
case Intrinsic::aarch64_sve_cmphi:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
break;
case Intrinsic::aarch64_sve_fcmpge:
case Intrinsic::aarch64_sve_cmpge:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETGE));
break;
case Intrinsic::aarch64_sve_fcmpgt:
case Intrinsic::aarch64_sve_cmpgt:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETGT));
break;
case Intrinsic::aarch64_sve_fcmpeq:
case Intrinsic::aarch64_sve_cmpeq:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
break;
case Intrinsic::aarch64_sve_fcmpne:
case Intrinsic::aarch64_sve_cmpne:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETNE));
break;
case Intrinsic::aarch64_sve_fcmpuo:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUO));
break;
case Intrinsic::aarch64_sve_fadda:
return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
case Intrinsic::aarch64_sve_faddv:
return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
case Intrinsic::aarch64_sve_fmaxnmv:
return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
case Intrinsic::aarch64_sve_fmaxv:
return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_fminnmv:
return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
case Intrinsic::aarch64_sve_fminv:
return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG);
case Intrinsic::aarch64_sve_sel:
return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmpeq_wide:
return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
case Intrinsic::aarch64_sve_cmpne_wide:
return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
case Intrinsic::aarch64_sve_cmpge_wide:
return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
case Intrinsic::aarch64_sve_cmpgt_wide:
return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
case Intrinsic::aarch64_sve_cmplt_wide:
return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
case Intrinsic::aarch64_sve_cmple_wide:
return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
case Intrinsic::aarch64_sve_cmphs_wide:
return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
case Intrinsic::aarch64_sve_cmphi_wide:
return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
case Intrinsic::aarch64_sve_cmplo_wide:
return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
case Intrinsic::aarch64_sve_cmpls_wide:
return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
case Intrinsic::aarch64_sve_ptest_any:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::ANY_ACTIVE);
case Intrinsic::aarch64_sve_ptest_first:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::FIRST_ACTIVE);
case Intrinsic::aarch64_sve_ptest_last:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::LAST_ACTIVE);
}
return SDValue();
}
static SDValue performExtendCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
// we can convert that DUP into another extract_high (of a bigger DUP), which
// helps the backend to decide that an sabdl2 would be useful, saving a real
// extract_high operation.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
(N->getOperand(0).getOpcode() == ISD::ABDU ||
N->getOperand(0).getOpcode() == ISD::ABDS)) {
SDNode *ABDNode = N->getOperand(0).getNode();
SDValue NewABD =
tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
if (!NewABD.getNode())
return SDValue();
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
}
return SDValue();
}
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
SDValue SplatVal, unsigned NumVecElts) {
assert(!St.isTruncatingStore() && "cannot split truncating vector store");
unsigned OrigAlignment = St.getAlignment();
unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
// Create scalar stores. This is at least as good as the code sequence for a
// split unaligned store which is a dup.s, ext.b, and two stores.
// Most of the time the three stores should be replaced by store pair
// instructions (stp).
SDLoc DL(&St);
SDValue BasePtr = St.getBasePtr();
uint64_t BaseOffset = 0;
const MachinePointerInfo &PtrInfo = St.getPointerInfo();
SDValue NewST1 =
DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
OrigAlignment, St.getMemOperand()->getFlags());
// As this in ISel, we will not merge this add which may degrade results.
if (BasePtr->getOpcode() == ISD::ADD &&
isa<ConstantSDNode>(BasePtr->getOperand(1))) {
BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
BasePtr = BasePtr->getOperand(0);
}
unsigned Offset = EltOffset;
while (--NumVecElts) {
unsigned Alignment = MinAlign(OrigAlignment, Offset);
SDValue OffsetPtr =
DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
PtrInfo.getWithOffset(Offset), Alignment,
St.getMemOperand()->getFlags());
Offset += EltOffset;
}
return NewST1;
}
// Returns an SVE type that ContentTy can be trivially sign or zero extended
// into.
static MVT getSVEContainerType(EVT ContentTy) {
assert(ContentTy.isSimple() && "No SVE containers for extended types");
switch (ContentTy.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("No known SVE container for this MVT type");
case MVT::nxv2i8:
case MVT::nxv2i16:
case MVT::nxv2i32:
case MVT::nxv2i64:
case MVT::nxv2f32:
case MVT::nxv2f64:
return MVT::nxv2i64;
case MVT::nxv4i8:
case MVT::nxv4i16:
case MVT::nxv4i32:
case MVT::nxv4f32:
return MVT::nxv4i32;
case MVT::nxv8i8:
case MVT::nxv8i16:
case MVT::nxv8f16:
case MVT::nxv8bf16:
return MVT::nxv8i16;
case MVT::nxv16i8:
return MVT::nxv16i8;
}
}
static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
if (VT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();
EVT ContainerVT = VT;
if (ContainerVT.isInteger())
ContainerVT = getSVEContainerType(ContainerVT);
SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
SDValue Ops[] = { N->getOperand(0), // Chain
N->getOperand(2), // Pg
N->getOperand(3), // Base
DAG.getValueType(VT) };
SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);
if (ContainerVT.isInteger() && (VT != ContainerVT))
Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
return DAG.getMergeValues({ Load, LoadChain }, DL);
}
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
EVT PtrTy = N->getOperand(3).getValueType();
if (VT == MVT::nxv8bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
EVT LoadVT = VT;
if (VT.isFloatingPoint())
LoadVT = VT.changeTypeToInteger();
auto *MINode = cast<MemIntrinsicSDNode>(N);
SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
MINode->getOperand(3), DAG.getUNDEF(PtrTy),
MINode->getOperand(2), PassThru,
MINode->getMemoryVT(), MINode->getMemOperand(),
ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
if (VT.isFloatingPoint()) {
SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
return DAG.getMergeValues(Ops, DL);
}
return L;
}
template <unsigned Opcode>
static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
"Unsupported opcode.");
SDLoc DL(N);
EVT VT = N->getValueType(0);
if (VT == MVT::nxv8bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
EVT LoadVT = VT;
if (VT.isFloatingPoint())
LoadVT = VT.changeTypeToInteger();
SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);
if (VT.isFloatingPoint())
Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));
return DAG.getMergeValues({Load, LoadChain}, DL);
}
static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Data = N->getOperand(2);
EVT DataVT = Data.getValueType();
EVT HwSrcVt = getSVEContainerType(DataVT);
SDValue InputVT = DAG.getValueType(DataVT);
if (DataVT == MVT::nxv8bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
if (DataVT.isFloatingPoint())
InputVT = DAG.getValueType(HwSrcVt);
SDValue SrcNew;
if (Data.getValueType().isFloatingPoint())
SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
else
SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
SDValue Ops[] = { N->getOperand(0), // Chain
SrcNew,
N->getOperand(4), // Base
N->getOperand(3), // Pg
InputVT
};
return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
}
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Data = N->getOperand(2);
EVT DataVT = Data.getValueType();
EVT PtrTy = N->getOperand(4).getValueType();
if (DataVT == MVT::nxv8bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
if (DataVT.isFloatingPoint())
Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
auto *MINode = cast<MemIntrinsicSDNode>(N);
return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
DAG.getUNDEF(PtrTy), MINode->getOperand(3),
MINode->getMemoryVT(), MINode->getMemOperand(),
ISD::UNINDEXED, false, false);
}
/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
/// load store optimizer pass will merge them to store pair stores. This should
/// be better than a movi to create the vector zero followed by a vector store
/// if the zero constant is not re-used, since one instructions and one register
/// live range will be removed.
///
/// For example, the final generated code should be:
///
/// stp xzr, xzr, [x0]
///
/// instead of:
///
/// movi v0.2d, #0
/// str q0, [x0]
///
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
SDValue StVal = St.getValue();
EVT VT = StVal.getValueType();
// Avoid scalarizing zero splat stores for scalable vectors.
if (VT.isScalableVector())
return SDValue();
// It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
// 2, 3 or 4 i32 elements.
int NumVecElts = VT.getVectorNumElements();
if (!(((NumVecElts == 2 || NumVecElts == 3) &&
VT.getVectorElementType().getSizeInBits() == 64) ||
((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
VT.getVectorElementType().getSizeInBits() == 32)))
return SDValue();
if (StVal.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
// If the zero constant has more than one use then the vector store could be
// better since the constant mov will be amortized and stp q instructions
// should be able to be formed.
if (!StVal.hasOneUse())
return SDValue();
// If the store is truncating then it's going down to i16 or smaller, which
// means it can be implemented in a single store anyway.
if (St.isTruncatingStore())
return SDValue();
// If the immediate offset of the address operand is too large for the stp
// instruction, then bail out.
if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
if (Offset < -512 || Offset > 504)
return SDValue();
}
for (int I = 0; I < NumVecElts; ++I) {
SDValue EltVal = StVal.getOperand(I);
if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
return SDValue();
}
// Use a CopyFromReg WZR/XZR here to prevent
// DAGCombiner::MergeConsecutiveStores from undoing this transformation.
SDLoc DL(&St);
unsigned ZeroReg;
EVT ZeroVT;
if (VT.getVectorElementType().getSizeInBits() == 32) {
ZeroReg = AArch64::WZR;
ZeroVT = MVT::i32;
} else {
ZeroReg = AArch64::XZR;
ZeroVT = MVT::i64;
}
SDValue SplatVal =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
}
/// Replace a splat of a scalar to a vector store by scalar stores of the scalar
/// value. The load store optimizer pass will merge them to store pair stores.
/// This has better performance than a splat of the scalar followed by a split
/// vector store. Even if the stores are not merged it is four stores vs a dup,
/// followed by an ext.b and two stores.
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
SDValue StVal = St.getValue();
EVT VT = StVal.getValueType();
// Don't replace floating point stores, they possibly won't be transformed to
// stp because of the store pair suppress pass.
if (VT.isFloatingPoint())
return SDValue();
// We can express a splat as store pair(s) for 2 or 4 elements.
unsigned NumVecElts = VT.getVectorNumElements();
if (NumVecElts != 4 && NumVecElts != 2)
return SDValue();
// If the store is truncating then it's going down to i16 or smaller, which
// means it can be implemented in a single store anyway.
if (St.isTruncatingStore())
return SDValue();
// Check that this is a splat.
// Make sure that each of the relevant vector element locations are inserted
// to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
SDValue SplatVal;
for (unsigned I = 0; I < NumVecElts; ++I) {
// Check for insert vector elements.
if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
return SDValue();
// Check that same value is inserted at each vector element.
if (I == 0)
SplatVal = StVal.getOperand(1);
else if (StVal.getOperand(1) != SplatVal)
return SDValue();
// Check insert element index.
ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
if (!CIndex)
return SDValue();
uint64_t IndexVal = CIndex->getZExtValue();
if (IndexVal >= NumVecElts)
return SDValue();
IndexNotInserted.reset(IndexVal);
StVal = StVal.getOperand(0);
}
// Check that all vector element locations were inserted to.
if (IndexNotInserted.any())
return SDValue();
return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
}
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
StoreSDNode *S = cast<StoreSDNode>(N);
if (S->isVolatile() || S->isIndexed())
return SDValue();
SDValue StVal = S->getValue();
EVT VT = StVal.getValueType();
if (!VT.isFixedLengthVector())
return SDValue();
// If we get a splat of zeros, convert this vector store to a store of
// scalars. They will be merged into store pairs of xzr thereby removing one
// instruction and one register.
if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
return ReplacedZeroSplat;
// FIXME: The logic for deciding if an unaligned store should be split should
// be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
// a call to that function here.
if (!Subtarget->isMisaligned128StoreSlow())
return SDValue();
// Don't split at -Oz.
if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
// Don't split v2i64 vectors. Memcpy lowering produces those and splitting
// those up regresses performance on micro-benchmarks and olden/bh.
if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
return SDValue();
// Split unaligned 16B stores. They are terrible for performance.
// Don't split stores with alignment of 1 or 2. Code that uses clang vector
// extensions can use this to mark that it does not want splitting to happen
// (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
// eliminating alignment hazards is only 1 in 8 for alignment of 2.
if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
S->getAlignment() <= 2)
return SDValue();
// If we get a splat of a scalar convert this vector store to a store of
// scalars. They will be merged into store pairs thereby removing two
// instructions.
if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
return ReplacedSplat;
SDLoc DL(S);
// Split VT into two.
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
unsigned NumElts = HalfVT.getVectorNumElements();
SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
DAG.getConstant(0, DL, MVT::i64));
SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
DAG.getConstant(NumElts, DL, MVT::i64));
SDValue BasePtr = S->getBasePtr();
SDValue NewST1 =
DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
S->getAlignment(), S->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(8, DL, MVT::i64));
return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
S->getPointerInfo(), S->getAlignment(),
S->getMemOperand()->getFlags());
}
static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");
// splice(pg, op1, undef) -> op1
if (N->getOperand(2).isUndef())
return N->getOperand(1);
return SDValue();
}
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
EVT ResVT = N->getValueType(0);
// uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
SDValue X = Op0.getOperand(0).getOperand(0);
return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
}
}
// uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
SDValue Z = Op1.getOperand(0).getOperand(1);
return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
}
}
return SDValue();
}
static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG) {
unsigned Opc = N->getOpcode();
assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads
Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) ||
(Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads
Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) &&
"Invalid opcode.");
const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
const bool Extended = Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO ||
Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO ||
Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO ||
Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO;
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Pg = N->getOperand(1);
SDValue Base = N->getOperand(2);
SDValue Offset = N->getOperand(3);
SDValue Ty = N->getOperand(4);
EVT ResVT = N->getValueType(0);
const auto OffsetOpc = Offset.getOpcode();
const bool OffsetIsZExt =
OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU;
const bool OffsetIsSExt =
OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU;
// Fold sign/zero extensions of vector offsets into GLD1 nodes where possible.
if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
SDValue ExtPg = Offset.getOperand(0);
VTSDNode *ExtFrom = cast<VTSDNode>(Offset.getOperand(2).getNode());
EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
// If the predicate for the sign- or zero-extended offset is the
// same as the predicate used for this load and the sign-/zero-extension
// was from a 32-bits...
if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
SDValue UnextendedOffset = Offset.getOperand(1);
unsigned NewOpc = getGatherVecOpcode(Scaled, OffsetIsSExt, true);
if (Signed)
NewOpc = getSignExtendedGatherOpcode(NewOpc);
return DAG.getNode(NewOpc, DL, {ResVT, MVT::Other},
{Chain, Pg, Base, UnextendedOffset, Ty});
}
}
return SDValue();
}
/// Optimize a vector shift instruction and its operand if shifted out
/// bits are not used.
static SDValue performVectorShiftCombine(SDNode *N,
const AArch64TargetLowering &TLI,
TargetLowering::DAGCombinerInfo &DCI) {
assert(N->getOpcode() == AArch64ISD::VASHR ||
N->getOpcode() == AArch64ISD::VLSHR);
SDValue Op = N->getOperand(0);
unsigned OpScalarSize = Op.getScalarValueSizeInBits();
unsigned ShiftImm = N->getConstantOperandVal(1);
assert(OpScalarSize > ShiftImm && "Invalid shift imm");
APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm);
APInt DemandedMask = ~ShiftedOutBits;
if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
return SDValue(N, 0);
return SDValue();
}
/// Target-specific DAG combine function for post-increment LD1 (lane) and
/// post-increment LD1R.
static SDValue performPostLD1Combine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
bool IsLaneOp) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (VT.isScalableVector())
return SDValue();
unsigned LoadIdx = IsLaneOp ? 1 : 0;
SDNode *LD = N->getOperand(LoadIdx).getNode();
// If it is not LOAD, can not do such combine.
if (LD->getOpcode() != ISD::LOAD)
return SDValue();
// The vector lane must be a constant in the LD1LANE opcode.
SDValue Lane;
if (IsLaneOp) {
Lane = N->getOperand(2);
auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
}
LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
EVT MemVT = LoadSDN->getMemoryVT();
// Check if memory operand is the same type as the vector element.
if (MemVT != VT.getVectorElementType())
return SDValue();
// Check if there are other uses. If so, do not combine as it will introduce
// an extra load.
for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
++UI) {
if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
continue;
if (*UI != N)
return SDValue();
}
SDValue Addr = LD->getOperand(1);
SDValue Vector = N->getOperand(0);
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
Addr.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
if (User->getOpcode() != ISD::ADD
|| UI.getUse().getResNo() != Addr.getResNo())
continue;
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
uint32_t IncVal = CInc->getZExtValue();
unsigned NumBytes = VT.getScalarSizeInBits() / 8;
if (IncVal != NumBytes)
continue;
Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
}
// To avoid cycle construction make sure that neither the load nor the add
// are predecessors to each other or the Vector.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
Visited.insert(Addr.getNode());
Worklist.push_back(User);
Worklist.push_back(LD);
Worklist.push_back(Vector.getNode());
if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
SmallVector<SDValue, 8> Ops;
Ops.push_back(LD->getOperand(0)); // Chain
if (IsLaneOp) {
Ops.push_back(Vector); // The vector to be inserted
Ops.push_back(Lane); // The lane to be inserted in the vector
}
Ops.push_back(Addr);
Ops.push_back(Inc);
EVT Tys[3] = { VT, MVT::i64, MVT::Other };
SDVTList SDTys = DAG.getVTList(Tys);
unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
MemVT,
LoadSDN->getMemOperand());
// Update the uses.
SDValue NewResults[] = {
SDValue(LD, 0), // The result of load
SDValue(UpdN.getNode(), 2) // Chain
};
DCI.CombineTo(LD, NewResults);
DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
break;
}
return SDValue();
}
/// Simplify ``Addr`` given that the top byte of it is ignored by HW during
/// address translation.
static bool performTBISimplification(SDValue Addr,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
APInt DemandedMask = APInt::getLowBitsSet(64, 56);
KnownBits Known;
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
DCI.CommitTargetLoweringOpt(TLO);
return true;
}
return false;
}
static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&
"Expected STORE dag node in input!");
if (auto Store = dyn_cast<StoreSDNode>(N)) {
if (!Store->isTruncatingStore() || Store->isIndexed())
return SDValue();
SDValue Ext = Store->getValue();
auto ExtOpCode = Ext.getOpcode();
if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
ExtOpCode != ISD::ANY_EXTEND)
return SDValue();
SDValue Orig = Ext->getOperand(0);
if (Store->getMemoryVT() != Orig->getValueType(0))
return SDValue();
return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
Store->getBasePtr(), Store->getPointerInfo(),
Store->getAlign());
}
return SDValue();
}
static SDValue performSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
return Split;
if (Subtarget->supportsAddressTopByteIgnored() &&
performTBISimplification(N->getOperand(2), DCI, DAG))
return SDValue(N, 0);
if (SDValue Store = foldTruncStoreOfExt(DAG, N))
return Store;
return SDValue();
}
/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.
static SDValue performNEONPostLDSTCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
unsigned AddrOpIdx = N->getNumOperands() - 1;
SDValue Addr = N->getOperand(AddrOpIdx);
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
if (User->getOpcode() != ISD::ADD ||
UI.getUse().getResNo() != Addr.getResNo())
continue;
// Check that the add is independent of the load/store. Otherwise, folding
// it would create a cycle.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
Visited.insert(Addr.getNode());
Worklist.push_back(N);
Worklist.push_back(User);
if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
// Find the new opcode for the updating load/store.
bool IsStore = false;
bool IsLaneOp = false;
bool IsDupOp = false;
unsigned NewOpc = 0;
unsigned NumVecs = 0;
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
default: llvm_unreachable("unexpected intrinsic for Neon base update");
case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
NumVecs = 2; break;
case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
NumVecs = 3; break;
case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
NumVecs = 4; break;
case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
NumVecs = 2; IsStore = true; break;
case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
NumVecs = 3; IsStore = true; break;
case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
NumVecs = 4; IsStore = true; break;
case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
NumVecs = 2; break;
case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
NumVecs = 3; break;
case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
NumVecs = 4; break;
case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
NumVecs = 2; IsStore = true; break;
case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
NumVecs = 3; IsStore = true; break;
case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
NumVecs = 4; IsStore = true; break;
case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
NumVecs = 2; IsDupOp = true; break;
case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
NumVecs = 3; IsDupOp = true; break;
case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
NumVecs = 4; IsDupOp = true; break;
case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
NumVecs = 2; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
NumVecs = 3; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
NumVecs = 4; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
NumVecs = 2; IsStore = true; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
NumVecs = 3; IsStore = true; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
NumVecs = 4; IsStore = true; IsLaneOp = true; break;
}
EVT VecTy;
if (IsStore)
VecTy = N->getOperand(2).getValueType();
else
VecTy = N->getValueType(0);
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
uint32_t IncVal = CInc->getZExtValue();
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
if (IsLaneOp || IsDupOp)
NumBytes /= VecTy.getVectorNumElements();
if (IncVal != NumBytes)
continue;
Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
}
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // Incoming chain
// Load lane and store have vector list as input.
if (IsLaneOp || IsStore)
for (unsigned i = 2; i < AddrOpIdx; ++i)
Ops.push_back(N->getOperand(i));
Ops.push_back(Addr); // Base register
Ops.push_back(Inc);
// Return Types.
EVT Tys[6];
unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
Tys[n] = VecTy;
Tys[n++] = MVT::i64; // Type of write back register
Tys[n] = MVT::Other; // Type of the chain
SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
MemInt->getMemoryVT(),
MemInt->getMemOperand());
// Update the uses.
std::vector<SDValue> NewResults;
for (unsigned i = 0; i < NumResultVecs; ++i) {
NewResults.push_back(SDValue(UpdN.getNode(), i));
}
NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
DCI.CombineTo(N, NewResults);
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
break;
}
return SDValue();
}
// Checks to see if the value is the prescribed width and returns information
// about its extension mode.
static
bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
ExtType = ISD::NON_EXTLOAD;
switch(V.getNode()->getOpcode()) {
default:
return false;
case ISD::LOAD: {
LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
|| (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
ExtType = LoadNode->getExtensionType();
return true;
}
return false;
}
case ISD::AssertSext: {
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
if ((TypeNode->getVT() == MVT::i8 && width == 8)
|| (TypeNode->getVT() == MVT::i16 && width == 16)) {
ExtType = ISD::SEXTLOAD;
return true;
}
return false;
}
case ISD::AssertZext: {
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
if ((TypeNode->getVT() == MVT::i8 && width == 8)
|| (TypeNode->getVT() == MVT::i16 && width == 16)) {
ExtType = ISD::ZEXTLOAD;
return true;
}
return false;
}
case ISD::Constant:
case ISD::TargetConstant: {
return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
1LL << (width - 1);
}
}
return true;
}
// This function does a whole lot of voodoo to determine if the tests are
// equivalent without and with a mask. Essentially what happens is that given a
// DAG resembling:
//
// +-------------+ +-------------+ +-------------+ +-------------+
// | Input | | AddConstant | | CompConstant| | CC |
// +-------------+ +-------------+ +-------------+ +-------------+
// | | | |
// V V | +----------+
// +-------------+ +----+ | |
// | ADD | |0xff| | |
// +-------------+ +----+ | |
// | | | |
// V V | |
// +-------------+ | |
// | AND | | |
// +-------------+ | |
// | | |
// +-----+ | |
// | | |
// V V V
// +-------------+
// | CMP |
// +-------------+
//
// The AND node may be safely removed for some combinations of inputs. In
// particular we need to take into account the extension type of the Input,
// the exact values of AddConstant, CompConstant, and CC, along with the nominal
// width of the input (this can work for any width inputs, the above graph is
// specific to 8 bits.
//
// The specific equations were worked out by generating output tables for each
// AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
// problem was simplified by working with 4 bit inputs, which means we only
// needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
// extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
// patterns present in both extensions (0,7). For every distinct set of
// AddConstant and CompConstants bit patterns we can consider the masked and
// unmasked versions to be equivalent if the result of this function is true for
// all 16 distinct bit patterns of for the current extension type of Input (w0).
//
// sub w8, w0, w1
// and w10, w8, #0x0f
// cmp w8, w2
// cset w9, AArch64CC
// cmp w10, w2
// cset w11, AArch64CC
// cmp w9, w11
// cset w0, eq
// ret
//
// Since the above function shows when the outputs are equivalent it defines
// when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
// would be expensive to run during compiles. The equations below were written
// in a test harness that confirmed they gave equivalent outputs to the above
// for all inputs function, so they can be used determine if the removal is
// legal instead.
//
// isEquivalentMaskless() is the code for testing if the AND can be removed
// factored out of the DAG recognition as the DAG can take several forms.
static bool isEquivalentMaskless(unsigned CC, unsigned width,
ISD::LoadExtType ExtType, int AddConstant,
int CompConstant) {
// By being careful about our equations and only writing the in term
// symbolic values and well known constants (0, 1, -1, MaxUInt) we can
// make them generally applicable to all bit widths.
int MaxUInt = (1 << width);
// For the purposes of these comparisons sign extending the type is
// equivalent to zero extending the add and displacing it by half the integer
// width. Provided we are careful and make sure our equations are valid over
// the whole range we can just adjust the input and avoid writing equations
// for sign extended inputs.
if (ExtType == ISD::SEXTLOAD)
AddConstant -= (1 << (width-1));
switch(CC) {
case AArch64CC::LE:
case AArch64CC::GT:
if ((AddConstant == 0) ||
(CompConstant == MaxUInt - 1 && AddConstant < 0) ||
(AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
return true;
break;
case AArch64CC::LT:
case AArch64CC::GE:
if ((AddConstant == 0) ||
(AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
return true;
break;
case AArch64CC::HI:
case AArch64CC::LS:
if ((AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant >= -1 &&
CompConstant < AddConstant + MaxUInt))
return true;
break;
case AArch64CC::PL:
case AArch64CC::MI:
if ((AddConstant == 0) ||
(AddConstant > 0 && CompConstant <= 0) ||
(AddConstant < 0 && CompConstant <= AddConstant))
return true;
break;
case AArch64CC::LO:
case AArch64CC::HS:
if ((AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant >= 0 &&
CompConstant <= AddConstant + MaxUInt))
return true;
break;
case AArch64CC::EQ:
case AArch64CC::NE:
if ((AddConstant > 0 && CompConstant < 0) ||
(AddConstant < 0 && CompConstant >= 0 &&
CompConstant < AddConstant + MaxUInt) ||
(AddConstant >= 0 && CompConstant >= 0 &&
CompConstant >= AddConstant) ||
(AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
return true;
break;
case AArch64CC::VS:
case AArch64CC::VC:
case AArch64CC::AL:
case AArch64CC::NV:
return true;
case AArch64CC::Invalid:
break;
}
return false;
}
static
SDValue performCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG, unsigned CCIndex,
unsigned CmpIndex) {
unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
unsigned CondOpcode = SubsNode->getOpcode();
if (CondOpcode != AArch64ISD::SUBS)
return SDValue();
// There is a SUBS feeding this condition. Is it fed by a mask we can
// use?
SDNode *AndNode = SubsNode->getOperand(0).getNode();
unsigned MaskBits = 0;
if (AndNode->getOpcode() != ISD::AND)
return SDValue();
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
uint32_t CNV = CN->getZExtValue();
if (CNV == 255)
MaskBits = 8;
else if (CNV == 65535)
MaskBits = 16;
}
if (!MaskBits)
return SDValue();
SDValue AddValue = AndNode->getOperand(0);
if (AddValue.getOpcode() != ISD::ADD)
return SDValue();
// The basic dag structure is correct, grab the inputs and validate them.
SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
SDValue SubsInputValue = SubsNode->getOperand(1);
// The mask is present and the provenance of all the values is a smaller type,
// lets see if the mask is superfluous.
if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
!isa<ConstantSDNode>(SubsInputValue.getNode()))
return SDValue();
ISD::LoadExtType ExtType;
if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
!checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
!checkValueWidth(AddInputValue1, MaskBits, ExtType) )
return SDValue();
if(!isEquivalentMaskless(CC, MaskBits, ExtType,
cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
return SDValue();
// The AND is not necessary, remove it.
SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
SubsNode->getValueType(1));
SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
return SDValue(N, 0);
}
// Optimize compare with zero and branch.
static SDValue performBRCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
// will not be produced, as they are conditional branch instructions that do
// not set flags.
if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
return SDValue();
if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
N = NV.getNode();
SDValue Chain = N->getOperand(0);
SDValue Dest = N->getOperand(1);
SDValue CCVal = N->getOperand(2);
SDValue Cmp = N->getOperand(3);
assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
return SDValue();
unsigned CmpOpc = Cmp.getOpcode();
if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
return SDValue();
// Only attempt folding if there is only one use of the flag and no use of the
// value.
if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
return SDValue();
SDValue LHS = Cmp.getOperand(0);
SDValue RHS = Cmp.getOperand(1);
assert(LHS.getValueType() == RHS.getValueType() &&
"Expected the value type to be the same for both operands!");
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
return SDValue();
if (isNullConstant(LHS))
std::swap(LHS, RHS);
if (!isNullConstant(RHS))
return SDValue();
if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
LHS.getOpcode() == ISD::SRL)
return SDValue();
// Fold the compare into the branch instruction.
SDValue BR;
if (CC == AArch64CC::EQ)
BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
else
BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, BR, false);
return SDValue();
}
// Optimize CSEL instructions
static SDValue performCSELCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// CSEL x, x, cc -> x
if (N->getOperand(0) == N->getOperand(1))
return N->getOperand(0);
return performCONDCombine(N, DCI, DAG, 2, 3);
}
static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
// setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
if (Cond == ISD::SETNE && isOneConstant(RHS) &&
LHS->getOpcode() == AArch64ISD::CSEL &&
isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
LHS->hasOneUse()) {
SDLoc DL(N);
// Invert CSEL's condition.
auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
auto NewCond = getInvertedCondCode(OldCond);
// csel 0, 1, !cond, X
SDValue CSEL =
DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
LHS.getOperand(3));
return DAG.getZExtOrTrunc(CSEL, DL, N->getValueType(0));
}
return SDValue();
}
static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
"Unexpected opcode!");
SDValue Pred = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
// setcc_merge_zero pred (sign_extend (setcc_merge_zero ... pred ...)), 0, ne
// => inner setcc_merge_zero
if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
LHS->getOpcode() == ISD::SIGN_EXTEND &&
LHS->getOperand(0)->getValueType(0) == N->getValueType(0) &&
LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
LHS->getOperand(0)->getOperand(0) == Pred)
return LHS->getOperand(0);
return SDValue();
}
// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
// as well as whether the test should be inverted. This code is required to
// catch these cases (as opposed to standard dag combines) because
// AArch64ISD::TBZ is matched during legalization.
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
SelectionDAG &DAG) {
if (!Op->hasOneUse())
return Op;
// We don't handle undef/constant-fold cases below, as they should have
// already been taken care of (e.g. and of 0, test of undefined shifted bits,
// etc.)
// (tbz (trunc x), b) -> (tbz x, b)
// This case is just here to enable more of the below cases to be caught.
if (Op->getOpcode() == ISD::TRUNCATE &&
Bit < Op->getValueType(0).getSizeInBits()) {
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
// (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
if (Op->getOpcode() == ISD::ANY_EXTEND &&
Bit < Op->getOperand(0).getValueSizeInBits()) {
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
if (Op->getNumOperands() != 2)
return Op;
auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!C)
return Op;
switch (Op->getOpcode()) {
default:
return Op;
// (tbz (and x, m), b) -> (tbz x, b)
case ISD::AND:
if ((C->getZExtValue() >> Bit) & 1)
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
return Op;
// (tbz (shl x, c), b) -> (tbz x, b-c)
case ISD::SHL:
if (C->getZExtValue() <= Bit &&
(Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
Bit = Bit - C->getZExtValue();
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
return Op;
// (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
case ISD::SRA:
Bit = Bit + C->getZExtValue();
if (Bit >= Op->getValueType(0).getSizeInBits())
Bit = Op->getValueType(0).getSizeInBits() - 1;
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
// (tbz (srl x, c), b) -> (tbz x, b+c)
case ISD::SRL:
if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
Bit = Bit + C->getZExtValue();
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
return Op;
// (tbz (xor x, -1), b) -> (tbnz x, b)
case ISD::XOR:
if ((C->getZExtValue() >> Bit) & 1)
Invert = !Invert;
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
}
// Optimize test single bit zero/non-zero and branch.
static SDValue performTBZCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
bool Invert = false;
SDValue TestSrc = N->getOperand(1);
SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
if (TestSrc == NewTestSrc)
return SDValue();
unsigned NewOpc = N->getOpcode();
if (Invert) {
if (NewOpc == AArch64ISD::TBZ)
NewOpc = AArch64ISD::TBNZ;
else {
assert(NewOpc == AArch64ISD::TBNZ);
NewOpc = AArch64ISD::TBZ;
}
}
SDLoc DL(N);
return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
}
// vselect (v1i1 setcc) ->
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
// such VSELECT.
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
EVT CCVT = N0.getValueType();
// Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
// supported types.
SDValue SetCC = N->getOperand(0);
if (SetCC.getOpcode() == ISD::SETCC &&
SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
SDValue CmpLHS = SetCC.getOperand(0);
EVT VT = CmpLHS.getValueType();
SDNode *CmpRHS = SetCC.getOperand(1).getNode();
SDNode *SplatLHS = N->getOperand(1).getNode();
SDNode *SplatRHS = N->getOperand(2).getNode();
APInt SplatLHSVal;
if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
VT.isSimple() &&
is_contained(
makeArrayRef({MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
MVT::v2i32, MVT::v4i32, MVT::v2i64}),
VT.getSimpleVT().SimpleTy) &&
ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
SplatLHSVal.isOneValue() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(
NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
VT.getScalarType()));
SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);
auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
return Or;
}
}
if (N0.getOpcode() != ISD::SETCC ||
CCVT.getVectorElementCount() != ElementCount::getFixed(1) ||
CCVT.getVectorElementType() != MVT::i1)
return SDValue();
EVT ResVT = N->getValueType(0);
EVT CmpVT = N0.getOperand(0).getValueType();
// Only combine when the result type is of the same size as the compared
// operands.
if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
return SDValue();
SDValue IfTrue = N->getOperand(1);
SDValue IfFalse = N->getOperand(2);
SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
IfTrue, IfFalse);
}
/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
/// the compare-mask instructions rather than going via NZCV, even if LHS and
/// RHS are really scalar. This replaces any scalar setcc in the above pattern
/// with a vector one followed by a DUP shuffle on the result.
static SDValue performSelectCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
EVT ResVT = N->getValueType(0);
if (N0.getOpcode() != ISD::SETCC)
return SDValue();
if (ResVT.isScalableVector())
return SDValue();
// Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
// scalar SetCCResultType. We also don't expect vectors, because we assume
// that selects fed by vector SETCCs are canonicalized to VSELECT.
assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
"Scalar-SETCC feeding SELECT has unexpected result type!");
// If NumMaskElts == 0, the comparison is larger than select result. The
// largest real NEON comparison is 64-bits per lane, which means the result is
// at most 32-bits and an illegal vector. Just bail out for now.
EVT SrcVT = N0.getOperand(0).getValueType();
// Don't try to do this optimization when the setcc itself has i1 operands.
// There are no legal vectors of i1, so this would be pointless.
if (SrcVT == MVT::i1)
return SDValue();
int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
if (!ResVT.isVector() || NumMaskElts == 0)
return SDValue();
SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
// Also bail out if the vector CCVT isn't the same size as ResVT.
// This can happen if the SETCC operand size doesn't divide the ResVT size
// (e.g., f64 vs v3f32).
if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
return SDValue();
// Make sure we didn't create illegal types, if we're not supposed to.
assert(DCI.isBeforeLegalize() ||
DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
// First perform a vector comparison, where lane 0 is the one we're interested
// in.
SDLoc DL(N0);
SDValue LHS =
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
SDValue RHS =
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
// Now duplicate the comparison mask we want across all other lanes.
SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
Mask = DAG.getNode(ISD::BITCAST, DL,
ResVT.changeVectorElementTypeToInteger(), Mask);
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
/// Get rid of unnecessary NVCASTs (that don't change the type).
static SDValue performNVCASTCombine(SDNode *N) {
if (N->getValueType(0) == N->getOperand(0).getValueType())
return N->getOperand(0);
return SDValue();
}
// If all users of the globaladdr are of the form (globaladdr + constant), find
// the smallest constant, fold it into the globaladdr's offset and rewrite the
// globaladdr as (globaladdr + constant) - constant.
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget,
const TargetMachine &TM) {
auto *GN = cast<GlobalAddressSDNode>(N);
if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
AArch64II::MO_NO_FLAG)
return SDValue();
uint64_t MinOffset = -1ull;
for (SDNode *N : GN->uses()) {
if (N->getOpcode() != ISD::ADD)
return SDValue();
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
if (!C)
C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
MinOffset = std::min(MinOffset, C->getZExtValue());
}
uint64_t Offset = MinOffset + GN->getOffset();
// Require that the new offset is larger than the existing one. Otherwise, we
// can end up oscillating between two possible DAGs, for example,
// (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
if (Offset <= uint64_t(GN->getOffset()))
return SDValue();
// Check whether folding this offset is legal. It must not go out of bounds of
// the referenced object to avoid violating the code model, and must be
// smaller than 2^21 because this is the largest offset expressible in all
// object formats.
//
// This check also prevents us from folding negative offsets, which will end
// up being treated in the same way as large positive ones. They could also
// cause code model violations, and aren't really common enough to matter.
if (Offset >= (1 << 21))
return SDValue();
const GlobalValue *GV = GN->getGlobal();
Type *T = GV->getValueType();
if (!T->isSized() ||
Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
return SDValue();
SDLoc DL(GN);
SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
DAG.getConstant(MinOffset, DL, MVT::i64));
}
// Turns the vector of indices into a vector of byte offstes by scaling Offset
// by (BitWidth / 8).
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
SDLoc DL, unsigned BitWidth) {
assert(Offset.getValueType().isScalableVector() &&
"This method is only for scalable vectors of offsets");
SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64);
SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);
return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
}
/// Check if the value of \p OffsetInBytes can be used as an immediate for
/// the gather load/prefetch and scatter store instructions with vector base and
/// immediate offset addressing mode:
///
/// [<Zn>.[S|D]{, #<imm>}]
///
/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
unsigned ScalarSizeInBytes) {
// The immediate is not a multiple of the scalar size.
if (OffsetInBytes % ScalarSizeInBytes)
return false;
// The immediate is out of range.
if (OffsetInBytes / ScalarSizeInBytes > 31)
return false;
return true;
}
/// Check if the value of \p Offset represents a valid immediate for the SVE
/// gather load/prefetch and scatter store instructiona with vector base and
/// immediate offset addressing mode:
///
/// [<Zn>.[S|D]{, #<imm>}]
///
/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
static bool isValidImmForSVEVecImmAddrMode(SDValue Offset,
unsigned ScalarSizeInBytes) {
ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
return OffsetConst && isValidImmForSVEVecImmAddrMode(
OffsetConst->getZExtValue(), ScalarSizeInBytes);
}
static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode,
bool OnlyPackedOffsets = true) {
const SDValue Src = N->getOperand(2);
const EVT SrcVT = Src->getValueType(0);
assert(SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors");
SDLoc DL(N);
MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();
// Make sure that source data will fit into an SVE register
if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();
// For FPs, ACLE only supports _packed_ single and double precision types.
if (SrcElVT.isFloatingPoint())
if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
return SDValue();
// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)
SDValue Base = N->getOperand(4);
// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(5);
// For "scalar + vector of indices", just scale the indices. This only
// applies to non-temporal scatters because there's no instruction that takes
// indicies.
if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
Offset =
getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
Opcode = AArch64ISD::SSTNT1_PRED;
}
// In the case of non-temporal gather loads there's only one SVE instruction
// per data-size: "scalar + vector", i.e.
// * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
std::swap(Base, Offset);
// SST1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,
// * in the range [0, 31 x #SizeInBytes],
// where #SizeInBytes is the size in bytes of the stored items. For
// immediates outside that range and non-immediate scalar offsets use SST1 or
// SST1_UXTW instead.
if (Opcode == AArch64ISD::SST1_IMM_PRED) {
if (!isValidImmForSVEVecImmAddrMode(Offset,
SrcVT.getScalarSizeInBits() / 8)) {
if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
Opcode = AArch64ISD::SST1_UXTW_PRED;
else
Opcode = AArch64ISD::SST1_PRED;
std::swap(Base, Offset);
}
}
auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();
// Some scatter store variants allow unpacked offsets, but only as nxv2i32
// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
// nxv2i64. Legalize accordingly.
if (!OnlyPackedOffsets &&
Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
if (!TLI.isTypeLegal(Offset.getValueType()))
return SDValue();
// Source value type that is representable in hardware
EVT HwSrcVt = getSVEContainerType(SrcVT);
// Keep the original type of the input data to store - this is needed to be
// able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
// FP values we want the integer equivalent, so just use HwSrcVt.
SDValue InputVT = DAG.getValueType(SrcVT);
if (SrcVT.isFloatingPoint())
InputVT = DAG.getValueType(HwSrcVt);
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue SrcNew;
if (Src.getValueType().isFloatingPoint())
SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
else
SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);
SDValue Ops[] = {N->getOperand(0), // Chain
SrcNew,
N->getOperand(3), // Pg
Base,
Offset,
InputVT};
return DAG.getNode(Opcode, DL, VTs, Ops);
}
static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode,
bool OnlyPackedOffsets = true) {
const EVT RetVT = N->getValueType(0);
assert(RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors");
SDLoc DL(N);
// Make sure that the loaded data will fit into an SVE register
if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();
// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)
SDValue Base = N->getOperand(3);
// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(4);
// For "scalar + vector of indices", just scale the indices. This only
// applies to non-temporal gathers because there's no instruction that takes
// indicies.
if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
RetVT.getScalarSizeInBits());
Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
}
// In the case of non-temporal gather loads there's only one SVE instruction
// per data-size: "scalar + vector", i.e.
// * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
Offset.getValueType().isVector())
std::swap(Base, Offset);
// GLD{FF}1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,
// * in the range [0, 31 x #SizeInBytes],
// where #SizeInBytes is the size in bytes of the loaded items. For
// immediates outside that range and non-immediate scalar offsets use
// GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
if (!isValidImmForSVEVecImmAddrMode(Offset,
RetVT.getScalarSizeInBits() / 8)) {
if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
? AArch64ISD::GLD1_UXTW_MERGE_ZERO
: AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
else
Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
? AArch64ISD::GLD1_MERGE_ZERO
: AArch64ISD::GLDFF1_MERGE_ZERO;
std::swap(Base, Offset);
}
}
auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();
// Some gather load variants allow unpacked offsets, but only as nxv2i32
// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
// nxv2i64. Legalize accordingly.
if (!OnlyPackedOffsets &&
Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
// Return value type that is representable in hardware
EVT HwRetVt = getSVEContainerType(RetVT);
// Keep the original output value type around - this is needed to be able to
// select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
// values we want the integer equivalent, so just use HwRetVT.
SDValue OutVT = DAG.getValueType(RetVT);
if (RetVT.isFloatingPoint())
OutVT = DAG.getValueType(HwRetVt);
SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
SDValue Ops[] = {N->getOperand(0), // Chain
N->getOperand(2), // Pg
Base, Offset, OutVT};
SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);
if (RetVT.isInteger() && (RetVT != HwRetVt))
Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));
// If the original return value was FP, bitcast accordingly. Doing it here
// means that we can avoid adding TableGen patterns for FPs.
if (RetVT.isFloatingPoint())
Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));
return DAG.getMergeValues({Load, LoadChain}, DL);
}
static SDValue
performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Src = N->getOperand(0);
unsigned Opc = Src->getOpcode();
// Sign extend of an unsigned unpack -> signed unpack
if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
: AArch64ISD::SUNPKLO;
// Push the sign extend to the operand of the unpack
// This is necessary where, for example, the operand of the unpack
// is another unpack:
// 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
// ->
// 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
// ->
// 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
SDValue ExtOp = Src->getOperand(0);
auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
EVT EltTy = VT.getVectorElementType();
(void)EltTy;
assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
"Sign extending from an invalid type");
EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
ExtOp, DAG.getValueType(ExtVT));
return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
}
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (!EnableCombineMGatherIntrinsics)
return SDValue();
// SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
// for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
unsigned NewOpc;
unsigned MemVTOpNum = 4;
switch (Opc) {
case AArch64ISD::LD1_MERGE_ZERO:
NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
case AArch64ISD::LDNF1_MERGE_ZERO:
NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
case AArch64ISD::LDFF1_MERGE_ZERO:
NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
case AArch64ISD::GLD1_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
break;
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
break;
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
break;
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
break;
case AArch64ISD::GLDNT1_MERGE_ZERO:
NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
break;
default:
return SDValue();
}
EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
return SDValue();
EVT DstVT = N->getValueType(0);
SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);
SmallVector<SDValue, 5> Ops;
for (unsigned I = 0; I < Src->getNumOperands(); ++I)
Ops.push_back(Src->getOperand(I));
SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
DCI.CombineTo(N, ExtLoad);
DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));
// Return N so it doesn't get rechecked
return SDValue(N, 0);
}
/// Legalize the gather prefetch (scalar + vector addressing mode) when the
/// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
/// != nxv2i32) do not need legalization.
static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
const unsigned OffsetPos = 4;
SDValue Offset = N->getOperand(OffsetPos);
// Not an unpacked vector, bail out.
if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
return SDValue();
// Extend the unpacked offset vector to 64-bit lanes.
SDLoc DL(N);
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset);
SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
// Replace the offset operand with the 64-bit one.
Ops[OffsetPos] = Offset;
return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
}
/// Combines a node carrying the intrinsic
/// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
/// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
/// sve gather prefetch instruction with vector plus immediate addressing mode.
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
unsigned ScalarSizeInBytes) {
const unsigned ImmPos = 4, OffsetPos = 3;
// No need to combine the node if the immediate is valid...
if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
return SDValue();
// ...otherwise swap the offset base with the offset...
SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
std::swap(Ops[ImmPos], Ops[OffsetPos]);
// ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
// `aarch64_sve_prfb_gather_uxtw_index`.
SDLoc DL(N);
Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
MVT::i64);
return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
}
// Return true if the vector operation can guarantee only the first lane of its
// result contains data, with all bits in other lanes set to zero.
static bool isLanes1toNKnownZero(SDValue Op) {
switch (Op.getOpcode()) {
default:
return false;
case AArch64ISD::ANDV_PRED:
case AArch64ISD::EORV_PRED:
case AArch64ISD::FADDA_PRED:
case AArch64ISD::FADDV_PRED:
case AArch64ISD::FMAXNMV_PRED:
case AArch64ISD::FMAXV_PRED:
case AArch64ISD::FMINNMV_PRED:
case AArch64ISD::FMINV_PRED:
case AArch64ISD::ORV_PRED:
case AArch64ISD::SADDV_PRED:
case AArch64ISD::SMAXV_PRED:
case AArch64ISD::SMINV_PRED:
case AArch64ISD::UADDV_PRED:
case AArch64ISD::UMAXV_PRED:
case AArch64ISD::UMINV_PRED:
return true;
}
}
static SDValue removeRedundantInsertVectorElt(SDNode *N) {
assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
SDValue InsertVec = N->getOperand(0);
SDValue InsertElt = N->getOperand(1);
SDValue InsertIdx = N->getOperand(2);
// We only care about inserts into the first element...
if (!isNullConstant(InsertIdx))
return SDValue();
// ...of a zero'd vector...
if (!ISD::isConstantSplatVectorAllZeros(InsertVec.getNode()))
return SDValue();
// ...where the inserted data was previously extracted...
if (InsertElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
SDValue ExtractVec = InsertElt.getOperand(0);
SDValue ExtractIdx = InsertElt.getOperand(1);
// ...from the first element of a vector.
if (!isNullConstant(ExtractIdx))
return SDValue();
// If we get here we are effectively trying to zero lanes 1-N of a vector.
// Ensure there's no type conversion going on.
if (N->getValueType(0) != ExtractVec.getValueType())
return SDValue();
if (!isLanes1toNKnownZero(ExtractVec))
return SDValue();
// The explicit zeroing is redundant.
return ExtractVec;
}
static SDValue
performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
if (SDValue Res = removeRedundantInsertVectorElt(N))
return Res;
return performPostLD1Combine(N, DCI, true);
}
SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
EVT Ty = N->getValueType(0);
if (Ty.isInteger())
return SDValue();
EVT IntTy = Ty.changeVectorElementTypeToInteger();
EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount());
if (ExtIntTy.getVectorElementType().getScalarSizeInBits() <
IntTy.getVectorElementType().getScalarSizeInBits())
return SDValue();
SDLoc DL(N);
SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)),
DL, ExtIntTy);
SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)),
DL, ExtIntTy);
SDValue Idx = N->getOperand(2);
SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx);
SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy);
return DAG.getBitcast(Ty, Trunc);
}
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default:
LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
break;
case ISD::ADD:
case ISD::SUB:
return performAddSubCombine(N, DCI, DAG);
case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
return performMulCombine(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return performFpToIntCombine(N, DAG, DCI, Subtarget);
case ISD::FDIV:
return performFDivCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
return performORCombine(N, DCI, Subtarget);
case ISD::AND:
return performANDCombine(N, DCI);
case ISD::SRL:
return performSRLCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return performIntrinsicCombine(N, DCI, Subtarget);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
return performExtendCombine(N, DCI, DAG);
case ISD::SIGN_EXTEND_INREG:
return performSignExtendInRegCombine(N, DCI, DAG);
case ISD::TRUNCATE:
return performVectorTruncateCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
case ISD::SELECT:
return performSelectCombine(N, DCI);
case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);
case ISD::SETCC:
return performSETCCCombine(N, DAG);
case ISD::LOAD:
if (performTBISimplification(N->getOperand(1), DCI, DAG))
return SDValue(N, 0);
break;
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
case ISD::VECTOR_SPLICE:
return performSVESpliceCombine(N, DAG);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
case AArch64ISD::TBNZ:
case AArch64ISD::TBZ:
return performTBZCombine(N, DCI, DAG);
case AArch64ISD::CSEL:
return performCSELCombine(N, DCI, DAG);
case AArch64ISD::DUP:
return performPostLD1Combine(N, DCI, false);
case AArch64ISD::NVCAST:
return performNVCASTCombine(N);
case AArch64ISD::SPLICE:
return performSpliceCombine(N, DAG);
case AArch64ISD::UZP1:
return performUzpCombine(N, DAG);
case AArch64ISD::SETCC_MERGE_ZERO:
return performSetccMergeZeroCombine(N, DAG);
case AArch64ISD::GLD1_MERGE_ZERO:
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
case AArch64ISD::GLD1S_MERGE_ZERO:
case AArch64ISD::GLD1S_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1S_UXTW_MERGE_ZERO:
case AArch64ISD::GLD1S_SXTW_MERGE_ZERO:
case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1S_IMM_MERGE_ZERO:
return performGLD1Combine(N, DAG);
case AArch64ISD::VASHR:
case AArch64ISD::VLSHR:
return performVectorShiftCombine(N, *this, DCI);
case ISD::INSERT_VECTOR_ELT:
return performInsertVectorEltCombine(N, DCI);
case ISD::EXTRACT_VECTOR_ELT:
return performExtractVectorEltCombine(N, DAG);
case ISD::VECREDUCE_ADD:
return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
return legalizeSVEGatherPrefetchOffsVec(N, DAG);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
case Intrinsic::aarch64_neon_ld1x2:
case Intrinsic::aarch64_neon_ld1x3:
case Intrinsic::aarch64_neon_ld1x4:
case Intrinsic::aarch64_neon_ld2lane:
case Intrinsic::aarch64_neon_ld3lane:
case Intrinsic::aarch64_neon_ld4lane:
case Intrinsic::aarch64_neon_ld2r:
case Intrinsic::aarch64_neon_ld3r:
case Intrinsic::aarch64_neon_ld4r:
case Intrinsic::aarch64_neon_st2:
case Intrinsic::aarch64_neon_st3:
case Intrinsic::aarch64_neon_st4:
case Intrinsic::aarch64_neon_st1x2:
case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:
case Intrinsic::aarch64_neon_st2lane:
case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane:
return performNEONPostLDSTCombine(N, DCI, DAG);
case Intrinsic::aarch64_sve_ldnt1:
return performLDNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_ld1rq:
return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
case Intrinsic::aarch64_sve_ld1ro:
return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1:
return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnf1:
return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1:
return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_st1:
return performST1Combine(N, DAG);
case Intrinsic::aarch64_sve_stnt1:
return performSTNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter_index:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED);
case Intrinsic::aarch64_sve_ld1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1_gather_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLD1_SCALED_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1_gather_sxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
case Intrinsic::aarch64_sve_st1_scatter:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED);
case Intrinsic::aarch64_sve_st1_scatter_index:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED);
case Intrinsic::aarch64_sve_st1_scatter_sxtw:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
return performScatterStoreCombine(N, DAG,
AArch64ISD::SST1_SXTW_SCALED_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
return performScatterStoreCombine(N, DAG,
AArch64ISD::SST1_UXTW_SCALED_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
case Intrinsic::aarch64_sve_tuple_get: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Src1 = N->getOperand(2);
SDValue Idx = N->getOperand(3);
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
EVT ResVT = N->getValueType(0);
uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
SDValue Val =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
return DAG.getMergeValues({Val, Chain}, DL);
}
case Intrinsic::aarch64_sve_tuple_set: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Tuple = N->getOperand(2);
SDValue Idx = N->getOperand(3);
SDValue Vec = N->getOperand(4);
EVT TupleVT = Tuple.getValueType();
uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
uint64_t NumLanes =
Vec.getValueType().getVectorElementCount().getKnownMinValue();
if ((TupleLanes % NumLanes) != 0)
report_fatal_error("invalid tuple vector!");
uint64_t NumVecs = TupleLanes / NumLanes;
SmallVector<SDValue, 4> Opnds;
for (unsigned I = 0; I < NumVecs; ++I) {
if (I == IdxConst)
Opnds.push_back(Vec);
else {
SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
Vec.getValueType(), Tuple, ExtIdx));
}
}
SDValue Concat =
DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
return DAG.getMergeValues({Concat, Chain}, DL);
}
case Intrinsic::aarch64_sve_tuple_create2:
case Intrinsic::aarch64_sve_tuple_create3:
case Intrinsic::aarch64_sve_tuple_create4: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SmallVector<SDValue, 4> Opnds;
for (unsigned I = 2; I < N->getNumOperands(); ++I)
Opnds.push_back(N->getOperand(I));
EVT VT = Opnds[0].getValueType();
EVT EltVT = VT.getVectorElementType();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
VT.getVectorElementCount() *
(N->getNumOperands() - 2));
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
return DAG.getMergeValues({Concat, Chain}, DL);
}
case Intrinsic::aarch64_sve_ld2:
case Intrinsic::aarch64_sve_ld3:
case Intrinsic::aarch64_sve_ld4: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Mask = N->getOperand(2);
SDValue BasePtr = N->getOperand(3);
SDValue LoadOps[] = {Chain, Mask, BasePtr};
unsigned IntrinsicID =
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
SDValue Result =
LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL);
return DAG.getMergeValues({Result, Chain}, DL);
}
case Intrinsic::aarch64_rndr:
case Intrinsic::aarch64_rndrrs: {
unsigned IntrinsicID =
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
auto Register =
(IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
: AArch64SysReg::RNDRRS);
SDLoc DL(N);
SDValue A = DAG.getNode(
AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other),
N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
SDValue B = DAG.getNode(
AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1));
return DAG.getMergeValues(
{A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
}
default:
break;
}
break;
case ISD::GlobalAddress:
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}
return SDValue();
}
// Check if the return value is used as only a return value, as otherwise
// we can't perform a tail-call. In particular, we need to check for
// target ISD nodes that are returns and any other "odd" constructs
// that the generic analysis code won't necessarily catch.
bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
SDValue TCChain = Chain;
SDNode *Copy = *N->use_begin();
if (Copy->getOpcode() == ISD::CopyToReg) {
// If the copy has a glue operand, we conservatively assume it isn't safe to
// perform a tail call.
if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
MVT::Glue)
return false;
TCChain = Copy->getOperand(0);
} else if (Copy->getOpcode() != ISD::FP_EXTEND)
return false;
bool HasRet = false;
for (SDNode *Node : Copy->uses()) {
if (Node->getOpcode() != AArch64ISD::RET_FLAG)
return false;
HasRet = true;
}
if (!HasRet)
return false;
Chain = TCChain;
return true;
}
// Return whether the an instruction can potentially be optimized to a tail
// call. This will cause the optimizers to attempt to move, or duplicate,
// return instructions to help enable tail call optimizations for this
// instruction.
bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
bool &IsInc,
SelectionDAG &DAG) const {
if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
return false;
Base = Op->getOperand(0);
// All of the indexed addressing mode instructions take a signed
// 9 bit immediate offset.
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
int64_t RHSC = RHS->getSExtValue();
if (Op->getOpcode() == ISD::SUB)
RHSC = -(uint64_t)RHSC;
if (!isInt<9>(RHSC))
return false;
IsInc = (Op->getOpcode() == ISD::ADD);
Offset = Op->getOperand(1);
return true;
}
return false;
}
bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
EVT VT;
SDValue Ptr;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
} else
return false;
bool IsInc;
if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
return false;
AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
return true;
}
bool AArch64TargetLowering::getPostIndexedAddressParts(
SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
EVT VT;
SDValue Ptr;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
} else
return false;
bool IsInc;
if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
return false;
// Post-indexing updates the base, so it's not a valid transform
// if that's not the same as the load's pointer.
if (Ptr != Base)
return false;
AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
return true;
}
void AArch64TargetLowering::ReplaceBITCASTResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDLoc DL(N);
SDValue Op = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = Op.getValueType();
if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
"Expected fp->int bitcast!");
SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult));
return;
}
if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
return;
Op = SDValue(
DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
DAG.getUNDEF(MVT::i32), Op,
DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
}
static void ReplaceReductionResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned InterOp,
unsigned AcrossOp) {
EVT LoVT, HiVT;
SDValue Lo, Hi;
SDLoc dl(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
Results.push_back(SplitVal);
}
static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
DAG.getNode(ISD::SRL, DL, MVT::i128, N,
DAG.getConstant(64, DL, MVT::i64)));
return std::make_pair(Lo, Hi);
}
void AArch64TargetLowering::ReplaceExtractSubVectorResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDValue In = N->getOperand(0);
EVT InVT = In.getValueType();
// Common code will handle these just fine.
if (!InVT.isScalableVector() || !InVT.isInteger())
return;
SDLoc DL(N);
EVT VT = N->getValueType(0);
// The following checks bail if this is not a halving operation.
ElementCount ResEC = VT.getVectorElementCount();
if (InVT.getVectorElementCount() != (ResEC * 2))
return;
auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!CIndex)
return;
unsigned Index = CIndex->getZExtValue();
if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
return;
unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
}
// Create an even/odd pair of X registers holding integer value V.
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDLoc dl(V.getNode());
SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
SDValue VHi = DAG.getAnyExtOrTrunc(
DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
dl, MVT::i64);
if (DAG.getDataLayout().isBigEndian())
std::swap (VLo, VHi);
SDValue RegClass =
DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
return SDValue(
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
}
static void ReplaceCMP_SWAP_128Results(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
assert(N->getValueType(0) == MVT::i128 &&
"AtomicCmpSwap on types less than 128 should be legal");
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
// LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
// so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
SDValue Ops[] = {
createGPRPairNode(DAG, N->getOperand(2)), // Compare value
createGPRPairNode(DAG, N->getOperand(3)), // Store value
N->getOperand(1), // Ptr
N->getOperand(0), // Chain in
};
unsigned Opcode;
switch (MemOp->getMergedOrdering()) {
case AtomicOrdering::Monotonic:
Opcode = AArch64::CASPX;
break;
case AtomicOrdering::Acquire:
Opcode = AArch64::CASPAX;
break;
case AtomicOrdering::Release:
Opcode = AArch64::CASPLX;
break;
case AtomicOrdering::AcquireRelease:
case AtomicOrdering::SequentiallyConsistent:
Opcode = AArch64::CASPALX;
break;
default:
llvm_unreachable("Unexpected ordering!");
}
MachineSDNode *CmpSwap = DAG.getMachineNode(
Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
DAG.setNodeMemRefs(CmpSwap, {MemOp});
unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
if (DAG.getDataLayout().isBigEndian())
std::swap(SubReg1, SubReg2);
SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
SDValue(CmpSwap, 0));
SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
SDValue(CmpSwap, 0));
Results.push_back(
DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
Results.push_back(SDValue(CmpSwap, 1)); // Chain out
return;
}
unsigned Opcode;
switch (MemOp->getMergedOrdering()) {
case AtomicOrdering::Monotonic:
Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
break;
case AtomicOrdering::Acquire:
Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
break;
case AtomicOrdering::Release:
Opcode = AArch64::CMP_SWAP_128_RELEASE;
break;
case AtomicOrdering::AcquireRelease:
case AtomicOrdering::SequentiallyConsistent:
Opcode = AArch64::CMP_SWAP_128;
break;
default:
llvm_unreachable("Unexpected ordering!");
}
auto Desired = splitInt128(N->getOperand(2), DAG);
auto New = splitInt128(N->getOperand(3), DAG);
SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
New.first, New.second, N->getOperand(0)};
SDNode *CmpSwap = DAG.getMachineNode(
Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
Ops);
DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
Results.push_back(SDValue(CmpSwap, 3));
}
void AArch64TargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom expand this");
case ISD::BITCAST:
ReplaceBITCASTResults(N, Results, DAG);
return;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
return;
case ISD::CTPOP:
if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
Results.push_back(Result);
return;
case AArch64ISD::SADDV:
ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
return;
case AArch64ISD::UADDV:
ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV);
return;
case AArch64ISD::SMINV:
ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV);
return;
case AArch64ISD::UMINV:
ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV);
return;
case AArch64ISD::SMAXV:
ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV);
return;
case AArch64ISD::UMAXV:
ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
// Let normal code take care of it by not adding anything to Results.
return;
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
return;
case ISD::LOAD: {
assert(SDValue(N, 0).getValueType() == MVT::i128 &&
"unexpected load's value type");
LoadSDNode *LoadNode = cast<LoadSDNode>(N);
if (!LoadNode->isVolatile() || LoadNode->getMemoryVT() != MVT::i128) {
// Non-volatile loads are optimized later in AArch64's load/store
// optimizer.
return;
}
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::LDP, SDLoc(N),
DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
{LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
LoadNode->getMemOperand());
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
Result.getValue(0), Result.getValue(1));
Results.append({Pair, Result.getValue(2) /* Chain */});
return;
}
case ISD::EXTRACT_SUBVECTOR:
ReplaceExtractSubVectorResults(N, Results, DAG);
return;
case ISD::INSERT_SUBVECTOR:
// Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate
// to common code for result type legalisation
return;
case ISD::INTRINSIC_WO_CHAIN: {
EVT VT = N->getValueType(0);
assert((VT == MVT::i8 || VT == MVT::i16) &&
"custom lowering for unexpected type");
ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
switch (IntID) {
default:
return;
case Intrinsic::aarch64_sve_clasta_n: {
SDLoc DL(N);
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
N->getOperand(1), Op2, N->getOperand(3));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_clastb_n: {
SDLoc DL(N);
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
N->getOperand(1), Op2, N->getOperand(3));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_lasta: {
SDLoc DL(N);
auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
N->getOperand(1), N->getOperand(2));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_lastb: {
SDLoc DL(N);
auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
N->getOperand(1), N->getOperand(2));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
}
}
}
}
bool AArch64TargetLowering::useLoadStackGuardNode() const {
if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
return TargetLowering::useLoadStackGuardNode();
return true;
}
unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal if there are three or more FDIVs.
return 3;
}
TargetLoweringBase::LegalizeTypeAction
AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
// During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
// v4i16, v2i32 instead of to promote.
if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
VT == MVT::v1f32)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong.
bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
return Size == 128;
}
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong.
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size > 128) return AtomicExpansionKind::None;
// Nand is not supported in LSE.
// Leave 128 bits to LLSC or CmpXChg.
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
if (Subtarget->hasLSE())
return AtomicExpansionKind::None;
if (Subtarget->outlineAtomics()) {
// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
// Don't outline them unless
// (1) high level <atomic> support approved:
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
// (2) low level libgcc and compiler-rt support implemented by:
// min/max outline atomics helpers
if (AI->getOperation() != AtomicRMWInst::Min &&
AI->getOperation() != AtomicRMWInst::Max &&
AI->getOperation() != AtomicRMWInst::UMin &&
AI->getOperation() != AtomicRMWInst::UMax) {
return AtomicExpansionKind::None;
}
}
}
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement atomicrmw without spilling. If the target address is also on the
// stack and close enough to the spill slot, this can lead to a situation
// where the monitor always gets cleared and the atomic operation can never
// succeed. So at -O0 lower this operation to a CAS loop.
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return AtomicExpansionKind::CmpXChg;
return AtomicExpansionKind::LLSC;
}
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
// If subtarget has LSE, leave cmpxchg intact for codegen.
if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return AtomicExpansionKind::None;
// 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
// it.
unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
if (Size > 64)
return AtomicExpansionKind::None;
return AtomicExpansionKind::LLSC;
}
Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
Type *ValueTy, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
bool IsAcquire = isAcquireOrStronger(Ord);
// Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i64, i64} and we have to recombine them into a
// single i128 here.
if (ValueTy->getPrimitiveSizeInBits() == 128) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
Function *Ldxr = Intrinsic::getDeclaration(M, Int);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
}
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
const DataLayout &DL = M->getDataLayout();
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
return Builder.CreateBitCast(Trunc, ValueTy);
}
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
IRBuilderBase &Builder) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
}
Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
Value *Val, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
bool IsRelease = isReleaseOrStronger(Ord);
// Since the intrinsics must have legal type, the i128 intrinsics take two
// parameters: "i64, i64". We must marshal Val into the appropriate form
// before the call.
if (Val->getType()->getPrimitiveSizeInBits() == 128) {
Intrinsic::ID Int =
IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
Function *Stxr = Intrinsic::getDeclaration(M, Int);
Type *Int64Ty = Type::getInt64Ty(M->getContext());
Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
}
Intrinsic::ID Int =
IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
Type *Tys[] = { Addr->getType() };
Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
const DataLayout &DL = M->getDataLayout();
IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
Val = Builder.CreateBitCast(Val, IntValTy);
return Builder.CreateCall(Stxr,
{Builder.CreateZExtOrBitCast(
Val, Stxr->getFunctionType()->getParamType(0)),
Addr});
}
bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
const DataLayout &DL) const {
if (!Ty->isArrayTy()) {
const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
return TySize.isScalable() && TySize.getKnownMinSize() > 128;
}
// All non aggregate members of the type must have the same type
SmallVector<EVT> ValueVTs;
ComputeValueVTs(*this, DL, Ty, ValueVTs);
return is_splat(ValueVTs);
}
bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
EVT) const {
return false;
}
static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
return IRB.CreatePointerCast(
IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
Offset),
IRB.getInt8PtrTy()->getPointerTo(0));
}
Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
// Android provides a fixed TLS slot for the stack cookie. See the definition
// of TLS_SLOT_STACK_GUARD in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
if (Subtarget->isTargetAndroid())
return UseTlsOffset(IRB, 0x28);
// Fuchsia is similar.
// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
if (Subtarget->isTargetFuchsia())
return UseTlsOffset(IRB, -0x10);
return TargetLowering::getIRStackGuard(IRB);
}
void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
// MSVC CRT provides functionalities for stack protection.
if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
// MSVC CRT has a global variable holding security cookie.
M.getOrInsertGlobal("__security_cookie",
Type::getInt8PtrTy(M.getContext()));
// MSVC CRT has a function to validate security cookie.
FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
"__security_check_cookie", Type::getVoidTy(M.getContext()),
Type::getInt8PtrTy(M.getContext()));
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
F->setCallingConv(CallingConv::Win64);
F->addAttribute(1, Attribute::AttrKind::InReg);
}
return;
}
TargetLowering::insertSSPDeclarations(M);
}
Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
// MSVC CRT has a global variable holding security cookie.
if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
return M.getGlobalVariable("__security_cookie");
return TargetLowering::getSDagStackGuard(M);
}
Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
// MSVC CRT has a function to validate security cookie.
if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
return M.getFunction("__security_check_cookie");
return TargetLowering::getSSPStackGuardCheck(M);
}
Value *
AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
if (Subtarget->isTargetAndroid())
return UseTlsOffset(IRB, 0x48);
// Fuchsia is similar.
// <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
if (Subtarget->isTargetFuchsia())
return UseTlsOffset(IRB, -0x8);
return TargetLowering::getSafeStackPointerLocation(IRB);
}
bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
const Instruction &AndI) const {
// Only sink 'and' mask to cmp use block if it is masking a single bit, since
// this is likely to be fold the and/cmp/br into a single tbz instruction. It
// may be beneficial to sink in other cases, but we would have to check that
// the cmp would not get folded into the br to form a cbz for these to be
// beneficial.
ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
if (!Mask)
return false;
return Mask->getValue().isPowerOf2();
}
bool AArch64TargetLowering::
shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const {
// Does baseline recommend not to perform the fold by default?
if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
return false;
// Else, if this is a vector shift, prefer 'shl'.
return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
}
bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG,
SDNode *N) const {
if (DAG.getMachineFunction().getFunction().hasMinSize() &&
!Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
return false;
return true;
}
void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
// Update IsSplitCSR in AArch64unctionInfo.
AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
AFI->setIsSplitCSR(true);
}
void AArch64TargetLowering::insertCopiesSplitCSR(
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
if (!IStart)
return;
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
MachineBasicBlock::iterator MBBI = Entry->begin();
for (const MCPhysReg *I = IStart; *I; ++I) {
const TargetRegisterClass *RC = nullptr;
if (AArch64::GPR64RegClass.contains(*I))
RC = &AArch64::GPR64RegClass;
else if (AArch64::FPR64RegClass.contains(*I))
RC = &AArch64::FPR64RegClass;
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
Register NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
// FIXME: this currently does not emit CFI pseudo-instructions, it works
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
// nounwind. If we want to generalize this later, we may need to emit
// CFI pseudo-instructions.
assert(Entry->getParent()->getFunction().hasFnAttribute(
Attribute::NoUnwind) &&
"Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
// Insert the copy-back instructions right before the terminator.
for (auto *Exit : Exits)
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
TII->get(TargetOpcode::COPY), *I)
.addReg(NewVR);
}
}
bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// Integer division on AArch64 is expensive. However, when aggressively
// optimizing for code size, we prefer to use a div instruction, as it is
// usually smaller than the alternative sequence.
// The exception to this is vector division. Since AArch64 doesn't have vector
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
bool OptSize = Attr.hasFnAttribute(Attribute::MinSize);
return OptSize && !VT.isVector();
}
bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
// We want inc-of-add for scalars and sub-of-not for vectors.
return VT.isScalarInteger();
}
bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
}
unsigned
AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
return getPointerTy(DL).getSizeInBits();
return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
}
void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
MF.getFrameInfo().computeMaxCallFrameSize(MF);
TargetLoweringBase::finalizeLowering(MF);
}
// Unlike X86, we let frame lowering assign offsets to all catch objects.
bool AArch64TargetLowering::needsFixedCatchObjects() const {
return false;
}
bool AArch64TargetLowering::shouldLocalize(
const MachineInstr &MI, const TargetTransformInfo *TTI) const {
switch (MI.getOpcode()) {
case TargetOpcode::G_GLOBAL_VALUE: {
// On Darwin, TLS global vars get selected into function calls, which
// we don't want localized, as they can get moved into the middle of a
// another call sequence.
const GlobalValue &GV = *MI.getOperand(1).getGlobal();
if (GV.isThreadLocal() && Subtarget->isTargetMachO())
return false;
break;
}
// If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
// localizable.
case AArch64::ADRP:
case AArch64::G_ADD_LOW:
return true;
default:
break;
}
return TargetLoweringBase::shouldLocalize(MI, TTI);
}
bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
if (isa<ScalableVectorType>(Inst.getType()))
return true;
for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
return true;
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
if (isa<ScalableVectorType>(AI->getAllocatedType()))
return true;
}
return false;
}
// Return the largest legal scalable vector type that matches VT's element type.
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) {
assert(VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unexpected element type for SVE container");
case MVT::i8:
return EVT(MVT::nxv16i8);
case MVT::i16:
return EVT(MVT::nxv8i16);
case MVT::i32:
return EVT(MVT::nxv4i32);
case MVT::i64:
return EVT(MVT::nxv2i64);
case MVT::f16:
return EVT(MVT::nxv8f16);
case MVT::f32:
return EVT(MVT::nxv4f32);
case MVT::f64:
return EVT(MVT::nxv2f64);
}
}
// Return a PTRUE with active lanes corresponding to the extent of VT.
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
EVT VT) {
assert(VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");
int PgPattern;
switch (VT.getVectorNumElements()) {
default:
llvm_unreachable("unexpected element count for SVE predicate");
case 1:
PgPattern = AArch64SVEPredPattern::vl1;
break;
case 2:
PgPattern = AArch64SVEPredPattern::vl2;
break;
case 4:
PgPattern = AArch64SVEPredPattern::vl4;
break;
case 8:
PgPattern = AArch64SVEPredPattern::vl8;
break;
case 16:
PgPattern = AArch64SVEPredPattern::vl16;
break;
case 32:
PgPattern = AArch64SVEPredPattern::vl32;
break;
case 64:
PgPattern = AArch64SVEPredPattern::vl64;
break;
case 128:
PgPattern = AArch64SVEPredPattern::vl128;
break;
case 256:
PgPattern = AArch64SVEPredPattern::vl256;
break;
}
// TODO: For vectors that are exactly getMaxSVEVectorSizeInBits big, we can
// use AArch64SVEPredPattern::all, which can enable the use of unpredicated
// variants of instructions when available.
MVT MaskVT;
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unexpected element type for SVE predicate");
case MVT::i8:
MaskVT = MVT::nxv16i1;
break;
case MVT::i16:
case MVT::f16:
MaskVT = MVT::nxv8i1;
break;
case MVT::i32:
case MVT::f32:
MaskVT = MVT::nxv4i1;
break;
case MVT::i64:
case MVT::f64:
MaskVT = MVT::nxv2i1;
break;
}
return DAG.getNode(AArch64ISD::PTRUE, DL, MaskVT,
DAG.getTargetConstant(PgPattern, DL, MVT::i64));
}
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
EVT VT) {
assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal scalable vector!");
auto PredTy = VT.changeVectorElementType(MVT::i1);
return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
}
static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT) {
if (VT.isFixedLengthVector())
return getPredicateForFixedLengthVector(DAG, DL, VT);
return getPredicateForScalableVector(DAG, DL, VT);
}
// Grow V to consume an entire SVE register.
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
assert(VT.isScalableVector() &&
"Expected to convert into a scalable vector!");
assert(V.getValueType().isFixedLengthVector() &&
"Expected a fixed length vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
}
// Shrink V so it's just big enough to maintain a VT's worth of data.
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
assert(VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!");
assert(V.getValueType().isScalableVector() &&
"Expected a scalable vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
}
// Convert all fixed length vector loads larger than NEON to masked_loads.
SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Load = cast<LoadSDNode>(Op);
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
auto NewLoad = DAG.getMaskedLoad(
ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT),
Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(),
Load->getExtensionType());
auto Result = convertFromScalableVector(DAG, VT, NewLoad);
SDValue MergedValues[2] = {Result, Load->getChain()};
return DAG.getMergeValues(MergedValues, DL);
}
static SDValue convertFixedMaskToScalableVector(SDValue Mask,
SelectionDAG &DAG) {
SDLoc DL(Mask);
EVT InVT = Mask.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
auto Op2 = DAG.getConstant(0, DL, ContainerVT);
auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
EVT CmpVT = Pg.getValueType();
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
{Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
}
// Convert all fixed length vector loads larger than NEON to masked_loads.
SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Load = cast<MaskedLoadSDNode>(Op);
if (Load->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD)
return SDValue();
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
SDValue Mask = convertFixedMaskToScalableVector(Load->getMask(), DAG);
SDValue PassThru;
bool IsPassThruZeroOrUndef = false;
if (Load->getPassThru()->isUndef()) {
PassThru = DAG.getUNDEF(ContainerVT);
IsPassThruZeroOrUndef = true;
} else {
if (ContainerVT.isInteger())
PassThru = DAG.getConstant(0, DL, ContainerVT);
else
PassThru = DAG.getConstantFP(0, DL, ContainerVT);
if (isZerosVector(Load->getPassThru().getNode()))
IsPassThruZeroOrUndef = true;
}
auto NewLoad = DAG.getMaskedLoad(
ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
Load->getAddressingMode(), Load->getExtensionType());
if (!IsPassThruZeroOrUndef) {
SDValue OldPassThru =
convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
NewLoad = DAG.getSelect(DL, ContainerVT, Mask, NewLoad, OldPassThru);
}
auto Result = convertFromScalableVector(DAG, VT, NewLoad);
SDValue MergedValues[2] = {Result, Load->getChain()};
return DAG.getMergeValues(MergedValues, DL);
}
// Convert all fixed length vector stores larger than NEON to masked_stores.
SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Store = cast<StoreSDNode>(Op);
SDLoc DL(Op);
EVT VT = Store->getValue().getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
return DAG.getMaskedStore(
Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(),
Store->getMemOperand(), Store->getAddressingMode(),
Store->isTruncatingStore());
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Store = cast<MaskedStoreSDNode>(Op);
if (Store->isTruncatingStore())
return SDValue();
SDLoc DL(Op);
EVT VT = Store->getValue().getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);
return DAG.getMaskedStore(
Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
Mask, Store->getMemoryVT(), Store->getMemOperand(),
Store->getAddressingMode(), Store->isTruncatingStore());
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT EltVT = VT.getVectorElementType();
bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
// Scalable vector i32/i64 DIV is supported.
if (EltVT == MVT::i32 || EltVT == MVT::i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
// Scalable vector i8/i16 DIV is not supported. Promote it to i32.
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
// If this is not a full vector, extend, div, and truncate it.
EVT WidenedVT = VT.widenIntegerVectorElementType(*DAG.getContext());
if (DAG.getTargetLoweringInfo().isTypeLegal(WidenedVT)) {
unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(0));
SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(1));
SDValue Div = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0, Op1);
return DAG.getNode(ISD::TRUNCATE, dl, VT, Div);
}
// Convert the operands to scalable vectors.
SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
// Extend the scalable operands.
unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
// Convert back to fixed vectors so the DIV can be further lowered.
Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
Op0Lo, Op1Lo);
SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
Op0Hi, Op1Hi);
// Convert again to scalable vectors to truncate.
ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
ResultLo, ResultHi);
return convertFromScalableVector(DAG, VT, ScalableResult);
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
Val = convertToScalableVector(DAG, ContainerVT, Val);
bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
// Repeatedly unpack Val until the result is of the desired element type.
switch (ContainerVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unimplemented container type");
case MVT::nxv16i8:
Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
if (VT.getVectorElementType() == MVT::i16)
break;
LLVM_FALLTHROUGH;
case MVT::nxv8i16:
Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
if (VT.getVectorElementType() == MVT::i32)
break;
LLVM_FALLTHROUGH;
case MVT::nxv4i32:
Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
break;
}
return convertFromScalableVector(DAG, VT, Val);
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
Val = convertToScalableVector(DAG, ContainerVT, Val);
// Repeatedly truncate Val until the result is of the desired element type.
switch (ContainerVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unimplemented container type");
case MVT::nxv2i64:
Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
if (VT.getVectorElementType() == MVT::i32)
break;
LLVM_FALLTHROUGH;
case MVT::nxv4i32:
Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
if (VT.getVectorElementType() == MVT::i16)
break;
LLVM_FALLTHROUGH;
case MVT::nxv8i16:
Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!");
break;
}
return convertFromScalableVector(DAG, VT, Val);
}
SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT InVT = Op.getOperand(0).getValueType();
assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
EVT InVT = Op.getOperand(0).getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0,
Op.getOperand(1), Op.getOperand(2));
return convertFromScalableVector(DAG, VT, ScalableRes);
}
// Convert vector operation 'Op' to an equivalent predicated operation whereby
// the original operation's type is used to construct a suitable predicate.
// NOTE: The results for inactive lanes are undefined.
SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SelectionDAG &DAG,
unsigned NewOp,
bool OverrideNEON) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
auto Pg = getPredicateForVector(DAG, DL, VT);
if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
// Create list of operands by converting existing ones to scalable types.
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
if (isa<CondCodeSDNode>(V)) {
Operands.push_back(V);
continue;
}
if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
EVT VTArg = VTNode->getVT().getVectorElementType();
EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
Operands.push_back(DAG.getValueType(NewVTArg));
continue;
}
assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&
"Only fixed length vectors are supported!");
Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
}
if (isMergePassthruOpcode(NewOp))
Operands.push_back(DAG.getUNDEF(ContainerVT));
auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
assert((!V.getValueType().isVector() ||
V.getValueType().isScalableVector()) &&
"Only scalable vectors are supported!");
Operands.push_back(V);
}
if (isMergePassthruOpcode(NewOp))
Operands.push_back(DAG.getUNDEF(VT));
return DAG.getNode(NewOp, DL, VT, Operands);
}
// If a fixed length vector operation has no side effects when applied to
// undefined elements, we can safely use scalable vectors to perform the same
// operation without needing to worry about predication.
SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(useSVEForFixedLengthVectorVT(VT) &&
"Only expected to lower fixed length vector operation!");
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
// Create list of operands by converting existing ones to scalable types.
SmallVector<SDValue, 4> Ops;
for (const SDValue &V : Op->op_values()) {
assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
// Pass through non-vector operands.
if (!V.getValueType().isVector()) {
Ops.push_back(V);
continue;
}
// "cast" fixed length vector to a scalable vector.
assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
"Only fixed length vectors are supported!");
Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
}
auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
SelectionDAG &DAG) const {
SDLoc DL(ScalarOp);
SDValue AccOp = ScalarOp.getOperand(0);
SDValue VecOp = ScalarOp.getOperand(1);
EVT SrcVT = VecOp.getValueType();
EVT ResVT = SrcVT.getVectorElementType();
EVT ContainerVT = SrcVT;
if (SrcVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
}
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
// Convert operands to Scalable.
AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), AccOp, Zero);
// Perform reduction.
SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
Pg, AccOp, VecOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
}
SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
SelectionDAG &DAG) const {
SDLoc DL(ReduceOp);
SDValue Op = ReduceOp.getOperand(0);
EVT OpVT = Op.getValueType();
EVT VT = ReduceOp.getValueType();
if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
return SDValue();
SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
switch (ReduceOp.getOpcode()) {
default:
return SDValue();
case ISD::VECREDUCE_OR:
return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
case ISD::VECREDUCE_AND: {
Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
}
case ISD::VECREDUCE_XOR: {
SDValue ID =
DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
SDValue Cntp =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
}
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
SDValue ScalarOp,
SelectionDAG &DAG) const {
SDLoc DL(ScalarOp);
SDValue VecOp = ScalarOp.getOperand(0);
EVT SrcVT = VecOp.getValueType();
if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
}
// UADDV always returns an i64 result.
EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
SrcVT.getVectorElementType();
EVT RdxVT = SrcVT;
if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
RdxVT = getPackedSVEVectorVT(ResVT);
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
Rdx, DAG.getConstant(0, DL, MVT::i64));
// The VEC_REDUCE nodes expect an element size result.
if (ResVT != ScalarOp.getValueType())
Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
return Res;
}
SDValue
AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
EVT InVT = Op.getOperand(1).getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
// Convert the mask to a predicated (NOTE: We don't need to worry about
// inactive lanes since VSELECT is safe when given undefined elements).
EVT MaskVT = Op.getOperand(0).getValueType();
EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
Mask = DAG.getNode(ISD::TRUNCATE, DL,
MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
Mask, Op1, Op2);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT InVT = Op.getOperand(0).getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
assert(useSVEForFixedLengthVectorVT(InVT) &&
"Only expected to lower fixed length vector operation!");
assert(Op.getValueType() == InVT.changeTypeToInteger() &&
"Expected integer result of the same bit length as the inputs!");
auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
EVT CmpVT = Pg.getValueType();
auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
{Pg, Op1, Op2, Op.getOperand(2)});
EVT PromoteVT = ContainerVT.changeTypeToInteger();
auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
return convertFromScalableVector(DAG, Op.getValueType(), Promote);
}
SDValue
AArch64TargetLowering::LowerFixedLengthBitcastToSVE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
auto SrcOp = Op.getOperand(0);
EVT VT = Op.getValueType();
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT =
getContainerForFixedLengthVector(DAG, SrcOp.getValueType());
SrcOp = convertToScalableVector(DAG, ContainerSrcVT, SrcOp);
Op = DAG.getNode(ISD::BITCAST, DL, ContainerDstVT, SrcOp);
return convertFromScalableVector(DAG, VT, Op);
}
SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
unsigned NumOperands = Op->getNumOperands();
assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
auto SrcOp1 = Op.getOperand(0);
auto SrcOp2 = Op.getOperand(1);
EVT VT = Op.getValueType();
EVT SrcVT = SrcOp1.getValueType();
if (NumOperands > 2) {
SmallVector<SDValue, 4> Ops;
EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
for (unsigned I = 0; I < NumOperands; I += 2)
Ops.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, PairVT,
Op->getOperand(I), Op->getOperand(I + 1)));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
}
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
SrcOp1 = convertToScalableVector(DAG, ContainerVT, SrcOp1);
SrcOp2 = convertToScalableVector(DAG, ContainerVT, SrcOp2);
Op = DAG.getNode(AArch64ISD::SPLICE, DL, ContainerVT, Pg, SrcOp1, SrcOp2);
return convertFromScalableVector(DAG, VT, Op);
}
SDValue
AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
SDValue Pg = getPredicateForVector(DAG, DL, VT);
EVT SrcVT = Val.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
EVT ExtendVT = ContainerVT.changeVectorElementType(
SrcVT.getVectorElementType());
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);
Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
Val = getSVESafeBitCast(ExtendVT, Val, DAG);
Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
Pg, Val, DAG.getUNDEF(ContainerVT));
return convertFromScalableVector(DAG, VT, Val);
}
SDValue
AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
EVT RoundVT = ContainerSrcVT.changeVectorElementType(
VT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, RoundVT, Pg, Val,
Op.getOperand(1), DAG.getUNDEF(RoundVT));
Val = getSVESafeBitCast(ContainerSrcVT.changeTypeToInteger(), Val, DAG);
Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
}
SDValue
AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
ContainerDstVT.getVectorElementType().getSizeInBits()) {
SDValue Pg = getPredicateForVector(DAG, DL, VT);
Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
VT.changeTypeToInteger(), Val);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
// Safe to use a larger than specified operand since we just unpacked the
// data, hence the upper bits are zero.
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
DAG.getUNDEF(ContainerDstVT));
return convertFromScalableVector(DAG, VT, Val);
} else {
EVT CvtVT = ContainerSrcVT.changeVectorElementType(
ContainerDstVT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
Val = convertFromScalableVector(DAG, SrcVT, Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
}
}
SDValue
AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
: AArch64ISD::FCVTZU_MERGE_PASSTHRU;
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
ContainerDstVT.getVectorElementType().getSizeInBits()) {
EVT CvtVT = ContainerDstVT.changeVectorElementType(
ContainerSrcVT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, VT);
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = getSVESafeBitCast(CvtVT, Val, DAG);
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
DAG.getUNDEF(ContainerDstVT));
return convertFromScalableVector(DAG, VT, Val);
} else {
EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
// Safe to use a larger than specified result since an fp_to_int where the
// result doesn't fit into the destination is undefined.
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
}
}
SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
auto *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
auto ShuffleMask = SVN->getMask();
SDLoc DL(Op);
SDValue Op1 = Op.getOperand(0);
SDValue Op2 = Op.getOperand(1);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
Op1 = convertToScalableVector(DAG, ContainerVT, Op1);
Op2 = convertToScalableVector(DAG, ContainerVT, Op2);
bool ReverseEXT = false;
unsigned Imm;
if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
Imm == VT.getVectorNumElements() - 1) {
if (ReverseEXT)
std::swap(Op1, Op2);
EVT ScalarTy = VT.getVectorElementType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
ScalarTy = MVT::i32;
SDValue Scalar = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1,
DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
Op = DAG.getNode(AArch64ISD::INSR, DL, ContainerVT, Op2, Scalar);
return convertFromScalableVector(DAG, VT, Op);
}
return SDValue();
}
SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT InVT = Op.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
(void)TLI;
assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&
InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&
"Only expect to cast between legal scalable vector types!");
assert((VT.getVectorElementType() == MVT::i1) ==
(InVT.getVectorElementType() == MVT::i1) &&
"Cannot cast between data and predicate scalable vector types!");
if (InVT == VT)
return Op;
if (VT.getVectorElementType() == MVT::i1)
return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
// Pack input if required.
if (InVT != PackedInVT)
Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
// Unpack result if required.
if (VT != PackedVT)
Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
return Op;
}
bool AArch64TargetLowering::isAllActivePredicate(SDValue N) const {
return ::isAllActivePredicate(N);
}
EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const {
return ::getPromotedVTForPredicate(VT);
}
bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
SDValue Op, const APInt &OriginalDemandedBits,
const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
unsigned Depth) const {
unsigned Opc = Op.getOpcode();
switch (Opc) {
case AArch64ISD::VSHL: {
// Match (VSHL (VLSHR Val X) X)
SDValue ShiftL = Op;
SDValue ShiftR = Op->getOperand(0);
if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
return false;
if (!ShiftL.hasOneUse() || !ShiftR.hasOneUse())
return false;
unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
// Other cases can be handled as well, but this is not
// implemented.
if (ShiftRBits != ShiftLBits)
return false;
unsigned ScalarSize = Op.getScalarValueSizeInBits();
assert(ScalarSize > ShiftLBits && "Invalid shift imm");
APInt ZeroBits = APInt::getLowBitsSet(ScalarSize, ShiftLBits);
APInt UnusedBits = ~OriginalDemandedBits;
if ((ZeroBits & UnusedBits) != ZeroBits)
return false;
// All bits that are zeroed by (VSHL (VLSHR Val X) X) are not
// used - simplify to just Val.
return TLO.CombineTo(Op, ShiftR->getOperand(0));
}
}
return TargetLowering::SimplifyDemandedBitsForTargetNode(
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
}
bool AArch64TargetLowering::isConstantUnsignedBitfieldExtactLegal(
unsigned Opc, LLT Ty1, LLT Ty2) const {
return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 62089166f4b7..00fd374587bc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -1,792 +1,792 @@
//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SME Outer Products
//===----------------------------------------------------------------------===//
class sme_fp_outer_product_inst<bit S, bit sz, MatrixTileOperand za_ty,
ZPRRegOp zpr_ty, string mnemonic>
: I<(outs za_ty:$ZAda),
(ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
"", []>,
Sched<[]> {
bits<5> Zm;
bits<3> Pm;
bits<3> Pn;
bits<5> Zn;
let Inst{31-23} = 0b100000001;
let Inst{22} = sz;
let Inst{21} = 0b0;
let Inst{20-16} = Zm;
let Inst{15-13} = Pm;
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4} = S;
let Inst{3} = 0b0;
}
class sme_outer_product_fp32<bit S, string mnemonic>
: sme_fp_outer_product_inst<S, 0b0, TileOp32, ZPR32, mnemonic> {
bits<2> ZAda;
let Inst{1-0} = ZAda;
let Inst{2} = 0b0;
}
class sme_outer_product_fp64<bit S, string mnemonic>
: sme_fp_outer_product_inst<S, 0b1, TileOp64, ZPR64, mnemonic> {
bits<3> ZAda;
let Inst{2-0} = ZAda;
}
class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
string mnemonic>
: I<(outs za_ty:$ZAda),
(ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
"", []>,
Sched<[]> {
bits<5> Zm;
bits<3> Pm;
bits<3> Pn;
bits<5> Zn;
let Inst{31-25} = 0b1010000;
let Inst{24} = u0;
let Inst{23} = 0b1;
let Inst{22} = sz;
let Inst{21} = u1;
let Inst{20-16} = Zm;
let Inst{15-13} = Pm;
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4} = S;
let Inst{3} = 0b0;
}
class sme_int_outer_product_i32<bits<3> opc, string mnemonic>
: sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32, ZPR8,
mnemonic> {
bits<2> ZAda;
let Inst{1-0} = ZAda;
let Inst{2} = 0b0;
}
class sme_int_outer_product_i64<bits<3> opc, string mnemonic>
: sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64, ZPR16,
mnemonic> {
bits<3> ZAda;
let Inst{2-0} = ZAda;
}
class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
: I<(outs TileOp32:$ZAda),
(ins PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
"", []>,
Sched<[]> {
bits<5> Zm;
bits<3> Pm;
bits<3> Pn;
bits<5> Zn;
bits<2> ZAda;
let Inst{31-22} = 0b1000000110;
let Inst{21} = op;
let Inst{20-16} = Zm;
let Inst{15-13} = Pm;
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4} = S;
let Inst{3-2} = 0b00;
let Inst{1-0} = ZAda;
}
multiclass sme_bf16_outer_product<bit S, string mnemonic> {
def : sme_outer_product_widening_inst<0b0, S, mnemonic>;
}
multiclass sme_f16_outer_product<bit S, string mnemonic> {
def : sme_outer_product_widening_inst<0b1, S, mnemonic>;
}
//===----------------------------------------------------------------------===//
// SME Add Vector to Tile
//===----------------------------------------------------------------------===//
class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
ZPRRegOp zpr_ty, string mnemonic>
: I<(outs tile_ty:$ZAda),
(ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
"", []>, Sched<[]> {
bits<3> Pm;
bits<3> Pn;
bits<5> Zn;
let Inst{31-23} = 0b110000001;
let Inst{22} = op;
let Inst{21-17} = 0b01000;
let Inst{16} = V;
let Inst{15-13} = Pm;
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4-3} = 0b00;
}
class sme_add_vector_to_tile_u32<bit V, string mnemonic>
: sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> {
bits<2> ZAda;
let Inst{2} = 0b0;
let Inst{1-0} = ZAda;
}
class sme_add_vector_to_tile_u64<bit V, string mnemonic>
: sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> {
bits<3> ZAda;
let Inst{2-0} = ZAda;
}
//===----------------------------------------------------------------------===//
// SME Contiguous Loads
//===----------------------------------------------------------------------===//
class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
string mnemonic, string argstr>
: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
bits<5> Rm;
bits<2> Rv;
bits<3> Pg;
bits<5> Rn;
let Inst{31-25} = 0b1110000;
let Inst{24} = Q;
let Inst{23-22} = msz;
let Inst{21} = 0b0;
let Inst{20-16} = Rm;
let Inst{15} = V;
let Inst{14-13} = Rv;
let Inst{12-10} = Pg;
let Inst{9-5} = Rn;
let Inst{4} = 0b0;
let mayLoad = 1;
}
class sme_mem_ld_ss_inst_BHSD<bits<2> msz, string mnemonic,
MatrixTileVectorOperand tile_ty, bit is_col,
Operand imm_ty, RegisterOperand gpr_ty>
: sme_mem_ld_ss_base<
0b0, is_col, msz, (outs tile_ty:$ZAt),
(ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
gpr_ty:$Rm),
mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
class sme_mem_ld_ss_inst_Q<string mnemonic, MatrixTileVectorOperand tile_ty,
bit is_col>
: sme_mem_ld_ss_base<
0b1, is_col, 0b11, (outs tile_ty:$ZAt),
(ins MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn,
GPR64shifted128:$Rm),
mnemonic, "\t\\{$ZAt[$Rv]\\}, $Pg/z, [$Rn, $Rm]">;
multiclass sme_mem_ss_aliases_BHSD<string mnemonic, Instruction inst,
MatrixTileVectorOperand tile_ty, Operand imm_ty,
RegisterOperand gpr_ty,
string pg_suffix=""> {
def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
// Default XZR offset aliases
def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]",
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]",
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
}
multiclass sme_mem_ss_aliases_Q<string mnemonic, Instruction inst,
MatrixTileVectorOperand tile_ty,
string pg_suffix=""> {
def : InstAlias<mnemonic # "\t$ZAt[$Rv], $Pg" # pg_suffix # ", [$Rn, $Rm]",
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, GPR64shifted128:$Rm), 0>;
// Default XZR offset aliases
def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv]\\}, $Pg" # pg_suffix # ", [$Rn]",
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 2>;
def : InstAlias<mnemonic # "\t$ZAt[$Rv], $Pg" # pg_suffix # ", [$Rn]",
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
}
multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
string pg_suffix=""> {
defm : sme_mem_ss_aliases_BHSD<mnemonic # "b", !cast<Instruction>(inst # _B),
!if(is_col, TileVectorOpV8, TileVectorOpH8),
imm0_15, GPR64shifted8, pg_suffix>;
defm : sme_mem_ss_aliases_BHSD<mnemonic # "h", !cast<Instruction>(inst # _H),
!if(is_col, TileVectorOpV16, TileVectorOpH16),
imm0_7, GPR64shifted16, pg_suffix>;
defm : sme_mem_ss_aliases_BHSD<mnemonic # "w", !cast<Instruction>(inst # _S),
!if(is_col, TileVectorOpV32, TileVectorOpH32),
imm0_3, GPR64shifted32, pg_suffix>;
defm : sme_mem_ss_aliases_BHSD<mnemonic # "d", !cast<Instruction>(inst # _D),
!if(is_col, TileVectorOpV64, TileVectorOpH64),
imm0_1, GPR64shifted64, pg_suffix>;
defm : sme_mem_ss_aliases_Q <mnemonic # "q", !cast<Instruction>(inst # _Q),
!if(is_col, TileVectorOpV128, TileVectorOpH128),
pg_suffix>;
}
multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
}
multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
def _B : sme_mem_ld_ss_inst_BHSD<0b00, mnemonic # "b",
!if(is_col, TileVectorOpV8,
TileVectorOpH8),
is_col, imm0_15, GPR64shifted8> {
bits<4> imm;
let Inst{3-0} = imm;
}
def _H : sme_mem_ld_ss_inst_BHSD<0b01, mnemonic # "h",
!if(is_col, TileVectorOpV16,
TileVectorOpH16),
is_col, imm0_7, GPR64shifted16> {
bits<1> ZAt;
bits<3> imm;
let Inst{3} = ZAt;
let Inst{2-0} = imm;
}
def _S : sme_mem_ld_ss_inst_BHSD<0b10, mnemonic # "w",
!if(is_col, TileVectorOpV32,
TileVectorOpH32),
is_col, imm0_3, GPR64shifted32> {
bits<2> ZAt;
bits<2> imm;
let Inst{3-2} = ZAt;
let Inst{1-0} = imm;
}
def _D : sme_mem_ld_ss_inst_BHSD<0b11, mnemonic # "d",
!if(is_col, TileVectorOpV64,
TileVectorOpH64),
is_col, imm0_1, GPR64shifted64> {
bits<3> ZAt;
bits<1> imm;
let Inst{3-1} = ZAt;
let Inst{0} = imm;
}
def _Q : sme_mem_ld_ss_inst_Q<mnemonic # "q",
!if(is_col, TileVectorOpV128,
TileVectorOpH128),
is_col> {
bits<4> ZAt;
let Inst{3-0} = ZAt;
}
defm : sme_mem_ld_ss_aliases<NAME, is_col>;
}
multiclass sme_mem_ld_ss<string mnemonic> {
defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>;
defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>;
}
//===----------------------------------------------------------------------===//
// SME Contiguous Stores
//===----------------------------------------------------------------------===//
class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
string mnemonic, string argstr>
: I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> {
bits<5> Rm;
bits<2> Rv;
bits<3> Pg;
bits<5> Rn;
let Inst{31-25} = 0b1110000;
let Inst{24} = Q;
let Inst{23-22} = msz;
let Inst{21} = 0b1;
let Inst{20-16} = Rm;
let Inst{15} = V;
let Inst{14-13} = Rv;
let Inst{12-10} = Pg;
let Inst{9-5} = Rn;
let Inst{4} = 0b0;
let mayStore = 1;
let hasSideEffects = 1;
}
class sme_mem_st_ss_inst_BHSD<bits<2> msz, string mnemonic,
MatrixTileVectorOperand tile_ty, bit is_col,
Operand imm_ty, RegisterOperand gpr_ty>
: sme_mem_st_ss_base<
0b0, is_col, msz,
(ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
GPR64sp:$Rn, gpr_ty:$Rm),
mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
class sme_mem_st_ss_inst_Q<string mnemonic, MatrixTileVectorOperand tile_ty,
bit is_col>
: sme_mem_st_ss_base<
0b1, is_col, 0b11,
(ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg,
GPR64sp:$Rn, GPR64shifted128:$Rm),
mnemonic, "\t\\{$ZAt[$Rv]\\}, $Pg, [$Rn, $Rm]">;
multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
}
multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
def _B : sme_mem_st_ss_inst_BHSD<0b00, mnemonic # "b",
!if(is_col, TileVectorOpV8,
TileVectorOpH8),
is_col, imm0_15, GPR64shifted8> {
bits<4> imm;
let Inst{3-0} = imm;
}
def _H : sme_mem_st_ss_inst_BHSD<0b01, mnemonic # "h",
!if(is_col, TileVectorOpV16,
TileVectorOpH16),
is_col, imm0_7, GPR64shifted16> {
bits<1> ZAt;
bits<3> imm;
let Inst{3} = ZAt;
let Inst{2-0} = imm;
}
def _S : sme_mem_st_ss_inst_BHSD<0b10, mnemonic # "w",
!if(is_col, TileVectorOpV32,
TileVectorOpH32),
is_col, imm0_3, GPR64shifted32> {
bits<2> ZAt;
bits<2> imm;
let Inst{3-2} = ZAt;
let Inst{1-0} = imm;
}
def _D : sme_mem_st_ss_inst_BHSD<0b11, mnemonic # "d",
!if(is_col, TileVectorOpV64,
TileVectorOpH64),
is_col, imm0_1, GPR64shifted64> {
bits<3> ZAt;
bits<1> imm;
let Inst{3-1} = ZAt;
let Inst{0} = imm;
}
def _Q : sme_mem_st_ss_inst_Q<mnemonic # "q",
!if(is_col, TileVectorOpV128,
TileVectorOpH128),
is_col> {
bits<4> ZAt;
let Inst{3-0} = ZAt;
}
defm : sme_mem_st_ss_aliases<NAME, is_col>;
}
multiclass sme_mem_st_ss<string mnemonic> {
defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>;
defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>;
}
//===----------------------------------------------------------------------===//
// SME Save and Restore Array
//===----------------------------------------------------------------------===//
class sme_spill_fill_inst<bit isStore, dag outs, dag ins, string opcodestr>
: I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
[]>,
Sched<[]> {
bits<2> Rv;
bits<5> Rn;
bits<4> imm4;
let Inst{31-22} = 0b1110000100;
let Inst{21} = isStore;
let Inst{20-15} = 0b000000;
let Inst{14-13} = Rv;
let Inst{12-10} = 0b000;
let Inst{9-5} = Rn;
let Inst{4} = 0b0;
let Inst{3-0} = imm4;
let mayLoad = !not(isStore);
let mayStore = isStore;
}
multiclass sme_spill_fill<bit isStore, dag outs, dag ins, string opcodestr> {
def NAME : sme_spill_fill_inst<isStore, outs, ins, opcodestr>;
def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
(!cast<Instruction>(NAME) MatrixOp:$ZAt,
MatrixIndexGPR32Op12_15:$Rv, imm0_15:$imm4, GPR64sp:$Rn, 0), 1>;
}
multiclass sme_spill<string opcodestr> {
defm NAME : sme_spill_fill<0b1, (outs),
(ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
imm0_15:$imm4, GPR64sp:$Rn,
imm0_15:$offset),
opcodestr>;
}
multiclass sme_fill<string opcodestr> {
defm NAME : sme_spill_fill<0b0, (outs MatrixOp:$ZAt),
(ins MatrixIndexGPR32Op12_15:$Rv,
imm0_15:$imm4, GPR64sp:$Rn,
imm0_15:$offset),
opcodestr>;
}
//===----------------------------------------------------------------------===//
// Move instructions
//===----------------------------------------------------------------------===//
class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
string mnemonic, string argstr>
: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
bits<2> Rv;
bits<3> Pg;
bits<5> Zn;
let Inst{31-24} = 0b11000000;
let Inst{23-22} = sz;
let Inst{21-17} = 0b00000;
let Inst{16} = Q;
let Inst{15} = V;
let Inst{14-13} = Rv;
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4} = 0b0;
}
class sme_vector_to_tile_inst<bits<2> sz, MatrixTileVectorOperand tile_ty,
bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
string mnemonic>
: sme_vector_to_tile_base<0b0, is_col, sz, (outs tile_ty:$ZAd),
(ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">;
class sme_vector_to_tile_inst_Q<MatrixTileVectorOperand tile_ty,
bit is_col, string mnemonic>
: sme_vector_to_tile_base<0b1, is_col, 0b11, (outs tile_ty:$ZAd),
(ins MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, ZPR128:$Zn),
mnemonic, "\t$ZAd[$Rv], $Pg/m, $Zn">;
multiclass sme_vector_to_tile_aliases<Instruction inst,
MatrixTileVectorOperand tile_ty,
ZPRRegOp zpr_ty, Operand imm_ty> {
def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
- (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm0_15:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
+ (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
}
multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
def _B : sme_vector_to_tile_inst<0b00, !if(is_col, TileVectorOpV8,
TileVectorOpH8),
is_col, imm0_15, ZPR8, mnemonic> {
bits<4> imm;
let Inst{3-0} = imm;
}
def _H : sme_vector_to_tile_inst<0b01, !if(is_col, TileVectorOpV16,
TileVectorOpH16),
is_col, imm0_7, ZPR16, mnemonic> {
bits<1> ZAd;
bits<3> imm;
let Inst{3} = ZAd;
let Inst{2-0} = imm;
}
def _S : sme_vector_to_tile_inst<0b10, !if(is_col, TileVectorOpV32,
TileVectorOpH32),
is_col, imm0_3, ZPR32, mnemonic> {
bits<2> ZAd;
bits<2> imm;
let Inst{3-2} = ZAd;
let Inst{1-0} = imm;
}
def _D : sme_vector_to_tile_inst<0b11, !if(is_col, TileVectorOpV64,
TileVectorOpH64),
is_col, imm0_1, ZPR64, mnemonic> {
bits<3> ZAd;
bits<1> imm;
let Inst{3-1} = ZAd;
let Inst{0} = imm;
}
def _Q : sme_vector_to_tile_inst_Q<!if(is_col, TileVectorOpV128,
TileVectorOpH128),
is_col, mnemonic> {
bits<4> ZAd;
bits<1> imm;
let Inst{3-0} = ZAd;
}
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
!if(is_col, TileVectorOpV8,
TileVectorOpH8),
ZPR8, imm0_15>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
!if(is_col, TileVectorOpV16,
TileVectorOpH16),
ZPR16, imm0_7>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
!if(is_col, TileVectorOpV32,
TileVectorOpH32),
ZPR32, imm0_3>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
!if(is_col, TileVectorOpV64,
TileVectorOpH64),
ZPR64, imm0_1>;
def : InstAlias<"mov\t$ZAd[$Rv], $Pg/m, $Zn",
(!cast<Instruction>(NAME # _Q) !if(is_col,
TileVectorOpV128,
TileVectorOpH128):$ZAd,
MatrixIndexGPR32Op12_15:$Rv,
PPR3bAny:$Pg, ZPR128:$Zn), 1>;
}
multiclass sme_vector_to_tile<string mnemonic> {
defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>;
defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>;
}
class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
string mnemonic, string argstr>
: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
bits<2> Rv;
bits<3> Pg;
bits<5> Zd;
let Inst{31-24} = 0b11000000;
let Inst{23-22} = sz;
let Inst{21-17} = 0b00001;
let Inst{16} = Q;
let Inst{15} = V;
let Inst{14-13} = Rv;
let Inst{12-10} = Pg;
let Inst{9} = 0b0;
let Inst{4-0} = Zd;
}
class sme_tile_to_vector_inst<bits<2> sz, ZPRRegOp zpr_ty,
MatrixTileVectorOperand tile_ty,
bit is_col, Operand imm_ty, string mnemonic>
: sme_tile_to_vector_base<0b0, is_col, sz, (outs zpr_ty:$Zd),
(ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]">;
class sme_tile_to_vector_inst_Q<MatrixTileVectorOperand tile_ty,
bit is_col, string mnemonic>
: sme_tile_to_vector_base<0b1, is_col, 0b11, (outs ZPR128:$Zd),
(ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv),
mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv]">;
multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
MatrixTileVectorOperand tile_ty,
Operand imm_ty > {
def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]",
(inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
}
multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
def _B : sme_tile_to_vector_inst<0b00, ZPR8, !if(is_col, TileVectorOpV8,
TileVectorOpH8),
is_col, imm0_15, mnemonic> {
bits<4> imm;
let Inst{8-5} = imm;
}
def _H : sme_tile_to_vector_inst<0b01, ZPR16, !if(is_col, TileVectorOpV16,
TileVectorOpH16),
is_col, imm0_7, mnemonic> {
bits<1> ZAn;
bits<3> imm;
let Inst{8} = ZAn;
let Inst{7-5} = imm;
}
def _S : sme_tile_to_vector_inst<0b10, ZPR32, !if(is_col, TileVectorOpV32,
TileVectorOpH32),
is_col, imm0_3, mnemonic> {
bits<2> ZAn;
bits<2> imm;
let Inst{8-7} = ZAn;
let Inst{6-5} = imm;
}
def _D : sme_tile_to_vector_inst<0b11, ZPR64, !if(is_col, TileVectorOpV64,
TileVectorOpH64),
is_col, imm0_1, mnemonic> {
bits<3> ZAn;
bits<1> imm;
let Inst{8-6} = ZAn;
let Inst{5} = imm;
}
def _Q : sme_tile_to_vector_inst_Q<!if(is_col, TileVectorOpV128,
TileVectorOpH128),
is_col, mnemonic> {
bits<4> ZAn;
let Inst{8-5} = ZAn;
}
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
!if(is_col, TileVectorOpV8,
TileVectorOpH8), imm0_15>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
!if(is_col, TileVectorOpV16,
TileVectorOpH16), imm0_7>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
!if(is_col, TileVectorOpV32,
TileVectorOpH32), imm0_3>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
!if(is_col, TileVectorOpV64,
TileVectorOpH64), imm0_1>;
def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv]",
(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, PPR3bAny:$Pg,
!if(is_col,
TileVectorOpV128,
TileVectorOpH128):$ZAn,
MatrixIndexGPR32Op12_15:$Rv), 1>;
}
multiclass sme_tile_to_vector<string mnemonic> {
defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>;
defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>;
}
//===----------------------------------------------------------------------===//
// SME Zero
//===----------------------------------------------------------------------===//
class sme_zero_inst<string mnemonic>
: I<(outs MatrixTileList:$imm), (ins),
mnemonic, "\t$imm", "", []>, Sched<[]> {
bits<8> imm;
let Inst{31-8} = 0b110000000000100000000000;
let Inst{7-0} = imm;
}
multiclass sme_zero<string mnemonic> {
def NAME : sme_zero_inst<mnemonic>;
def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>;
def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>;
def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>;
def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>;
def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>;
def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>;
def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>;
def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>;
def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>;
def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>;
def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>;
def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>;
def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
}
//===----------------------------------------------------------------------===//
// SVE2 Instructions
//===----------------------------------------------------------------------===//
class sve2_int_perm_revd<string asm>
: I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn),
asm, "\t$Zd, $Pg/m, $Zn", "", []>,
Sched<[]> {
bits<5> Zd;
bits<3> Pg;
bits<5> Zn;
let Inst{31-24} = 0b00000101;
let Inst{23-22} = 0b00; // size
let Inst{21-13} = 0b101110100;
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveUnary;
let ElementSize = ZPR128.ElementSize;
}
class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
: I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
asm, "\t$Zd, $Zn, $Zm", "", []>,
Sched<[]> {
bits<5> Zm;
bits<5> Zn;
bits<5> Zd;
let Inst{31-24} = 0b01000100;
let Inst{23-22} = sz;
let Inst{21} = 0b0;
let Inst{20-16} = Zm;
let Inst{15-11} = 0b11000;
let Inst{10} = U;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zpr_ty.ElementSize;
}
multiclass sve2_clamp<string asm, bit U> {
def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
}
class sve2_int_perm_dup_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
: I<(outs ppr_ty:$Pd), (ins PPRAny:$Pg, ppr_ty:$Pn,
MatrixIndexGPR32Op12_15:$Rm, imm_ty:$imm),
asm, "\t$Pd, $Pg/z, $Pn[$Rm, $imm]", "", []>,
Sched<[]> {
bits<2> Rm;
bits<4> Pg;
bits<4> Pn;
bits<4> Pd;
let Inst{31-24} = 0b00100101;
let Inst{21} = 0b1;
let Inst{17-16} = Rm;
let Inst{15-14} = 0b01;
let Inst{13-10} = Pg;
let Inst{9} = 0b0;
let Inst{8-5} = Pn;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
}
multiclass sve2_int_perm_dup_p<string asm> {
def _B : sve2_int_perm_dup_p<asm, PPR8, imm0_15> {
bits<4> imm;
let Inst{23-22} = imm{3-2};
let Inst{20-19} = imm{1-0};
let Inst{18} = 0b1;
}
def _H : sve2_int_perm_dup_p<asm, PPR16, imm0_7> {
bits<3> imm;
let Inst{23-22} = imm{2-1};
let Inst{20} = imm{0};
let Inst{19-18} = 0b10;
}
def _S : sve2_int_perm_dup_p<asm, PPR32, imm0_3> {
bits<2> imm;
let Inst{23-22} = imm{1-0};
let Inst{20-18} = 0b100;
}
def _D : sve2_int_perm_dup_p<asm, PPR64, imm0_1> {
bits<1> imm;
let Inst{23} = imm;
let Inst{22} = 0b1;
let Inst{20-18} = 0b000;
}
def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
(!cast<Instruction>(NAME # _B) PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
(!cast<Instruction>(NAME # _H) PPR16:$Pd, PPRAny:$Pg, PPR16:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
(!cast<Instruction>(NAME # _S) PPR32:$Pd, PPRAny:$Pg, PPR32:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
(!cast<Instruction>(NAME # _D) PPR64:$Pd, PPRAny:$Pg, PPR64:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
index d8465f6d682b..94126e179462 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
@@ -1,861 +1,861 @@
//===---- M68kAsmParser.cpp - Parse M68k assembly to MCInst instructions --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "M68kInstrInfo.h"
#include "M68kRegisterInfo.h"
#include "TargetInfo/M68kTargetInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/TargetRegistry.h"
#include <sstream>
#define DEBUG_TYPE "m68k-asm-parser"
using namespace llvm;
static cl::opt<bool> RegisterPrefixOptional(
"m68k-register-prefix-optional", cl::Hidden,
cl::desc("Enable specifying registers without the % prefix"),
cl::init(false));
namespace {
/// Parses M68k assembly from a stream.
class M68kAsmParser : public MCTargetAsmParser {
const MCSubtargetInfo &STI;
MCAsmParser &Parser;
const MCRegisterInfo *MRI;
#define GET_ASSEMBLER_HEADER
#include "M68kGenAsmMatcher.inc"
// Helpers for Match&Emit.
bool invalidOperand(const SMLoc &Loc, const OperandVector &Operands,
const uint64_t &ErrorInfo);
bool missingFeature(const SMLoc &Loc, const uint64_t &ErrorInfo);
bool emit(MCInst &Inst, SMLoc const &Loc, MCStreamer &Out) const;
bool parseRegisterName(unsigned int &RegNo, SMLoc Loc,
StringRef RegisterName);
OperandMatchResultTy parseRegister(unsigned int &RegNo);
// Parser functions.
void eatComma();
bool isExpr();
OperandMatchResultTy parseImm(OperandVector &Operands);
OperandMatchResultTy parseMemOp(OperandVector &Operands);
public:
M68kAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI, MII), STI(STI), Parser(Parser) {
MCAsmParserExtension::Initialize(Parser);
MRI = getContext().getRegisterInfo();
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
};
struct M68kMemOp {
enum class Kind {
Addr,
Reg,
RegIndirect,
RegPostIncrement,
RegPreDecrement,
RegIndirectDisplacement,
RegIndirectDisplacementIndex,
};
// These variables are used for the following forms:
// Addr: (OuterDisp)
// Reg: %OuterReg
// RegIndirect: (%OuterReg)
// RegPostIncrement: (%OuterReg)+
// RegPreDecrement: -(%OuterReg)
// RegIndirectDisplacement: OuterDisp(%OuterReg)
// RegIndirectDisplacementIndex:
// OuterDisp(%OuterReg, %InnerReg.Size * Scale, InnerDisp)
Kind Op;
unsigned OuterReg;
unsigned InnerReg;
const MCExpr *OuterDisp;
const MCExpr *InnerDisp;
uint8_t Size : 4;
uint8_t Scale : 4;
const MCExpr *Expr;
M68kMemOp() {}
M68kMemOp(Kind Op) : Op(Op) {}
void print(raw_ostream &OS) const;
};
/// An parsed M68k assembly operand.
class M68kOperand : public MCParsedAsmOperand {
typedef MCParsedAsmOperand Base;
- enum class Kind {
+ enum class KindTy {
Invalid,
Token,
Imm,
MemOp,
};
- Kind Kind;
+ KindTy Kind;
SMLoc Start, End;
union {
StringRef Token;
int64_t Imm;
const MCExpr *Expr;
M68kMemOp MemOp;
};
public:
- M68kOperand(enum Kind Kind, SMLoc Start, SMLoc End)
+ M68kOperand(KindTy Kind, SMLoc Start, SMLoc End)
: Base(), Kind(Kind), Start(Start), End(End) {}
SMLoc getStartLoc() const override { return Start; }
SMLoc getEndLoc() const override { return End; }
void print(raw_ostream &OS) const override;
bool isMem() const override { return false; }
- bool isMemOp() const { return Kind == Kind::MemOp; }
+ bool isMemOp() const { return Kind == KindTy::MemOp; }
static void addExpr(MCInst &Inst, const MCExpr *Expr);
// Reg
bool isReg() const override;
unsigned getReg() const override;
void addRegOperands(MCInst &Inst, unsigned N) const;
static std::unique_ptr<M68kOperand> createMemOp(M68kMemOp MemOp, SMLoc Start,
SMLoc End);
// Token
bool isToken() const override;
StringRef getToken() const;
static std::unique_ptr<M68kOperand> createToken(StringRef Token, SMLoc Start,
SMLoc End);
// Imm
bool isImm() const override;
void addImmOperands(MCInst &Inst, unsigned N) const;
static std::unique_ptr<M68kOperand> createImm(const MCExpr *Expr, SMLoc Start,
SMLoc End);
// Addr
bool isAddr() const;
void addAddrOperands(MCInst &Inst, unsigned N) const;
// ARI
bool isARI() const;
void addARIOperands(MCInst &Inst, unsigned N) const;
// ARID
bool isARID() const;
void addARIDOperands(MCInst &Inst, unsigned N) const;
// ARII
bool isARII() const;
void addARIIOperands(MCInst &Inst, unsigned N) const;
// ARIPD
bool isARIPD() const;
void addARIPDOperands(MCInst &Inst, unsigned N) const;
// ARIPI
bool isARIPI() const;
void addARIPIOperands(MCInst &Inst, unsigned N) const;
// PCD
bool isPCD() const;
void addPCDOperands(MCInst &Inst, unsigned N) const;
// PCI
bool isPCI() const;
void addPCIOperands(MCInst &Inst, unsigned N) const;
};
} // end anonymous namespace.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kAsmParser() {
RegisterMCAsmParser<M68kAsmParser> X(getTheM68kTarget());
}
#define GET_MATCHER_IMPLEMENTATION
#include "M68kGenAsmMatcher.inc"
void M68kMemOp::print(raw_ostream &OS) const {
switch (Op) {
case Kind::Addr:
OS << OuterDisp;
break;
case Kind::Reg:
OS << '%' << OuterReg;
break;
case Kind::RegIndirect:
OS << "(%" << OuterReg << ')';
break;
case Kind::RegPostIncrement:
OS << "(%" << OuterReg << ")+";
break;
case Kind::RegPreDecrement:
OS << "-(%" << OuterReg << ")";
break;
case Kind::RegIndirectDisplacement:
OS << OuterDisp << "(%" << OuterReg << ")";
break;
case Kind::RegIndirectDisplacementIndex:
OS << OuterDisp << "(%" << OuterReg << ", " << InnerReg << "." << Size
<< ", " << InnerDisp << ")";
break;
}
}
void M68kOperand::addExpr(MCInst &Inst, const MCExpr *Expr) {
if (auto Const = dyn_cast<MCConstantExpr>(Expr)) {
Inst.addOperand(MCOperand::createImm(Const->getValue()));
return;
}
Inst.addOperand(MCOperand::createExpr(Expr));
}
// Reg
bool M68kOperand::isReg() const {
- return Kind == Kind::MemOp && MemOp.Op == M68kMemOp::Kind::Reg;
+ return Kind == KindTy::MemOp && MemOp.Op == M68kMemOp::Kind::Reg;
}
unsigned M68kOperand::getReg() const {
assert(isReg());
return MemOp.OuterReg;
}
void M68kOperand::addRegOperands(MCInst &Inst, unsigned N) const {
assert(isReg() && "wrong operand kind");
assert((N == 1) && "can only handle one register operand");
Inst.addOperand(MCOperand::createReg(getReg()));
}
std::unique_ptr<M68kOperand> M68kOperand::createMemOp(M68kMemOp MemOp,
SMLoc Start, SMLoc End) {
- auto Op = std::make_unique<M68kOperand>(Kind::MemOp, Start, End);
+ auto Op = std::make_unique<M68kOperand>(KindTy::MemOp, Start, End);
Op->MemOp = MemOp;
return Op;
}
// Token
-bool M68kOperand::isToken() const { return Kind == Kind::Token; }
+bool M68kOperand::isToken() const { return Kind == KindTy::Token; }
StringRef M68kOperand::getToken() const {
assert(isToken());
return Token;
}
std::unique_ptr<M68kOperand> M68kOperand::createToken(StringRef Token,
SMLoc Start, SMLoc End) {
- auto Op = std::make_unique<M68kOperand>(Kind::Token, Start, End);
+ auto Op = std::make_unique<M68kOperand>(KindTy::Token, Start, End);
Op->Token = Token;
return Op;
}
// Imm
-bool M68kOperand::isImm() const { return Kind == Kind::Imm; }
+bool M68kOperand::isImm() const { return Kind == KindTy::Imm; }
void M68kOperand::addImmOperands(MCInst &Inst, unsigned N) const {
assert(isImm() && "wrong oeprand kind");
assert((N == 1) && "can only handle one register operand");
M68kOperand::addExpr(Inst, Expr);
}
std::unique_ptr<M68kOperand> M68kOperand::createImm(const MCExpr *Expr,
SMLoc Start, SMLoc End) {
- auto Op = std::make_unique<M68kOperand>(Kind::Imm, Start, End);
+ auto Op = std::make_unique<M68kOperand>(KindTy::Imm, Start, End);
Op->Expr = Expr;
return Op;
}
// Addr
bool M68kOperand::isAddr() const {
return isMemOp() && MemOp.Op == M68kMemOp::Kind::Addr;
}
void M68kOperand::addAddrOperands(MCInst &Inst, unsigned N) const {
M68kOperand::addExpr(Inst, MemOp.OuterDisp);
}
// ARI
bool M68kOperand::isARI() const {
return isMemOp() && MemOp.Op == M68kMemOp::Kind::RegIndirect &&
M68k::AR32RegClass.contains(MemOp.OuterReg);
}
void M68kOperand::addARIOperands(MCInst &Inst, unsigned N) const {
Inst.addOperand(MCOperand::createReg(MemOp.OuterReg));
}
// ARID
bool M68kOperand::isARID() const {
return isMemOp() && MemOp.Op == M68kMemOp::Kind::RegIndirectDisplacement &&
M68k::AR32RegClass.contains(MemOp.OuterReg);
}
void M68kOperand::addARIDOperands(MCInst &Inst, unsigned N) const {
M68kOperand::addExpr(Inst, MemOp.OuterDisp);
Inst.addOperand(MCOperand::createReg(MemOp.OuterReg));
}
// ARII
bool M68kOperand::isARII() const {
return isMemOp() &&
MemOp.Op == M68kMemOp::Kind::RegIndirectDisplacementIndex &&
M68k::AR32RegClass.contains(MemOp.OuterReg);
}
void M68kOperand::addARIIOperands(MCInst &Inst, unsigned N) const {
M68kOperand::addExpr(Inst, MemOp.OuterDisp);
Inst.addOperand(MCOperand::createReg(MemOp.OuterReg));
Inst.addOperand(MCOperand::createReg(MemOp.InnerReg));
}
// ARIPD
bool M68kOperand::isARIPD() const {
return isMemOp() && MemOp.Op == M68kMemOp::Kind::RegPreDecrement &&
M68k::AR32RegClass.contains(MemOp.OuterReg);
}
void M68kOperand::addARIPDOperands(MCInst &Inst, unsigned N) const {
Inst.addOperand(MCOperand::createReg(MemOp.OuterReg));
}
// ARIPI
bool M68kOperand::isARIPI() const {
return isMemOp() && MemOp.Op == M68kMemOp::Kind::RegPostIncrement &&
M68k::AR32RegClass.contains(MemOp.OuterReg);
}
void M68kOperand::addARIPIOperands(MCInst &Inst, unsigned N) const {
Inst.addOperand(MCOperand::createReg(MemOp.OuterReg));
}
// PCD
bool M68kOperand::isPCD() const {
return isMemOp() && MemOp.Op == M68kMemOp::Kind::RegIndirectDisplacement &&
MemOp.OuterReg == M68k::PC;
}
void M68kOperand::addPCDOperands(MCInst &Inst, unsigned N) const {
M68kOperand::addExpr(Inst, MemOp.OuterDisp);
}
// PCI
bool M68kOperand::isPCI() const {
return isMemOp() &&
MemOp.Op == M68kMemOp::Kind::RegIndirectDisplacementIndex &&
MemOp.OuterReg == M68k::PC;
}
void M68kOperand::addPCIOperands(MCInst &Inst, unsigned N) const {
M68kOperand::addExpr(Inst, MemOp.OuterDisp);
Inst.addOperand(MCOperand::createReg(MemOp.InnerReg));
}
static inline bool checkRegisterClass(unsigned RegNo, bool Data, bool Address,
bool SP) {
switch (RegNo) {
case M68k::A0:
case M68k::A1:
case M68k::A2:
case M68k::A3:
case M68k::A4:
case M68k::A5:
case M68k::A6:
return Address;
case M68k::SP:
return SP;
case M68k::D0:
case M68k::D1:
case M68k::D2:
case M68k::D3:
case M68k::D4:
case M68k::D5:
case M68k::D6:
case M68k::D7:
return Data;
case M68k::SR:
case M68k::CCR:
return false;
default:
llvm_unreachable("unexpected register type");
return false;
}
}
unsigned M68kAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) {
M68kOperand &Operand = (M68kOperand &)Op;
switch (Kind) {
case MCK_XR16:
case MCK_SPILL:
if (Operand.isReg() &&
checkRegisterClass(Operand.getReg(), true, true, true)) {
return Match_Success;
}
break;
case MCK_AR16:
case MCK_AR32:
if (Operand.isReg() &&
checkRegisterClass(Operand.getReg(), false, true, true)) {
return Match_Success;
}
break;
case MCK_AR32_NOSP:
if (Operand.isReg() &&
checkRegisterClass(Operand.getReg(), false, true, false)) {
return Match_Success;
}
break;
case MCK_DR8:
case MCK_DR16:
case MCK_DR32:
if (Operand.isReg() &&
checkRegisterClass(Operand.getReg(), true, false, false)) {
return Match_Success;
}
break;
case MCK_AR16_TC:
if (Operand.isReg() &&
((Operand.getReg() == M68k::A0) || (Operand.getReg() == M68k::A1))) {
return Match_Success;
}
break;
case MCK_DR16_TC:
if (Operand.isReg() &&
((Operand.getReg() == M68k::D0) || (Operand.getReg() == M68k::D1))) {
return Match_Success;
}
break;
case MCK_XR16_TC:
if (Operand.isReg() &&
((Operand.getReg() == M68k::D0) || (Operand.getReg() == M68k::D1) ||
(Operand.getReg() == M68k::A0) || (Operand.getReg() == M68k::A1))) {
return Match_Success;
}
break;
}
return Match_InvalidOperand;
}
bool M68kAsmParser::parseRegisterName(unsigned &RegNo, SMLoc Loc,
StringRef RegisterName) {
auto RegisterNameLower = RegisterName.lower();
// CCR register
if (RegisterNameLower == "ccr") {
RegNo = M68k::CCR;
return true;
}
// Parse simple general-purpose registers.
if (RegisterNameLower.size() == 2) {
static unsigned RegistersByIndex[] = {
M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
M68k::A4, M68k::A5, M68k::A6, M68k::SP,
};
switch (RegisterNameLower[0]) {
case 'd':
case 'a': {
if (isdigit(RegisterNameLower[1])) {
unsigned IndexOffset = (RegisterNameLower[0] == 'a') ? 8 : 0;
unsigned RegIndex = (unsigned)(RegisterNameLower[1] - '0');
if (RegIndex < 8) {
RegNo = RegistersByIndex[IndexOffset + RegIndex];
return true;
}
}
break;
}
case 's':
if (RegisterNameLower[1] == 'p') {
RegNo = M68k::SP;
return true;
} else if (RegisterNameLower[1] == 'r') {
RegNo = M68k::SR;
return true;
}
break;
case 'p':
if (RegisterNameLower[1] == 'c') {
RegNo = M68k::PC;
return true;
}
break;
}
}
return false;
}
OperandMatchResultTy M68kAsmParser::parseRegister(unsigned &RegNo) {
bool HasPercent = false;
AsmToken PercentToken;
LLVM_DEBUG(dbgs() << "parseRegister "; getTok().dump(dbgs()); dbgs() << "\n");
if (getTok().is(AsmToken::Percent)) {
HasPercent = true;
PercentToken = Lex();
} else if (!RegisterPrefixOptional.getValue()) {
return MatchOperand_NoMatch;
}
if (!Parser.getTok().is(AsmToken::Identifier)) {
if (HasPercent) {
getLexer().UnLex(PercentToken);
}
return MatchOperand_NoMatch;
}
auto RegisterName = Parser.getTok().getString();
if (!parseRegisterName(RegNo, Parser.getLexer().getLoc(), RegisterName)) {
if (HasPercent) {
getLexer().UnLex(PercentToken);
}
return MatchOperand_NoMatch;
}
Parser.Lex();
return MatchOperand_Success;
}
bool M68kAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
auto Result = tryParseRegister(RegNo, StartLoc, EndLoc);
if (Result != MatchOperand_Success) {
return Error(StartLoc, "expected register");
}
return false;
}
OperandMatchResultTy M68kAsmParser::tryParseRegister(unsigned &RegNo,
SMLoc &StartLoc,
SMLoc &EndLoc) {
StartLoc = getLexer().getLoc();
auto Result = parseRegister(RegNo);
EndLoc = getLexer().getLoc();
return Result;
}
bool M68kAsmParser::isExpr() {
switch (Parser.getTok().getKind()) {
case AsmToken::Identifier:
case AsmToken::Integer:
return true;
case AsmToken::Minus:
return getLexer().peekTok().getKind() == AsmToken::Integer;
default:
return false;
}
}
OperandMatchResultTy M68kAsmParser::parseImm(OperandVector &Operands) {
if (getLexer().isNot(AsmToken::Hash)) {
return MatchOperand_NoMatch;
}
SMLoc Start = getLexer().getLoc();
Parser.Lex();
SMLoc End;
const MCExpr *Expr;
if (getParser().parseExpression(Expr, End)) {
return MatchOperand_ParseFail;
}
Operands.push_back(M68kOperand::createImm(Expr, Start, End));
return MatchOperand_Success;
}
OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
SMLoc Start = getLexer().getLoc();
bool IsPD = false;
M68kMemOp MemOp;
// Check for a plain register.
auto Result = parseRegister(MemOp.OuterReg);
if (Result == MatchOperand_Success) {
MemOp.Op = M68kMemOp::Kind::Reg;
Operands.push_back(
M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
return MatchOperand_Success;
}
if (Result == MatchOperand_ParseFail) {
return Result;
}
// Check for pre-decrement & outer displacement.
bool HasDisplacement = false;
if (getLexer().is(AsmToken::Minus)) {
IsPD = true;
Parser.Lex();
} else if (isExpr()) {
if (Parser.parseExpression(MemOp.OuterDisp)) {
return MatchOperand_ParseFail;
}
HasDisplacement = true;
}
if (getLexer().isNot(AsmToken::LParen)) {
if (HasDisplacement) {
MemOp.Op = M68kMemOp::Kind::Addr;
Operands.push_back(
M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
return MatchOperand_Success;
} else if (IsPD) {
Error(getLexer().getLoc(), "expected (");
return MatchOperand_ParseFail;
}
return MatchOperand_NoMatch;
}
Parser.Lex();
// Check for constant dereference & MIT-style displacement
if (!HasDisplacement && isExpr()) {
if (Parser.parseExpression(MemOp.OuterDisp)) {
return MatchOperand_ParseFail;
}
HasDisplacement = true;
// If we're not followed by a comma, we're a constant dereference.
if (getLexer().isNot(AsmToken::Comma)) {
MemOp.Op = M68kMemOp::Kind::Addr;
Operands.push_back(
M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
return MatchOperand_Success;
}
Parser.Lex();
}
Result = parseRegister(MemOp.OuterReg);
if (Result == MatchOperand_ParseFail) {
return MatchOperand_ParseFail;
}
if (Result != MatchOperand_Success) {
Error(getLexer().getLoc(), "expected register");
return MatchOperand_ParseFail;
}
// Check for Index.
bool HasIndex = false;
if (Parser.getTok().is(AsmToken::Comma)) {
Parser.Lex();
Result = parseRegister(MemOp.InnerReg);
if (Result == MatchOperand_ParseFail) {
return Result;
}
if (Result == MatchOperand_NoMatch) {
Error(getLexer().getLoc(), "expected register");
return MatchOperand_ParseFail;
}
// TODO: parse size, scale and inner displacement.
MemOp.Size = 4;
MemOp.Scale = 1;
MemOp.InnerDisp = MCConstantExpr::create(0, Parser.getContext(), true, 4);
HasIndex = true;
}
if (Parser.getTok().isNot(AsmToken::RParen)) {
Error(getLexer().getLoc(), "expected )");
return MatchOperand_ParseFail;
}
Parser.Lex();
bool IsPI = false;
if (!IsPD && Parser.getTok().is(AsmToken::Plus)) {
Parser.Lex();
IsPI = true;
}
SMLoc End = getLexer().getLoc();
unsigned OpCount = IsPD + IsPI + (HasIndex || HasDisplacement);
if (OpCount > 1) {
Error(Start, "only one of post-increment, pre-decrement or displacement "
"can be used");
return MatchOperand_ParseFail;
}
if (IsPD) {
MemOp.Op = M68kMemOp::Kind::RegPreDecrement;
} else if (IsPI) {
MemOp.Op = M68kMemOp::Kind::RegPostIncrement;
} else if (HasIndex) {
MemOp.Op = M68kMemOp::Kind::RegIndirectDisplacementIndex;
} else if (HasDisplacement) {
MemOp.Op = M68kMemOp::Kind::RegIndirectDisplacement;
} else {
MemOp.Op = M68kMemOp::Kind::RegIndirect;
}
Operands.push_back(M68kOperand::createMemOp(MemOp, Start, End));
return MatchOperand_Success;
}
void M68kAsmParser::eatComma() {
if (Parser.getTok().is(AsmToken::Comma)) {
Parser.Lex();
}
}
bool M68kAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
SMLoc Start = getLexer().getLoc();
Operands.push_back(M68kOperand::createToken(Name, Start, Start));
bool First = true;
while (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
if (!First) {
eatComma();
} else {
First = false;
}
auto MatchResult = MatchOperandParserImpl(Operands, Name);
if (MatchResult == MatchOperand_Success) {
continue;
}
// Add custom operand formats here...
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token parsing operands");
}
// Eat EndOfStatement.
Parser.Lex();
return false;
}
bool M68kAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
bool M68kAsmParser::invalidOperand(SMLoc const &Loc,
OperandVector const &Operands,
uint64_t const &ErrorInfo) {
SMLoc ErrorLoc = Loc;
char const *Diag = 0;
if (ErrorInfo != ~0U) {
if (ErrorInfo >= Operands.size()) {
Diag = "too few operands for instruction.";
} else {
auto const &Op = (M68kOperand const &)*Operands[ErrorInfo];
if (Op.getStartLoc() != SMLoc()) {
ErrorLoc = Op.getStartLoc();
}
}
}
if (!Diag) {
Diag = "invalid operand for instruction";
}
return Error(ErrorLoc, Diag);
}
bool M68kAsmParser::missingFeature(llvm::SMLoc const &Loc,
uint64_t const &ErrorInfo) {
return Error(Loc, "instruction requires a CPU feature not currently enabled");
}
bool M68kAsmParser::emit(MCInst &Inst, SMLoc const &Loc,
MCStreamer &Out) const {
Inst.setLoc(Loc);
Out.emitInstruction(Inst, STI);
return false;
}
bool M68kAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult =
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
case Match_Success:
return emit(Inst, Loc, Out);
case Match_MissingFeature:
return missingFeature(Loc, ErrorInfo);
case Match_InvalidOperand:
return invalidOperand(Loc, Operands, ErrorInfo);
case Match_MnemonicFail:
return Error(Loc, "invalid instruction");
default:
return true;
}
}
void M68kOperand::print(raw_ostream &OS) const {
switch (Kind) {
- case Kind::Invalid:
+ case KindTy::Invalid:
OS << "invalid";
break;
- case Kind::Token:
+ case KindTy::Token:
OS << "token '" << Token << "'";
break;
- case Kind::Imm:
+ case KindTy::Imm:
OS << "immediate " << Imm;
break;
- case Kind::MemOp:
+ case KindTy::MemOp:
MemOp.print(OS);
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp
index 5b8fd3d41b14..cb7d8f8b25e3 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp
@@ -1,188 +1,192 @@
//===-- M68kTargetMachine.cpp - M68k target machine ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains implementation for M68k target machine.
///
//===----------------------------------------------------------------------===//
#include "M68kTargetMachine.h"
#include "M68k.h"
#include "M68kSubtarget.h"
#include "M68kTargetObjectFile.h"
#include "TargetInfo/M68kTargetInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/TargetRegistry.h"
#include <memory>
using namespace llvm;
#define DEBUG_TYPE "m68k"
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kTarget() {
RegisterTargetMachine<M68kTargetMachine> X(getTheM68kTarget());
auto *PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
}
namespace {
std::string computeDataLayout(const Triple &TT, StringRef CPU,
const TargetOptions &Options) {
std::string Ret = "";
// M68k is Big Endian
Ret += "E";
// FIXME how to wire it with the used object format?
Ret += "-m:e";
- // M68k pointers are always 32 bit wide even for 16 bit cpus
- Ret += "-p:32:32";
-
- // M68k requires i8 to align on 2 byte boundry
+ // M68k pointers are always 32 bit wide even for 16-bit CPUs.
+ // The ABI only specifies 16-bit alignment.
+ // On at least the 68020+ with a 32-bit bus, there is a performance benefit
+ // to having 32-bit alignment.
+ Ret += "-p:32:16:32";
+
+ // Bytes do not require special alignment, words are word aligned and
+ // long words are word aligned at minimum.
Ret += "-i8:8:8-i16:16:16-i32:16:32";
// FIXME no floats at the moment
// The registers can hold 8, 16, 32 bits
Ret += "-n8:16:32";
Ret += "-a:0:16-S16";
return Ret;
}
Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
// If not defined we default to static
if (!RM.hasValue()) {
return Reloc::Static;
}
return *RM;
}
CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
bool JIT) {
if (!CM) {
return CodeModel::Small;
} else if (CM == CodeModel::Large) {
llvm_unreachable("Large code model is not supported");
} else if (CM == CodeModel::Kernel) {
llvm_unreachable("Kernel code model is not implemented yet");
}
return CM.getValue();
}
} // end anonymous namespace
M68kTargetMachine::M68kTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Optional<Reloc::Model> RM,
Optional<CodeModel::Model> CM,
CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS,
Options, getEffectiveRelocModel(TT, RM),
::getEffectiveCodeModel(CM, JIT), OL),
TLOF(std::make_unique<M68kELFTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
M68kTargetMachine::~M68kTargetMachine() {}
const M68kSubtarget *
M68kTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
auto CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
auto FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
auto &I = SubtargetMap[CPU + FS];
if (!I) {
// This needs to be done before we create a new subtarget since any
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
I = std::make_unique<M68kSubtarget>(TargetTriple, CPU, FS, *this);
}
return I.get();
}
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
namespace {
class M68kPassConfig : public TargetPassConfig {
public:
M68kPassConfig(M68kTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
M68kTargetMachine &getM68kTargetMachine() const {
return getTM<M68kTargetMachine>();
}
const M68kSubtarget &getM68kSubtarget() const {
return *getM68kTargetMachine().getSubtargetImpl();
}
bool addIRTranslator() override;
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
bool addInstSelector() override;
void addPreSched2() override;
void addPreEmitPass() override;
};
} // namespace
TargetPassConfig *M68kTargetMachine::createPassConfig(PassManagerBase &PM) {
return new M68kPassConfig(*this, PM);
}
bool M68kPassConfig::addInstSelector() {
// Install an instruction selector.
addPass(createM68kISelDag(getM68kTargetMachine()));
addPass(createM68kGlobalBaseRegPass());
return false;
}
bool M68kPassConfig::addIRTranslator() {
addPass(new IRTranslator());
return false;
}
bool M68kPassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
}
bool M68kPassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
return false;
}
bool M68kPassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
return false;
}
void M68kPassConfig::addPreSched2() { addPass(createM68kExpandPseudoPass()); }
void M68kPassConfig::addPreEmitPass() {
addPass(createM68kCollapseMOVEMPass());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index a541daaff9f4..207101763ac2 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1,1617 +1,1617 @@
//===-- RISCVInstrInfo.cpp - RISCV Instruction Information ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the RISCV implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#include "RISCVInstrInfo.h"
#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
#define GEN_CHECK_COMPRESS_INSTR
#include "RISCVGenCompressInstEmitter.inc"
#define GET_INSTRINFO_CTOR_DTOR
#include "RISCVGenInstrInfo.inc"
namespace llvm {
namespace RISCVVPseudosTable {
using namespace RISCV;
#define GET_RISCVVPseudosTable_IMPL
#include "RISCVGenSearchableTables.inc"
} // namespace RISCVVPseudosTable
} // namespace llvm
RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)
: RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
STI(STI) {}
MCInst RISCVInstrInfo::getNop() const {
if (STI.getFeatureBits()[RISCV::FeatureStdExtC])
return MCInstBuilder(RISCV::C_NOP);
return MCInstBuilder(RISCV::ADDI)
.addReg(RISCV::X0)
.addReg(RISCV::X0)
.addImm(0);
}
unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
default:
return 0;
case RISCV::LB:
case RISCV::LBU:
case RISCV::LH:
case RISCV::LHU:
case RISCV::FLH:
case RISCV::LW:
case RISCV::FLW:
case RISCV::LWU:
case RISCV::LD:
case RISCV::FLD:
break;
}
if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
return 0;
}
unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
default:
return 0;
case RISCV::SB:
case RISCV::SH:
case RISCV::SW:
case RISCV::FSH:
case RISCV::FSW:
case RISCV::SD:
case RISCV::FSD:
break;
}
if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
return 0;
}
static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
unsigned NumRegs) {
// We really want the positive remainder mod 32 here, that happens to be
// easily obtainable with a mask.
return ((DstReg - SrcReg) & 0x1f) < NumRegs;
}
void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg,
MCRegister SrcReg, bool KillSrc) const {
if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(0);
return;
}
// FPR->FPR copies and VR->VR copies.
unsigned Opc;
bool IsScalableVector = true;
unsigned NF = 1;
unsigned LMul = 1;
unsigned SubRegIdx = RISCV::sub_vrm1_0;
if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::FSGNJ_H;
IsScalableVector = false;
} else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::FSGNJ_S;
IsScalableVector = false;
} else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::FSGNJ_D;
IsScalableVector = false;
} else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
} else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
} else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV4R_V;
} else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV8R_V;
} else if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 2;
LMul = 1;
} else if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 2;
LMul = 2;
} else if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV4R_V;
SubRegIdx = RISCV::sub_vrm4_0;
NF = 2;
LMul = 4;
} else if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 3;
LMul = 1;
} else if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 3;
LMul = 2;
} else if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 4;
LMul = 1;
} else if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 4;
LMul = 2;
} else if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 5;
LMul = 1;
} else if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 6;
LMul = 1;
} else if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 7;
LMul = 1;
} else if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 8;
LMul = 1;
} else {
llvm_unreachable("Impossible reg-to-reg copy");
}
if (IsScalableVector) {
if (NF == 1) {
BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
int I = 0, End = NF, Incr = 1;
unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
unsigned DstEncoding = TRI->getEncodingValue(DstReg);
if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMul)) {
I = NF - 1;
End = -1;
Incr = -1;
}
for (; I != End; I += Incr) {
BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I))
.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
getKillRegState(KillSrc));
}
}
} else {
BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.addReg(SrcReg, getKillRegState(KillSrc));
}
}
void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register SrcReg, bool IsKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end())
DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
unsigned Opcode;
bool IsScalableVector = true;
bool IsZvlsseg = true;
if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::SW : RISCV::SD;
IsScalableVector = false;
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FSH;
IsScalableVector = false;
} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FSW;
IsScalableVector = false;
} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FSD;
IsScalableVector = false;
} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVSPILL_M1;
IsZvlsseg = false;
} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVSPILL_M2;
IsZvlsseg = false;
} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVSPILL_M4;
IsZvlsseg = false;
} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVSPILL_M8;
IsZvlsseg = false;
} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL2_M1;
else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL2_M2;
else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL2_M4;
else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL3_M1;
else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL3_M2;
else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL4_M1;
else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL4_M2;
else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL5_M1;
else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL6_M1;
else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL7_M1;
else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL8_M1;
else
llvm_unreachable("Can't store this register to stack slot");
if (IsScalableVector) {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
MFI.setStackID(FI, TargetStackID::ScalableVector);
auto MIB = BuildMI(MBB, I, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FI)
.addMemOperand(MMO);
if (IsZvlsseg) {
// For spilling/reloading Zvlsseg registers, append the dummy field for
// the scaled vector length. The argument will be used when expanding
// these pseudo instructions.
MIB.addReg(RISCV::X0);
}
} else {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
BuildMI(MBB, I, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
}
}
void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register DstReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end())
DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
unsigned Opcode;
bool IsScalableVector = true;
bool IsZvlsseg = true;
if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::LW : RISCV::LD;
IsScalableVector = false;
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FLH;
IsScalableVector = false;
} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FLW;
IsScalableVector = false;
} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FLD;
IsScalableVector = false;
} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVRELOAD_M1;
IsZvlsseg = false;
} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVRELOAD_M2;
IsZvlsseg = false;
} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVRELOAD_M4;
IsZvlsseg = false;
} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVRELOAD_M8;
IsZvlsseg = false;
} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD2_M1;
else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD2_M2;
else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD2_M4;
else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD3_M1;
else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD3_M2;
else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD4_M1;
else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD4_M2;
else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD5_M1;
else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD6_M1;
else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD7_M1;
else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD8_M1;
else
llvm_unreachable("Can't load this register from stack slot");
if (IsScalableVector) {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
MFI.setStackID(FI, TargetStackID::ScalableVector);
auto MIB = BuildMI(MBB, I, DL, get(Opcode), DstReg)
.addFrameIndex(FI)
.addMemOperand(MMO);
if (IsZvlsseg) {
// For spilling/reloading Zvlsseg registers, append the dummy field for
// the scaled vector length. The argument will be used when expanding
// these pseudo instructions.
MIB.addReg(RISCV::X0);
}
} else {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
BuildMI(MBB, I, DL, get(Opcode), DstReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
}
}
void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
MachineInstr::MIFlag Flag) const {
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
Register SrcReg = RISCV::X0;
Register Result = MRI.createVirtualRegister(&RISCV::GPRRegClass);
unsigned Num = 0;
if (!STI.is64Bit() && !isInt<32>(Val))
report_fatal_error("Should only materialize 32-bit constants for RV32");
RISCVMatInt::InstSeq Seq =
RISCVMatInt::generateInstSeq(Val, STI.getFeatureBits());
assert(!Seq.empty());
for (RISCVMatInt::Inst &Inst : Seq) {
// Write the final result to DstReg if it's the last instruction in the Seq.
// Otherwise, write the result to the temp register.
if (++Num == Seq.size())
Result = DstReg;
if (Inst.Opc == RISCV::LUI) {
BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result)
.addImm(Inst.Imm)
.setMIFlag(Flag);
} else if (Inst.Opc == RISCV::ADDUW) {
BuildMI(MBB, MBBI, DL, get(RISCV::ADDUW), Result)
.addReg(SrcReg, RegState::Kill)
.addReg(RISCV::X0)
.setMIFlag(Flag);
} else {
BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result)
.addReg(SrcReg, RegState::Kill)
.addImm(Inst.Imm)
.setMIFlag(Flag);
}
// Only the first instruction has X0 as its source.
SrcReg = Result;
}
}
// The contents of values added to Cond are not examined outside of
// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
// push BranchOpcode, Reg1, Reg2.
static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
SmallVectorImpl<MachineOperand> &Cond) {
// Block ends with fall-through condbranch.
assert(LastInst.getDesc().isConditionalBranch() &&
"Unknown conditional branch");
Target = LastInst.getOperand(2).getMBB();
Cond.push_back(MachineOperand::CreateImm(LastInst.getOpcode()));
Cond.push_back(LastInst.getOperand(0));
Cond.push_back(LastInst.getOperand(1));
}
static unsigned getOppositeBranchOpcode(int Opc) {
switch (Opc) {
default:
llvm_unreachable("Unrecognized conditional branch");
case RISCV::BEQ:
return RISCV::BNE;
case RISCV::BNE:
return RISCV::BEQ;
case RISCV::BLT:
return RISCV::BGE;
case RISCV::BGE:
return RISCV::BLT;
case RISCV::BLTU:
return RISCV::BGEU;
case RISCV::BGEU:
return RISCV::BLTU;
}
}
bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
TBB = FBB = nullptr;
Cond.clear();
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
if (I == MBB.end() || !isUnpredicatedTerminator(*I))
return false;
// Count the number of terminators and find the first unconditional or
// indirect branch.
MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
int NumTerminators = 0;
for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
J++) {
NumTerminators++;
if (J->getDesc().isUnconditionalBranch() ||
J->getDesc().isIndirectBranch()) {
FirstUncondOrIndirectBr = J.getReverse();
}
}
// If AllowModify is true, we can erase any terminators after
// FirstUncondOrIndirectBR.
if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
while (std::next(FirstUncondOrIndirectBr) != MBB.end()) {
std::next(FirstUncondOrIndirectBr)->eraseFromParent();
NumTerminators--;
}
I = FirstUncondOrIndirectBr;
}
// We can't handle blocks that end in an indirect branch.
if (I->getDesc().isIndirectBranch())
return true;
// We can't handle blocks with more than 2 terminators.
if (NumTerminators > 2)
return true;
// Handle a single unconditional branch.
if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
TBB = getBranchDestBlock(*I);
return false;
}
// Handle a single conditional branch.
if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
parseCondBranch(*I, TBB, Cond);
return false;
}
// Handle a conditional branch followed by an unconditional branch.
if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
I->getDesc().isUnconditionalBranch()) {
parseCondBranch(*std::prev(I), TBB, Cond);
FBB = getBranchDestBlock(*I);
return false;
}
// Otherwise, we can't handle this.
return true;
}
unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
if (BytesRemoved)
*BytesRemoved = 0;
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
if (I == MBB.end())
return 0;
if (!I->getDesc().isUnconditionalBranch() &&
!I->getDesc().isConditionalBranch())
return 0;
// Remove the branch.
if (BytesRemoved)
*BytesRemoved += getInstSizeInBytes(*I);
I->eraseFromParent();
I = MBB.end();
if (I == MBB.begin())
return 1;
--I;
if (!I->getDesc().isConditionalBranch())
return 1;
// Remove the branch.
if (BytesRemoved)
*BytesRemoved += getInstSizeInBytes(*I);
I->eraseFromParent();
return 2;
}
// Inserts a branch into the end of the specific MachineBasicBlock, returning
// the number of instructions inserted.
unsigned RISCVInstrInfo::insertBranch(
MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
if (BytesAdded)
*BytesAdded = 0;
// Shouldn't be a fall through.
assert(TBB && "insertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 3 || Cond.size() == 0) &&
"RISCV branch conditions have two components!");
// Unconditional branch.
if (Cond.empty()) {
MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);
if (BytesAdded)
*BytesAdded += getInstSizeInBytes(MI);
return 1;
}
// Either a one or two-way conditional branch.
unsigned Opc = Cond[0].getImm();
MachineInstr &CondMI =
*BuildMI(&MBB, DL, get(Opc)).add(Cond[1]).add(Cond[2]).addMBB(TBB);
if (BytesAdded)
*BytesAdded += getInstSizeInBytes(CondMI);
// One-way conditional branch.
if (!FBB)
return 1;
// Two-way conditional branch.
MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);
if (BytesAdded)
*BytesAdded += getInstSizeInBytes(MI);
return 2;
}
unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
MachineBasicBlock &DestBB,
const DebugLoc &DL,
int64_t BrOffset,
RegScavenger *RS) const {
assert(RS && "RegScavenger required for long branching");
assert(MBB.empty() &&
"new block should be inserted for expanding unconditional branch");
assert(MBB.pred_size() == 1);
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
if (!isInt<32>(BrOffset))
report_fatal_error(
"Branch offsets outside of the signed 32-bit range not supported");
// FIXME: A virtual register must be used initially, as the register
// scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
// uses the same workaround).
Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
auto II = MBB.end();
MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
.addReg(ScratchReg, RegState::Define | RegState::Dead)
.addMBB(&DestBB, RISCVII::MO_CALL);
RS->enterBasicBlockEnd(MBB);
unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
MI.getIterator(), false, 0);
MRI.replaceRegWith(ScratchReg, Scav);
MRI.clearVirtRegs();
RS->setRegUsed(Scav);
return 8;
}
bool RISCVInstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
assert((Cond.size() == 3) && "Invalid branch condition!");
Cond[0].setImm(getOppositeBranchOpcode(Cond[0].getImm()));
return false;
}
MachineBasicBlock *
RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
assert(MI.getDesc().isBranch() && "Unexpected opcode!");
// The branch target is always the last operand.
int NumOp = MI.getNumExplicitOperands();
return MI.getOperand(NumOp - 1).getMBB();
}
bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
int64_t BrOffset) const {
unsigned XLen = STI.getXLen();
// Ideally we could determine the supported branch offset from the
// RISCVII::FormMask, but this can't be used for Pseudo instructions like
// PseudoBR.
switch (BranchOp) {
default:
llvm_unreachable("Unexpected opcode!");
case RISCV::BEQ:
case RISCV::BNE:
case RISCV::BLT:
case RISCV::BGE:
case RISCV::BLTU:
case RISCV::BGEU:
return isIntN(13, BrOffset);
case RISCV::JAL:
case RISCV::PseudoBR:
return isIntN(21, BrOffset);
case RISCV::PseudoJump:
return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));
}
}
unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
default: {
if (MI.getParent() && MI.getParent()->getParent()) {
const auto MF = MI.getMF();
const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
const MCRegisterInfo &MRI = *TM.getMCRegisterInfo();
const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
const RISCVSubtarget &ST = MF->getSubtarget<RISCVSubtarget>();
if (isCompressibleInst(MI, &ST, MRI, STI))
return 2;
}
return get(Opcode).getSize();
}
case TargetOpcode::EH_LABEL:
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
case TargetOpcode::DBG_VALUE:
return 0;
// These values are determined based on RISCVExpandAtomicPseudoInsts,
// RISCVExpandPseudoInsts and RISCVMCCodeEmitter, depending on where the
// pseudos are expanded.
case RISCV::PseudoCALLReg:
case RISCV::PseudoCALL:
case RISCV::PseudoJump:
case RISCV::PseudoTAIL:
case RISCV::PseudoLLA:
case RISCV::PseudoLA:
case RISCV::PseudoLA_TLS_IE:
case RISCV::PseudoLA_TLS_GD:
return 8;
case RISCV::PseudoAtomicLoadNand32:
case RISCV::PseudoAtomicLoadNand64:
return 20;
case RISCV::PseudoMaskedAtomicSwap32:
case RISCV::PseudoMaskedAtomicLoadAdd32:
case RISCV::PseudoMaskedAtomicLoadSub32:
return 28;
case RISCV::PseudoMaskedAtomicLoadNand32:
return 32;
case RISCV::PseudoMaskedAtomicLoadMax32:
case RISCV::PseudoMaskedAtomicLoadMin32:
return 44;
case RISCV::PseudoMaskedAtomicLoadUMax32:
case RISCV::PseudoMaskedAtomicLoadUMin32:
return 36;
case RISCV::PseudoCmpXchg32:
case RISCV::PseudoCmpXchg64:
return 16;
case RISCV::PseudoMaskedCmpXchg32:
return 32;
case TargetOpcode::INLINEASM:
case TargetOpcode::INLINEASM_BR: {
const MachineFunction &MF = *MI.getParent()->getParent();
const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
*TM.getMCAsmInfo());
}
case RISCV::PseudoVSPILL2_M1:
case RISCV::PseudoVSPILL2_M2:
case RISCV::PseudoVSPILL2_M4:
case RISCV::PseudoVSPILL3_M1:
case RISCV::PseudoVSPILL3_M2:
case RISCV::PseudoVSPILL4_M1:
case RISCV::PseudoVSPILL4_M2:
case RISCV::PseudoVSPILL5_M1:
case RISCV::PseudoVSPILL6_M1:
case RISCV::PseudoVSPILL7_M1:
case RISCV::PseudoVSPILL8_M1:
case RISCV::PseudoVRELOAD2_M1:
case RISCV::PseudoVRELOAD2_M2:
case RISCV::PseudoVRELOAD2_M4:
case RISCV::PseudoVRELOAD3_M1:
case RISCV::PseudoVRELOAD3_M2:
case RISCV::PseudoVRELOAD4_M1:
case RISCV::PseudoVRELOAD4_M2:
case RISCV::PseudoVRELOAD5_M1:
case RISCV::PseudoVRELOAD6_M1:
case RISCV::PseudoVRELOAD7_M1:
case RISCV::PseudoVRELOAD8_M1: {
// The values are determined based on expandVSPILL and expandVRELOAD that
// expand the pseudos depending on NF.
unsigned NF = isRVVSpillForZvlsseg(Opcode)->first;
return 4 * (2 * NF - 1);
}
}
}
bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
const unsigned Opcode = MI.getOpcode();
switch (Opcode) {
default:
break;
case RISCV::FSGNJ_D:
case RISCV::FSGNJ_S:
// The canonical floating-point move is fsgnj rd, rs, rs.
return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
case RISCV::ADDI:
case RISCV::ORI:
case RISCV::XORI:
return (MI.getOperand(1).isReg() &&
MI.getOperand(1).getReg() == RISCV::X0) ||
(MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);
}
return MI.isAsCheapAsAMove();
}
Optional<DestSourcePair>
RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
if (MI.isMoveReg())
return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
switch (MI.getOpcode()) {
default:
break;
case RISCV::ADDI:
// Operand 1 can be a frameindex but callers expect registers
if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0)
return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
break;
case RISCV::FSGNJ_D:
case RISCV::FSGNJ_S:
// The canonical floating-point move is fsgnj rd, rs, rs.
if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
break;
}
return None;
}
bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const {
const MCInstrInfo *MCII = STI.getInstrInfo();
MCInstrDesc const &Desc = MCII->get(MI.getOpcode());
for (auto &OI : enumerate(Desc.operands())) {
unsigned OpType = OI.value().OperandType;
if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
const MachineOperand &MO = MI.getOperand(OI.index());
if (MO.isImm()) {
int64_t Imm = MO.getImm();
bool Ok;
switch (OpType) {
default:
llvm_unreachable("Unexpected operand type");
case RISCVOp::OPERAND_UIMM4:
Ok = isUInt<4>(Imm);
break;
case RISCVOp::OPERAND_UIMM5:
Ok = isUInt<5>(Imm);
break;
case RISCVOp::OPERAND_UIMM12:
Ok = isUInt<12>(Imm);
break;
case RISCVOp::OPERAND_SIMM12:
Ok = isInt<12>(Imm);
break;
case RISCVOp::OPERAND_UIMM20:
Ok = isUInt<20>(Imm);
break;
case RISCVOp::OPERAND_UIMMLOG2XLEN:
if (STI.getTargetTriple().isArch64Bit())
Ok = isUInt<6>(Imm);
else
Ok = isUInt<5>(Imm);
break;
}
if (!Ok) {
ErrInfo = "Invalid immediate";
return false;
}
}
}
}
return true;
}
// Return true if get the base operand, byte offset of an instruction and the
// memory width. Width is the size of memory that is being loaded/stored.
bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
unsigned &Width, const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore())
return false;
// Here we assume the standard RISC-V ISA, which uses a base+offset
// addressing mode. You'll need to relax these conditions to support custom
// load/stores instructions.
if (LdSt.getNumExplicitOperands() != 3)
return false;
if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
return false;
if (!LdSt.hasOneMemOperand())
return false;
Width = (*LdSt.memoperands_begin())->getSize();
BaseReg = &LdSt.getOperand(1);
Offset = LdSt.getOperand(2).getImm();
return true;
}
bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
const MachineInstr &MIa, const MachineInstr &MIb) const {
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
return false;
// Retrieve the base register, offset from the base register and width. Width
// is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
// base registers are identical, and the offset of a lower memory access +
// the width doesn't overlap the offset of a higher memory access,
// then the memory accesses are different.
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned int WidthA = 0, WidthB = 0;
if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
if (BaseOpA->isIdenticalTo(*BaseOpB)) {
int LowOffset = std::min(OffsetA, OffsetB);
int HighOffset = std::max(OffsetA, OffsetB);
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
if (LowOffset + LowWidth <= HighOffset)
return true;
}
}
return false;
}
std::pair<unsigned, unsigned>
RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
return std::make_pair(TF & Mask, TF & ~Mask);
}
ArrayRef<std::pair<unsigned, const char *>>
RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
using namespace RISCVII;
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_CALL, "riscv-call"},
{MO_PLT, "riscv-plt"},
{MO_LO, "riscv-lo"},
{MO_HI, "riscv-hi"},
{MO_PCREL_LO, "riscv-pcrel-lo"},
{MO_PCREL_HI, "riscv-pcrel-hi"},
{MO_GOT_HI, "riscv-got-hi"},
{MO_TPREL_LO, "riscv-tprel-lo"},
{MO_TPREL_HI, "riscv-tprel-hi"},
{MO_TPREL_ADD, "riscv-tprel-add"},
{MO_TLS_GOT_HI, "riscv-tls-got-hi"},
{MO_TLS_GD_HI, "riscv-tls-gd-hi"}};
return makeArrayRef(TargetFlags);
}
bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
const Function &F = MF.getFunction();
// Can F be deduplicated by the linker? If it can, don't outline from it.
if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
return false;
// Don't outline from functions with section markings; the program could
// expect that all the code is in the named section.
if (F.hasSection())
return false;
// It's safe to outline from MF.
return true;
}
bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
unsigned &Flags) const {
// More accurate safety checking is done in getOutliningCandidateInfo.
return true;
}
// Enum values indicating how an outlined call should be constructed.
enum MachineOutlinerConstructionID {
MachineOutlinerDefault
};
outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
// First we need to filter out candidates where the X5 register (IE t0) can't
// be used to setup the function call.
auto CannotInsertCall = [](outliner::Candidate &C) {
const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
C.initLRU(*TRI);
LiveRegUnits LRU = C.LRU;
return !LRU.available(RISCV::X5);
};
llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
return outliner::OutlinedFunction();
unsigned SequenceSize = 0;
auto I = RepeatedSequenceLocs[0].front();
auto E = std::next(RepeatedSequenceLocs[0].back());
for (; I != E; ++I)
SequenceSize += getInstSizeInBytes(*I);
// call t0, function = 8 bytes.
unsigned CallOverhead = 8;
for (auto &C : RepeatedSequenceLocs)
C.setCallInfo(MachineOutlinerDefault, CallOverhead);
// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
unsigned FrameOverhead = 4;
if (RepeatedSequenceLocs[0].getMF()->getSubtarget()
.getFeatureBits()[RISCV::FeatureStdExtC])
FrameOverhead = 2;
return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
FrameOverhead, MachineOutlinerDefault);
}
outliner::InstrType
RISCVInstrInfo::getOutliningType(MachineBasicBlock::iterator &MBBI,
unsigned Flags) const {
MachineInstr &MI = *MBBI;
MachineBasicBlock *MBB = MI.getParent();
const TargetRegisterInfo *TRI =
MBB->getParent()->getSubtarget().getRegisterInfo();
// Positions generally can't safely be outlined.
if (MI.isPosition()) {
// We can manually strip out CFI instructions later.
if (MI.isCFIInstruction())
return outliner::InstrType::Invisible;
return outliner::InstrType::Illegal;
}
// Don't trust the user to write safe inline assembly.
if (MI.isInlineAsm())
return outliner::InstrType::Illegal;
// We can't outline branches to other basic blocks.
if (MI.isTerminator() && !MBB->succ_empty())
return outliner::InstrType::Illegal;
// We need support for tail calls to outlined functions before return
// statements can be allowed.
if (MI.isReturn())
return outliner::InstrType::Illegal;
// Don't allow modifying the X5 register which we use for return addresses for
// these outlined functions.
if (MI.modifiesRegister(RISCV::X5, TRI) ||
MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
return outliner::InstrType::Illegal;
// Make sure the operands don't reference something unsafe.
for (const auto &MO : MI.operands())
if (MO.isMBB() || MO.isBlockAddress() || MO.isCPI())
return outliner::InstrType::Illegal;
// Don't allow instructions which won't be materialized to impact outlining
// analysis.
if (MI.isMetaInstruction())
return outliner::InstrType::Invisible;
return outliner::InstrType::Legal;
}
void RISCVInstrInfo::buildOutlinedFrame(
MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const {
// Strip out any CFI instructions
bool Changed = true;
while (Changed) {
Changed = false;
auto I = MBB.begin();
auto E = MBB.end();
for (; I != E; ++I) {
if (I->isCFIInstruction()) {
I->removeFromParent();
Changed = true;
break;
}
}
}
MBB.addLiveIn(RISCV::X5);
// Add in a return instruction to the end of the outlined frame.
MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
.addReg(RISCV::X0, RegState::Define)
.addReg(RISCV::X5)
.addImm(0));
}
MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
MachineFunction &MF, const outliner::Candidate &C) const {
// Add in a call instruction to the outlined function at the given location.
It = MBB.insert(It,
BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
.addGlobalAddress(M.getNamedValue(MF.getName()), 0,
RISCVII::MO_CALL));
return It;
}
// clang-format off
#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \
RISCV::PseudoV##OP##_##TYPE##_##LMUL##_COMMUTABLE
#define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \
CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \
case CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \
case CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \
case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \
case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2): \
case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4): \
case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8)
#define CASE_VFMA_SPLATS(OP) \
CASE_VFMA_OPCODE_LMULS(OP, VF16): \
case CASE_VFMA_OPCODE_LMULS(OP, VF32): \
case CASE_VFMA_OPCODE_LMULS(OP, VF64)
// clang-format on
bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
const MCInstrDesc &Desc = MI.getDesc();
if (!Desc.isCommutable())
return false;
switch (MI.getOpcode()) {
case CASE_VFMA_SPLATS(FMADD):
case CASE_VFMA_SPLATS(FMSUB):
case CASE_VFMA_SPLATS(FMACC):
case CASE_VFMA_SPLATS(FMSAC):
case CASE_VFMA_SPLATS(FNMADD):
case CASE_VFMA_SPLATS(FNMSUB):
case CASE_VFMA_SPLATS(FNMACC):
case CASE_VFMA_SPLATS(FNMSAC):
case CASE_VFMA_OPCODE_LMULS(FMACC, VV):
case CASE_VFMA_OPCODE_LMULS(FMSAC, VV):
case CASE_VFMA_OPCODE_LMULS(FNMACC, VV):
case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV):
case CASE_VFMA_OPCODE_LMULS(MADD, VX):
case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
case CASE_VFMA_OPCODE_LMULS(MACC, VX):
case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
case CASE_VFMA_OPCODE_LMULS(MACC, VV):
case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
// For these instructions we can only swap operand 1 and operand 3 by
// changing the opcode.
unsigned CommutableOpIdx1 = 1;
unsigned CommutableOpIdx2 = 3;
if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
CommutableOpIdx2))
return false;
return true;
}
case CASE_VFMA_OPCODE_LMULS(FMADD, VV):
case CASE_VFMA_OPCODE_LMULS(FMSUB, VV):
case CASE_VFMA_OPCODE_LMULS(FNMADD, VV):
case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV):
case CASE_VFMA_OPCODE_LMULS(MADD, VV):
case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
// For these instructions we have more freedom. We can commute with the
// other multiplicand or with the addend/subtrahend/minuend.
// Any fixed operand must be from source 1, 2 or 3.
if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
return false;
if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
return false;
// It both ops are fixed one must be the tied source.
if (SrcOpIdx1 != CommuteAnyOperandIndex &&
SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
return false;
// Look for two different register operands assumed to be commutable
// regardless of the FMA opcode. The FMA opcode is adjusted later if
// needed.
if (SrcOpIdx1 == CommuteAnyOperandIndex ||
SrcOpIdx2 == CommuteAnyOperandIndex) {
// At least one of operands to be commuted is not specified and
// this method is free to choose appropriate commutable operands.
unsigned CommutableOpIdx1 = SrcOpIdx1;
if (SrcOpIdx1 == SrcOpIdx2) {
// Both of operands are not fixed. Set one of commutable
// operands to the tied source.
CommutableOpIdx1 = 1;
- } else if (SrcOpIdx1 == CommutableOpIdx1) {
+ } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
// Only one of the operands is not fixed.
CommutableOpIdx1 = SrcOpIdx2;
}
// CommutableOpIdx1 is well defined now. Let's choose another commutable
// operand and assign its index to CommutableOpIdx2.
unsigned CommutableOpIdx2;
if (CommutableOpIdx1 != 1) {
// If we haven't already used the tied source, we must use it now.
CommutableOpIdx2 = 1;
} else {
Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg();
// The commuted operands should have different registers.
// Otherwise, the commute transformation does not change anything and
// is useless. We use this as a hint to make our decision.
if (Op1Reg != MI.getOperand(2).getReg())
CommutableOpIdx2 = 2;
else
CommutableOpIdx2 = 3;
}
// Assign the found pair of commutable indices to SrcOpIdx1 and
// SrcOpIdx2 to return those values.
if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
CommutableOpIdx2))
return false;
}
return true;
}
}
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
}
#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_COMMUTABLE: \
Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_COMMUTABLE; \
break;
#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF16) \
CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF32) \
CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF64)
MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool NewMI,
unsigned OpIdx1,
unsigned OpIdx2) const {
auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
if (NewMI)
return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
return MI;
};
switch (MI.getOpcode()) {
case CASE_VFMA_SPLATS(FMACC):
case CASE_VFMA_SPLATS(FMADD):
case CASE_VFMA_SPLATS(FMSAC):
case CASE_VFMA_SPLATS(FMSUB):
case CASE_VFMA_SPLATS(FNMACC):
case CASE_VFMA_SPLATS(FNMADD):
case CASE_VFMA_SPLATS(FNMSAC):
case CASE_VFMA_SPLATS(FNMSUB):
case CASE_VFMA_OPCODE_LMULS(FMACC, VV):
case CASE_VFMA_OPCODE_LMULS(FMSAC, VV):
case CASE_VFMA_OPCODE_LMULS(FNMACC, VV):
case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV):
case CASE_VFMA_OPCODE_LMULS(MADD, VX):
case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
case CASE_VFMA_OPCODE_LMULS(MACC, VX):
case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
case CASE_VFMA_OPCODE_LMULS(MACC, VV):
case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
// It only make sense to toggle these between clobbering the
// addend/subtrahend/minuend one of the multiplicands.
assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index");
unsigned Opc;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)
CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)
CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)
CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
CASE_VFMA_CHANGE_OPCODE_LMULS(FMACC, FMADD, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(FMSAC, FMSUB, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(FNMACC, FNMADD, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSAC, FNMSUB, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
}
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case CASE_VFMA_OPCODE_LMULS(FMADD, VV):
case CASE_VFMA_OPCODE_LMULS(FMSUB, VV):
case CASE_VFMA_OPCODE_LMULS(FNMADD, VV):
case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV):
case CASE_VFMA_OPCODE_LMULS(MADD, VV):
case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
// If one of the operands, is the addend we need to change opcode.
// Otherwise we're just swapping 2 of the multiplicands.
if (OpIdx1 == 3 || OpIdx2 == 3) {
unsigned Opc;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
CASE_VFMA_CHANGE_OPCODE_LMULS(FMADD, FMACC, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(FMSUB, FMSAC, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(FNMADD, FNMACC, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSUB, FNMSAC, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
}
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
// Let the default code handle it.
break;
}
}
return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
#undef CASE_VFMA_CHANGE_OPCODE_SPLATS
#undef CASE_VFMA_CHANGE_OPCODE_LMULS
#undef CASE_VFMA_CHANGE_OPCODE_COMMON
#undef CASE_VFMA_SPLATS
#undef CASE_VFMA_OPCODE_LMULS
#undef CASE_VFMA_OPCODE_COMMON
// clang-format off
#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
RISCV::PseudoV##OP##_##LMUL##_TIED
#define CASE_WIDEOP_OPCODE_LMULS(OP) \
CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
case CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
// clang-format on
#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
case RISCV::PseudoV##OP##_##LMUL##_TIED: \
NewOpc = RISCV::PseudoV##OP##_##LMUL; \
break;
#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
MachineInstr *RISCVInstrInfo::convertToThreeAddress(
MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const {
switch (MI.getOpcode()) {
default:
break;
case CASE_WIDEOP_OPCODE_LMULS(FWADD_WV):
case CASE_WIDEOP_OPCODE_LMULS(FWSUB_WV):
case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
// clang-format off
unsigned NewOpc;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
}
//clang-format on
MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
.add(MI.getOperand(4));
MIB.copyImplicitOps(MI);
if (LV) {
unsigned NumOps = MI.getNumOperands();
for (unsigned I = 1; I < NumOps; ++I) {
MachineOperand &Op = MI.getOperand(I);
if (Op.isReg() && Op.isKill())
LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
}
}
return MIB;
}
}
return nullptr;
}
#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
#undef CASE_WIDEOP_OPCODE_LMULS
#undef CASE_WIDEOP_OPCODE_COMMON
Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator II,
const DebugLoc &DL,
int64_t Amount,
MachineInstr::MIFlag Flag) const {
assert(Amount > 0 && "There is no need to get VLEN scaled value.");
assert(Amount % 8 == 0 &&
"Reserve the stack by the multiple of one vector size.");
MachineRegisterInfo &MRI = MF.getRegInfo();
const RISCVInstrInfo *TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
int64_t NumOfVReg = Amount / 8;
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL)
.setMIFlag(Flag);
assert(isInt<32>(NumOfVReg) &&
"Expect the number of vector registers within 32-bits.");
if (isPowerOf2_32(NumOfVReg)) {
uint32_t ShiftAmount = Log2_32(NumOfVReg);
if (ShiftAmount == 0)
return VL;
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
.addReg(VL, RegState::Kill)
.addImm(ShiftAmount)
.setMIFlag(Flag);
} else if (isPowerOf2_32(NumOfVReg - 1)) {
Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
uint32_t ShiftAmount = Log2_32(NumOfVReg - 1);
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), ScaledRegister)
.addReg(VL)
.addImm(ShiftAmount)
.setMIFlag(Flag);
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), VL)
.addReg(ScaledRegister, RegState::Kill)
.addReg(VL, RegState::Kill)
.setMIFlag(Flag);
} else if (isPowerOf2_32(NumOfVReg + 1)) {
Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
uint32_t ShiftAmount = Log2_32(NumOfVReg + 1);
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), ScaledRegister)
.addReg(VL)
.addImm(ShiftAmount)
.setMIFlag(Flag);
BuildMI(MBB, II, DL, TII->get(RISCV::SUB), VL)
.addReg(ScaledRegister, RegState::Kill)
.addReg(VL, RegState::Kill)
.setMIFlag(Flag);
} else {
Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
if (!isInt<12>(NumOfVReg))
movImm(MBB, II, DL, N, NumOfVReg);
else {
BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), N)
.addReg(RISCV::X0)
.addImm(NumOfVReg)
.setMIFlag(Flag);
}
if (!MF.getSubtarget<RISCVSubtarget>().hasStdExtM())
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
MF.getFunction(),
"M-extension must be enabled to calculate the vscaled size/offset."});
BuildMI(MBB, II, DL, TII->get(RISCV::MUL), VL)
.addReg(VL, RegState::Kill)
.addReg(N, RegState::Kill)
.setMIFlag(Flag);
}
return VL;
}
static bool isRVVWholeLoadStore(unsigned Opcode) {
switch (Opcode) {
default:
return false;
case RISCV::VS1R_V:
case RISCV::VS2R_V:
case RISCV::VS4R_V:
case RISCV::VS8R_V:
case RISCV::VL1RE8_V:
case RISCV::VL2RE8_V:
case RISCV::VL4RE8_V:
case RISCV::VL8RE8_V:
case RISCV::VL1RE16_V:
case RISCV::VL2RE16_V:
case RISCV::VL4RE16_V:
case RISCV::VL8RE16_V:
case RISCV::VL1RE32_V:
case RISCV::VL2RE32_V:
case RISCV::VL4RE32_V:
case RISCV::VL8RE32_V:
case RISCV::VL1RE64_V:
case RISCV::VL2RE64_V:
case RISCV::VL4RE64_V:
case RISCV::VL8RE64_V:
return true;
}
}
bool RISCVInstrInfo::isRVVSpill(const MachineInstr &MI, bool CheckFIs) const {
// RVV lacks any support for immediate addressing for stack addresses, so be
// conservative.
unsigned Opcode = MI.getOpcode();
if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
!isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
return false;
return !CheckFIs || any_of(MI.operands(), [](const MachineOperand &MO) {
return MO.isFI();
});
}
Optional<std::pair<unsigned, unsigned>>
RISCVInstrInfo::isRVVSpillForZvlsseg(unsigned Opcode) const {
switch (Opcode) {
default:
return None;
case RISCV::PseudoVSPILL2_M1:
case RISCV::PseudoVRELOAD2_M1:
return std::make_pair(2u, 1u);
case RISCV::PseudoVSPILL2_M2:
case RISCV::PseudoVRELOAD2_M2:
return std::make_pair(2u, 2u);
case RISCV::PseudoVSPILL2_M4:
case RISCV::PseudoVRELOAD2_M4:
return std::make_pair(2u, 4u);
case RISCV::PseudoVSPILL3_M1:
case RISCV::PseudoVRELOAD3_M1:
return std::make_pair(3u, 1u);
case RISCV::PseudoVSPILL3_M2:
case RISCV::PseudoVRELOAD3_M2:
return std::make_pair(3u, 2u);
case RISCV::PseudoVSPILL4_M1:
case RISCV::PseudoVRELOAD4_M1:
return std::make_pair(4u, 1u);
case RISCV::PseudoVSPILL4_M2:
case RISCV::PseudoVRELOAD4_M2:
return std::make_pair(4u, 2u);
case RISCV::PseudoVSPILL5_M1:
case RISCV::PseudoVRELOAD5_M1:
return std::make_pair(5u, 1u);
case RISCV::PseudoVSPILL6_M1:
case RISCV::PseudoVRELOAD6_M1:
return std::make_pair(6u, 1u);
case RISCV::PseudoVSPILL7_M1:
case RISCV::PseudoVRELOAD7_M1:
return std::make_pair(7u, 1u);
case RISCV::PseudoVSPILL8_M1:
case RISCV::PseudoVRELOAD8_M1:
return std::make_pair(8u, 1u);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 171d59ae4c6b..ae5108b0cb0d 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -1,1436 +1,1434 @@
//===-- WebAssemblyFastISel.cpp - WebAssembly FastISel implementation -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file defines the WebAssembly-specific support for the FastISel
/// class. Some of the target-specific code is generated by tablegen in the file
/// WebAssemblyGenFastISel.inc, which is #included here.
///
/// TODO: kill flags
///
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "wasm-fastisel"
namespace {
class WebAssemblyFastISel final : public FastISel {
// All possible address modes.
class Address {
public:
using BaseKind = enum { RegBase, FrameIndexBase };
private:
BaseKind Kind = RegBase;
union {
unsigned Reg;
int FI;
} Base;
// Whether the base has been determined yet
bool IsBaseSet = false;
int64_t Offset = 0;
const GlobalValue *GV = nullptr;
public:
// Innocuous defaults for our address.
Address() { Base.Reg = 0; }
void setKind(BaseKind K) {
assert(!isSet() && "Can't change kind with non-zero base");
Kind = K;
}
BaseKind getKind() const { return Kind; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
assert(!IsBaseSet && "Base cannot be reset");
Base.Reg = Reg;
IsBaseSet = true;
}
unsigned getReg() const {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
assert(!IsBaseSet && "Base cannot be reset");
Base.FI = FI;
IsBaseSet = true;
}
unsigned getFI() const {
assert(isFIBase() && "Invalid base frame index access!");
return Base.FI;
}
void setOffset(int64_t NewOffset) {
assert(NewOffset >= 0 && "Offsets must be non-negative");
Offset = NewOffset;
}
int64_t getOffset() const { return Offset; }
void setGlobalValue(const GlobalValue *G) { GV = G; }
const GlobalValue *getGlobalValue() const { return GV; }
bool isSet() const { return IsBaseSet; }
};
/// Keep a pointer to the WebAssemblySubtarget around so that we can make the
/// right decision when generating code for different targets.
const WebAssemblySubtarget *Subtarget;
LLVMContext *Context;
private:
// Utility helper routines
MVT::SimpleValueType getSimpleType(Type *Ty) {
EVT VT = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
return VT.isSimple() ? VT.getSimpleVT().SimpleTy
: MVT::INVALID_SIMPLE_VALUE_TYPE;
}
MVT::SimpleValueType getLegalType(MVT::SimpleValueType VT) {
switch (VT) {
case MVT::i1:
case MVT::i8:
case MVT::i16:
return MVT::i32;
case MVT::i32:
case MVT::i64:
case MVT::f32:
case MVT::f64:
return VT;
case MVT::funcref:
case MVT::externref:
if (Subtarget->hasReferenceTypes())
return VT;
break;
case MVT::f16:
return MVT::f32;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v4f32:
case MVT::v2i64:
case MVT::v2f64:
if (Subtarget->hasSIMD128())
return VT;
break;
default:
break;
}
return MVT::INVALID_SIMPLE_VALUE_TYPE;
}
bool computeAddress(const Value *Obj, Address &Addr);
void materializeLoadStoreOperands(Address &Addr);
void addLoadStoreOperands(const Address &Addr, const MachineInstrBuilder &MIB,
MachineMemOperand *MMO);
unsigned maskI1Value(unsigned Reg, const Value *V);
- unsigned getRegForI1Value(const Value *V, bool &Not);
+ unsigned getRegForI1Value(const Value *V, const BasicBlock *BB, bool &Not);
unsigned zeroExtendToI32(unsigned Reg, const Value *V,
MVT::SimpleValueType From);
unsigned signExtendToI32(unsigned Reg, const Value *V,
MVT::SimpleValueType From);
unsigned zeroExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From,
MVT::SimpleValueType To);
unsigned signExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From,
MVT::SimpleValueType To);
unsigned getRegForUnsignedValue(const Value *V);
unsigned getRegForSignedValue(const Value *V);
unsigned getRegForPromotedValue(const Value *V, bool IsSigned);
unsigned notValue(unsigned Reg);
unsigned copyValue(unsigned Reg);
// Backend specific FastISel code.
unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
unsigned fastMaterializeConstant(const Constant *C) override;
bool fastLowerArguments() override;
// Selection routines.
bool selectCall(const Instruction *I);
bool selectSelect(const Instruction *I);
bool selectTrunc(const Instruction *I);
bool selectZExt(const Instruction *I);
bool selectSExt(const Instruction *I);
bool selectICmp(const Instruction *I);
bool selectFCmp(const Instruction *I);
bool selectBitCast(const Instruction *I);
bool selectLoad(const Instruction *I);
bool selectStore(const Instruction *I);
bool selectBr(const Instruction *I);
bool selectRet(const Instruction *I);
bool selectUnreachable(const Instruction *I);
public:
// Backend specific FastISel code.
WebAssemblyFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
Subtarget = &FuncInfo.MF->getSubtarget<WebAssemblySubtarget>();
Context = &FuncInfo.Fn->getContext();
}
bool fastSelectInstruction(const Instruction *I) override;
#include "WebAssemblyGenFastISel.inc"
};
} // end anonymous namespace
bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const auto *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
// another block, otherwise it may not have a virtual register assigned.
if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
Opcode = I->getOpcode();
U = I;
}
} else if (const auto *C = dyn_cast<ConstantExpr>(Obj)) {
Opcode = C->getOpcode();
U = C;
}
if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
if (const auto *GV = dyn_cast<GlobalValue>(Obj)) {
if (TLI.isPositionIndependent())
return false;
if (Addr.getGlobalValue())
return false;
if (GV->isThreadLocal())
return false;
Addr.setGlobalValue(GV);
return true;
}
switch (Opcode) {
default:
break;
case Instruction::BitCast: {
// Look through bitcasts.
return computeAddress(U->getOperand(0), Addr);
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr);
break;
}
case Instruction::PtrToInt: {
// Look past no-op ptrtoints.
if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr);
break;
}
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
uint64_t TmpOffset = Addr.getOffset();
// Non-inbounds geps can wrap; wasm's offsets can't.
if (!cast<GEPOperator>(U)->isInBounds())
goto unsupported_gep;
// Iterate through the GEP folding the constants into offsets where
// we can.
for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
GTI != E; ++GTI) {
const Value *Op = GTI.getOperand();
if (StructType *STy = GTI.getStructTypeOrNull()) {
const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
for (;;) {
if (const auto *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
break;
}
if (S == 1 && Addr.isRegBase() && Addr.getReg() == 0) {
// An unscaled add of a register. Set it as the new base.
unsigned Reg = getRegForValue(Op);
if (Reg == 0)
return false;
Addr.setReg(Reg);
break;
}
if (canFoldAddIntoGEP(U, Op)) {
// A compatible add with a constant operand. Fold the constant.
auto *CI = cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
// Iterate on the other operand.
Op = cast<AddOperator>(Op)->getOperand(0);
continue;
}
// Unsupported
goto unsupported_gep;
}
}
}
// Don't fold in negative offsets.
if (int64_t(TmpOffset) >= 0) {
// Try to grab the base operand now.
Addr.setOffset(TmpOffset);
if (computeAddress(U->getOperand(0), Addr))
return true;
}
// We failed, restore everything and try the other options.
Addr = SavedAddr;
unsupported_gep:
break;
}
case Instruction::Alloca: {
const auto *AI = cast<AllocaInst>(Obj);
DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
if (Addr.isSet()) {
return false;
}
Addr.setKind(Address::FrameIndexBase);
Addr.setFI(SI->second);
return true;
}
break;
}
case Instruction::Add: {
// Adds of constants are common and easy enough.
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
if (isa<ConstantInt>(LHS))
std::swap(LHS, RHS);
if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
uint64_t TmpOffset = Addr.getOffset() + CI->getSExtValue();
if (int64_t(TmpOffset) >= 0) {
Addr.setOffset(TmpOffset);
return computeAddress(LHS, Addr);
}
}
Address Backup = Addr;
if (computeAddress(LHS, Addr) && computeAddress(RHS, Addr))
return true;
Addr = Backup;
break;
}
case Instruction::Sub: {
// Subs of constants are common and easy enough.
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
int64_t TmpOffset = Addr.getOffset() - CI->getSExtValue();
if (TmpOffset >= 0) {
Addr.setOffset(TmpOffset);
return computeAddress(LHS, Addr);
}
}
break;
}
}
if (Addr.isSet()) {
return false;
}
unsigned Reg = getRegForValue(Obj);
if (Reg == 0)
return false;
Addr.setReg(Reg);
return Addr.getReg() != 0;
}
void WebAssemblyFastISel::materializeLoadStoreOperands(Address &Addr) {
if (Addr.isRegBase()) {
unsigned Reg = Addr.getReg();
if (Reg == 0) {
Reg = createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
: &WebAssembly::I32RegClass);
unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64
: WebAssembly::CONST_I32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), Reg)
.addImm(0);
Addr.setReg(Reg);
}
}
}
void WebAssemblyFastISel::addLoadStoreOperands(const Address &Addr,
const MachineInstrBuilder &MIB,
MachineMemOperand *MMO) {
// Set the alignment operand (this is rewritten in SetP2AlignOperands).
// TODO: Disable SetP2AlignOperands for FastISel and just do it here.
MIB.addImm(0);
if (const GlobalValue *GV = Addr.getGlobalValue())
MIB.addGlobalAddress(GV, Addr.getOffset());
else
MIB.addImm(Addr.getOffset());
if (Addr.isRegBase())
MIB.addReg(Addr.getReg());
else
MIB.addFrameIndex(Addr.getFI());
MIB.addMemOperand(MMO);
}
unsigned WebAssemblyFastISel::maskI1Value(unsigned Reg, const Value *V) {
return zeroExtendToI32(Reg, V, MVT::i1);
}
-unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
+unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V,
+ const BasicBlock *BB,
+ bool &Not) {
if (const auto *ICmp = dyn_cast<ICmpInst>(V))
if (const ConstantInt *C = dyn_cast<ConstantInt>(ICmp->getOperand(1)))
- if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32)) {
+ if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32) &&
+ ICmp->getParent() == BB) {
Not = ICmp->isTrueWhenEqual();
return getRegForValue(ICmp->getOperand(0));
}
- Value *NotV;
- if (match(V, m_Not(m_Value(NotV))) && V->getType()->isIntegerTy(32)) {
- Not = true;
- return getRegForValue(NotV);
- }
-
Not = false;
unsigned Reg = getRegForValue(V);
if (Reg == 0)
return 0;
return maskI1Value(Reg, V);
}
unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
MVT::SimpleValueType From) {
if (Reg == 0)
return 0;
switch (From) {
case MVT::i1:
// If the value is naturally an i1, we don't need to mask it. We only know
// if a value is naturally an i1 if it is definitely lowered by FastISel,
// not a DAG ISel fallback.
if (V != nullptr && isa<Argument>(V) && cast<Argument>(V)->hasZExtAttr())
return copyValue(Reg);
break;
case MVT::i8:
case MVT::i16:
break;
case MVT::i32:
return copyValue(Reg);
default:
return 0;
}
unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::CONST_I32), Imm)
.addImm(~(~uint64_t(0) << MVT(From).getSizeInBits()));
unsigned Result = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::AND_I32), Result)
.addReg(Reg)
.addReg(Imm);
return Result;
}
unsigned WebAssemblyFastISel::signExtendToI32(unsigned Reg, const Value *V,
MVT::SimpleValueType From) {
if (Reg == 0)
return 0;
switch (From) {
case MVT::i1:
case MVT::i8:
case MVT::i16:
break;
case MVT::i32:
return copyValue(Reg);
default:
return 0;
}
unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::CONST_I32), Imm)
.addImm(32 - MVT(From).getSizeInBits());
unsigned Left = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::SHL_I32), Left)
.addReg(Reg)
.addReg(Imm);
unsigned Right = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::SHR_S_I32), Right)
.addReg(Left)
.addReg(Imm);
return Right;
}
unsigned WebAssemblyFastISel::zeroExtend(unsigned Reg, const Value *V,
MVT::SimpleValueType From,
MVT::SimpleValueType To) {
if (To == MVT::i64) {
if (From == MVT::i64)
return copyValue(Reg);
Reg = zeroExtendToI32(Reg, V, From);
unsigned Result = createResultReg(&WebAssembly::I64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::I64_EXTEND_U_I32), Result)
.addReg(Reg);
return Result;
}
if (To == MVT::i32)
return zeroExtendToI32(Reg, V, From);
return 0;
}
unsigned WebAssemblyFastISel::signExtend(unsigned Reg, const Value *V,
MVT::SimpleValueType From,
MVT::SimpleValueType To) {
if (To == MVT::i64) {
if (From == MVT::i64)
return copyValue(Reg);
Reg = signExtendToI32(Reg, V, From);
unsigned Result = createResultReg(&WebAssembly::I64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::I64_EXTEND_S_I32), Result)
.addReg(Reg);
return Result;
}
if (To == MVT::i32)
return signExtendToI32(Reg, V, From);
return 0;
}
unsigned WebAssemblyFastISel::getRegForUnsignedValue(const Value *V) {
MVT::SimpleValueType From = getSimpleType(V->getType());
MVT::SimpleValueType To = getLegalType(From);
unsigned VReg = getRegForValue(V);
if (VReg == 0)
return 0;
return zeroExtend(VReg, V, From, To);
}
unsigned WebAssemblyFastISel::getRegForSignedValue(const Value *V) {
MVT::SimpleValueType From = getSimpleType(V->getType());
MVT::SimpleValueType To = getLegalType(From);
unsigned VReg = getRegForValue(V);
if (VReg == 0)
return 0;
return signExtend(VReg, V, From, To);
}
unsigned WebAssemblyFastISel::getRegForPromotedValue(const Value *V,
bool IsSigned) {
return IsSigned ? getRegForSignedValue(V) : getRegForUnsignedValue(V);
}
unsigned WebAssemblyFastISel::notValue(unsigned Reg) {
assert(MRI.getRegClass(Reg) == &WebAssembly::I32RegClass);
unsigned NotReg = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::EQZ_I32), NotReg)
.addReg(Reg);
return NotReg;
}
unsigned WebAssemblyFastISel::copyValue(unsigned Reg) {
unsigned ResultReg = createResultReg(MRI.getRegClass(Reg));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(WebAssembly::COPY),
ResultReg)
.addReg(Reg);
return ResultReg;
}
unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
unsigned ResultReg =
createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
: &WebAssembly::I32RegClass);
unsigned Opc =
Subtarget->hasAddr64() ? WebAssembly::COPY_I64 : WebAssembly::COPY_I32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addFrameIndex(SI->second);
return ResultReg;
}
return 0;
}
unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
if (TLI.isPositionIndependent())
return 0;
if (GV->isThreadLocal())
return 0;
unsigned ResultReg =
createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
: &WebAssembly::I32RegClass);
unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64
: WebAssembly::CONST_I32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addGlobalAddress(GV);
return ResultReg;
}
// Let target-independent code handle it.
return 0;
}
bool WebAssemblyFastISel::fastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
const Function *F = FuncInfo.Fn;
if (F->isVarArg())
return false;
if (FuncInfo.Fn->getCallingConv() == CallingConv::Swift)
return false;
unsigned I = 0;
for (auto const &Arg : F->args()) {
const AttributeList &Attrs = F->getAttributes();
if (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
Attrs.hasParamAttribute(I, Attribute::SwiftSelf) ||
Attrs.hasParamAttribute(I, Attribute::SwiftError) ||
Attrs.hasParamAttribute(I, Attribute::InAlloca) ||
Attrs.hasParamAttribute(I, Attribute::Nest))
return false;
Type *ArgTy = Arg.getType();
if (ArgTy->isStructTy() || ArgTy->isArrayTy())
return false;
if (!Subtarget->hasSIMD128() && ArgTy->isVectorTy())
return false;
unsigned Opc;
const TargetRegisterClass *RC;
switch (getSimpleType(ArgTy)) {
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
Opc = WebAssembly::ARGUMENT_i32;
RC = &WebAssembly::I32RegClass;
break;
case MVT::i64:
Opc = WebAssembly::ARGUMENT_i64;
RC = &WebAssembly::I64RegClass;
break;
case MVT::f32:
Opc = WebAssembly::ARGUMENT_f32;
RC = &WebAssembly::F32RegClass;
break;
case MVT::f64:
Opc = WebAssembly::ARGUMENT_f64;
RC = &WebAssembly::F64RegClass;
break;
case MVT::v16i8:
Opc = WebAssembly::ARGUMENT_v16i8;
RC = &WebAssembly::V128RegClass;
break;
case MVT::v8i16:
Opc = WebAssembly::ARGUMENT_v8i16;
RC = &WebAssembly::V128RegClass;
break;
case MVT::v4i32:
Opc = WebAssembly::ARGUMENT_v4i32;
RC = &WebAssembly::V128RegClass;
break;
case MVT::v2i64:
Opc = WebAssembly::ARGUMENT_v2i64;
RC = &WebAssembly::V128RegClass;
break;
case MVT::v4f32:
Opc = WebAssembly::ARGUMENT_v4f32;
RC = &WebAssembly::V128RegClass;
break;
case MVT::v2f64:
Opc = WebAssembly::ARGUMENT_v2f64;
RC = &WebAssembly::V128RegClass;
break;
case MVT::funcref:
Opc = WebAssembly::ARGUMENT_funcref;
RC = &WebAssembly::FUNCREFRegClass;
break;
case MVT::externref:
Opc = WebAssembly::ARGUMENT_externref;
RC = &WebAssembly::EXTERNREFRegClass;
break;
default:
return false;
}
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(I);
updateValueMap(&Arg, ResultReg);
++I;
}
MRI.addLiveIn(WebAssembly::ARGUMENTS);
auto *MFI = MF->getInfo<WebAssemblyFunctionInfo>();
for (auto const &Arg : F->args()) {
MVT::SimpleValueType ArgTy = getLegalType(getSimpleType(Arg.getType()));
if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE) {
MFI->clearParamsAndResults();
return false;
}
MFI->addParam(ArgTy);
}
if (!F->getReturnType()->isVoidTy()) {
MVT::SimpleValueType RetTy =
getLegalType(getSimpleType(F->getReturnType()));
if (RetTy == MVT::INVALID_SIMPLE_VALUE_TYPE) {
MFI->clearParamsAndResults();
return false;
}
MFI->addResult(RetTy);
}
return true;
}
bool WebAssemblyFastISel::selectCall(const Instruction *I) {
const auto *Call = cast<CallInst>(I);
// TODO: Support tail calls in FastISel
if (Call->isMustTailCall() || Call->isInlineAsm() ||
Call->getFunctionType()->isVarArg())
return false;
Function *Func = Call->getCalledFunction();
if (Func && Func->isIntrinsic())
return false;
if (Call->getCallingConv() == CallingConv::Swift)
return false;
bool IsDirect = Func != nullptr;
if (!IsDirect && isa<ConstantExpr>(Call->getCalledOperand()))
return false;
FunctionType *FuncTy = Call->getFunctionType();
unsigned Opc = IsDirect ? WebAssembly::CALL : WebAssembly::CALL_INDIRECT;
bool IsVoid = FuncTy->getReturnType()->isVoidTy();
unsigned ResultReg;
if (!IsVoid) {
if (!Subtarget->hasSIMD128() && Call->getType()->isVectorTy())
return false;
MVT::SimpleValueType RetTy = getSimpleType(Call->getType());
switch (RetTy) {
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
ResultReg = createResultReg(&WebAssembly::I32RegClass);
break;
case MVT::i64:
ResultReg = createResultReg(&WebAssembly::I64RegClass);
break;
case MVT::f32:
ResultReg = createResultReg(&WebAssembly::F32RegClass);
break;
case MVT::f64:
ResultReg = createResultReg(&WebAssembly::F64RegClass);
break;
case MVT::v16i8:
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v8i16:
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v4i32:
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v2i64:
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v4f32:
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v2f64:
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::funcref:
ResultReg = createResultReg(&WebAssembly::FUNCREFRegClass);
break;
case MVT::externref:
ResultReg = createResultReg(&WebAssembly::EXTERNREFRegClass);
break;
default:
return false;
}
}
SmallVector<unsigned, 8> Args;
for (unsigned I = 0, E = Call->getNumArgOperands(); I < E; ++I) {
Value *V = Call->getArgOperand(I);
MVT::SimpleValueType ArgTy = getSimpleType(V->getType());
if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
return false;
const AttributeList &Attrs = Call->getAttributes();
if (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
Attrs.hasParamAttribute(I, Attribute::SwiftSelf) ||
Attrs.hasParamAttribute(I, Attribute::SwiftError) ||
Attrs.hasParamAttribute(I, Attribute::InAlloca) ||
Attrs.hasParamAttribute(I, Attribute::Nest))
return false;
unsigned Reg;
if (Attrs.hasParamAttribute(I, Attribute::SExt))
Reg = getRegForSignedValue(V);
else if (Attrs.hasParamAttribute(I, Attribute::ZExt))
Reg = getRegForUnsignedValue(V);
else
Reg = getRegForValue(V);
if (Reg == 0)
return false;
Args.push_back(Reg);
}
unsigned CalleeReg = 0;
if (!IsDirect) {
CalleeReg = getRegForValue(Call->getCalledOperand());
if (!CalleeReg)
return false;
}
auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
if (!IsVoid)
MIB.addReg(ResultReg, RegState::Define);
if (IsDirect) {
MIB.addGlobalAddress(Func);
} else {
// Placeholder for the type index.
MIB.addImm(0);
// The table into which this call_indirect indexes.
MCSymbolWasm *Table = WebAssembly::getOrCreateFunctionTableSymbol(
MF->getMMI().getContext(), Subtarget);
if (Subtarget->hasReferenceTypes()) {
MIB.addSym(Table);
} else {
// Otherwise for the MVP there is at most one table whose number is 0, but
// we can't write a table symbol or issue relocations. Instead we just
// ensure the table is live.
Table->setNoStrip();
MIB.addImm(0);
}
// See if we must truncate the function pointer.
// CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
// as 64-bit for uniformity with other pointer types.
// See also: WebAssemblyISelLowering.cpp: LowerCallResults
if (Subtarget->hasAddr64()) {
auto Wrap = BuildMI(*FuncInfo.MBB, std::prev(FuncInfo.InsertPt), DbgLoc,
TII.get(WebAssembly::I32_WRAP_I64));
unsigned Reg32 = createResultReg(&WebAssembly::I32RegClass);
Wrap.addReg(Reg32, RegState::Define);
Wrap.addReg(CalleeReg);
CalleeReg = Reg32;
}
}
for (unsigned ArgReg : Args)
MIB.addReg(ArgReg);
if (!IsDirect)
MIB.addReg(CalleeReg);
if (!IsVoid)
updateValueMap(Call, ResultReg);
return true;
}
bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
const auto *Select = cast<SelectInst>(I);
bool Not;
- unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
+ unsigned CondReg =
+ getRegForI1Value(Select->getCondition(), I->getParent(), Not);
if (CondReg == 0)
return false;
unsigned TrueReg = getRegForValue(Select->getTrueValue());
if (TrueReg == 0)
return false;
unsigned FalseReg = getRegForValue(Select->getFalseValue());
if (FalseReg == 0)
return false;
if (Not)
std::swap(TrueReg, FalseReg);
unsigned Opc;
const TargetRegisterClass *RC;
switch (getSimpleType(Select->getType())) {
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
Opc = WebAssembly::SELECT_I32;
RC = &WebAssembly::I32RegClass;
break;
case MVT::i64:
Opc = WebAssembly::SELECT_I64;
RC = &WebAssembly::I64RegClass;
break;
case MVT::f32:
Opc = WebAssembly::SELECT_F32;
RC = &WebAssembly::F32RegClass;
break;
case MVT::f64:
Opc = WebAssembly::SELECT_F64;
RC = &WebAssembly::F64RegClass;
break;
case MVT::funcref:
Opc = WebAssembly::SELECT_FUNCREF;
RC = &WebAssembly::FUNCREFRegClass;
break;
case MVT::externref:
Opc = WebAssembly::SELECT_EXTERNREF;
RC = &WebAssembly::EXTERNREFRegClass;
break;
default:
return false;
}
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(TrueReg)
.addReg(FalseReg)
.addReg(CondReg);
updateValueMap(Select, ResultReg);
return true;
}
bool WebAssemblyFastISel::selectTrunc(const Instruction *I) {
const auto *Trunc = cast<TruncInst>(I);
unsigned Reg = getRegForValue(Trunc->getOperand(0));
if (Reg == 0)
return false;
if (Trunc->getOperand(0)->getType()->isIntegerTy(64)) {
unsigned Result = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::I32_WRAP_I64), Result)
.addReg(Reg);
Reg = Result;
}
updateValueMap(Trunc, Reg);
return true;
}
bool WebAssemblyFastISel::selectZExt(const Instruction *I) {
const auto *ZExt = cast<ZExtInst>(I);
const Value *Op = ZExt->getOperand(0);
MVT::SimpleValueType From = getSimpleType(Op->getType());
MVT::SimpleValueType To = getLegalType(getSimpleType(ZExt->getType()));
unsigned In = getRegForValue(Op);
if (In == 0)
return false;
unsigned Reg = zeroExtend(In, Op, From, To);
if (Reg == 0)
return false;
updateValueMap(ZExt, Reg);
return true;
}
bool WebAssemblyFastISel::selectSExt(const Instruction *I) {
const auto *SExt = cast<SExtInst>(I);
const Value *Op = SExt->getOperand(0);
MVT::SimpleValueType From = getSimpleType(Op->getType());
MVT::SimpleValueType To = getLegalType(getSimpleType(SExt->getType()));
unsigned In = getRegForValue(Op);
if (In == 0)
return false;
unsigned Reg = signExtend(In, Op, From, To);
if (Reg == 0)
return false;
updateValueMap(SExt, Reg);
return true;
}
bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
const auto *ICmp = cast<ICmpInst>(I);
bool I32 = getSimpleType(ICmp->getOperand(0)->getType()) != MVT::i64;
unsigned Opc;
bool IsSigned = false;
switch (ICmp->getPredicate()) {
case ICmpInst::ICMP_EQ:
Opc = I32 ? WebAssembly::EQ_I32 : WebAssembly::EQ_I64;
break;
case ICmpInst::ICMP_NE:
Opc = I32 ? WebAssembly::NE_I32 : WebAssembly::NE_I64;
break;
case ICmpInst::ICMP_UGT:
Opc = I32 ? WebAssembly::GT_U_I32 : WebAssembly::GT_U_I64;
break;
case ICmpInst::ICMP_UGE:
Opc = I32 ? WebAssembly::GE_U_I32 : WebAssembly::GE_U_I64;
break;
case ICmpInst::ICMP_ULT:
Opc = I32 ? WebAssembly::LT_U_I32 : WebAssembly::LT_U_I64;
break;
case ICmpInst::ICMP_ULE:
Opc = I32 ? WebAssembly::LE_U_I32 : WebAssembly::LE_U_I64;
break;
case ICmpInst::ICMP_SGT:
Opc = I32 ? WebAssembly::GT_S_I32 : WebAssembly::GT_S_I64;
IsSigned = true;
break;
case ICmpInst::ICMP_SGE:
Opc = I32 ? WebAssembly::GE_S_I32 : WebAssembly::GE_S_I64;
IsSigned = true;
break;
case ICmpInst::ICMP_SLT:
Opc = I32 ? WebAssembly::LT_S_I32 : WebAssembly::LT_S_I64;
IsSigned = true;
break;
case ICmpInst::ICMP_SLE:
Opc = I32 ? WebAssembly::LE_S_I32 : WebAssembly::LE_S_I64;
IsSigned = true;
break;
default:
return false;
}
unsigned LHS = getRegForPromotedValue(ICmp->getOperand(0), IsSigned);
if (LHS == 0)
return false;
unsigned RHS = getRegForPromotedValue(ICmp->getOperand(1), IsSigned);
if (RHS == 0)
return false;
unsigned ResultReg = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(LHS)
.addReg(RHS);
updateValueMap(ICmp, ResultReg);
return true;
}
bool WebAssemblyFastISel::selectFCmp(const Instruction *I) {
const auto *FCmp = cast<FCmpInst>(I);
unsigned LHS = getRegForValue(FCmp->getOperand(0));
if (LHS == 0)
return false;
unsigned RHS = getRegForValue(FCmp->getOperand(1));
if (RHS == 0)
return false;
bool F32 = getSimpleType(FCmp->getOperand(0)->getType()) != MVT::f64;
unsigned Opc;
bool Not = false;
switch (FCmp->getPredicate()) {
case FCmpInst::FCMP_OEQ:
Opc = F32 ? WebAssembly::EQ_F32 : WebAssembly::EQ_F64;
break;
case FCmpInst::FCMP_UNE:
Opc = F32 ? WebAssembly::NE_F32 : WebAssembly::NE_F64;
break;
case FCmpInst::FCMP_OGT:
Opc = F32 ? WebAssembly::GT_F32 : WebAssembly::GT_F64;
break;
case FCmpInst::FCMP_OGE:
Opc = F32 ? WebAssembly::GE_F32 : WebAssembly::GE_F64;
break;
case FCmpInst::FCMP_OLT:
Opc = F32 ? WebAssembly::LT_F32 : WebAssembly::LT_F64;
break;
case FCmpInst::FCMP_OLE:
Opc = F32 ? WebAssembly::LE_F32 : WebAssembly::LE_F64;
break;
case FCmpInst::FCMP_UGT:
Opc = F32 ? WebAssembly::LE_F32 : WebAssembly::LE_F64;
Not = true;
break;
case FCmpInst::FCMP_UGE:
Opc = F32 ? WebAssembly::LT_F32 : WebAssembly::LT_F64;
Not = true;
break;
case FCmpInst::FCMP_ULT:
Opc = F32 ? WebAssembly::GE_F32 : WebAssembly::GE_F64;
Not = true;
break;
case FCmpInst::FCMP_ULE:
Opc = F32 ? WebAssembly::GT_F32 : WebAssembly::GT_F64;
Not = true;
break;
default:
return false;
}
unsigned ResultReg = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(LHS)
.addReg(RHS);
if (Not)
ResultReg = notValue(ResultReg);
updateValueMap(FCmp, ResultReg);
return true;
}
bool WebAssemblyFastISel::selectBitCast(const Instruction *I) {
// Target-independent code can handle this, except it doesn't set the dead
// flag on the ARGUMENTS clobber, so we have to do that manually in order
// to satisfy code that expects this of isBitcast() instructions.
EVT VT = TLI.getValueType(DL, I->getOperand(0)->getType());
EVT RetVT = TLI.getValueType(DL, I->getType());
if (!VT.isSimple() || !RetVT.isSimple())
return false;
unsigned In = getRegForValue(I->getOperand(0));
if (In == 0)
return false;
if (VT == RetVT) {
// No-op bitcast.
updateValueMap(I, In);
return true;
}
Register Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(),
In);
if (!Reg)
return false;
MachineBasicBlock::iterator Iter = FuncInfo.InsertPt;
--Iter;
assert(Iter->isBitcast());
Iter->setPhysRegsDeadExcept(ArrayRef<Register>(), TRI);
updateValueMap(I, Reg);
return true;
}
bool WebAssemblyFastISel::selectLoad(const Instruction *I) {
const auto *Load = cast<LoadInst>(I);
if (Load->isAtomic())
return false;
if (!WebAssembly::isDefaultAddressSpace(Load->getPointerAddressSpace()))
return false;
if (!Subtarget->hasSIMD128() && Load->getType()->isVectorTy())
return false;
Address Addr;
if (!computeAddress(Load->getPointerOperand(), Addr))
return false;
// TODO: Fold a following sign-/zero-extend into the load instruction.
unsigned Opc;
const TargetRegisterClass *RC;
bool A64 = Subtarget->hasAddr64();
switch (getSimpleType(Load->getType())) {
case MVT::i1:
case MVT::i8:
Opc = A64 ? WebAssembly::LOAD8_U_I32_A64 : WebAssembly::LOAD8_U_I32_A32;
RC = &WebAssembly::I32RegClass;
break;
case MVT::i16:
Opc = A64 ? WebAssembly::LOAD16_U_I32_A64 : WebAssembly::LOAD16_U_I32_A32;
RC = &WebAssembly::I32RegClass;
break;
case MVT::i32:
Opc = A64 ? WebAssembly::LOAD_I32_A64 : WebAssembly::LOAD_I32_A32;
RC = &WebAssembly::I32RegClass;
break;
case MVT::i64:
Opc = A64 ? WebAssembly::LOAD_I64_A64 : WebAssembly::LOAD_I64_A32;
RC = &WebAssembly::I64RegClass;
break;
case MVT::f32:
Opc = A64 ? WebAssembly::LOAD_F32_A64 : WebAssembly::LOAD_F32_A32;
RC = &WebAssembly::F32RegClass;
break;
case MVT::f64:
Opc = A64 ? WebAssembly::LOAD_F64_A64 : WebAssembly::LOAD_F64_A32;
RC = &WebAssembly::F64RegClass;
break;
default:
return false;
}
materializeLoadStoreOperands(Addr);
unsigned ResultReg = createResultReg(RC);
auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
ResultReg);
addLoadStoreOperands(Addr, MIB, createMachineMemOperandFor(Load));
updateValueMap(Load, ResultReg);
return true;
}
bool WebAssemblyFastISel::selectStore(const Instruction *I) {
const auto *Store = cast<StoreInst>(I);
if (Store->isAtomic())
return false;
if (!WebAssembly::isDefaultAddressSpace(Store->getPointerAddressSpace()))
return false;
if (!Subtarget->hasSIMD128() &&
Store->getValueOperand()->getType()->isVectorTy())
return false;
Address Addr;
if (!computeAddress(Store->getPointerOperand(), Addr))
return false;
unsigned Opc;
bool VTIsi1 = false;
bool A64 = Subtarget->hasAddr64();
switch (getSimpleType(Store->getValueOperand()->getType())) {
case MVT::i1:
VTIsi1 = true;
LLVM_FALLTHROUGH;
case MVT::i8:
Opc = A64 ? WebAssembly::STORE8_I32_A64 : WebAssembly::STORE8_I32_A32;
break;
case MVT::i16:
Opc = A64 ? WebAssembly::STORE16_I32_A64 : WebAssembly::STORE16_I32_A32;
break;
case MVT::i32:
Opc = A64 ? WebAssembly::STORE_I32_A64 : WebAssembly::STORE_I32_A32;
break;
case MVT::i64:
Opc = A64 ? WebAssembly::STORE_I64_A64 : WebAssembly::STORE_I64_A32;
break;
case MVT::f32:
Opc = A64 ? WebAssembly::STORE_F32_A64 : WebAssembly::STORE_F32_A32;
break;
case MVT::f64:
Opc = A64 ? WebAssembly::STORE_F64_A64 : WebAssembly::STORE_F64_A32;
break;
default:
return false;
}
materializeLoadStoreOperands(Addr);
unsigned ValueReg = getRegForValue(Store->getValueOperand());
if (ValueReg == 0)
return false;
if (VTIsi1)
ValueReg = maskI1Value(ValueReg, Store->getValueOperand());
auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
addLoadStoreOperands(Addr, MIB, createMachineMemOperandFor(Store));
MIB.addReg(ValueReg);
return true;
}
bool WebAssemblyFastISel::selectBr(const Instruction *I) {
const auto *Br = cast<BranchInst>(I);
if (Br->isUnconditional()) {
MachineBasicBlock *MSucc = FuncInfo.MBBMap[Br->getSuccessor(0)];
fastEmitBranch(MSucc, Br->getDebugLoc());
return true;
}
MachineBasicBlock *TBB = FuncInfo.MBBMap[Br->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[Br->getSuccessor(1)];
bool Not;
- unsigned CondReg = getRegForI1Value(Br->getCondition(), Not);
+ unsigned CondReg = getRegForI1Value(Br->getCondition(), Br->getParent(), Not);
if (CondReg == 0)
return false;
unsigned Opc = WebAssembly::BR_IF;
if (Not)
Opc = WebAssembly::BR_UNLESS;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addMBB(TBB)
.addReg(CondReg);
finishCondBranch(Br->getParent(), TBB, FBB);
return true;
}
bool WebAssemblyFastISel::selectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
const auto *Ret = cast<ReturnInst>(I);
if (Ret->getNumOperands() == 0) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::RETURN));
return true;
}
// TODO: support multiple return in FastISel
if (Ret->getNumOperands() > 1)
return false;
Value *RV = Ret->getOperand(0);
if (!Subtarget->hasSIMD128() && RV->getType()->isVectorTy())
return false;
switch (getSimpleType(RV->getType())) {
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f32:
case MVT::f64:
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
case MVT::funcref:
case MVT::externref:
break;
default:
return false;
}
unsigned Reg;
if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::SExt))
Reg = getRegForSignedValue(RV);
else if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::ZExt))
Reg = getRegForUnsignedValue(RV);
else
Reg = getRegForValue(RV);
if (Reg == 0)
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::RETURN))
.addReg(Reg);
return true;
}
bool WebAssemblyFastISel::selectUnreachable(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::UNREACHABLE));
return true;
}
bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Call:
if (selectCall(I))
return true;
break;
case Instruction::Select:
return selectSelect(I);
case Instruction::Trunc:
return selectTrunc(I);
case Instruction::ZExt:
return selectZExt(I);
case Instruction::SExt:
return selectSExt(I);
case Instruction::ICmp:
return selectICmp(I);
case Instruction::FCmp:
return selectFCmp(I);
case Instruction::BitCast:
return selectBitCast(I);
case Instruction::Load:
return selectLoad(I);
case Instruction::Store:
return selectStore(I);
case Instruction::Br:
return selectBr(I);
case Instruction::Ret:
return selectRet(I);
case Instruction::Unreachable:
return selectUnreachable(I);
default:
break;
}
// Fall back to target-independent instruction selection.
return selectOperator(I, I->getOpcode());
}
FastISel *WebAssembly::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
return new WebAssemblyFastISel(FuncInfo, LibInfo);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 37329b489555..eea848d3eb2f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -1,578 +1,599 @@
//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Pass.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
namespace {
+// Determine if a promotion alias should be created for a symbol name.
+static bool allowPromotionAlias(const std::string &Name) {
+ // Promotion aliases are used only in inline assembly. It's safe to
+ // simply skip unusual names. Subset of MCAsmInfo::isAcceptableChar()
+ // and MCAsmInfoXCOFF::isAcceptableChar().
+ for (const char &C : Name) {
+ if (isAlnum(C) || C == '_' || C == '.')
+ continue;
+ return false;
+ }
+ return true;
+}
+
// Promote each local-linkage entity defined by ExportM and used by ImportM by
// changing visibility and appending the given ModuleId.
void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
SetVector<GlobalValue *> &PromoteExtra) {
DenseMap<const Comdat *, Comdat *> RenamedComdats;
for (auto &ExportGV : ExportM.global_values()) {
if (!ExportGV.hasLocalLinkage())
continue;
auto Name = ExportGV.getName();
GlobalValue *ImportGV = nullptr;
if (!PromoteExtra.count(&ExportGV)) {
ImportGV = ImportM.getNamedValue(Name);
if (!ImportGV)
continue;
ImportGV->removeDeadConstantUsers();
if (ImportGV->use_empty()) {
ImportGV->eraseFromParent();
continue;
}
}
+ std::string OldName = Name.str();
std::string NewName = (Name + ModuleId).str();
if (const auto *C = ExportGV.getComdat())
if (C->getName() == Name)
RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName));
ExportGV.setName(NewName);
ExportGV.setLinkage(GlobalValue::ExternalLinkage);
ExportGV.setVisibility(GlobalValue::HiddenVisibility);
if (ImportGV) {
ImportGV->setName(NewName);
ImportGV->setVisibility(GlobalValue::HiddenVisibility);
}
+
+ if (isa<Function>(&ExportGV) && allowPromotionAlias(OldName)) {
+ // Create a local alias with the original name to avoid breaking
+ // references from inline assembly.
+ std::string Alias = ".set " + OldName + "," + NewName + "\n";
+ ExportM.appendModuleInlineAsm(Alias);
+ }
}
if (!RenamedComdats.empty())
for (auto &GO : ExportM.global_objects())
if (auto *C = GO.getComdat()) {
auto Replacement = RenamedComdats.find(C);
if (Replacement != RenamedComdats.end())
GO.setComdat(Replacement->second);
}
}
// Promote all internal (i.e. distinct) type ids used by the module by replacing
// them with external type ids formed using the module id.
//
// Note that this needs to be done before we clone the module because each clone
// will receive its own set of distinct metadata nodes.
void promoteTypeIds(Module &M, StringRef ModuleId) {
DenseMap<Metadata *, Metadata *> LocalToGlobal;
auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
Metadata *MD =
cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
Metadata *&GlobalMD = LocalToGlobal[MD];
if (!GlobalMD) {
std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str();
GlobalMD = MDString::get(M.getContext(), NewName);
}
CI->setArgOperand(ArgNo,
MetadataAsValue::get(M.getContext(), GlobalMD));
}
};
if (Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
for (const Use &U : TypeTestFunc->uses()) {
auto CI = cast<CallInst>(U.getUser());
ExternalizeTypeId(CI, 1);
}
}
if (Function *TypeCheckedLoadFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
for (const Use &U : TypeCheckedLoadFunc->uses()) {
auto CI = cast<CallInst>(U.getUser());
ExternalizeTypeId(CI, 2);
}
}
for (GlobalObject &GO : M.global_objects()) {
SmallVector<MDNode *, 1> MDs;
GO.getMetadata(LLVMContext::MD_type, MDs);
GO.eraseMetadata(LLVMContext::MD_type);
for (auto MD : MDs) {
auto I = LocalToGlobal.find(MD->getOperand(1));
if (I == LocalToGlobal.end()) {
GO.addMetadata(LLVMContext::MD_type, *MD);
continue;
}
GO.addMetadata(
LLVMContext::MD_type,
*MDNode::get(M.getContext(), {MD->getOperand(0), I->second}));
}
}
}
// Drop unused globals, and drop type information from function declarations.
// FIXME: If we made functions typeless then there would be no need to do this.
void simplifyExternals(Module &M) {
FunctionType *EmptyFT =
FunctionType::get(Type::getVoidTy(M.getContext()), false);
for (auto I = M.begin(), E = M.end(); I != E;) {
Function &F = *I++;
if (F.isDeclaration() && F.use_empty()) {
F.eraseFromParent();
continue;
}
if (!F.isDeclaration() || F.getFunctionType() == EmptyFT ||
// Changing the type of an intrinsic may invalidate the IR.
F.getName().startswith("llvm."))
continue;
Function *NewF =
Function::Create(EmptyFT, GlobalValue::ExternalLinkage,
F.getAddressSpace(), "", &M);
NewF->copyAttributesFrom(&F);
// Only copy function attribtues.
NewF->setAttributes(
AttributeList::get(M.getContext(), AttributeList::FunctionIndex,
F.getAttributes().getFnAttributes()));
NewF->takeName(&F);
F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
F.eraseFromParent();
}
for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
GlobalVariable &GV = *I++;
if (GV.isDeclaration() && GV.use_empty()) {
GV.eraseFromParent();
continue;
}
}
}
static void
filterModule(Module *M,
function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
std::vector<GlobalValue *> V;
for (GlobalValue &GV : M->global_values())
if (!ShouldKeepDefinition(&GV))
V.push_back(&GV);
for (GlobalValue *GV : V)
if (!convertToDeclaration(*GV))
GV->eraseFromParent();
}
void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
if (auto *F = dyn_cast<Function>(C))
return Fn(F);
if (isa<GlobalValue>(C))
return;
for (Value *Op : C->operands())
forEachVirtualFunction(cast<Constant>(Op), Fn);
}
// Clone any @llvm[.compiler].used over to the new module and append
// values whose defs were cloned into that module.
static void cloneUsedGlobalVariables(const Module &SrcM, Module &DestM,
bool CompilerUsed) {
SmallVector<GlobalValue *, 4> Used, NewUsed;
// First collect those in the llvm[.compiler].used set.
collectUsedGlobalVariables(SrcM, Used, CompilerUsed);
// Next build a set of the equivalent values defined in DestM.
for (auto *V : Used) {
auto *GV = DestM.getNamedValue(V->getName());
if (GV && !GV->isDeclaration())
NewUsed.push_back(GV);
}
// Finally, add them to a llvm[.compiler].used variable in DestM.
if (CompilerUsed)
appendToCompilerUsed(DestM, NewUsed);
else
appendToUsed(DestM, NewUsed);
}
// If it's possible to split M into regular and thin LTO parts, do so and write
// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
// regular LTO bitcode file to OS.
void splitAndWriteThinLTOBitcode(
raw_ostream &OS, raw_ostream *ThinLinkOS,
function_ref<AAResults &(Function &)> AARGetter, Module &M) {
std::string ModuleId = getUniqueModuleId(&M);
if (ModuleId.empty()) {
// We couldn't generate a module ID for this module, write it out as a
// regular LTO module with an index for summary-based dead stripping.
ProfileSummaryInfo PSI(M);
M.addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);
WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, &Index);
if (ThinLinkOS)
// We don't have a ThinLTO part, but still write the module to the
// ThinLinkOS if requested so that the expected output file is produced.
WriteBitcodeToFile(M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
&Index);
return;
}
promoteTypeIds(M, ModuleId);
// Returns whether a global or its associated global has attached type
// metadata. The former may participate in CFI or whole-program
// devirtualization, so they need to appear in the merged module instead of
// the thin LTO module. Similarly, globals that are associated with globals
// with type metadata need to appear in the merged module because they will
// reference the global's section directly.
auto HasTypeMetadata = [](const GlobalObject *GO) {
if (MDNode *MD = GO->getMetadata(LLVMContext::MD_associated))
if (auto *AssocVM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(0)))
if (auto *AssocGO = dyn_cast<GlobalObject>(AssocVM->getValue()))
if (AssocGO->hasMetadata(LLVMContext::MD_type))
return true;
return GO->hasMetadata(LLVMContext::MD_type);
};
// Collect the set of virtual functions that are eligible for virtual constant
// propagation. Each eligible function must not access memory, must return
// an integer of width <=64 bits, must take at least one argument, must not
// use its first argument (assumed to be "this") and all arguments other than
// the first one must be of <=64 bit integer type.
//
// Note that we test whether this copy of the function is readnone, rather
// than testing function attributes, which must hold for any copy of the
// function, even a less optimized version substituted at link time. This is
// sound because the virtual constant propagation optimizations effectively
// inline all implementations of the virtual function into each call site,
// rather than using function attributes to perform local optimization.
DenseSet<const Function *> EligibleVirtualFns;
// If any member of a comdat lives in MergedM, put all members of that
// comdat in MergedM to keep the comdat together.
DenseSet<const Comdat *> MergedMComdats;
for (GlobalVariable &GV : M.globals())
if (HasTypeMetadata(&GV)) {
if (const auto *C = GV.getComdat())
MergedMComdats.insert(C);
forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
auto *RT = dyn_cast<IntegerType>(F->getReturnType());
if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
!F->arg_begin()->use_empty())
return;
for (auto &Arg : drop_begin(F->args())) {
auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
if (!ArgT || ArgT->getBitWidth() > 64)
return;
}
if (!F->isDeclaration() &&
computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
EligibleVirtualFns.insert(F);
});
}
ValueToValueMapTy VMap;
std::unique_ptr<Module> MergedM(
CloneModule(M, VMap, [&](const GlobalValue *GV) -> bool {
if (const auto *C = GV->getComdat())
if (MergedMComdats.count(C))
return true;
if (auto *F = dyn_cast<Function>(GV))
return EligibleVirtualFns.count(F);
if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
return HasTypeMetadata(GVar);
return false;
}));
StripDebugInfo(*MergedM);
MergedM->setModuleInlineAsm("");
// Clone any llvm.*used globals to ensure the included values are
// not deleted.
cloneUsedGlobalVariables(M, *MergedM, /*CompilerUsed*/ false);
cloneUsedGlobalVariables(M, *MergedM, /*CompilerUsed*/ true);
for (Function &F : *MergedM)
if (!F.isDeclaration()) {
// Reset the linkage of all functions eligible for virtual constant
// propagation. The canonical definitions live in the thin LTO module so
// that they can be imported.
F.setLinkage(GlobalValue::AvailableExternallyLinkage);
F.setComdat(nullptr);
}
SetVector<GlobalValue *> CfiFunctions;
for (auto &F : M)
if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F))
CfiFunctions.insert(&F);
// Remove all globals with type metadata, globals with comdats that live in
// MergedM, and aliases pointing to such globals from the thin LTO module.
filterModule(&M, [&](const GlobalValue *GV) {
if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
if (HasTypeMetadata(GVar))
return false;
if (const auto *C = GV->getComdat())
if (MergedMComdats.count(C))
return false;
return true;
});
promoteInternals(*MergedM, M, ModuleId, CfiFunctions);
promoteInternals(M, *MergedM, ModuleId, CfiFunctions);
auto &Ctx = MergedM->getContext();
SmallVector<MDNode *, 8> CfiFunctionMDs;
for (auto V : CfiFunctions) {
Function &F = *cast<Function>(V);
SmallVector<MDNode *, 2> Types;
F.getMetadata(LLVMContext::MD_type, Types);
SmallVector<Metadata *, 4> Elts;
Elts.push_back(MDString::get(Ctx, F.getName()));
CfiFunctionLinkage Linkage;
if (lowertypetests::isJumpTableCanonical(&F))
Linkage = CFL_Definition;
else if (F.hasExternalWeakLinkage())
Linkage = CFL_WeakDeclaration;
else
Linkage = CFL_Declaration;
Elts.push_back(ConstantAsMetadata::get(
llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));
append_range(Elts, Types);
CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));
}
if(!CfiFunctionMDs.empty()) {
NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions");
for (auto MD : CfiFunctionMDs)
NMD->addOperand(MD);
}
SmallVector<MDNode *, 8> FunctionAliases;
for (auto &A : M.aliases()) {
if (!isa<Function>(A.getAliasee()))
continue;
auto *F = cast<Function>(A.getAliasee());
Metadata *Elts[] = {
MDString::get(Ctx, A.getName()),
MDString::get(Ctx, F->getName()),
ConstantAsMetadata::get(
ConstantInt::get(Type::getInt8Ty(Ctx), A.getVisibility())),
ConstantAsMetadata::get(
ConstantInt::get(Type::getInt8Ty(Ctx), A.isWeakForLinker())),
};
FunctionAliases.push_back(MDTuple::get(Ctx, Elts));
}
if (!FunctionAliases.empty()) {
NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("aliases");
for (auto MD : FunctionAliases)
NMD->addOperand(MD);
}
SmallVector<MDNode *, 8> Symvers;
ModuleSymbolTable::CollectAsmSymvers(M, [&](StringRef Name, StringRef Alias) {
Function *F = M.getFunction(Name);
if (!F || F->use_empty())
return;
Symvers.push_back(MDTuple::get(
Ctx, {MDString::get(Ctx, Name), MDString::get(Ctx, Alias)}));
});
if (!Symvers.empty()) {
NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("symvers");
for (auto MD : Symvers)
NMD->addOperand(MD);
}
simplifyExternals(*MergedM);
// FIXME: Try to re-use BSI and PFI from the original module here.
ProfileSummaryInfo PSI(M);
ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);
// Mark the merged module as requiring full LTO. We still want an index for
// it though, so that it can participate in summary-based dead stripping.
MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
ModuleSummaryIndex MergedMIndex =
buildModuleSummaryIndex(*MergedM, nullptr, &PSI);
SmallVector<char, 0> Buffer;
BitcodeWriter W(Buffer);
// Save the module hash produced for the full bitcode, which will
// be used in the backends, and use that in the minimized bitcode
// produced for the full link.
ModuleHash ModHash = {{0}};
W.writeModule(M, /*ShouldPreserveUseListOrder=*/false, &Index,
/*GenerateHash=*/true, &ModHash);
W.writeModule(*MergedM, /*ShouldPreserveUseListOrder=*/false, &MergedMIndex);
W.writeSymtab();
W.writeStrtab();
OS << Buffer;
// If a minimized bitcode module was requested for the thin link, only
// the information that is needed by thin link will be written in the
// given OS (the merged module will be written as usual).
if (ThinLinkOS) {
Buffer.clear();
BitcodeWriter W2(Buffer);
StripDebugInfo(M);
W2.writeThinLinkBitcode(M, Index, ModHash);
W2.writeModule(*MergedM, /*ShouldPreserveUseListOrder=*/false,
&MergedMIndex);
W2.writeSymtab();
W2.writeStrtab();
*ThinLinkOS << Buffer;
}
}
// Check if the LTO Unit splitting has been enabled.
bool enableSplitLTOUnit(Module &M) {
bool EnableSplitLTOUnit = false;
if (auto *MD = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("EnableSplitLTOUnit")))
EnableSplitLTOUnit = MD->getZExtValue();
return EnableSplitLTOUnit;
}
// Returns whether this module needs to be split because it uses type metadata.
bool hasTypeMetadata(Module &M) {
for (auto &GO : M.global_objects()) {
if (GO.hasMetadata(LLVMContext::MD_type))
return true;
}
return false;
}
void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
function_ref<AAResults &(Function &)> AARGetter,
Module &M, const ModuleSummaryIndex *Index) {
std::unique_ptr<ModuleSummaryIndex> NewIndex = nullptr;
// See if this module has any type metadata. If so, we try to split it
// or at least promote type ids to enable WPD.
if (hasTypeMetadata(M)) {
if (enableSplitLTOUnit(M))
return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
// Promote type ids as needed for index-based WPD.
std::string ModuleId = getUniqueModuleId(&M);
if (!ModuleId.empty()) {
promoteTypeIds(M, ModuleId);
// Need to rebuild the index so that it contains type metadata
// for the newly promoted type ids.
// FIXME: Probably should not bother building the index at all
// in the caller of writeThinLTOBitcode (which does so via the
// ModuleSummaryIndexAnalysis pass), since we have to rebuild it
// anyway whenever there is type metadata (here or in
// splitAndWriteThinLTOBitcode). Just always build it once via the
// buildModuleSummaryIndex when Module(s) are ready.
ProfileSummaryInfo PSI(M);
NewIndex = std::make_unique<ModuleSummaryIndex>(
buildModuleSummaryIndex(M, nullptr, &PSI));
Index = NewIndex.get();
}
}
// Write it out as an unsplit ThinLTO module.
// Save the module hash produced for the full bitcode, which will
// be used in the backends, and use that in the minimized bitcode
// produced for the full link.
ModuleHash ModHash = {{0}};
WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
/*GenerateHash=*/true, &ModHash);
// If a minimized bitcode module was requested for the thin link, only
// the information that is needed by thin link will be written in the
// given OS.
if (ThinLinkOS && Index)
WriteThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash);
}
class WriteThinLTOBitcode : public ModulePass {
raw_ostream &OS; // raw_ostream to print on
// The output stream on which to emit a minimized module for use
// just in the thin link, if requested.
raw_ostream *ThinLinkOS;
public:
static char ID; // Pass identification, replacement for typeid
WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
}
explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
: ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
}
StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
bool runOnModule(Module &M) override {
const ModuleSummaryIndex *Index =
&(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
return true;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ModuleSummaryIndexWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
} // anonymous namespace
char WriteThinLTOBitcode::ID = 0;
INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
"Write ThinLTO Bitcode", false, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
"Write ThinLTO Bitcode", false, true)
ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
raw_ostream *ThinLinkOS) {
return new WriteThinLTOBitcode(Str, ThinLinkOS);
}
PreservedAnalyses
llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
writeThinLTOBitcode(OS, ThinLinkOS,
[&FAM](Function &F) -> AAResults & {
return FAM.getResult<AAManager>(F);
},
M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
return PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index be21db9087d2..e4ec5f266eb8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -1,360 +1,364 @@
//===----------------------- AlignmentFromAssumptions.cpp -----------------===//
// Set Load/Store Alignments From Assumptions
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a ScalarEvolution-based transformation to set
// the alignments of load, stores and memory intrinsics based on the truth
// expressions of assume intrinsics. The primary motivation is to handle
// complex alignment assumptions that apply to vector loads and stores that
// appear after vectorization and unrolling.
//
//===----------------------------------------------------------------------===//
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#define AA_NAME "alignment-from-assumptions"
#define DEBUG_TYPE AA_NAME
using namespace llvm;
STATISTIC(NumLoadAlignChanged,
"Number of loads changed by alignment assumptions");
STATISTIC(NumStoreAlignChanged,
"Number of stores changed by alignment assumptions");
STATISTIC(NumMemIntAlignChanged,
"Number of memory intrinsics changed by alignment assumptions");
namespace {
struct AlignmentFromAssumptions : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
AlignmentFromAssumptions() : FunctionPass(ID) {
initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
}
AlignmentFromAssumptionsPass Impl;
};
}
char AlignmentFromAssumptions::ID = 0;
static const char aip_name[] = "Alignment from assumptions";
INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME,
aip_name, false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME,
aip_name, false, false)
FunctionPass *llvm::createAlignmentFromAssumptionsPass() {
return new AlignmentFromAssumptions();
}
// Given an expression for the (constant) alignment, AlignSCEV, and an
// expression for the displacement between a pointer and the aligned address,
// DiffSCEV, compute the alignment of the displaced pointer if it can be reduced
// to a constant. Using SCEV to compute alignment handles the case where
// DiffSCEV is a recurrence with constant start such that the aligned offset
// is constant. e.g. {16,+,32} % 32 -> 16.
static MaybeAlign getNewAlignmentDiff(const SCEV *DiffSCEV,
const SCEV *AlignSCEV,
ScalarEvolution *SE) {
// DiffUnits = Diff % int64_t(Alignment)
const SCEV *DiffUnitsSCEV = SE->getURemExpr(DiffSCEV, AlignSCEV);
LLVM_DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is "
<< *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n");
if (const SCEVConstant *ConstDUSCEV =
dyn_cast<SCEVConstant>(DiffUnitsSCEV)) {
int64_t DiffUnits = ConstDUSCEV->getValue()->getSExtValue();
// If the displacement is an exact multiple of the alignment, then the
// displaced pointer has the same alignment as the aligned pointer, so
// return the alignment value.
if (!DiffUnits)
return cast<SCEVConstant>(AlignSCEV)->getValue()->getAlignValue();
// If the displacement is not an exact multiple, but the remainder is a
// constant, then return this remainder (but only if it is a power of 2).
uint64_t DiffUnitsAbs = std::abs(DiffUnits);
if (isPowerOf2_64(DiffUnitsAbs))
return Align(DiffUnitsAbs);
}
return None;
}
// There is an address given by an offset OffSCEV from AASCEV which has an
// alignment AlignSCEV. Use that information, if possible, to compute a new
// alignment for Ptr.
static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
const SCEV *OffSCEV, Value *Ptr,
ScalarEvolution *SE) {
const SCEV *PtrSCEV = SE->getSCEV(Ptr);
// On a platform with 32-bit allocas, but 64-bit flat/global pointer sizes
// (*cough* AMDGPU), the effective SCEV type of AASCEV and PtrSCEV
// may disagree. Trunc/extend so they agree.
PtrSCEV = SE->getTruncateOrZeroExtend(
PtrSCEV, SE->getEffectiveSCEVType(AASCEV->getType()));
const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
if (isa<SCEVCouldNotCompute>(DiffSCEV))
return Align(1);
// On 32-bit platforms, DiffSCEV might now have type i32 -- we've always
// sign-extended OffSCEV to i64, so make sure they agree again.
DiffSCEV = SE->getNoopOrSignExtend(DiffSCEV, OffSCEV->getType());
// What we really want to know is the overall offset to the aligned
// address. This address is displaced by the provided offset.
DiffSCEV = SE->getAddExpr(DiffSCEV, OffSCEV);
LLVM_DEBUG(dbgs() << "AFI: alignment of " << *Ptr << " relative to "
<< *AlignSCEV << " and offset " << *OffSCEV
<< " using diff " << *DiffSCEV << "\n");
if (MaybeAlign NewAlignment = getNewAlignmentDiff(DiffSCEV, AlignSCEV, SE)) {
LLVM_DEBUG(dbgs() << "\tnew alignment: " << DebugStr(NewAlignment) << "\n");
return *NewAlignment;
}
if (const SCEVAddRecExpr *DiffARSCEV = dyn_cast<SCEVAddRecExpr>(DiffSCEV)) {
// The relative offset to the alignment assumption did not yield a constant,
// but we should try harder: if we assume that a is 32-byte aligned, then in
// for (i = 0; i < 1024; i += 4) r += a[i]; not all of the loads from a are
// 32-byte aligned, but instead alternate between 32 and 16-byte alignment.
// As a result, the new alignment will not be a constant, but can still
// be improved over the default (of 4) to 16.
const SCEV *DiffStartSCEV = DiffARSCEV->getStart();
const SCEV *DiffIncSCEV = DiffARSCEV->getStepRecurrence(*SE);
LLVM_DEBUG(dbgs() << "\ttrying start/inc alignment using start "
<< *DiffStartSCEV << " and inc " << *DiffIncSCEV << "\n");
// Now compute the new alignment using the displacement to the value in the
// first iteration, and also the alignment using the per-iteration delta.
// If these are the same, then use that answer. Otherwise, use the smaller
// one, but only if it divides the larger one.
MaybeAlign NewAlignment = getNewAlignmentDiff(DiffStartSCEV, AlignSCEV, SE);
MaybeAlign NewIncAlignment =
getNewAlignmentDiff(DiffIncSCEV, AlignSCEV, SE);
LLVM_DEBUG(dbgs() << "\tnew start alignment: " << DebugStr(NewAlignment)
<< "\n");
LLVM_DEBUG(dbgs() << "\tnew inc alignment: " << DebugStr(NewIncAlignment)
<< "\n");
if (!NewAlignment || !NewIncAlignment)
return Align(1);
const Align NewAlign = *NewAlignment;
const Align NewIncAlign = *NewIncAlignment;
if (NewAlign > NewIncAlign) {
LLVM_DEBUG(dbgs() << "\tnew start/inc alignment: "
<< DebugStr(NewIncAlign) << "\n");
return NewIncAlign;
}
if (NewIncAlign > NewAlign) {
LLVM_DEBUG(dbgs() << "\tnew start/inc alignment: " << DebugStr(NewAlign)
<< "\n");
return NewAlign;
}
assert(NewIncAlign == NewAlign);
LLVM_DEBUG(dbgs() << "\tnew start/inc alignment: " << DebugStr(NewAlign)
<< "\n");
return NewAlign;
}
return Align(1);
}
bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
unsigned Idx,
Value *&AAPtr,
const SCEV *&AlignSCEV,
const SCEV *&OffSCEV) {
Type *Int64Ty = Type::getInt64Ty(I->getContext());
OperandBundleUse AlignOB = I->getOperandBundleAt(Idx);
if (AlignOB.getTagName() != "align")
return false;
assert(AlignOB.Inputs.size() >= 2);
AAPtr = AlignOB.Inputs[0].get();
// TODO: Consider accumulating the offset to the base.
AAPtr = AAPtr->stripPointerCastsSameRepresentation();
AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get());
AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty);
+ if (!isa<SCEVConstant>(AlignSCEV))
+ // Added to suppress a crash because consumer doesn't expect non-constant
+ // alignments in the assume bundle. TODO: Consider generalizing caller.
+ return false;
if (AlignOB.Inputs.size() == 3)
OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
else
OffSCEV = SE->getZero(Int64Ty);
OffSCEV = SE->getTruncateOrZeroExtend(OffSCEV, Int64Ty);
return true;
}
bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
unsigned Idx) {
Value *AAPtr;
const SCEV *AlignSCEV, *OffSCEV;
if (!extractAlignmentInfo(ACall, Idx, AAPtr, AlignSCEV, OffSCEV))
return false;
// Skip ConstantPointerNull and UndefValue. Assumptions on these shouldn't
// affect other users.
if (isa<ConstantData>(AAPtr))
return false;
const SCEV *AASCEV = SE->getSCEV(AAPtr);
// Apply the assumption to all other users of the specified pointer.
SmallPtrSet<Instruction *, 32> Visited;
SmallVector<Instruction*, 16> WorkList;
for (User *J : AAPtr->users()) {
if (J == ACall)
continue;
if (Instruction *K = dyn_cast<Instruction>(J))
WorkList.push_back(K);
}
while (!WorkList.empty()) {
Instruction *J = WorkList.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
if (!isValidAssumeForContext(ACall, J, DT))
continue;
Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
LI->getPointerOperand(), SE);
if (NewAlignment > LI->getAlign()) {
LI->setAlignment(NewAlignment);
++NumLoadAlignChanged;
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
if (!isValidAssumeForContext(ACall, J, DT))
continue;
Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
SI->getPointerOperand(), SE);
if (NewAlignment > SI->getAlign()) {
SI->setAlignment(NewAlignment);
++NumStoreAlignChanged;
}
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
if (!isValidAssumeForContext(ACall, J, DT))
continue;
Align NewDestAlignment =
getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MI->getDest(), SE);
LLVM_DEBUG(dbgs() << "\tmem inst: " << DebugStr(NewDestAlignment)
<< "\n";);
if (NewDestAlignment > *MI->getDestAlign()) {
MI->setDestAlignment(NewDestAlignment);
++NumMemIntAlignChanged;
}
// For memory transfers, there is also a source alignment that
// can be set.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
Align NewSrcAlignment =
getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MTI->getSource(), SE);
LLVM_DEBUG(dbgs() << "\tmem trans: " << DebugStr(NewSrcAlignment)
<< "\n";);
if (NewSrcAlignment > *MTI->getSourceAlign()) {
MTI->setSourceAlignment(NewSrcAlignment);
++NumMemIntAlignChanged;
}
}
}
// Now that we've updated that use of the pointer, look for other uses of
// the pointer to update.
Visited.insert(J);
for (User *UJ : J->users()) {
Instruction *K = cast<Instruction>(UJ);
if (!Visited.count(K))
WorkList.push_back(K);
}
}
return true;
}
bool AlignmentFromAssumptions::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return Impl.runImpl(F, AC, SE, DT);
}
bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
ScalarEvolution *SE_,
DominatorTree *DT_) {
SE = SE_;
DT = DT_;
bool Changed = false;
for (auto &AssumeVH : AC.assumptions())
if (AssumeVH) {
CallInst *Call = cast<CallInst>(AssumeVH);
for (unsigned Idx = 0; Idx < Call->getNumOperandBundles(); Idx++)
Changed |= processAssumption(Call, Idx);
}
return Changed;
}
PreservedAnalyses
AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
if (!runImpl(F, AC, &SE, &DT))
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
PA.preserve<ScalarEvolutionAnalysis>();
return PA;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3d60e205b002..a153f393448c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1,2787 +1,2792 @@
//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass implements an idiom recognizer that transforms simple loops into a
// non-loop form. In cases that this kicks in, it can be a significant
// performance win.
//
// If compiling for code size we avoid idiom recognition if the resulting
// code could be larger than the code for the original loop. One way this could
// happen is if the loop is not removable after idiom recognition due to the
// presence of non-idiom instructions. The initial implementation of the
// heuristics applies to idioms in multi-block loops.
//
//===----------------------------------------------------------------------===//
//
// TODO List:
//
// Future loop memory idioms to recognize:
// memcmp, strlen, etc.
// Future floating point idioms to recognize in -ffast-math mode:
// fpowi
// Future integer operation idioms to recognize:
// ctpop
//
// Beware that isel's default lowering for ctpop is highly inefficient for
// i64 and larger types when i64 is legal and the value has few bits set. It
// would be good to enhance isel to emit a loop for ctpop in this case.
//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <utility>
#include <vector>
using namespace llvm;
#define DEBUG_TYPE "loop-idiom"
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
STATISTIC(
NumShiftUntilBitTest,
"Number of uncountable loops recognized as 'shift until bitttest' idiom");
STATISTIC(NumShiftUntilZero,
"Number of uncountable loops recognized as 'shift until zero' idiom");
bool DisableLIRP::All;
static cl::opt<bool, true>
DisableLIRPAll("disable-" DEBUG_TYPE "-all",
cl::desc("Options to disable Loop Idiom Recognize Pass."),
cl::location(DisableLIRP::All), cl::init(false),
cl::ReallyHidden);
bool DisableLIRP::Memset;
static cl::opt<bool, true>
DisableLIRPMemset("disable-" DEBUG_TYPE "-memset",
cl::desc("Proceed with loop idiom recognize pass, but do "
"not convert loop(s) to memset."),
cl::location(DisableLIRP::Memset), cl::init(false),
cl::ReallyHidden);
bool DisableLIRP::Memcpy;
static cl::opt<bool, true>
DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy",
cl::desc("Proceed with loop idiom recognize pass, but do "
"not convert loop(s) to memcpy."),
cl::location(DisableLIRP::Memcpy), cl::init(false),
cl::ReallyHidden);
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
cl::desc("Use loop idiom recognition code size heuristics when compiling"
"with -Os/-Oz"),
cl::init(true), cl::Hidden);
namespace {
class LoopIdiomRecognize {
Loop *CurLoop = nullptr;
AliasAnalysis *AA;
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
const DataLayout *DL;
OptimizationRemarkEmitter &ORE;
bool ApplyCodeSizeHeuristics;
std::unique_ptr<MemorySSAUpdater> MSSAU;
public:
explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
LoopInfo *LI, ScalarEvolution *SE,
TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, MemorySSA *MSSA,
const DataLayout *DL,
OptimizationRemarkEmitter &ORE)
: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
}
bool runOnLoop(Loop *L);
private:
using StoreList = SmallVector<StoreInst *, 8>;
using StoreListMap = MapVector<Value *, StoreList>;
StoreListMap StoreRefsForMemset;
StoreListMap StoreRefsForMemsetPattern;
StoreList StoreRefsForMemcpy;
bool HasMemset;
bool HasMemsetPattern;
bool HasMemcpy;
/// Return code for isLegalStore()
enum LegalStoreKind {
None = 0,
Memset,
MemsetPattern,
Memcpy,
UnorderedAtomicMemcpy,
DontUse // Dummy retval never to be used. Allows catching errors in retval
// handling.
};
/// \name Countable Loop Idiom Handling
/// @{
bool runOnCountableLoop();
bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
SmallVectorImpl<BasicBlock *> &ExitBlocks);
void collectStores(BasicBlock *BB);
LegalStoreKind isLegalStore(StoreInst *SI);
enum class ForMemset { No, Yes };
bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
ForMemset For);
template <typename MemInst>
bool processLoopMemIntrinsic(
BasicBlock *BB,
bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
const SCEV *BECount);
bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
MaybeAlign StoreAlignment, Value *StoredVal,
Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores,
const SCEVAddRecExpr *Ev, const SCEV *BECount,
bool NegStride, bool IsLoopMemset = false);
bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
bool processLoopStoreOfLoopLoad(Value *DestPtr, Value *SourcePtr,
unsigned StoreSize, MaybeAlign StoreAlign,
MaybeAlign LoadAlign, Instruction *TheStore,
Instruction *TheLoad,
const SCEVAddRecExpr *StoreEv,
const SCEVAddRecExpr *LoadEv,
const SCEV *BECount);
bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
bool IsLoopMemset = false);
/// @}
/// \name Noncountable Loop Idiom Handling
/// @{
bool runOnNoncountableLoop();
bool recognizePopcount();
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
PHINode *CntPhi, Value *Var);
bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
Instruction *CntInst, PHINode *CntPhi,
Value *Var, Instruction *DefX,
const DebugLoc &DL, bool ZeroCheck,
bool IsCntPhiUsedOutsideLoop);
bool recognizeShiftUntilBitTest();
bool recognizeShiftUntilZero();
/// @}
};
class LoopIdiomRecognizeLegacyPass : public LoopPass {
public:
static char ID;
explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
initializeLoopIdiomRecognizeLegacyPassPass(
*PassRegistry::getPassRegistry());
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
if (DisableLIRP::All)
return false;
if (skipLoop(L))
return false;
AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
*L->getHeader()->getParent());
const TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent());
const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
MemorySSA *MSSA = nullptr;
if (MSSAAnalysis)
MSSA = &MSSAAnalysis->getMSSA();
// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, MSSA, DL, ORE);
return LIR.runOnLoop(L);
}
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG.
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
}
};
} // end anonymous namespace
char LoopIdiomRecognizeLegacyPass::ID = 0;
PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
if (DisableLIRP::All)
return PreservedAnalyses::all();
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
AR.MSSA, DL, ORE);
if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
if (AR.MSSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
}
INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
"Recognize loop idioms", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
"Recognize loop idioms", false, false)
Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
static void deleteDeadInstruction(Instruction *I) {
I->replaceAllUsesWith(UndefValue::get(I->getType()));
I->eraseFromParent();
}
//===----------------------------------------------------------------------===//
//
// Implementation of LoopIdiomRecognize
//
//===----------------------------------------------------------------------===//
bool LoopIdiomRecognize::runOnLoop(Loop *L) {
CurLoop = L;
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
if (!L->getLoopPreheader())
return false;
// Disable loop idiom recognition if the function's name is a common idiom.
StringRef Name = L->getHeader()->getParent()->getName();
if (Name == "memset" || Name == "memcpy")
return false;
// Determine if code size heuristics need to be applied.
ApplyCodeSizeHeuristics =
L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
HasMemset = TLI->has(LibFunc_memset);
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
HasMemcpy = TLI->has(LibFunc_memcpy);
if (HasMemset || HasMemsetPattern || HasMemcpy)
if (SE->hasLoopInvariantBackedgeTakenCount(L))
return runOnCountableLoop();
return runOnNoncountableLoop();
}
bool LoopIdiomRecognize::runOnCountableLoop() {
const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
assert(!isa<SCEVCouldNotCompute>(BECount) &&
"runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count");
// If this loop executes exactly one time, then it should be peeled, not
// optimized by this pass.
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
if (BECst->getAPInt() == 0)
return false;
SmallVector<BasicBlock *, 8> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName()
<< "] Countable Loop %" << CurLoop->getHeader()->getName()
<< "\n");
// The following transforms hoist stores/memsets into the loop pre-header.
// Give up if the loop has instructions that may throw.
SimpleLoopSafetyInfo SafetyInfo;
SafetyInfo.computeLoopSafetyInfo(CurLoop);
if (SafetyInfo.anyBlockMayThrow())
return false;
bool MadeChange = false;
// Scan all the blocks in the loop that are not in subloops.
for (auto *BB : CurLoop->getBlocks()) {
// Ignore blocks in subloops.
if (LI->getLoopFor(BB) != CurLoop)
continue;
MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
}
return MadeChange;
}
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
return ConstStride->getAPInt();
}
/// getMemSetPatternValue - If a strided store of the specified value is safe to
/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
/// be passed in. Otherwise, return null.
///
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
/// just replicate their input array and then pass on to memset_pattern16.
static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
// FIXME: This could check for UndefValue because it can be merged into any
// other valid pattern.
// If the value isn't a constant, we can't promote it to being in a constant
// array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile.
Constant *C = dyn_cast<Constant>(V);
if (!C)
return nullptr;
// Only handle simple values that are a power of two bytes in size.
uint64_t Size = DL->getTypeSizeInBits(V->getType());
if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
return nullptr;
// Don't care enough about darwin/ppc to implement this.
if (DL->isBigEndian())
return nullptr;
// Convert to size in bytes.
Size /= 8;
// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
// if the top and bottom are the same (e.g. for vectors and large integers).
if (Size > 16)
return nullptr;
// If the constant is exactly 16 bytes, just use it.
if (Size == 16)
return C;
// Otherwise, we'll use an array of the constants.
unsigned ArraySize = 16 / Size;
ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
}
LoopIdiomRecognize::LegalStoreKind
LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
// Don't touch volatile stores.
if (SI->isVolatile())
return LegalStoreKind::None;
// We only want simple or unordered-atomic stores.
if (!SI->isUnordered())
return LegalStoreKind::None;
// Avoid merging nontemporal stores.
if (SI->getMetadata(LLVMContext::MD_nontemporal))
return LegalStoreKind::None;
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
// Don't convert stores of non-integral pointer types to memsets (which stores
// integers).
if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
return LegalStoreKind::None;
// Reject stores that are so large that they overflow an unsigned.
// When storing out scalable vectors we bail out for now, since the code
// below currently only works for constant strides.
TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
if (SizeInBits.isScalable() || (SizeInBits.getFixedSize() & 7) ||
(SizeInBits.getFixedSize() >> 32) != 0)
return LegalStoreKind::None;
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
const SCEVAddRecExpr *StoreEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
return LegalStoreKind::None;
// Check to see if we have a constant stride.
if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
return LegalStoreKind::None;
// See if the store can be turned into a memset.
// If the stored value is a byte-wise value (like i32 -1), then it may be
// turned into a memset of i8 -1, assuming that all the consecutive bytes
// are stored. A store of i32 0x01020304 can never be turned into a memset,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal, *DL);
// Note: memset and memset_pattern on unordered-atomic is yet not supported
bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
// If we're allowed to form a memset, and the stored value would be
// acceptable for memset, use it.
if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset &&
// Verify that the stored value is loop invariant. If not, we can't
// promote the memset.
CurLoop->isLoopInvariant(SplatValue)) {
// It looks like we can use SplatValue.
return LegalStoreKind::Memset;
}
if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
// Don't create memset_pattern16s with address spaces.
StorePtr->getType()->getPointerAddressSpace() == 0 &&
getMemSetPatternValue(StoredVal, DL)) {
// It looks like we can use PatternValue!
return LegalStoreKind::MemsetPattern;
}
// Otherwise, see if the store can be turned into a memcpy.
if (HasMemcpy && !DisableLIRP::Memcpy) {
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
APInt Stride = getStoreStride(StoreEv);
unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
if (StoreSize != Stride && StoreSize != -Stride)
return LegalStoreKind::None;
// The store must be feeding a non-volatile load.
LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
// Only allow non-volatile loads
if (!LI || LI->isVolatile())
return LegalStoreKind::None;
// Only allow simple or unordered-atomic loads
if (!LI->isUnordered())
return LegalStoreKind::None;
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided load. If we have something else, it's a
// random load we can't handle.
const SCEVAddRecExpr *LoadEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
return LegalStoreKind::None;
// The store and load must share the same stride.
if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
return LegalStoreKind::None;
// Success. This store can be converted into a memcpy.
UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
: LegalStoreKind::Memcpy;
}
// This store can't be transformed into a memset/memcpy.
return LegalStoreKind::None;
}
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
StoreRefsForMemset.clear();
StoreRefsForMemsetPattern.clear();
StoreRefsForMemcpy.clear();
for (Instruction &I : *BB) {
StoreInst *SI = dyn_cast<StoreInst>(&I);
if (!SI)
continue;
// Make sure this is a strided store with a constant stride.
switch (isLegalStore(SI)) {
case LegalStoreKind::None:
// Nothing to do
break;
case LegalStoreKind::Memset: {
// Find the base pointer.
Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
StoreRefsForMemset[Ptr].push_back(SI);
} break;
case LegalStoreKind::MemsetPattern: {
// Find the base pointer.
Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
StoreRefsForMemsetPattern[Ptr].push_back(SI);
} break;
case LegalStoreKind::Memcpy:
case LegalStoreKind::UnorderedAtomicMemcpy:
StoreRefsForMemcpy.push_back(SI);
break;
default:
assert(false && "unhandled return value");
break;
}
}
}
/// runOnLoopBlock - Process the specified block, which lives in a counted loop
/// with the specified backedge count. This block is known to be in the current
/// loop and not in any subloops.
bool LoopIdiomRecognize::runOnLoopBlock(
BasicBlock *BB, const SCEV *BECount,
SmallVectorImpl<BasicBlock *> &ExitBlocks) {
// We can only promote stores in this block if they are unconditionally
// executed in the loop. For a block to be unconditionally executed, it has
// to dominate all the exit blocks of the loop. Verify this now.
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
if (!DT->dominates(BB, ExitBlocks[i]))
return false;
bool MadeChange = false;
// Look for store instructions, which may be optimized to memset/memcpy.
collectStores(BB);
// Look for a single store or sets of stores with a common base, which can be
// optimized into a memset (memset_pattern). The latter most commonly happens
// with structs and handunrolled loops.
for (auto &SL : StoreRefsForMemset)
MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
for (auto &SL : StoreRefsForMemsetPattern)
MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
// Optimize the store into a memcpy, if it feeds an similarly strided load.
for (auto &SI : StoreRefsForMemcpy)
MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
MadeChange |= processLoopMemIntrinsic<MemSetInst>(
BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
return MadeChange;
}
/// See if this store(s) can be promoted to a memset.
bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
const SCEV *BECount, ForMemset For) {
// Try to find consecutive stores that can be transformed into memsets.
SetVector<StoreInst *> Heads, Tails;
SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
// Do a quadratic search on all of the given stores and find
// all of the pairs of stores that follow each other.
SmallVector<unsigned, 16> IndexQueue;
for (unsigned i = 0, e = SL.size(); i < e; ++i) {
assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
Value *FirstStoredVal = SL[i]->getValueOperand();
Value *FirstStorePtr = SL[i]->getPointerOperand();
const SCEVAddRecExpr *FirstStoreEv =
cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
APInt FirstStride = getStoreStride(FirstStoreEv);
unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
// See if we can optimize just this store in isolation.
if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
Heads.insert(SL[i]);
continue;
}
Value *FirstSplatValue = nullptr;
Constant *FirstPatternValue = nullptr;
if (For == ForMemset::Yes)
FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
else
FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
assert((FirstSplatValue || FirstPatternValue) &&
"Expected either splat value or pattern value.");
IndexQueue.clear();
// If a store has multiple consecutive store candidates, search Stores
// array according to the sequence: from i+1 to e, then from i-1 to 0.
// This is because usually pairing with immediate succeeding or preceding
// candidate create the best chance to find memset opportunity.
unsigned j = 0;
for (j = i + 1; j < e; ++j)
IndexQueue.push_back(j);
for (j = i; j > 0; --j)
IndexQueue.push_back(j - 1);
for (auto &k : IndexQueue) {
assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
Value *SecondStorePtr = SL[k]->getPointerOperand();
const SCEVAddRecExpr *SecondStoreEv =
cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
APInt SecondStride = getStoreStride(SecondStoreEv);
if (FirstStride != SecondStride)
continue;
Value *SecondStoredVal = SL[k]->getValueOperand();
Value *SecondSplatValue = nullptr;
Constant *SecondPatternValue = nullptr;
if (For == ForMemset::Yes)
SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
else
SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
assert((SecondSplatValue || SecondPatternValue) &&
"Expected either splat value or pattern value.");
if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
if (For == ForMemset::Yes) {
if (isa<UndefValue>(FirstSplatValue))
FirstSplatValue = SecondSplatValue;
if (FirstSplatValue != SecondSplatValue)
continue;
} else {
if (isa<UndefValue>(FirstPatternValue))
FirstPatternValue = SecondPatternValue;
if (FirstPatternValue != SecondPatternValue)
continue;
}
Tails.insert(SL[k]);
Heads.insert(SL[i]);
ConsecutiveChain[SL[i]] = SL[k];
break;
}
}
}
// We may run into multiple chains that merge into a single chain. We mark the
// stores that we transformed so that we don't visit the same store twice.
SmallPtrSet<Value *, 16> TransformedStores;
bool Changed = false;
// For stores that start but don't end a link in the chain:
for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
it != e; ++it) {
if (Tails.count(*it))
continue;
// We found a store instr that starts a chain. Now follow the chain and try
// to transform it.
SmallPtrSet<Instruction *, 8> AdjacentStores;
StoreInst *I = *it;
StoreInst *HeadStore = I;
unsigned StoreSize = 0;
// Collect the chain into a list.
while (Tails.count(I) || Heads.count(I)) {
if (TransformedStores.count(I))
break;
AdjacentStores.insert(I);
StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
// Move to the next value in the chain.
I = ConsecutiveChain[I];
}
Value *StoredVal = HeadStore->getValueOperand();
Value *StorePtr = HeadStore->getPointerOperand();
const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
APInt Stride = getStoreStride(StoreEv);
// Check to see if the stride matches the size of the stores. If so, then
// we know that every byte is touched in the loop.
if (StoreSize != Stride && StoreSize != -Stride)
continue;
bool NegStride = StoreSize == -Stride;
if (processLoopStridedStore(StorePtr, StoreSize,
MaybeAlign(HeadStore->getAlignment()),
StoredVal, HeadStore, AdjacentStores, StoreEv,
BECount, NegStride)) {
TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
Changed = true;
}
}
return Changed;
}
/// processLoopMemIntrinsic - Template function for calling different processor
/// functions based on mem instrinsic type.
template <typename MemInst>
bool LoopIdiomRecognize::processLoopMemIntrinsic(
BasicBlock *BB,
bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
const SCEV *BECount) {
bool MadeChange = false;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
Instruction *Inst = &*I++;
// Look for memory instructions, which may be optimized to a larger one.
if (MemInst *MI = dyn_cast<MemInst>(Inst)) {
WeakTrackingVH InstPtr(&*I);
if (!(this->*Processor)(MI, BECount))
continue;
MadeChange = true;
// If processing the instruction invalidated our iterator, start over from
// the top of the block.
if (!InstPtr)
I = BB->begin();
}
}
return MadeChange;
}
/// processLoopMemCpy - See if this memcpy can be promoted to a large memcpy
bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
const SCEV *BECount) {
// We can only handle non-volatile memcpys with a constant size.
if (MCI->isVolatile() || !isa<ConstantInt>(MCI->getLength()))
return false;
// If we're not allowed to hack on memcpy, we fail.
if ((!HasMemcpy && !isa<MemCpyInlineInst>(MCI)) || DisableLIRP::Memcpy)
return false;
Value *Dest = MCI->getDest();
Value *Source = MCI->getSource();
if (!Dest || !Source)
return false;
// See if the load and store pointer expressions are AddRec like {base,+,1} on
// the current loop, which indicates a strided load and store. If we have
// something else, it's a random load or store we can't handle.
const SCEVAddRecExpr *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Dest));
if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
return false;
const SCEVAddRecExpr *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Source));
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
return false;
// Reject memcpys that are so large that they overflow an unsigned.
uint64_t SizeInBytes = cast<ConstantInt>(MCI->getLength())->getZExtValue();
if ((SizeInBytes >> 32) != 0)
return false;
// Check if the stride matches the size of the memcpy. If so, then we know
// that every byte is touched in the loop.
const SCEVConstant *StoreStride =
dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
const SCEVConstant *LoadStride =
dyn_cast<SCEVConstant>(LoadEv->getOperand(1));
if (!StoreStride || !LoadStride)
return false;
APInt StoreStrideValue = StoreStride->getAPInt();
APInt LoadStrideValue = LoadStride->getAPInt();
// Huge stride value - give up
if (StoreStrideValue.getBitWidth() > 64 || LoadStrideValue.getBitWidth() > 64)
return false;
if (SizeInBytes != StoreStrideValue && SizeInBytes != -StoreStrideValue) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "SizeStrideUnequal", MCI)
<< ore::NV("Inst", "memcpy") << " in "
<< ore::NV("Function", MCI->getFunction())
<< " function will not be hoised: "
<< ore::NV("Reason", "memcpy size is not equal to stride");
});
return false;
}
int64_t StoreStrideInt = StoreStrideValue.getSExtValue();
int64_t LoadStrideInt = LoadStrideValue.getSExtValue();
// Check if the load stride matches the store stride.
if (StoreStrideInt != LoadStrideInt)
return false;
return processLoopStoreOfLoopLoad(Dest, Source, (unsigned)SizeInBytes,
MCI->getDestAlign(), MCI->getSourceAlign(),
MCI, MCI, StoreEv, LoadEv, BECount);
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
const SCEV *BECount) {
// We can only handle non-volatile memsets with a constant size.
if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
return false;
// If we're not allowed to hack on memset, we fail.
if (!HasMemset || DisableLIRP::Memset)
return false;
Value *Pointer = MSI->getDest();
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
return false;
// Reject memsets that are so large that they overflow an unsigned.
uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
if ((SizeInBytes >> 32) != 0)
return false;
// Check to see if the stride matches the size of the memset. If so, then we
// know that every byte is touched in the loop.
const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
if (!ConstStride)
return false;
APInt Stride = ConstStride->getAPInt();
if (SizeInBytes != Stride && SizeInBytes != -Stride)
return false;
// Verify that the memset value is loop invariant. If not, we can't promote
// the memset.
Value *SplatValue = MSI->getValue();
if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
return false;
SmallPtrSet<Instruction *, 1> MSIs;
MSIs.insert(MSI);
bool NegStride = SizeInBytes == -Stride;
return processLoopStridedStore(
Pointer, (unsigned)SizeInBytes, MaybeAlign(MSI->getDestAlignment()),
SplatValue, MSI, MSIs, Ev, BECount, NegStride, /*IsLoopMemset=*/true);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
/// specified pointer location, which is a loop-strided access. The 'Access'
/// argument specifies what the verboten forms of access are (read or write).
static bool
mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
const SCEV *BECount, unsigned StoreSize,
AliasAnalysis &AA,
SmallPtrSetImpl<Instruction *> &IgnoredStores) {
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
LocationSize AccessSize = LocationSize::afterPointer();
// If the loop iterates a fixed number of times, we can refine the access size
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
StoreSize);
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
// with store of &A[100], we need to StoreLoc to be "A" with size of 100,
// which will then no-alias a store to &A[100].
MemoryLocation StoreLoc(Ptr, AccessSize);
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
++BI)
for (Instruction &I : **BI)
if (IgnoredStores.count(&I) == 0 &&
isModOrRefSet(
intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
return true;
return false;
}
// If we have a negative stride, Start refers to the end of the memory location
// we're trying to memset. Therefore, we need to recompute the base pointer,
// which is just Start - BECount*Size.
static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
Type *IntPtr, unsigned StoreSize,
ScalarEvolution *SE) {
const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
if (StoreSize != 1)
Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
SCEV::FlagNUW);
return SE->getMinusSCEV(Start, Index);
}
/// Compute the number of bytes as a SCEV from the backedge taken count.
///
/// This also maps the SCEV into the provided type and tries to handle the
/// computation in a way that will fold cleanly.
static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
unsigned StoreSize, Loop *CurLoop,
const DataLayout *DL, ScalarEvolution *SE) {
const SCEV *NumBytesS;
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
//
// If we're going to need to zero extend the BE count, check if we can add
// one to it prior to zero extending without overflow. Provided this is safe,
// it allows better simplification of the +1.
if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() <
DL->getTypeSizeInBits(IntPtr).getFixedSize() &&
SE->isLoopEntryGuardedByCond(
CurLoop, ICmpInst::ICMP_NE, BECount,
SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
NumBytesS = SE->getZeroExtendExpr(
SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
IntPtr);
} else {
NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
SE->getOne(IntPtr), SCEV::FlagNUW);
}
// And scale it based on the store size.
if (StoreSize != 1) {
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
SCEV::FlagNUW);
}
return NumBytesS;
}
/// processLoopStridedStore - We see a strided store of some value. If we can
/// transform this into a memset or memset_pattern in the loop preheader, do so.
bool LoopIdiomRecognize::processLoopStridedStore(
Value *DestPtr, unsigned StoreSize, MaybeAlign StoreAlignment,
Value *StoredVal, Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
Value *SplatValue = isBytewiseValue(StoredVal, *DL);
Constant *PatternValue = nullptr;
if (!SplatValue)
PatternValue = getMemSetPatternValue(StoredVal, DL);
assert((SplatValue || PatternValue) &&
"Expected either splat value or pattern value.");
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header. This allows us to insert code for it in the preheader.
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
SCEVExpanderCleaner ExpCleaner(Expander, *DT);
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
bool Changed = false;
const SCEV *Start = Ev->getStart();
// Handle negative strided loops.
if (NegStride)
Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSize, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
if (!isSafeToExpand(Start, *SE))
return Changed;
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
// or write to the aliased location. Check for any overlap by generating the
// base pointer and checking the region.
Value *BasePtr =
Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
// From here on out, conservatively report to the pass manager that we've
// changed the IR, even if we later clean up these added instructions. There
// may be structural differences e.g. in the order of use lists not accounted
// for in just a textual dump of the IR. This is written as a variable, even
// though statically all the places this dominates could be replaced with
// 'true', with the hope that anyone trying to be clever / "more precise" with
// the return value will read this comment, and leave them alone.
Changed = true;
if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
StoreSize, *AA, Stores))
return Changed;
if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
return Changed;
// Okay, everything looks good, insert the memset.
const SCEV *NumBytesS =
getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
if (!isSafeToExpand(NumBytesS, *SE))
return Changed;
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
CallInst *NewCall;
if (SplatValue) {
NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes,
MaybeAlign(StoreAlignment));
} else {
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;
Module *M = TheStore->getModule();
StringRef FuncName = "memset_pattern16";
FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
Int8PtrTy, Int8PtrTy, IntIdxTy);
inferLibFuncAttributes(M, FuncName, *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
GlobalValue::PrivateLinkage,
PatternValue, ".memset_pattern");
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
GV->setAlignment(Align(16));
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
}
NewCall->setDebugLoc(TheStore->getDebugLoc());
if (MSSAU) {
MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
}
LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
<< " from store to: " << *Ev << " at: " << *TheStore
<< "\n");
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
NewCall->getDebugLoc(), Preheader)
<< "Transformed loop-strided store in "
<< ore::NV("Function", TheStore->getFunction())
<< " function into a call to "
<< ore::NV("NewFunction", NewCall->getCalledFunction())
<< "() intrinsic";
});
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
for (auto *I : Stores) {
if (MSSAU)
MSSAU->removeMemoryAccess(I, true);
deleteDeadInstruction(I);
}
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
++NumMemSet;
ExpCleaner.markResultUsed();
return true;
}
/// If the stored value is a strided load in the same loop with the same stride
/// this may be transformable into a memcpy. This kicks in for stuff like
/// for (i) A[i] = B[i];
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
const SCEV *BECount) {
assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
Value *StorePtr = SI->getPointerOperand();
const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
// The store must be feeding a non-volatile load.
LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided load. If we have something else, it's a
// random load we can't handle.
Value *LoadPtr = LI->getPointerOperand();
const SCEVAddRecExpr *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSize,
SI->getAlign(), LI->getAlign(), SI, LI,
StoreEv, LoadEv, BECount);
}
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
Value *DestPtr, Value *SourcePtr, unsigned StoreSize, MaybeAlign StoreAlign,
MaybeAlign LoadAlign, Instruction *TheStore, Instruction *TheLoad,
const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv,
const SCEV *BECount) {
// FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to
// conservatively bail here, since otherwise we may have to transform
// llvm.memcpy.inline into llvm.memcpy which is illegal.
if (isa<MemCpyInlineInst>(TheStore))
return false;
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
SCEVExpanderCleaner ExpCleaner(Expander, *DT);
bool Changed = false;
const SCEV *StrStart = StoreEv->getStart();
unsigned StrAS = DestPtr->getType()->getPointerAddressSpace();
Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
APInt Stride = getStoreStride(StoreEv);
bool NegStride = StoreSize == -Stride;
// Handle negative strided loops.
if (NegStride)
StrStart = getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSize, SE);
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
// or write the memory region we're storing to. This includes the load that
// feeds the stores. Check for an alias by generating the base address and
// checking everything.
Value *StoreBasePtr = Expander.expandCodeFor(
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
// From here on out, conservatively report to the pass manager that we've
// changed the IR, even if we later clean up these added instructions. There
// may be structural differences e.g. in the order of use lists not accounted
// for in just a textual dump of the IR. This is written as a variable, even
// though statically all the places this dominates could be replaced with
// 'true', with the hope that anyone trying to be clever / "more precise" with
// the return value will read this comment, and leave them alone.
Changed = true;
SmallPtrSet<Instruction *, 2> Stores;
Stores.insert(TheStore);
bool IsMemCpy = isa<MemCpyInst>(TheStore);
const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
bool UseMemMove =
mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
StoreSize, *AA, Stores);
if (UseMemMove) {
+ // For memmove case it's not enough to guarantee that loop doesn't access
+ // TheStore and TheLoad. Additionally we need to make sure that TheStore is
+ // the only user of TheLoad.
+ if (!TheLoad->hasOneUse())
+ return Changed;
Stores.insert(TheLoad);
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
BECount, StoreSize, *AA, Stores)) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
TheStore)
<< ore::NV("Inst", InstRemark) << " in "
<< ore::NV("Function", TheStore->getFunction())
<< " function will not be hoisted: "
<< ore::NV("Reason", "The loop may access store location");
});
return Changed;
}
Stores.erase(TheLoad);
}
const SCEV *LdStart = LoadEv->getStart();
unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();
// Handle negative strided loops.
if (NegStride)
LdStart = getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSize, SE);
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
Value *LoadBasePtr = Expander.expandCodeFor(
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
// If the store is a memcpy instruction, we must check if it will write to
// the load memory locations. So remove it from the ignored stores.
if (IsMemCpy)
Stores.erase(TheStore);
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
StoreSize, *AA, Stores)) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
<< ore::NV("Inst", InstRemark) << " in "
<< ore::NV("Function", TheStore->getFunction())
<< " function will not be hoisted: "
<< ore::NV("Reason", "The loop may access load location");
});
return Changed;
}
if (UseMemMove) {
// Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr for
// negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
int64_t LoadOff = 0, StoreOff = 0;
const Value *BP1 = llvm::GetPointerBaseWithConstantOffset(
LoadBasePtr->stripPointerCasts(), LoadOff, *DL);
const Value *BP2 = llvm::GetPointerBaseWithConstantOffset(
StoreBasePtr->stripPointerCasts(), StoreOff, *DL);
int64_t LoadSize =
DL->getTypeSizeInBits(TheLoad->getType()).getFixedSize() / 8;
if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
return Changed;
if ((!NegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
(NegStride && LoadOff + LoadSize > StoreOff))
return Changed;
}
if (avoidLIRForMultiBlockLoop())
return Changed;
// Okay, everything is safe, we can transform this!
const SCEV *NumBytesS =
getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
// If the load or store are atomic, then they must necessarily be unordered
// by previous checks.
if (!TheStore->isAtomic() && !TheLoad->isAtomic()) {
if (UseMemMove)
NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr,
LoadAlign, NumBytes);
else
NewCall = Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr,
LoadAlign, NumBytes);
} else {
// For now don't support unordered atomic memmove.
if (UseMemMove)
return Changed;
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
assert((StoreAlign.hasValue() && LoadAlign.hasValue()) &&
"Expect unordered load/store to have align.");
if (StoreAlign.getValue() < StoreSize || LoadAlign.getValue() < StoreSize)
return Changed;
// If the element.atomic memcpy is not lowered into explicit
// loads/stores later, then it will be lowered into an element-size
// specific lib call. If the lib call doesn't exist for our store size, then
// we shouldn't generate the memcpy.
if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
return Changed;
// Create the call.
// Note that unordered atomic loads/stores are *required* by the spec to
// have an alignment but non-atomic loads/stores may not.
NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(),
NumBytes, StoreSize);
}
NewCall->setDebugLoc(TheStore->getDebugLoc());
if (MSSAU) {
MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
}
LLVM_DEBUG(dbgs() << " Formed new call: " << *NewCall << "\n"
<< " from load ptr=" << *LoadEv << " at: " << *TheLoad
<< "\n"
<< " from store ptr=" << *StoreEv << " at: " << *TheStore
<< "\n");
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
NewCall->getDebugLoc(), Preheader)
<< "Formed a call to "
<< ore::NV("NewFunction", NewCall->getCalledFunction())
<< "() intrinsic from " << ore::NV("Inst", InstRemark)
<< " instruction in " << ore::NV("Function", TheStore->getFunction())
<< " function";
});
// Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
if (MSSAU)
MSSAU->removeMemoryAccess(TheStore, true);
deleteDeadInstruction(TheStore);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
if (UseMemMove)
++NumMemMove;
else
++NumMemCpy;
ExpCleaner.markResultUsed();
return true;
}
// When compiling for codesize we avoid idiom recognition for a multi-block loop
// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
//
bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
bool IsLoopMemset) {
if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) {
LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
<< " : LIR " << (IsMemset ? "Memset" : "Memcpy")
<< " avoided: multi-block top-level loop\n");
return true;
}
}
return false;
}
bool LoopIdiomRecognize::runOnNoncountableLoop() {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName()
<< "] Noncountable Loop %"
<< CurLoop->getHeader()->getName() << "\n");
return recognizePopcount() || recognizeAndInsertFFS() ||
recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
}
/// Check if the given conditional branch is based on the comparison between
/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
/// true), the control yields to the loop entry. If the branch matches the
/// behavior, the variable involved in the comparison is returned. This function
/// will be called to see if the precondition and postcondition of the loop are
/// in desirable form.
static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
bool JmpOnZero = false) {
if (!BI || !BI->isConditional())
return nullptr;
ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cond)
return nullptr;
ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
if (!CmpZero || !CmpZero->isZero())
return nullptr;
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
if (JmpOnZero)
std::swap(TrueSucc, FalseSucc);
ICmpInst::Predicate Pred = Cond->getPredicate();
if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
(Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
return Cond->getOperand(0);
return nullptr;
}
// Check if the recurrence variable `VarX` is in the right form to create
// the idiom. Returns the value coerced to a PHINode if so.
static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
BasicBlock *LoopEntry) {
auto *PhiX = dyn_cast<PHINode>(VarX);
if (PhiX && PhiX->getParent() == LoopEntry &&
(PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
return PhiX;
return nullptr;
}
/// Return true iff the idiom is detected in the loop.
///
/// Additionally:
/// 1) \p CntInst is set to the instruction counting the population bit.
/// 2) \p CntPhi is set to the corresponding phi node.
/// 3) \p Var is set to the value whose population bits are being counted.
///
/// The core idiom we are trying to detect is:
/// \code
/// if (x0 != 0)
/// goto loop-exit // the precondition of the loop
/// cnt0 = init-val;
/// do {
/// x1 = phi (x0, x2);
/// cnt1 = phi(cnt0, cnt2);
///
/// cnt2 = cnt1 + 1;
/// ...
/// x2 = x1 & (x1 - 1);
/// ...
/// } while(x != 0);
///
/// loop-exit:
/// \endcode
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
Instruction *&CntInst, PHINode *&CntPhi,
Value *&Var) {
// step 1: Check to see if the look-back branch match this pattern:
// "if (a!=0) goto loop-entry".
BasicBlock *LoopEntry;
Instruction *DefX2, *CountInst;
Value *VarX1, *VarX0;
PHINode *PhiX, *CountPhi;
DefX2 = CountInst = nullptr;
VarX1 = VarX0 = nullptr;
PhiX = CountPhi = nullptr;
LoopEntry = *(CurLoop->block_begin());
// step 1: Check if the loop-back branch is in desirable form.
{
if (Value *T = matchCondition(
dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
DefX2 = dyn_cast<Instruction>(T);
else
return false;
}
// step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
{
if (!DefX2 || DefX2->getOpcode() != Instruction::And)
return false;
BinaryOperator *SubOneOp;
if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
VarX1 = DefX2->getOperand(1);
else {
VarX1 = DefX2->getOperand(0);
SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
}
if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
return false;
ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
if (!Dec ||
!((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
(SubOneOp->getOpcode() == Instruction::Add &&
Dec->isMinusOne()))) {
return false;
}
}
// step 3: Check the recurrence of variable X
PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
if (!PhiX)
return false;
// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
{
CountInst = nullptr;
for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
IterE = LoopEntry->end();
Iter != IterE; Iter++) {
Instruction *Inst = &*Iter;
if (Inst->getOpcode() != Instruction::Add)
continue;
ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
if (!Inc || !Inc->isOne())
continue;
PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
if (!Phi)
continue;
// Check if the result of the instruction is live of the loop.
bool LiveOutLoop = false;
for (User *U : Inst->users()) {
if ((cast<Instruction>(U))->getParent() != LoopEntry) {
LiveOutLoop = true;
break;
}
}
if (LiveOutLoop) {
CountInst = Inst;
CountPhi = Phi;
break;
}
}
if (!CountInst)
return false;
}
// step 5: check if the precondition is in this form:
// "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
{
auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
return false;
CntInst = CountInst;
CntPhi = CountPhi;
Var = T;
}
return true;
}
/// Return true if the idiom is detected in the loop.
///
/// Additionally:
/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
/// or nullptr if there is no such.
/// 2) \p CntPhi is set to the corresponding phi node
/// or nullptr if there is no such.
/// 3) \p Var is set to the value whose CTLZ could be used.
/// 4) \p DefX is set to the instruction calculating Loop exit condition.
///
/// The core idiom we are trying to detect is:
/// \code
/// if (x0 == 0)
/// goto loop-exit // the precondition of the loop
/// cnt0 = init-val;
/// do {
/// x = phi (x0, x.next); //PhiX
/// cnt = phi(cnt0, cnt.next);
///
/// cnt.next = cnt + 1;
/// ...
/// x.next = x >> 1; // DefX
/// ...
/// } while(x.next != 0);
///
/// loop-exit:
/// \endcode
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
Intrinsic::ID &IntrinID, Value *&InitX,
Instruction *&CntInst, PHINode *&CntPhi,
Instruction *&DefX) {
BasicBlock *LoopEntry;
Value *VarX = nullptr;
DefX = nullptr;
CntInst = nullptr;
CntPhi = nullptr;
LoopEntry = *(CurLoop->block_begin());
// step 1: Check if the loop-back branch is in desirable form.
if (Value *T = matchCondition(
dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
DefX = dyn_cast<Instruction>(T);
else
return false;
// step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
if (!DefX || !DefX->isShift())
return false;
IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
Intrinsic::ctlz;
ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
if (!Shft || !Shft->isOne())
return false;
VarX = DefX->getOperand(0);
// step 3: Check the recurrence of variable X
PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
if (!PhiX)
return false;
InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
// Make sure the initial value can't be negative otherwise the ashr in the
// loop might never reach zero which would make the loop infinite.
if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
return false;
// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
// or cnt.next = cnt + -1.
// TODO: We can skip the step. If loop trip count is known (CTLZ),
// then all uses of "cnt.next" could be optimized to the trip count
// plus "cnt0". Currently it is not optimized.
// This step could be used to detect POPCNT instruction:
// cnt.next = cnt + (x.next & 1)
for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
IterE = LoopEntry->end();
Iter != IterE; Iter++) {
Instruction *Inst = &*Iter;
if (Inst->getOpcode() != Instruction::Add)
continue;
ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
continue;
PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
if (!Phi)
continue;
CntInst = Inst;
CntPhi = Phi;
break;
}
if (!CntInst)
return false;
return true;
}
/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
/// trip count returns true; otherwise, returns false.
bool LoopIdiomRecognize::recognizeAndInsertFFS() {
// Give up if the loop has multiple blocks or multiple backedges.
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
return false;
Intrinsic::ID IntrinID;
Value *InitX;
Instruction *DefX = nullptr;
PHINode *CntPhi = nullptr;
Instruction *CntInst = nullptr;
// Help decide if transformation is profitable. For ShiftUntilZero idiom,
// this is always 6.
size_t IdiomCanonicalSize = 6;
if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
CntInst, CntPhi, DefX))
return false;
bool IsCntPhiUsedOutsideLoop = false;
for (User *U : CntPhi->users())
if (!CurLoop->contains(cast<Instruction>(U))) {
IsCntPhiUsedOutsideLoop = true;
break;
}
bool IsCntInstUsedOutsideLoop = false;
for (User *U : CntInst->users())
if (!CurLoop->contains(cast<Instruction>(U))) {
IsCntInstUsedOutsideLoop = true;
break;
}
// If both CntInst and CntPhi are used outside the loop the profitability
// is questionable.
if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
return false;
// For some CPUs result of CTLZ(X) intrinsic is undefined
// when X is 0. If we can not guarantee X != 0, we need to check this
// when expand.
bool ZeroCheck = false;
// It is safe to assume Preheader exist as it was checked in
// parent function RunOnLoop.
BasicBlock *PH = CurLoop->getLoopPreheader();
// If we are using the count instruction outside the loop, make sure we
// have a zero check as a precondition. Without the check the loop would run
// one iteration for before any check of the input value. This means 0 and 1
// would have identical behavior in the original loop and thus
if (!IsCntPhiUsedOutsideLoop) {
auto *PreCondBB = PH->getSinglePredecessor();
if (!PreCondBB)
return false;
auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
if (!PreCondBI)
return false;
if (matchCondition(PreCondBI, PH) != InitX)
return false;
ZeroCheck = true;
}
// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
// profitable if we delete the loop.
// the loop has only 6 instructions:
// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
// %shr = ashr %n.addr.0, 1
// %tobool = icmp eq %shr, 0
// %inc = add nsw %i.0, 1
// br i1 %tobool
const Value *Args[] = {InitX,
ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
// @llvm.dbg doesn't count as they have no semantic effect.
auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
uint32_t HeaderSize =
std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
InstructionCost Cost =
TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
if (HeaderSize != IdiomCanonicalSize &&
Cost > TargetTransformInfo::TCC_Basic)
return false;
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
DefX->getDebugLoc(), ZeroCheck,
IsCntPhiUsedOutsideLoop);
return true;
}
/// Recognizes a population count idiom in a non-countable loop.
///
/// If detected, transforms the relevant code to issue the popcount intrinsic
/// function call, and returns true; otherwise, returns false.
bool LoopIdiomRecognize::recognizePopcount() {
if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
return false;
// Counting population are usually conducted by few arithmetic instructions.
// Such instructions can be easily "absorbed" by vacant slots in a
// non-compact loop. Therefore, recognizing popcount idiom only makes sense
// in a compact loop.
// Give up if the loop has multiple blocks or multiple backedges.
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
return false;
BasicBlock *LoopBody = *(CurLoop->block_begin());
if (LoopBody->size() >= 20) {
// The loop is too big, bail out.
return false;
}
// It should have a preheader containing nothing but an unconditional branch.
BasicBlock *PH = CurLoop->getLoopPreheader();
if (!PH || &PH->front() != PH->getTerminator())
return false;
auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
if (!EntryBI || EntryBI->isConditional())
return false;
// It should have a precondition block where the generated popcount intrinsic
// function can be inserted.
auto *PreCondBB = PH->getSinglePredecessor();
if (!PreCondBB)
return false;
auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
if (!PreCondBI || PreCondBI->isUnconditional())
return false;
Instruction *CntInst;
PHINode *CntPhi;
Value *Val;
if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
return false;
transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
return true;
}
static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
const DebugLoc &DL) {
Value *Ops[] = {Val};
Type *Tys[] = {Val->getType()};
Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
CallInst *CI = IRBuilder.CreateCall(Func, Ops);
CI->setDebugLoc(DL);
return CI;
}
static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
const DebugLoc &DL, bool ZeroCheck,
Intrinsic::ID IID) {
Value *Ops[] = {Val, IRBuilder.getInt1(ZeroCheck)};
Type *Tys[] = {Val->getType()};
Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
CallInst *CI = IRBuilder.CreateCall(Func, Ops);
CI->setDebugLoc(DL);
return CI;
}
/// Transform the following loop (Using CTLZ, CTTZ is similar):
/// loop:
/// CntPhi = PHI [Cnt0, CntInst]
/// PhiX = PHI [InitX, DefX]
/// CntInst = CntPhi + 1
/// DefX = PhiX >> 1
/// LOOP_BODY
/// Br: loop if (DefX != 0)
/// Use(CntPhi) or Use(CntInst)
///
/// Into:
/// If CntPhi used outside the loop:
/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
/// Count = CountPrev + 1
/// else
/// Count = BitWidth(InitX) - CTLZ(InitX)
/// loop:
/// CntPhi = PHI [Cnt0, CntInst]
/// PhiX = PHI [InitX, DefX]
/// PhiCount = PHI [Count, Dec]
/// CntInst = CntPhi + 1
/// DefX = PhiX >> 1
/// Dec = PhiCount - 1
/// LOOP_BODY
/// Br: loop if (Dec != 0)
/// Use(CountPrev + Cnt0) // Use(CntPhi)
/// or
/// Use(Count + Cnt0) // Use(CntInst)
///
/// If LOOP_BODY is empty the loop will be deleted.
/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
void LoopIdiomRecognize::transformLoopToCountable(
Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
// Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
IRBuilder<> Builder(PreheaderBr);
Builder.SetCurrentDebugLocation(DL);
// If there are no uses of CntPhi crate:
// Count = BitWidth - CTLZ(InitX);
// NewCount = Count;
// If there are uses of CntPhi create:
// NewCount = BitWidth - CTLZ(InitX >> 1);
// Count = NewCount + 1;
Value *InitXNext;
if (IsCntPhiUsedOutsideLoop) {
if (DefX->getOpcode() == Instruction::AShr)
InitXNext = Builder.CreateAShr(InitX, 1);
else if (DefX->getOpcode() == Instruction::LShr)
InitXNext = Builder.CreateLShr(InitX, 1);
else if (DefX->getOpcode() == Instruction::Shl) // cttz
InitXNext = Builder.CreateShl(InitX, 1);
else
llvm_unreachable("Unexpected opcode!");
} else
InitXNext = InitX;
Value *Count =
createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
Type *CountTy = Count->getType();
Count = Builder.CreateSub(
ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
Value *NewCount = Count;
if (IsCntPhiUsedOutsideLoop)
Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->getType());
Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
// If the counter was being incremented in the loop, add NewCount to the
// counter's initial value, but only if the initial value is not zero.
ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
if (!InitConst || !InitConst->isZero())
NewCount = Builder.CreateAdd(NewCount, CntInitVal);
} else {
// If the count was being decremented in the loop, subtract NewCount from
// the counter's initial value.
NewCount = Builder.CreateSub(CntInitVal, NewCount);
}
// Step 2: Insert new IV and loop condition:
// loop:
// ...
// PhiCount = PHI [Count, Dec]
// ...
// Dec = PhiCount - 1
// ...
// Br: loop if (Dec != 0)
BasicBlock *Body = *(CurLoop->block_begin());
auto *LbBr = cast<BranchInst>(Body->getTerminator());
ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi", &Body->front());
Builder.SetInsertPoint(LbCond);
Instruction *TcDec = cast<Instruction>(Builder.CreateSub(
TcPhi, ConstantInt::get(CountTy, 1), "tcdec", false, true));
TcPhi->addIncoming(Count, Preheader);
TcPhi->addIncoming(TcDec, Body);
CmpInst::Predicate Pred =
(LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
LbCond->setPredicate(Pred);
LbCond->setOperand(0, TcDec);
LbCond->setOperand(1, ConstantInt::get(CountTy, 0));
// Step 3: All the references to the original counter outside
// the loop are replaced with the NewCount
if (IsCntPhiUsedOutsideLoop)
CntPhi->replaceUsesOutsideBlock(NewCount, Body);
else
CntInst->replaceUsesOutsideBlock(NewCount, Body);
// step 4: Forget the "non-computable" trip-count SCEV associated with the
// loop. The loop would otherwise not be deleted even if it becomes empty.
SE->forgetLoop(CurLoop);
}
void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
Instruction *CntInst,
PHINode *CntPhi, Value *Var) {
BasicBlock *PreHead = CurLoop->getLoopPreheader();
auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
const DebugLoc &DL = CntInst->getDebugLoc();
// Assuming before transformation, the loop is following:
// if (x) // the precondition
// do { cnt++; x &= x - 1; } while(x);
// Step 1: Insert the ctpop instruction at the end of the precondition block
IRBuilder<> Builder(PreCondBr);
Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
{
PopCnt = createPopcntIntrinsic(Builder, Var, DL);
NewCount = PopCntZext =
Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
if (NewCount != PopCnt)
(cast<Instruction>(NewCount))->setDebugLoc(DL);
// TripCnt is exactly the number of iterations the loop has
TripCnt = NewCount;
// If the population counter's initial value is not zero, insert Add Inst.
Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
if (!InitConst || !InitConst->isZero()) {
NewCount = Builder.CreateAdd(NewCount, CntInitVal);
(cast<Instruction>(NewCount))->setDebugLoc(DL);
}
}
// Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
// "if (NewCount == 0) loop-exit". Without this change, the intrinsic
// function would be partial dead code, and downstream passes will drag
// it back from the precondition block to the preheader.
{
ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
Value *Opnd0 = PopCntZext;
Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
if (PreCond->getOperand(0) != Var)
std::swap(Opnd0, Opnd1);
ICmpInst *NewPreCond = cast<ICmpInst>(
Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
PreCondBr->setCondition(NewPreCond);
RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
}
// Step 3: Note that the population count is exactly the trip count of the
// loop in question, which enable us to convert the loop from noncountable
// loop into a countable one. The benefit is twofold:
//
// - If the loop only counts population, the entire loop becomes dead after
// the transformation. It is a lot easier to prove a countable loop dead
// than to prove a noncountable one. (In some C dialects, an infinite loop
// isn't dead even if it computes nothing useful. In general, DCE needs
// to prove a noncountable loop finite before safely delete it.)
//
// - If the loop also performs something else, it remains alive.
// Since it is transformed to countable form, it can be aggressively
// optimized by some optimizations which are in general not applicable
// to a noncountable loop.
//
// After this step, this loop (conceptually) would look like following:
// newcnt = __builtin_ctpop(x);
// t = newcnt;
// if (x)
// do { cnt++; x &= x-1; t--) } while (t > 0);
BasicBlock *Body = *(CurLoop->block_begin());
{
auto *LbBr = cast<BranchInst>(Body->getTerminator());
ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
Type *Ty = TripCnt->getType();
PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
Builder.SetInsertPoint(LbCond);
Instruction *TcDec = cast<Instruction>(
Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
"tcdec", false, true));
TcPhi->addIncoming(TripCnt, PreHead);
TcPhi->addIncoming(TcDec, Body);
CmpInst::Predicate Pred =
(LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
LbCond->setPredicate(Pred);
LbCond->setOperand(0, TcDec);
LbCond->setOperand(1, ConstantInt::get(Ty, 0));
}
// Step 4: All the references to the original population counter outside
// the loop are replaced with the NewCount -- the value returned from
// __builtin_ctpop().
CntInst->replaceUsesOutsideBlock(NewCount, Body);
// step 5: Forget the "non-computable" trip-count SCEV associated with the
// loop. The loop would otherwise not be deleted even if it becomes empty.
SE->forgetLoop(CurLoop);
}
/// Match loop-invariant value.
template <typename SubPattern_t> struct match_LoopInvariant {
SubPattern_t SubPattern;
const Loop *L;
match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
: SubPattern(SP), L(L) {}
template <typename ITy> bool match(ITy *V) {
return L->isLoopInvariant(V) && SubPattern.match(V);
}
};
/// Matches if the value is loop-invariant.
template <typename Ty>
inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
return match_LoopInvariant<Ty>(M, L);
}
/// Return true if the idiom is detected in the loop.
///
/// The core idiom we are trying to detect is:
/// \code
/// entry:
/// <...>
/// %bitmask = shl i32 1, %bitpos
/// br label %loop
///
/// loop:
/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
/// %x.next = shl i32 %x.curr, 1
/// <...>
/// br i1 %x.curr.isbitunset, label %loop, label %end
///
/// end:
/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
/// <...>
/// \endcode
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
Value *&BitMask, Value *&BitPos,
Value *&CurrX, Instruction *&NextX) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
" Performing shift-until-bittest idiom detection.\n");
// Give up if the loop has multiple blocks or multiple backedges.
if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
return false;
}
BasicBlock *LoopHeaderBB = CurLoop->getHeader();
BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
assert(LoopPreheaderBB && "There is always a loop preheader.");
using namespace PatternMatch;
// Step 1: Check if the loop backedge is in desirable form.
ICmpInst::Predicate Pred;
Value *CmpLHS, *CmpRHS;
BasicBlock *TrueBB, *FalseBB;
if (!match(LoopHeaderBB->getTerminator(),
m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
return false;
}
// Step 2: Check if the backedge's condition is in desirable form.
auto MatchVariableBitMask = [&]() {
return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
match(CmpLHS,
m_c_And(m_Value(CurrX),
m_CombineAnd(
m_Value(BitMask),
m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)),
CurLoop))));
};
auto MatchConstantBitMask = [&]() {
return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
match(CmpLHS, m_And(m_Value(CurrX),
m_CombineAnd(m_Value(BitMask), m_Power2()))) &&
(BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask)));
};
auto MatchDecomposableConstantBitMask = [&]() {
APInt Mask;
return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) &&
ICmpInst::isEquality(Pred) && Mask.isPowerOf2() &&
(BitMask = ConstantInt::get(CurrX->getType(), Mask)) &&
(BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2()));
};
if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
!MatchDecomposableConstantBitMask()) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
return false;
}
// Step 3: Check if the recurrence is in desirable form.
auto *CurrXPN = dyn_cast<PHINode>(CurrX);
if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
return false;
}
BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
NextX =
dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
assert(CurLoop->isLoopInvariant(BaseX) &&
"Expected BaseX to be avaliable in the preheader!");
if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
// FIXME: support right-shift?
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
return false;
}
// Step 4: Check if the backedge's destinations are in desirable form.
assert(ICmpInst::isEquality(Pred) &&
"Should only get equality predicates here.");
// cmp-br is commutative, so canonicalize to a single variant.
if (Pred != ICmpInst::Predicate::ICMP_EQ) {
Pred = ICmpInst::getInversePredicate(Pred);
std::swap(TrueBB, FalseBB);
}
// We expect to exit loop when comparison yields false,
// so when it yields true we should branch back to loop header.
if (TrueBB != LoopHeaderBB) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
return false;
}
// Okay, idiom checks out.
return true;
}
/// Look for the following loop:
/// \code
/// entry:
/// <...>
/// %bitmask = shl i32 1, %bitpos
/// br label %loop
///
/// loop:
/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
/// %x.next = shl i32 %x.curr, 1
/// <...>
/// br i1 %x.curr.isbitunset, label %loop, label %end
///
/// end:
/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
/// <...>
/// \endcode
///
/// And transform it into:
/// \code
/// entry:
/// %bitmask = shl i32 1, %bitpos
/// %lowbitmask = add i32 %bitmask, -1
/// %mask = or i32 %lowbitmask, %bitmask
/// %x.masked = and i32 %x, %mask
/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
/// i1 true)
/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
/// %tripcount = add i32 %backedgetakencount, 1
/// %x.curr = shl i32 %x, %backedgetakencount
/// %x.next = shl i32 %x, %tripcount
/// br label %loop
///
/// loop:
/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
/// %loop.iv.next = add nuw i32 %loop.iv, 1
/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
/// <...>
/// br i1 %loop.ivcheck, label %end, label %loop
///
/// end:
/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
/// <...>
/// \endcode
bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
bool MadeChange = false;
Value *X, *BitMask, *BitPos, *XCurr;
Instruction *XNext;
if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
XNext)) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
" shift-until-bittest idiom detection failed.\n");
return MadeChange;
}
LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n");
// Ok, it is the idiom we were looking for, we *could* transform this loop,
// but is it profitable to transform?
BasicBlock *LoopHeaderBB = CurLoop->getHeader();
BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
assert(LoopPreheaderBB && "There is always a loop preheader.");
BasicBlock *SuccessorBB = CurLoop->getExitBlock();
assert(SuccessorBB && "There is only a single successor.");
IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());
Intrinsic::ID IntrID = Intrinsic::ctlz;
Type *Ty = X->getType();
unsigned Bitwidth = Ty->getScalarSizeInBits();
TargetTransformInfo::TargetCostKind CostKind =
TargetTransformInfo::TCK_SizeAndLatency;
// The rewrite is considered to be unprofitable iff and only iff the
// intrinsic/shift we'll use are not cheap. Note that we are okay with *just*
// making the loop countable, even if nothing else changes.
IntrinsicCostAttributes Attrs(
IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getTrue()});
InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
if (Cost > TargetTransformInfo::TCC_Basic) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
" Intrinsic is too costly, not beneficial\n");
return MadeChange;
}
if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) >
TargetTransformInfo::TCC_Basic) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n");
return MadeChange;
}
// Ok, transform appears worthwhile.
MadeChange = true;
// Step 1: Compute the loop trip count.
Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
BitPos->getName() + ".lowbitmask");
Value *Mask =
Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()},
/*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
Value *XMaskedNumActiveBits = Builder.CreateSub(
ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
XMasked->getName() + ".numactivebits", /*HasNUW=*/true,
/*HasNSW=*/Bitwidth != 2);
Value *XMaskedLeadingOnePos =
Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
XMasked->getName() + ".leadingonepos", /*HasNUW=*/false,
/*HasNSW=*/Bitwidth > 2);
Value *LoopBackedgeTakenCount = Builder.CreateSub(
BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount",
/*HasNUW=*/true, /*HasNSW=*/true);
// We know loop's backedge-taken count, but what's loop's trip count?
// Note that while NUW is always safe, while NSW is only for bitwidths != 2.
Value *LoopTripCount =
Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
/*HasNSW=*/Bitwidth != 2);
// Step 2: Compute the recurrence's final value without a loop.
// NewX is always safe to compute, because `LoopBackedgeTakenCount`
// will always be smaller than `bitwidth(X)`, i.e. we never get poison.
Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
NewX->takeName(XCurr);
if (auto *I = dyn_cast<Instruction>(NewX))
I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
Value *NewXNext;
// Rewriting XNext is more complicated, however, because `X << LoopTripCount`
// will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
// iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
// that isn't the case, we'll need to emit an alternative, safe IR.
if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() ||
PatternMatch::match(
BitPos, PatternMatch::m_SpecificInt_ICMP(
ICmpInst::ICMP_NE, APInt(Ty->getScalarSizeInBits(),
Ty->getScalarSizeInBits() - 1))))
NewXNext = Builder.CreateShl(X, LoopTripCount);
else {
// Otherwise, just additionally shift by one. It's the smallest solution,
// alternatively, we could check that NewX is INT_MIN (or BitPos is )
// and select 0 instead.
NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
}
NewXNext->takeName(XNext);
if (auto *I = dyn_cast<Instruction>(NewXNext))
I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
// Step 3: Adjust the successor basic block to recieve the computed
// recurrence's final value instead of the recurrence itself.
XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB);
XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB);
// Step 4: Rewrite the loop into a countable form, with canonical IV.
// The new canonical induction variable.
Builder.SetInsertPoint(&LoopHeaderBB->front());
auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
// The induction itself.
// Note that while NUW is always safe, while NSW is only for bitwidths != 2.
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
auto *IVNext =
Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",
/*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
// The loop trip count check.
auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
IV->addIncoming(IVNext, LoopHeaderBB);
// Step 5: Forget the "non-computable" trip-count SCEV associated with the
// loop. The loop would otherwise not be deleted even if it becomes empty.
SE->forgetLoop(CurLoop);
// Other passes will take care of actually deleting the loop if possible.
LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n");
++NumShiftUntilBitTest;
return MadeChange;
}
/// Return true if the idiom is detected in the loop.
///
/// The core idiom we are trying to detect is:
/// \code
/// entry:
/// <...>
/// %start = <...>
/// %extraoffset = <...>
/// <...>
/// br label %for.cond
///
/// loop:
/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
/// %nbits = add nsw i8 %iv, %extraoffset
/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
/// %iv.next = add i8 %iv, 1
/// <...>
/// br i1 %val.shifted.iszero, label %end, label %loop
///
/// end:
/// %iv.res = phi i8 [ %iv, %loop ] <...>
/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
/// <...>
/// \endcode
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, ScalarEvolution *SE,
Instruction *&ValShiftedIsZero,
Intrinsic::ID &IntrinID, Instruction *&IV,
Value *&Start, Value *&Val,
const SCEV *&ExtraOffsetExpr,
bool &InvertedCond) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
" Performing shift-until-zero idiom detection.\n");
// Give up if the loop has multiple blocks or multiple backedges.
if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
return false;
}
Instruction *ValShifted, *NBits, *IVNext;
Value *ExtraOffset;
BasicBlock *LoopHeaderBB = CurLoop->getHeader();
BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
assert(LoopPreheaderBB && "There is always a loop preheader.");
using namespace PatternMatch;
// Step 1: Check if the loop backedge, condition is in desirable form.
ICmpInst::Predicate Pred;
BasicBlock *TrueBB, *FalseBB;
if (!match(LoopHeaderBB->getTerminator(),
m_Br(m_Instruction(ValShiftedIsZero), m_BasicBlock(TrueBB),
m_BasicBlock(FalseBB))) ||
!match(ValShiftedIsZero,
m_ICmp(Pred, m_Instruction(ValShifted), m_Zero())) ||
!ICmpInst::isEquality(Pred)) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
return false;
}
// Step 2: Check if the comparison's operand is in desirable form.
// FIXME: Val could be a one-input PHI node, which we should look past.
if (!match(ValShifted, m_Shift(m_LoopInvariant(m_Value(Val), CurLoop),
m_Instruction(NBits)))) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad comparisons value computation.\n");
return false;
}
IntrinID = ValShifted->getOpcode() == Instruction::Shl ? Intrinsic::cttz
: Intrinsic::ctlz;
// Step 3: Check if the shift amount is in desirable form.
if (match(NBits, m_c_Add(m_Instruction(IV),
m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
(NBits->hasNoSignedWrap() || NBits->hasNoUnsignedWrap()))
ExtraOffsetExpr = SE->getNegativeSCEV(SE->getSCEV(ExtraOffset));
else if (match(NBits,
m_Sub(m_Instruction(IV),
m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
NBits->hasNoSignedWrap())
ExtraOffsetExpr = SE->getSCEV(ExtraOffset);
else {
IV = NBits;
ExtraOffsetExpr = SE->getZero(NBits->getType());
}
// Step 4: Check if the recurrence is in desirable form.
auto *IVPN = dyn_cast<PHINode>(IV);
if (!IVPN || IVPN->getParent() != LoopHeaderBB) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
return false;
}
Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));
if (!IVNext || !match(IVNext, m_Add(m_Specific(IVPN), m_One()))) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
return false;
}
// Step 4: Check if the backedge's destinations are in desirable form.
assert(ICmpInst::isEquality(Pred) &&
"Should only get equality predicates here.");
// cmp-br is commutative, so canonicalize to a single variant.
InvertedCond = Pred != ICmpInst::Predicate::ICMP_EQ;
if (InvertedCond) {
Pred = ICmpInst::getInversePredicate(Pred);
std::swap(TrueBB, FalseBB);
}
// We expect to exit loop when comparison yields true,
// so when it yields false we should branch back to loop header.
if (FalseBB != LoopHeaderBB) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
return false;
}
// The new, countable, loop will certainly only run a known number of
// iterations, It won't be infinite. But the old loop might be infinite
// under certain conditions. For logical shifts, the value will become zero
// after at most bitwidth(%Val) loop iterations. However, for arithmetic
// right-shift, iff the sign bit was set, the value will never become zero,
// and the loop may never finish.
if (ValShifted->getOpcode() == Instruction::AShr &&
!isMustProgress(CurLoop) && !SE->isKnownNonNegative(SE->getSCEV(Val))) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Can not prove the loop is finite.\n");
return false;
}
// Okay, idiom checks out.
return true;
}
/// Look for the following loop:
/// \code
/// entry:
/// <...>
/// %start = <...>
/// %extraoffset = <...>
/// <...>
/// br label %for.cond
///
/// loop:
/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
/// %nbits = add nsw i8 %iv, %extraoffset
/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
/// %iv.next = add i8 %iv, 1
/// <...>
/// br i1 %val.shifted.iszero, label %end, label %loop
///
/// end:
/// %iv.res = phi i8 [ %iv, %loop ] <...>
/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
/// <...>
/// \endcode
///
/// And transform it into:
/// \code
/// entry:
/// <...>
/// %start = <...>
/// %extraoffset = <...>
/// <...>
/// %val.numleadingzeros = call i8 @llvm.ct{l,t}z.i8(i8 %val, i1 0)
/// %val.numactivebits = sub i8 8, %val.numleadingzeros
/// %extraoffset.neg = sub i8 0, %extraoffset
/// %tmp = add i8 %val.numactivebits, %extraoffset.neg
/// %iv.final = call i8 @llvm.smax.i8(i8 %tmp, i8 %start)
/// %loop.tripcount = sub i8 %iv.final, %start
/// br label %loop
///
/// loop:
/// %loop.iv = phi i8 [ 0, %entry ], [ %loop.iv.next, %loop ]
/// %loop.iv.next = add i8 %loop.iv, 1
/// %loop.ivcheck = icmp eq i8 %loop.iv.next, %loop.tripcount
/// %iv = add i8 %loop.iv, %start
/// <...>
/// br i1 %loop.ivcheck, label %end, label %loop
///
/// end:
/// %iv.res = phi i8 [ %iv.final, %loop ] <...>
/// <...>
/// \endcode
bool LoopIdiomRecognize::recognizeShiftUntilZero() {
bool MadeChange = false;
Instruction *ValShiftedIsZero;
Intrinsic::ID IntrID;
Instruction *IV;
Value *Start, *Val;
const SCEV *ExtraOffsetExpr;
bool InvertedCond;
if (!detectShiftUntilZeroIdiom(CurLoop, SE, ValShiftedIsZero, IntrID, IV,
Start, Val, ExtraOffsetExpr, InvertedCond)) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
" shift-until-zero idiom detection failed.\n");
return MadeChange;
}
LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom detected!\n");
// Ok, it is the idiom we were looking for, we *could* transform this loop,
// but is it profitable to transform?
BasicBlock *LoopHeaderBB = CurLoop->getHeader();
BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
assert(LoopPreheaderBB && "There is always a loop preheader.");
BasicBlock *SuccessorBB = CurLoop->getExitBlock();
assert(SuccessorBB && "There is only a single successor.");
IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
Builder.SetCurrentDebugLocation(IV->getDebugLoc());
Type *Ty = Val->getType();
unsigned Bitwidth = Ty->getScalarSizeInBits();
TargetTransformInfo::TargetCostKind CostKind =
TargetTransformInfo::TCK_SizeAndLatency;
// The rewrite is considered to be unprofitable iff and only iff the
// intrinsic we'll use are not cheap. Note that we are okay with *just*
// making the loop countable, even if nothing else changes.
IntrinsicCostAttributes Attrs(
IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getFalse()});
InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
if (Cost > TargetTransformInfo::TCC_Basic) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
" Intrinsic is too costly, not beneficial\n");
return MadeChange;
}
// Ok, transform appears worthwhile.
MadeChange = true;
bool OffsetIsZero = false;
if (auto *ExtraOffsetExprC = dyn_cast<SCEVConstant>(ExtraOffsetExpr))
OffsetIsZero = ExtraOffsetExprC->isZero();
// Step 1: Compute the loop's final IV value / trip count.
CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
IntrID, Ty, {Val, /*is_zero_undef=*/Builder.getFalse()},
/*FMFSource=*/nullptr, Val->getName() + ".numleadingzeros");
Value *ValNumActiveBits = Builder.CreateSub(
ConstantInt::get(Ty, Ty->getScalarSizeInBits()), ValNumLeadingZeros,
Val->getName() + ".numactivebits", /*HasNUW=*/true,
/*HasNSW=*/Bitwidth != 2);
SCEVExpander Expander(*SE, *DL, "loop-idiom");
Expander.setInsertPoint(&*Builder.GetInsertPoint());
Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
Value *ValNumActiveBitsOffset = Builder.CreateAdd(
ValNumActiveBits, ExtraOffset, ValNumActiveBits->getName() + ".offset",
/*HasNUW=*/OffsetIsZero, /*HasNSW=*/true);
Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
{ValNumActiveBitsOffset, Start},
/*FMFSource=*/nullptr, "iv.final");
auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
/*HasNUW=*/OffsetIsZero, /*HasNSW=*/true));
// FIXME: or when the offset was `add nuw`
// We know loop's backedge-taken count, but what's loop's trip count?
Value *LoopTripCount =
Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
/*HasNSW=*/Bitwidth != 2);
// Step 2: Adjust the successor basic block to recieve the original
// induction variable's final value instead of the orig. IV itself.
IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
// Step 3: Rewrite the loop into a countable form, with canonical IV.
// The new canonical induction variable.
Builder.SetInsertPoint(&LoopHeaderBB->front());
auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
// The induction itself.
Builder.SetInsertPoint(LoopHeaderBB->getFirstNonPHI());
auto *CIVNext =
Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next",
/*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
// The loop trip count check.
auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
auto *NewIVCheck = CIVCheck;
if (InvertedCond) {
NewIVCheck = Builder.CreateNot(CIVCheck);
NewIVCheck->takeName(ValShiftedIsZero);
}
// The original IV, but rebased to be an offset to the CIV.
auto *IVDePHId = Builder.CreateAdd(CIV, Start, "", /*HasNUW=*/false,
/*HasNSW=*/true); // FIXME: what about NUW?
IVDePHId->takeName(IV);
// The loop terminator.
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
CIV->addIncoming(CIVNext, LoopHeaderBB);
// Step 4: Forget the "non-computable" trip-count SCEV associated with the
// loop. The loop would otherwise not be deleted even if it becomes empty.
SE->forgetLoop(CurLoop);
// Step 5: Try to cleanup the loop's body somewhat.
IV->replaceAllUsesWith(IVDePHId);
IV->eraseFromParent();
ValShiftedIsZero->replaceAllUsesWith(NewIVCheck);
ValShiftedIsZero->eraseFromParent();
// Other passes will take care of actually deleting the loop if possible.
LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom optimized!\n");
++NumShiftUntilZero;
return MadeChange;
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
index 6cf5d9285b90..0096a3d44d85 100644
--- a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
@@ -1,222 +1,222 @@
//===- CoverageExporterLcov.cpp - Code coverage export --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements export of code coverage data to lcov trace file format.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
//
// The trace file code coverage export follows the following format (see also
// https://linux.die.net/man/1/geninfo). Each quoted string appears on its own
// line; the indentation shown here is only for documentation purposes.
//
// - for each source file:
// - "SF:<absolute path to source file>"
// - for each function:
// - "FN:<line number of function start>,<function name>"
// - for each function:
// - "FNDA:<execution count>,<function name>"
// - "FNF:<number of functions found>"
// - "FNH:<number of functions hit>"
// - for each instrumented line:
// - "DA:<line number>,<execution count>[,<checksum>]
// - for each branch:
// - "BRDA:<line number>,<branch pair id>,<branch id>,<count>"
// - "BRF:<number of branches found>"
// - "BRH:<number of branches hit>"
// - "LH:<number of lines with non-zero execution count>"
// - "LF:<number of instrumented lines>"
// - "end_of_record"
//
// If the user is exporting summary information only, then the FN, FNDA, and DA
// lines will not be present.
//
//===----------------------------------------------------------------------===//
#include "CoverageExporterLcov.h"
#include "CoverageReport.h"
using namespace llvm;
namespace {
void renderFunctionSummary(raw_ostream &OS,
const FileCoverageSummary &Summary) {
OS << "FNF:" << Summary.FunctionCoverage.getNumFunctions() << '\n'
<< "FNH:" << Summary.FunctionCoverage.getExecuted() << '\n';
}
void renderFunctions(
raw_ostream &OS,
const iterator_range<coverage::FunctionRecordIterator> &Functions) {
for (const auto &F : Functions) {
auto StartLine = F.CountedRegions.front().LineStart;
OS << "FN:" << StartLine << ',' << F.Name << '\n';
}
for (const auto &F : Functions)
OS << "FNDA:" << F.ExecutionCount << ',' << F.Name << '\n';
}
void renderLineExecutionCounts(raw_ostream &OS,
const coverage::CoverageData &FileCoverage) {
coverage::LineCoverageIterator LCI{FileCoverage, 1};
coverage::LineCoverageIterator LCIEnd = LCI.getEnd();
for (; LCI != LCIEnd; ++LCI) {
const coverage::LineCoverageStats &LCS = *LCI;
if (LCS.isMapped()) {
OS << "DA:" << LCS.getLine() << ',' << LCS.getExecutionCount() << '\n';
}
}
}
std::vector<llvm::coverage::CountedRegion>
collectNestedBranches(const coverage::CoverageMapping &Coverage,
ArrayRef<llvm::coverage::ExpansionRecord> Expansions,
int ViewDepth = 0, int SrcLine = 0) {
std::vector<llvm::coverage::CountedRegion> Branches;
for (const auto &Expansion : Expansions) {
auto ExpansionCoverage = Coverage.getCoverageForExpansion(Expansion);
// If we're at the top level, set the corresponding source line.
if (ViewDepth == 0)
SrcLine = Expansion.Region.LineStart;
// Recursively collect branches from nested expansions.
auto NestedExpansions = ExpansionCoverage.getExpansions();
auto NestedExBranches = collectNestedBranches(Coverage, NestedExpansions,
ViewDepth + 1, SrcLine);
append_range(Branches, NestedExBranches);
// Add branches from this level of expansion.
auto ExBranches = ExpansionCoverage.getBranches();
for (auto B : ExBranches)
if (B.FileID == Expansion.FileID) {
B.LineStart = SrcLine;
Branches.push_back(B);
}
}
return Branches;
}
bool sortLine(llvm::coverage::CountedRegion I,
llvm::coverage::CountedRegion J) {
return (I.LineStart < J.LineStart) ||
((I.LineStart == J.LineStart) && (I.ColumnStart < J.ColumnStart));
}
void renderBranchExecutionCounts(raw_ostream &OS,
const coverage::CoverageMapping &Coverage,
const coverage::CoverageData &FileCoverage) {
std::vector<llvm::coverage::CountedRegion> Branches =
FileCoverage.getBranches();
// Recursively collect branches for all file expansions.
std::vector<llvm::coverage::CountedRegion> ExBranches =
collectNestedBranches(Coverage, FileCoverage.getExpansions());
// Append Expansion Branches to Source Branches.
append_range(Branches, ExBranches);
// Sort branches based on line number to ensure branches corresponding to the
// same source line are counted together.
llvm::sort(Branches, sortLine);
auto NextBranch = Branches.begin();
auto EndBranch = Branches.end();
// Branches with the same source line are enumerated individually
// (BranchIndex) as well as based on True/False pairs (PairIndex).
while (NextBranch != EndBranch) {
unsigned CurrentLine = NextBranch->LineStart;
unsigned PairIndex = 0;
unsigned BranchIndex = 0;
while (NextBranch != EndBranch && CurrentLine == NextBranch->LineStart) {
if (!NextBranch->Folded) {
unsigned BC1 = NextBranch->ExecutionCount;
unsigned BC2 = NextBranch->FalseExecutionCount;
bool BranchNotExecuted = (BC1 == 0 && BC2 == 0);
for (int I = 0; I < 2; I++, BranchIndex++) {
OS << "BRDA:" << CurrentLine << ',' << PairIndex << ','
<< BranchIndex;
if (BranchNotExecuted)
OS << ',' << '-' << '\n';
else
OS << ',' << (I == 0 ? BC1 : BC2) << '\n';
}
PairIndex++;
}
NextBranch++;
}
}
}
void renderLineSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
OS << "LF:" << Summary.LineCoverage.getNumLines() << '\n'
<< "LH:" << Summary.LineCoverage.getCovered() << '\n';
}
void renderBranchSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
OS << "BRF:" << Summary.BranchCoverage.getNumBranches() << '\n'
- << "BFH:" << Summary.BranchCoverage.getCovered() << '\n';
+ << "BRH:" << Summary.BranchCoverage.getCovered() << '\n';
}
void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
const std::string &Filename,
const FileCoverageSummary &FileReport, bool ExportSummaryOnly,
bool SkipFunctions) {
OS << "SF:" << Filename << '\n';
if (!ExportSummaryOnly && !SkipFunctions) {
renderFunctions(OS, Coverage.getCoveredFunctions(Filename));
}
renderFunctionSummary(OS, FileReport);
if (!ExportSummaryOnly) {
// Calculate and render detailed coverage information for given file.
auto FileCoverage = Coverage.getCoverageForFile(Filename);
renderLineExecutionCounts(OS, FileCoverage);
renderBranchExecutionCounts(OS, Coverage, FileCoverage);
}
renderBranchSummary(OS, FileReport);
renderLineSummary(OS, FileReport);
OS << "end_of_record\n";
}
void renderFiles(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
ArrayRef<std::string> SourceFiles,
ArrayRef<FileCoverageSummary> FileReports,
bool ExportSummaryOnly, bool SkipFunctions) {
for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I)
renderFile(OS, Coverage, SourceFiles[I], FileReports[I], ExportSummaryOnly,
SkipFunctions);
}
} // end anonymous namespace
void CoverageExporterLcov::renderRoot(const CoverageFilters &IgnoreFilters) {
std::vector<std::string> SourceFiles;
for (StringRef SF : Coverage.getUniqueSourceFiles()) {
if (!IgnoreFilters.matchesFilename(SF))
SourceFiles.emplace_back(SF);
}
renderRoot(SourceFiles);
}
void CoverageExporterLcov::renderRoot(ArrayRef<std::string> SourceFiles) {
FileCoverageSummary Totals = FileCoverageSummary("Totals");
auto FileReports = CoverageReport::prepareFileReports(Coverage, Totals,
SourceFiles, Options);
renderFiles(OS, Coverage, SourceFiles, FileReports, Options.ExportSummaryOnly,
Options.SkipFunctions);
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 48ae92f734c7..9d461b08f3f8 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1,2693 +1,2698 @@
//===-- llvm-objdump.cpp - Object file dumping utility for llvm -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This program is a utility that works like binutils "objdump", that is, it
// dumps out a plethora of information about an object file depending on the
// flags.
//
// The flags and output of this program should be near identical to those of
// binutils objdump.
//
//===----------------------------------------------------------------------===//
#include "llvm-objdump.h"
#include "COFFDump.h"
#include "ELFDump.h"
#include "MachODump.h"
#include "ObjdumpOptID.h"
#include "SourcePrinter.h"
#include "WasmDump.h"
#include "XCOFFDump.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/FaultMapParser.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cctype>
#include <cstring>
#include <system_error>
#include <unordered_map>
#include <utility>
using namespace llvm;
using namespace llvm::object;
using namespace llvm::objdump;
using namespace llvm::opt;
namespace {
class CommonOptTable : public opt::OptTable {
public:
CommonOptTable(ArrayRef<Info> OptionInfos, const char *Usage,
const char *Description)
: OptTable(OptionInfos), Usage(Usage), Description(Description) {
setGroupedShortOptions(true);
}
void printHelp(StringRef Argv0, bool ShowHidden = false) const {
Argv0 = sys::path::filename(Argv0);
opt::OptTable::printHelp(outs(), (Argv0 + Usage).str().c_str(), Description,
ShowHidden, ShowHidden);
// TODO Replace this with OptTable API once it adds extrahelp support.
outs() << "\nPass @FILE as argument to read options from FILE.\n";
}
private:
const char *Usage;
const char *Description;
};
// ObjdumpOptID is in ObjdumpOptID.h
#define PREFIX(NAME, VALUE) const char *const OBJDUMP_##NAME[] = VALUE;
#include "ObjdumpOpts.inc"
#undef PREFIX
static constexpr opt::OptTable::Info ObjdumpInfoTable[] = {
#define OBJDUMP_nullptr nullptr
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{OBJDUMP_##PREFIX, NAME, HELPTEXT, \
METAVAR, OBJDUMP_##ID, opt::Option::KIND##Class, \
PARAM, FLAGS, OBJDUMP_##GROUP, \
OBJDUMP_##ALIAS, ALIASARGS, VALUES},
#include "ObjdumpOpts.inc"
#undef OPTION
#undef OBJDUMP_nullptr
};
class ObjdumpOptTable : public CommonOptTable {
public:
ObjdumpOptTable()
: CommonOptTable(ObjdumpInfoTable, " [options] <input object files>",
"llvm object file dumper") {}
};
enum OtoolOptID {
OTOOL_INVALID = 0, // This is not an option ID.
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
OTOOL_##ID,
#include "OtoolOpts.inc"
#undef OPTION
};
#define PREFIX(NAME, VALUE) const char *const OTOOL_##NAME[] = VALUE;
#include "OtoolOpts.inc"
#undef PREFIX
static constexpr opt::OptTable::Info OtoolInfoTable[] = {
#define OTOOL_nullptr nullptr
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{OTOOL_##PREFIX, NAME, HELPTEXT, \
METAVAR, OTOOL_##ID, opt::Option::KIND##Class, \
PARAM, FLAGS, OTOOL_##GROUP, \
OTOOL_##ALIAS, ALIASARGS, VALUES},
#include "OtoolOpts.inc"
#undef OPTION
#undef OTOOL_nullptr
};
class OtoolOptTable : public CommonOptTable {
public:
OtoolOptTable()
: CommonOptTable(OtoolInfoTable, " [option...] [file...]",
"Mach-O object file displaying tool") {}
};
} // namespace
#define DEBUG_TYPE "objdump"
static uint64_t AdjustVMA;
static bool AllHeaders;
static std::string ArchName;
bool objdump::ArchiveHeaders;
bool objdump::Demangle;
bool objdump::Disassemble;
bool objdump::DisassembleAll;
bool objdump::SymbolDescription;
static std::vector<std::string> DisassembleSymbols;
static bool DisassembleZeroes;
static std::vector<std::string> DisassemblerOptions;
DIDumpType objdump::DwarfDumpType;
static bool DynamicRelocations;
static bool FaultMapSection;
static bool FileHeaders;
bool objdump::SectionContents;
static std::vector<std::string> InputFilenames;
bool objdump::PrintLines;
static bool MachOOpt;
std::string objdump::MCPU;
std::vector<std::string> objdump::MAttrs;
bool objdump::ShowRawInsn;
bool objdump::LeadingAddr;
static bool RawClangAST;
bool objdump::Relocations;
bool objdump::PrintImmHex;
bool objdump::PrivateHeaders;
std::vector<std::string> objdump::FilterSections;
bool objdump::SectionHeaders;
static bool ShowLMA;
bool objdump::PrintSource;
static uint64_t StartAddress;
static bool HasStartAddressFlag;
static uint64_t StopAddress = UINT64_MAX;
static bool HasStopAddressFlag;
bool objdump::SymbolTable;
static bool SymbolizeOperands;
static bool DynamicSymbolTable;
std::string objdump::TripleName;
bool objdump::UnwindInfo;
static bool Wide;
std::string objdump::Prefix;
uint32_t objdump::PrefixStrip;
DebugVarsFormat objdump::DbgVariables = DVDisabled;
int objdump::DbgIndent = 52;
static StringSet<> DisasmSymbolSet;
StringSet<> objdump::FoundSectionSet;
static StringRef ToolName;
namespace {
struct FilterResult {
// True if the section should not be skipped.
bool Keep;
// True if the index counter should be incremented, even if the section should
// be skipped. For example, sections may be skipped if they are not included
// in the --section flag, but we still want those to count toward the section
// count.
bool IncrementIndex;
};
} // namespace
static FilterResult checkSectionFilter(object::SectionRef S) {
if (FilterSections.empty())
return {/*Keep=*/true, /*IncrementIndex=*/true};
Expected<StringRef> SecNameOrErr = S.getName();
if (!SecNameOrErr) {
consumeError(SecNameOrErr.takeError());
return {/*Keep=*/false, /*IncrementIndex=*/false};
}
StringRef SecName = *SecNameOrErr;
// StringSet does not allow empty key so avoid adding sections with
// no name (such as the section with index 0) here.
if (!SecName.empty())
FoundSectionSet.insert(SecName);
// Only show the section if it's in the FilterSections list, but always
// increment so the indexing is stable.
return {/*Keep=*/is_contained(FilterSections, SecName),
/*IncrementIndex=*/true};
}
SectionFilter objdump::ToolSectionFilter(object::ObjectFile const &O,
uint64_t *Idx) {
// Start at UINT64_MAX so that the first index returned after an increment is
// zero (after the unsigned wrap).
if (Idx)
*Idx = UINT64_MAX;
return SectionFilter(
[Idx](object::SectionRef S) {
FilterResult Result = checkSectionFilter(S);
if (Idx != nullptr && Result.IncrementIndex)
*Idx += 1;
return Result.Keep;
},
O);
}
std::string objdump::getFileNameForError(const object::Archive::Child &C,
unsigned Index) {
Expected<StringRef> NameOrErr = C.getName();
if (NameOrErr)
return std::string(NameOrErr.get());
// If we have an error getting the name then we print the index of the archive
// member. Since we are already in an error state, we just ignore this error.
consumeError(NameOrErr.takeError());
return "<file index: " + std::to_string(Index) + ">";
}
void objdump::reportWarning(const Twine &Message, StringRef File) {
// Output order between errs() and outs() matters especially for archive
// files where the output is per member object.
outs().flush();
WithColor::warning(errs(), ToolName)
<< "'" << File << "': " << Message << "\n";
}
LLVM_ATTRIBUTE_NORETURN void objdump::reportError(StringRef File,
const Twine &Message) {
outs().flush();
WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n";
exit(1);
}
LLVM_ATTRIBUTE_NORETURN void objdump::reportError(Error E, StringRef FileName,
StringRef ArchiveName,
StringRef ArchitectureName) {
assert(E);
outs().flush();
WithColor::error(errs(), ToolName);
if (ArchiveName != "")
errs() << ArchiveName << "(" << FileName << ")";
else
errs() << "'" << FileName << "'";
if (!ArchitectureName.empty())
errs() << " (for architecture " << ArchitectureName << ")";
errs() << ": ";
logAllUnhandledErrors(std::move(E), errs());
exit(1);
}
static void reportCmdLineWarning(const Twine &Message) {
WithColor::warning(errs(), ToolName) << Message << "\n";
}
LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(const Twine &Message) {
WithColor::error(errs(), ToolName) << Message << "\n";
exit(1);
}
static void warnOnNoMatchForSections() {
SetVector<StringRef> MissingSections;
for (StringRef S : FilterSections) {
if (FoundSectionSet.count(S))
return;
// User may specify a unnamed section. Don't warn for it.
if (!S.empty())
MissingSections.insert(S);
}
// Warn only if no section in FilterSections is matched.
for (StringRef S : MissingSections)
reportCmdLineWarning("section '" + S +
"' mentioned in a -j/--section option, but not "
"found in any input file");
}
static const Target *getTarget(const ObjectFile *Obj) {
// Figure out the target triple.
Triple TheTriple("unknown-unknown-unknown");
if (TripleName.empty()) {
TheTriple = Obj->makeTriple();
} else {
TheTriple.setTriple(Triple::normalize(TripleName));
auto Arch = Obj->getArch();
if (Arch == Triple::arm || Arch == Triple::armeb)
Obj->setARMSubArch(TheTriple);
}
// Get the target specific parser.
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
Error);
if (!TheTarget)
reportError(Obj->getFileName(), "can't find target: " + Error);
// Update the triple name and return the found target.
TripleName = TheTriple.getTriple();
return TheTarget;
}
bool objdump::isRelocAddressLess(RelocationRef A, RelocationRef B) {
return A.getOffset() < B.getOffset();
}
static Error getRelocationValueString(const RelocationRef &Rel,
SmallVectorImpl<char> &Result) {
const ObjectFile *Obj = Rel.getObject();
if (auto *ELF = dyn_cast<ELFObjectFileBase>(Obj))
return getELFRelocationValueString(ELF, Rel, Result);
if (auto *COFF = dyn_cast<COFFObjectFile>(Obj))
return getCOFFRelocationValueString(COFF, Rel, Result);
if (auto *Wasm = dyn_cast<WasmObjectFile>(Obj))
return getWasmRelocationValueString(Wasm, Rel, Result);
if (auto *MachO = dyn_cast<MachOObjectFile>(Obj))
return getMachORelocationValueString(MachO, Rel, Result);
if (auto *XCOFF = dyn_cast<XCOFFObjectFile>(Obj))
return getXCOFFRelocationValueString(XCOFF, Rel, Result);
llvm_unreachable("unknown object file format");
}
/// Indicates whether this relocation should hidden when listing
/// relocations, usually because it is the trailing part of a multipart
/// relocation that will be printed as part of the leading relocation.
static bool getHidden(RelocationRef RelRef) {
auto *MachO = dyn_cast<MachOObjectFile>(RelRef.getObject());
if (!MachO)
return false;
unsigned Arch = MachO->getArch();
DataRefImpl Rel = RelRef.getRawDataRefImpl();
uint64_t Type = MachO->getRelocationType(Rel);
// On arches that use the generic relocations, GENERIC_RELOC_PAIR
// is always hidden.
if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc)
return Type == MachO::GENERIC_RELOC_PAIR;
if (Arch == Triple::x86_64) {
// On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
// an X86_64_RELOC_SUBTRACTOR.
if (Type == MachO::X86_64_RELOC_UNSIGNED && Rel.d.a > 0) {
DataRefImpl RelPrev = Rel;
RelPrev.d.a--;
uint64_t PrevType = MachO->getRelocationType(RelPrev);
if (PrevType == MachO::X86_64_RELOC_SUBTRACTOR)
return true;
}
}
return false;
}
namespace {
/// Get the column at which we want to start printing the instruction
/// disassembly, taking into account anything which appears to the left of it.
unsigned getInstStartColumn(const MCSubtargetInfo &STI) {
return !ShowRawInsn ? 16 : STI.getTargetTriple().isX86() ? 40 : 24;
}
static bool isAArch64Elf(const ObjectFile *Obj) {
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
return Elf && Elf->getEMachine() == ELF::EM_AARCH64;
}
static bool isArmElf(const ObjectFile *Obj) {
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
return Elf && Elf->getEMachine() == ELF::EM_ARM;
}
static bool hasMappingSymbols(const ObjectFile *Obj) {
return isArmElf(Obj) || isAArch64Elf(Obj);
}
static void printRelocation(formatted_raw_ostream &OS, StringRef FileName,
const RelocationRef &Rel, uint64_t Address,
bool Is64Bits) {
StringRef Fmt = Is64Bits ? "\t\t%016" PRIx64 ": " : "\t\t\t%08" PRIx64 ": ";
SmallString<16> Name;
SmallString<32> Val;
Rel.getTypeName(Name);
if (Error E = getRelocationValueString(Rel, Val))
reportError(std::move(E), FileName);
OS << format(Fmt.data(), Address) << Name << "\t" << Val;
}
class PrettyPrinter {
public:
virtual ~PrettyPrinter() = default;
virtual void
printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, formatted_raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
LiveVariablePrinter &LVP) {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, ObjectFilename, LVP);
LVP.printBetweenInsts(OS, false);
size_t Start = OS.tell();
if (LeadingAddr)
OS << format("%8" PRIx64 ":", Address.Address);
if (ShowRawInsn) {
OS << ' ';
dumpBytes(Bytes, OS);
}
// The output of printInst starts with a tab. Print some spaces so that
// the tab has 1 column and advances to the target tab stop.
unsigned TabStop = getInstStartColumn(STI);
unsigned Column = OS.tell() - Start;
OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8);
if (MI) {
// See MCInstPrinter::printInst. On targets where a PC relative immediate
// is relative to the next instruction and the length of a MCInst is
// difficult to measure (x86), this is the address of the next
// instruction.
uint64_t Addr =
Address.Address + (STI.getTargetTriple().isX86() ? Bytes.size() : 0);
IP.printInst(MI, Addr, "", STI, OS);
} else
OS << "\t<unknown>";
}
};
PrettyPrinter PrettyPrinterInst;
class HexagonPrettyPrinter : public PrettyPrinter {
public:
void printLead(ArrayRef<uint8_t> Bytes, uint64_t Address,
formatted_raw_ostream &OS) {
uint32_t opcode =
(Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | Bytes[0];
if (LeadingAddr)
OS << format("%8" PRIx64 ":", Address);
if (ShowRawInsn) {
OS << "\t";
dumpBytes(Bytes.slice(0, 4), OS);
OS << format("\t%08" PRIx32, opcode);
}
}
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, formatted_raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
LiveVariablePrinter &LVP) override {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, ObjectFilename, LVP, "");
if (!MI) {
printLead(Bytes, Address.Address, OS);
OS << " <unknown>";
return;
}
std::string Buffer;
{
raw_string_ostream TempStream(Buffer);
IP.printInst(MI, Address.Address, "", STI, TempStream);
}
StringRef Contents(Buffer);
// Split off bundle attributes
auto PacketBundle = Contents.rsplit('\n');
// Split off first instruction from the rest
auto HeadTail = PacketBundle.first.split('\n');
auto Preamble = " { ";
auto Separator = "";
// Hexagon's packets require relocations to be inline rather than
// clustered at the end of the packet.
std::vector<RelocationRef>::const_iterator RelCur = Rels->begin();
std::vector<RelocationRef>::const_iterator RelEnd = Rels->end();
auto PrintReloc = [&]() -> void {
while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) {
if (RelCur->getOffset() == Address.Address) {
printRelocation(OS, ObjectFilename, *RelCur, Address.Address, false);
return;
}
++RelCur;
}
};
while (!HeadTail.first.empty()) {
OS << Separator;
Separator = "\n";
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, ObjectFilename, LVP, "");
printLead(Bytes, Address.Address, OS);
OS << Preamble;
Preamble = " ";
StringRef Inst;
auto Duplex = HeadTail.first.split('\v');
if (!Duplex.second.empty()) {
OS << Duplex.first;
OS << "; ";
Inst = Duplex.second;
}
else
Inst = HeadTail.first;
OS << Inst;
HeadTail = HeadTail.second.split('\n');
if (HeadTail.first.empty())
OS << " } " << PacketBundle.second;
PrintReloc();
Bytes = Bytes.slice(4);
Address.Address += 4;
}
}
};
HexagonPrettyPrinter HexagonPrettyPrinterInst;
class AMDGCNPrettyPrinter : public PrettyPrinter {
public:
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, formatted_raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
LiveVariablePrinter &LVP) override {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, ObjectFilename, LVP);
if (MI) {
SmallString<40> InstStr;
raw_svector_ostream IS(InstStr);
IP.printInst(MI, Address.Address, "", STI, IS);
OS << left_justify(IS.str(), 60);
} else {
// an unrecognized encoding - this is probably data so represent it
// using the .long directive, or .byte directive if fewer than 4 bytes
// remaining
if (Bytes.size() >= 4) {
OS << format("\t.long 0x%08" PRIx32 " ",
support::endian::read32<support::little>(Bytes.data()));
OS.indent(42);
} else {
OS << format("\t.byte 0x%02" PRIx8, Bytes[0]);
for (unsigned int i = 1; i < Bytes.size(); i++)
OS << format(", 0x%02" PRIx8, Bytes[i]);
OS.indent(55 - (6 * Bytes.size()));
}
}
OS << format("// %012" PRIX64 ":", Address.Address);
if (Bytes.size() >= 4) {
// D should be casted to uint32_t here as it is passed by format to
// snprintf as vararg.
for (uint32_t D : makeArrayRef(
reinterpret_cast<const support::little32_t *>(Bytes.data()),
Bytes.size() / 4))
OS << format(" %08" PRIX32, D);
} else {
for (unsigned char B : Bytes)
OS << format(" %02" PRIX8, B);
}
if (!Annot.empty())
OS << " // " << Annot;
}
};
AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst;
class BPFPrettyPrinter : public PrettyPrinter {
public:
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, formatted_raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
LiveVariablePrinter &LVP) override {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, ObjectFilename, LVP);
if (LeadingAddr)
OS << format("%8" PRId64 ":", Address.Address / 8);
if (ShowRawInsn) {
OS << "\t";
dumpBytes(Bytes, OS);
}
if (MI)
IP.printInst(MI, Address.Address, "", STI, OS);
else
OS << "\t<unknown>";
}
};
BPFPrettyPrinter BPFPrettyPrinterInst;
PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
switch(Triple.getArch()) {
default:
return PrettyPrinterInst;
case Triple::hexagon:
return HexagonPrettyPrinterInst;
case Triple::amdgcn:
return AMDGCNPrettyPrinterInst;
case Triple::bpfel:
case Triple::bpfeb:
return BPFPrettyPrinterInst;
}
}
}
static uint8_t getElfSymbolType(const ObjectFile *Obj, const SymbolRef &Sym) {
assert(Obj->isELF());
if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj))
return unwrapOrError(Elf32LEObj->getSymbol(Sym.getRawDataRefImpl()),
Obj->getFileName())
->getType();
if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj))
return unwrapOrError(Elf64LEObj->getSymbol(Sym.getRawDataRefImpl()),
Obj->getFileName())
->getType();
if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj))
return unwrapOrError(Elf32BEObj->getSymbol(Sym.getRawDataRefImpl()),
Obj->getFileName())
->getType();
if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj))
return unwrapOrError(Elf64BEObj->getSymbol(Sym.getRawDataRefImpl()),
Obj->getFileName())
->getType();
llvm_unreachable("Unsupported binary format");
}
template <class ELFT> static void
addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj,
std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
for (auto Symbol : Obj->getDynamicSymbolIterators()) {
uint8_t SymbolType = Symbol.getELFType();
if (SymbolType == ELF::STT_SECTION)
continue;
uint64_t Address = unwrapOrError(Symbol.getAddress(), Obj->getFileName());
// ELFSymbolRef::getAddress() returns size instead of value for common
// symbols which is not desirable for disassembly output. Overriding.
if (SymbolType == ELF::STT_COMMON)
Address = unwrapOrError(Obj->getSymbol(Symbol.getRawDataRefImpl()),
Obj->getFileName())
->st_value;
StringRef Name = unwrapOrError(Symbol.getName(), Obj->getFileName());
if (Name.empty())
continue;
section_iterator SecI =
unwrapOrError(Symbol.getSection(), Obj->getFileName());
if (SecI == Obj->section_end())
continue;
AllSymbols[*SecI].emplace_back(Address, Name, SymbolType);
}
}
static void
addDynamicElfSymbols(const ObjectFile *Obj,
std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
assert(Obj->isELF());
if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj))
addDynamicElfSymbols(Elf32LEObj, AllSymbols);
else if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj))
addDynamicElfSymbols(Elf64LEObj, AllSymbols);
else if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj))
addDynamicElfSymbols(Elf32BEObj, AllSymbols);
else if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj))
addDynamicElfSymbols(Elf64BEObj, AllSymbols);
else
llvm_unreachable("Unsupported binary format");
}
static Optional<SectionRef> getWasmCodeSection(const WasmObjectFile *Obj) {
for (auto SecI : Obj->sections()) {
const WasmSection &Section = Obj->getWasmSection(SecI);
if (Section.Type == wasm::WASM_SEC_CODE)
return SecI;
}
return None;
}
static void
addMissingWasmCodeSymbols(const WasmObjectFile *Obj,
std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
Optional<SectionRef> Section = getWasmCodeSection(Obj);
if (!Section)
return;
SectionSymbolsTy &Symbols = AllSymbols[*Section];
std::set<uint64_t> SymbolAddresses;
for (const auto &Sym : Symbols)
SymbolAddresses.insert(Sym.Addr);
for (const wasm::WasmFunction &Function : Obj->functions()) {
uint64_t Address = Function.CodeSectionOffset;
// Only add fallback symbols for functions not already present in the symbol
// table.
if (SymbolAddresses.count(Address))
continue;
// This function has no symbol, so it should have no SymbolName.
assert(Function.SymbolName.empty());
// We use DebugName for the name, though it may be empty if there is no
// "name" custom section, or that section is missing a name for this
// function.
StringRef Name = Function.DebugName;
Symbols.emplace_back(Address, Name, ELF::STT_NOTYPE);
}
}
static void addPltEntries(const ObjectFile *Obj,
std::map<SectionRef, SectionSymbolsTy> &AllSymbols,
StringSaver &Saver) {
Optional<SectionRef> Plt = None;
for (const SectionRef &Section : Obj->sections()) {
Expected<StringRef> SecNameOrErr = Section.getName();
if (!SecNameOrErr) {
consumeError(SecNameOrErr.takeError());
continue;
}
if (*SecNameOrErr == ".plt")
Plt = Section;
}
if (!Plt)
return;
if (auto *ElfObj = dyn_cast<ELFObjectFileBase>(Obj)) {
for (auto PltEntry : ElfObj->getPltAddresses()) {
if (PltEntry.first) {
SymbolRef Symbol(*PltEntry.first, ElfObj);
uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
if (Expected<StringRef> NameOrErr = Symbol.getName()) {
if (!NameOrErr->empty())
AllSymbols[*Plt].emplace_back(
PltEntry.second, Saver.save((*NameOrErr + "@plt").str()),
SymbolType);
continue;
} else {
// The warning has been reported in disassembleObject().
consumeError(NameOrErr.takeError());
}
}
reportWarning("PLT entry at 0x" + Twine::utohexstr(PltEntry.second) +
" references an invalid symbol",
Obj->getFileName());
}
}
}
// Normally the disassembly output will skip blocks of zeroes. This function
// returns the number of zero bytes that can be skipped when dumping the
// disassembly of the instructions in Buf.
static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
// Find the number of leading zeroes.
size_t N = 0;
while (N < Buf.size() && !Buf[N])
++N;
// We may want to skip blocks of zero bytes, but unless we see
// at least 8 of them in a row.
if (N < 8)
return 0;
// We skip zeroes in multiples of 4 because do not want to truncate an
// instruction if it starts with a zero byte.
return N & ~0x3;
}
// Returns a map from sections to their relocations.
static std::map<SectionRef, std::vector<RelocationRef>>
getRelocsMap(object::ObjectFile const &Obj) {
std::map<SectionRef, std::vector<RelocationRef>> Ret;
uint64_t I = (uint64_t)-1;
for (SectionRef Sec : Obj.sections()) {
++I;
Expected<section_iterator> RelocatedOrErr = Sec.getRelocatedSection();
if (!RelocatedOrErr)
reportError(Obj.getFileName(),
"section (" + Twine(I) +
"): failed to get a relocated section: " +
toString(RelocatedOrErr.takeError()));
section_iterator Relocated = *RelocatedOrErr;
if (Relocated == Obj.section_end() || !checkSectionFilter(*Relocated).Keep)
continue;
std::vector<RelocationRef> &V = Ret[*Relocated];
append_range(V, Sec.relocations());
// Sort relocations by address.
llvm::stable_sort(V, isRelocAddressLess);
}
return Ret;
}
// Used for --adjust-vma to check if address should be adjusted by the
// specified value for a given section.
// For ELF we do not adjust non-allocatable sections like debug ones,
// because they are not loadable.
// TODO: implement for other file formats.
static bool shouldAdjustVA(const SectionRef &Section) {
const ObjectFile *Obj = Section.getObject();
if (Obj->isELF())
return ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC;
return false;
}
typedef std::pair<uint64_t, char> MappingSymbolPair;
static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols,
uint64_t Address) {
auto It =
partition_point(MappingSymbols, [Address](const MappingSymbolPair &Val) {
return Val.first <= Address;
});
// Return zero for any address before the first mapping symbol; this means
// we should use the default disassembly mode, depending on the target.
if (It == MappingSymbols.begin())
return '\x00';
return (It - 1)->second;
}
static uint64_t dumpARMELFData(uint64_t SectionAddr, uint64_t Index,
uint64_t End, const ObjectFile *Obj,
ArrayRef<uint8_t> Bytes,
ArrayRef<MappingSymbolPair> MappingSymbols,
raw_ostream &OS) {
support::endianness Endian =
Obj->isLittleEndian() ? support::little : support::big;
OS << format("%8" PRIx64 ":\t", SectionAddr + Index);
if (Index + 4 <= End) {
dumpBytes(Bytes.slice(Index, 4), OS);
OS << "\t.word\t"
<< format_hex(support::endian::read32(Bytes.data() + Index, Endian),
10);
return 4;
}
if (Index + 2 <= End) {
dumpBytes(Bytes.slice(Index, 2), OS);
OS << "\t\t.short\t"
<< format_hex(support::endian::read16(Bytes.data() + Index, Endian),
6);
return 2;
}
dumpBytes(Bytes.slice(Index, 1), OS);
OS << "\t\t.byte\t" << format_hex(Bytes[0], 4);
return 1;
}
static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
ArrayRef<uint8_t> Bytes) {
// print out data up to 8 bytes at a time in hex and ascii
uint8_t AsciiData[9] = {'\0'};
uint8_t Byte;
int NumBytes = 0;
for (; Index < End; ++Index) {
if (NumBytes == 0)
outs() << format("%8" PRIx64 ":", SectionAddr + Index);
Byte = Bytes.slice(Index)[0];
outs() << format(" %02x", Byte);
AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
uint8_t IndentOffset = 0;
NumBytes++;
if (Index == End - 1 || NumBytes > 8) {
// Indent the space for less than 8 bytes data.
// 2 spaces for byte and one for space between bytes
IndentOffset = 3 * (8 - NumBytes);
for (int Excess = NumBytes; Excess < 8; Excess++)
AsciiData[Excess] = '\0';
NumBytes = 8;
}
if (NumBytes == 8) {
AsciiData[8] = '\0';
outs() << std::string(IndentOffset, ' ') << " ";
outs() << reinterpret_cast<char *>(AsciiData);
outs() << '\n';
NumBytes = 0;
}
}
}
SymbolInfoTy objdump::createSymbolInfo(const ObjectFile *Obj,
const SymbolRef &Symbol) {
const StringRef FileName = Obj->getFileName();
const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
if (Obj->isXCOFF() && SymbolDescription) {
const auto *XCOFFObj = cast<XCOFFObjectFile>(Obj);
DataRefImpl SymbolDRI = Symbol.getRawDataRefImpl();
const uint32_t SymbolIndex = XCOFFObj->getSymbolIndex(SymbolDRI.p);
Optional<XCOFF::StorageMappingClass> Smc =
getXCOFFSymbolCsectSMC(XCOFFObj, Symbol);
return SymbolInfoTy(Addr, Name, Smc, SymbolIndex,
isLabel(XCOFFObj, Symbol));
} else
return SymbolInfoTy(Addr, Name,
Obj->isELF() ? getElfSymbolType(Obj, Symbol)
: (uint8_t)ELF::STT_NOTYPE);
}
static SymbolInfoTy createDummySymbolInfo(const ObjectFile *Obj,
const uint64_t Addr, StringRef &Name,
uint8_t Type) {
if (Obj->isXCOFF() && SymbolDescription)
return SymbolInfoTy(Addr, Name, None, None, false);
else
return SymbolInfoTy(Addr, Name, Type);
}
static void
collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
MCDisassembler *DisAsm, MCInstPrinter *IP,
const MCSubtargetInfo *STI, uint64_t SectionAddr,
uint64_t Start, uint64_t End,
std::unordered_map<uint64_t, std::string> &Labels) {
// So far only supports X86.
if (!STI->getTargetTriple().isX86())
return;
Labels.clear();
unsigned LabelCount = 0;
Start += SectionAddr;
End += SectionAddr;
uint64_t Index = Start;
while (Index < End) {
// Disassemble a real instruction and record function-local branch labels.
MCInst Inst;
uint64_t Size;
bool Disassembled = DisAsm->getInstruction(
Inst, Size, Bytes.slice(Index - SectionAddr), Index, nulls());
if (Size == 0)
Size = 1;
if (Disassembled && MIA) {
uint64_t Target;
bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
if (TargetKnown && (Target >= Start && Target < End) &&
!Labels.count(Target))
Labels[Target] = ("L" + Twine(LabelCount++)).str();
}
Index += Size;
}
}
// Create an MCSymbolizer for the target and add it to the MCDisassembler.
// This is currently only used on AMDGPU, and assumes the format of the
// void * argument passed to AMDGPU's createMCSymbolizer.
static void addSymbolizer(
MCContext &Ctx, const Target *Target, StringRef TripleName,
MCDisassembler *DisAsm, uint64_t SectionAddr, ArrayRef<uint8_t> Bytes,
SectionSymbolsTy &Symbols,
std::vector<std::unique_ptr<std::string>> &SynthesizedLabelNames) {
std::unique_ptr<MCRelocationInfo> RelInfo(
Target->createMCRelocationInfo(TripleName, Ctx));
if (!RelInfo)
return;
std::unique_ptr<MCSymbolizer> Symbolizer(Target->createMCSymbolizer(
TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
MCSymbolizer *SymbolizerPtr = &*Symbolizer;
DisAsm->setSymbolizer(std::move(Symbolizer));
if (!SymbolizeOperands)
return;
// Synthesize labels referenced by branch instructions by
// disassembling, discarding the output, and collecting the referenced
// addresses from the symbolizer.
for (size_t Index = 0; Index != Bytes.size();) {
MCInst Inst;
uint64_t Size;
DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), SectionAddr + Index,
nulls());
if (Size == 0)
Size = 1;
Index += Size;
}
ArrayRef<uint64_t> LabelAddrsRef = SymbolizerPtr->getReferencedAddresses();
// Copy and sort to remove duplicates.
std::vector<uint64_t> LabelAddrs;
LabelAddrs.insert(LabelAddrs.end(), LabelAddrsRef.begin(),
LabelAddrsRef.end());
llvm::sort(LabelAddrs);
LabelAddrs.resize(std::unique(LabelAddrs.begin(), LabelAddrs.end()) -
LabelAddrs.begin());
// Add the labels.
for (unsigned LabelNum = 0; LabelNum != LabelAddrs.size(); ++LabelNum) {
auto Name = std::make_unique<std::string>();
*Name = (Twine("L") + Twine(LabelNum)).str();
SynthesizedLabelNames.push_back(std::move(Name));
Symbols.push_back(SymbolInfoTy(
LabelAddrs[LabelNum], *SynthesizedLabelNames.back(), ELF::STT_NOTYPE));
}
llvm::stable_sort(Symbols);
// Recreate the symbolizer with the new symbols list.
RelInfo.reset(Target->createMCRelocationInfo(TripleName, Ctx));
Symbolizer.reset(Target->createMCSymbolizer(
TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
DisAsm->setSymbolizer(std::move(Symbolizer));
}
static StringRef getSegmentName(const MachOObjectFile *MachO,
const SectionRef &Section) {
if (MachO) {
DataRefImpl DR = Section.getRawDataRefImpl();
StringRef SegmentName = MachO->getSectionFinalSegmentName(DR);
return SegmentName;
}
return "";
}
static void emitPostInstructionInfo(formatted_raw_ostream &FOS,
const MCAsmInfo &MAI,
const MCSubtargetInfo &STI,
StringRef Comments,
LiveVariablePrinter &LVP) {
do {
if (!Comments.empty()) {
// Emit a line of comments.
StringRef Comment;
std::tie(Comment, Comments) = Comments.split('\n');
// MAI.getCommentColumn() assumes that instructions are printed at the
// position of 8, while getInstStartColumn() returns the actual position.
unsigned CommentColumn =
MAI.getCommentColumn() - 8 + getInstStartColumn(STI);
FOS.PadToColumn(CommentColumn);
FOS << MAI.getCommentString() << ' ' << Comment;
}
LVP.printAfterInst(FOS);
FOS << '\n';
} while (!Comments.empty());
FOS.flush();
}
static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
MCContext &Ctx, MCDisassembler *PrimaryDisAsm,
MCDisassembler *SecondaryDisAsm,
const MCInstrAnalysis *MIA, MCInstPrinter *IP,
const MCSubtargetInfo *PrimarySTI,
const MCSubtargetInfo *SecondarySTI,
PrettyPrinter &PIP,
SourcePrinter &SP, bool InlineRelocs) {
const MCSubtargetInfo *STI = PrimarySTI;
MCDisassembler *DisAsm = PrimaryDisAsm;
bool PrimaryIsThumb = false;
if (isArmElf(Obj))
PrimaryIsThumb = STI->checkFeatures("+thumb-mode");
std::map<SectionRef, std::vector<RelocationRef>> RelocMap;
if (InlineRelocs)
RelocMap = getRelocsMap(*Obj);
bool Is64Bits = Obj->getBytesInAddress() > 4;
// Create a mapping from virtual address to symbol name. This is used to
// pretty print the symbols while disassembling.
std::map<SectionRef, SectionSymbolsTy> AllSymbols;
SectionSymbolsTy AbsoluteSymbols;
const StringRef FileName = Obj->getFileName();
const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj);
for (const SymbolRef &Symbol : Obj->symbols()) {
Expected<StringRef> NameOrErr = Symbol.getName();
if (!NameOrErr) {
reportWarning(toString(NameOrErr.takeError()), FileName);
continue;
}
if (NameOrErr->empty() && !(Obj->isXCOFF() && SymbolDescription))
continue;
if (Obj->isELF() && getElfSymbolType(Obj, Symbol) == ELF::STT_SECTION)
continue;
if (MachO) {
// __mh_(execute|dylib|dylinker|bundle|preload|object)_header are special
// symbols that support MachO header introspection. They do not bind to
// code locations and are irrelevant for disassembly.
if (NameOrErr->startswith("__mh_") && NameOrErr->endswith("_header"))
continue;
// Don't ask a Mach-O STAB symbol for its section unless you know that
// STAB symbol's section field refers to a valid section index. Otherwise
// the symbol may error trying to load a section that does not exist.
DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
uint8_t NType = (MachO->is64Bit() ?
MachO->getSymbol64TableEntry(SymDRI).n_type:
MachO->getSymbolTableEntry(SymDRI).n_type);
if (NType & MachO::N_STAB)
continue;
}
section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
if (SecI != Obj->section_end())
AllSymbols[*SecI].push_back(createSymbolInfo(Obj, Symbol));
else
AbsoluteSymbols.push_back(createSymbolInfo(Obj, Symbol));
}
if (AllSymbols.empty() && Obj->isELF())
addDynamicElfSymbols(Obj, AllSymbols);
if (Obj->isWasm())
addMissingWasmCodeSymbols(cast<WasmObjectFile>(Obj), AllSymbols);
BumpPtrAllocator A;
StringSaver Saver(A);
addPltEntries(Obj, AllSymbols, Saver);
// Create a mapping from virtual address to section. An empty section can
// cause more than one section at the same address. Sort such sections to be
// before same-addressed non-empty sections so that symbol lookups prefer the
// non-empty section.
std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses;
for (SectionRef Sec : Obj->sections())
SectionAddresses.emplace_back(Sec.getAddress(), Sec);
llvm::stable_sort(SectionAddresses, [](const auto &LHS, const auto &RHS) {
if (LHS.first != RHS.first)
return LHS.first < RHS.first;
return LHS.second.getSize() < RHS.second.getSize();
});
// Linked executables (.exe and .dll files) typically don't include a real
// symbol table but they might contain an export table.
if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj)) {
for (const auto &ExportEntry : COFFObj->export_directories()) {
StringRef Name;
if (Error E = ExportEntry.getSymbolName(Name))
reportError(std::move(E), Obj->getFileName());
if (Name.empty())
continue;
uint32_t RVA;
if (Error E = ExportEntry.getExportRVA(RVA))
reportError(std::move(E), Obj->getFileName());
uint64_t VA = COFFObj->getImageBase() + RVA;
auto Sec = partition_point(
SectionAddresses, [VA](const std::pair<uint64_t, SectionRef> &O) {
return O.first <= VA;
});
if (Sec != SectionAddresses.begin()) {
--Sec;
AllSymbols[Sec->second].emplace_back(VA, Name, ELF::STT_NOTYPE);
} else
AbsoluteSymbols.emplace_back(VA, Name, ELF::STT_NOTYPE);
}
}
// Sort all the symbols, this allows us to use a simple binary search to find
// Multiple symbols can have the same address. Use a stable sort to stabilize
// the output.
StringSet<> FoundDisasmSymbolSet;
for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
llvm::stable_sort(SecSyms.second);
llvm::stable_sort(AbsoluteSymbols);
std::unique_ptr<DWARFContext> DICtx;
LiveVariablePrinter LVP(*Ctx.getRegisterInfo(), *STI);
if (DbgVariables != DVDisabled) {
DICtx = DWARFContext::create(*Obj);
for (const std::unique_ptr<DWARFUnit> &CU : DICtx->compile_units())
LVP.addCompileUnit(CU->getUnitDIE(false));
}
LLVM_DEBUG(LVP.dump());
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
if (FilterSections.empty() && !DisassembleAll &&
(!Section.isText() || Section.isVirtual()))
continue;
uint64_t SectionAddr = Section.getAddress();
uint64_t SectSize = Section.getSize();
if (!SectSize)
continue;
// Get the list of all the symbols in this section.
SectionSymbolsTy &Symbols = AllSymbols[Section];
std::vector<MappingSymbolPair> MappingSymbols;
if (hasMappingSymbols(Obj)) {
for (const auto &Symb : Symbols) {
uint64_t Address = Symb.Addr;
StringRef Name = Symb.Name;
if (Name.startswith("$d"))
MappingSymbols.emplace_back(Address - SectionAddr, 'd');
if (Name.startswith("$x"))
MappingSymbols.emplace_back(Address - SectionAddr, 'x');
if (Name.startswith("$a"))
MappingSymbols.emplace_back(Address - SectionAddr, 'a');
if (Name.startswith("$t"))
MappingSymbols.emplace_back(Address - SectionAddr, 't');
}
}
llvm::sort(MappingSymbols);
ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
unwrapOrError(Section.getContents(), Obj->getFileName()));
std::vector<std::unique_ptr<std::string>> SynthesizedLabelNames;
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
// AMDGPU disassembler uses symbolizer for printing labels
addSymbolizer(Ctx, TheTarget, TripleName, DisAsm, SectionAddr, Bytes,
Symbols, SynthesizedLabelNames);
}
StringRef SegmentName = getSegmentName(MachO, Section);
StringRef SectionName = unwrapOrError(Section.getName(), Obj->getFileName());
// If the section has no symbol at the start, just insert a dummy one.
if (Symbols.empty() || Symbols[0].Addr != 0) {
Symbols.insert(Symbols.begin(),
createDummySymbolInfo(Obj, SectionAddr, SectionName,
Section.isText() ? ELF::STT_FUNC
: ELF::STT_OBJECT));
}
SmallString<40> Comments;
raw_svector_ostream CommentStream(Comments);
uint64_t VMAAdjustment = 0;
if (shouldAdjustVA(Section))
VMAAdjustment = AdjustVMA;
+ // In executable and shared objects, r_offset holds a virtual address.
+ // Subtract SectionAddr from the r_offset field of a relocation to get
+ // the section offset.
+ uint64_t RelAdjustment = Obj->isRelocatableObject() ? 0 : SectionAddr;
uint64_t Size;
uint64_t Index;
bool PrintedSection = false;
std::vector<RelocationRef> Rels = RelocMap[Section];
std::vector<RelocationRef>::const_iterator RelCur = Rels.begin();
std::vector<RelocationRef>::const_iterator RelEnd = Rels.end();
// Disassemble symbol by symbol.
for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
std::string SymbolName = Symbols[SI].Name.str();
if (Demangle)
SymbolName = demangle(SymbolName);
// Skip if --disassemble-symbols is not empty and the symbol is not in
// the list.
if (!DisasmSymbolSet.empty() && !DisasmSymbolSet.count(SymbolName))
continue;
uint64_t Start = Symbols[SI].Addr;
if (Start < SectionAddr || StopAddress <= Start)
continue;
else
FoundDisasmSymbolSet.insert(SymbolName);
// The end is the section end, the beginning of the next symbol, or
// --stop-address.
uint64_t End = std::min<uint64_t>(SectionAddr + SectSize, StopAddress);
if (SI + 1 < SE)
End = std::min(End, Symbols[SI + 1].Addr);
if (Start >= End || End <= StartAddress)
continue;
Start -= SectionAddr;
End -= SectionAddr;
if (!PrintedSection) {
PrintedSection = true;
outs() << "\nDisassembly of section ";
if (!SegmentName.empty())
outs() << SegmentName << ",";
outs() << SectionName << ":\n";
}
outs() << '\n';
if (LeadingAddr)
outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ",
SectionAddr + Start + VMAAdjustment);
if (Obj->isXCOFF() && SymbolDescription) {
outs() << getXCOFFSymbolDescription(Symbols[SI], SymbolName) << ":\n";
} else
outs() << '<' << SymbolName << ">:\n";
// Don't print raw contents of a virtual section. A virtual section
// doesn't have any contents in the file.
if (Section.isVirtual()) {
outs() << "...\n";
continue;
}
auto Status = DisAsm->onSymbolStart(Symbols[SI], Size,
Bytes.slice(Start, End - Start),
SectionAddr + Start, CommentStream);
// To have round trippable disassembly, we fall back to decoding the
// remaining bytes as instructions.
//
// If there is a failure, we disassemble the failed region as bytes before
// falling back. The target is expected to print nothing in this case.
//
// If there is Success or SoftFail i.e no 'real' failure, we go ahead by
// Size bytes before falling back.
// So if the entire symbol is 'eaten' by the target:
// Start += Size // Now Start = End and we will never decode as
// // instructions
//
// Right now, most targets return None i.e ignore to treat a symbol
// separately. But WebAssembly decodes preludes for some symbols.
//
if (Status.hasValue()) {
if (Status.getValue() == MCDisassembler::Fail) {
outs() << "// Error in decoding " << SymbolName
<< " : Decoding failed region as bytes.\n";
for (uint64_t I = 0; I < Size; ++I) {
outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
<< "\n";
}
}
} else {
Size = 0;
}
Start += Size;
Index = Start;
if (SectionAddr < StartAddress)
Index = std::max<uint64_t>(Index, StartAddress - SectionAddr);
// If there is a data/common symbol inside an ELF text section and we are
// only disassembling text (applicable all architectures), we are in a
// situation where we must print the data and not disassemble it.
if (Obj->isELF() && !DisassembleAll && Section.isText()) {
uint8_t SymTy = Symbols[SI].Type;
if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) {
dumpELFData(SectionAddr, Index, End, Bytes);
Index = End;
}
}
bool CheckARMELFData = hasMappingSymbols(Obj) &&
Symbols[SI].Type != ELF::STT_OBJECT &&
!DisassembleAll;
bool DumpARMELFData = false;
formatted_raw_ostream FOS(outs());
std::unordered_map<uint64_t, std::string> AllLabels;
if (SymbolizeOperands)
collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI,
SectionAddr, Index, End, AllLabels);
while (Index < End) {
// ARM and AArch64 ELF binaries can interleave data and text in the
// same section. We rely on the markers introduced to understand what
// we need to dump. If the data marker is within a function, it is
// denoted as a word/short etc.
if (CheckARMELFData) {
char Kind = getMappingSymbolKind(MappingSymbols, Index);
DumpARMELFData = Kind == 'd';
if (SecondarySTI) {
if (Kind == 'a') {
STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI;
DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm;
} else if (Kind == 't') {
STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI;
DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm;
}
}
}
if (DumpARMELFData) {
Size = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
MappingSymbols, FOS);
} else {
// When -z or --disassemble-zeroes are given we always dissasemble
// them. Otherwise we might want to skip zero bytes we see.
if (!DisassembleZeroes) {
uint64_t MaxOffset = End - Index;
// For --reloc: print zero blocks patched by relocations, so that
// relocations can be shown in the dump.
if (RelCur != RelEnd)
- MaxOffset = RelCur->getOffset() - Index;
+ MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index,
+ MaxOffset);
if (size_t N =
countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
FOS << "\t\t..." << '\n';
Index += N;
continue;
}
}
// Print local label if there's any.
auto Iter = AllLabels.find(SectionAddr + Index);
if (Iter != AllLabels.end())
FOS << "<" << Iter->second << ">:\n";
// Disassemble a real instruction or a data when disassemble all is
// provided
MCInst Inst;
bool Disassembled =
DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
SectionAddr + Index, CommentStream);
if (Size == 0)
Size = 1;
LVP.update({Index, Section.getIndex()},
{Index + Size, Section.getIndex()}, Index + Size != End);
IP->setCommentStream(CommentStream);
PIP.printInst(
*IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size),
{SectionAddr + Index + VMAAdjustment, Section.getIndex()}, FOS,
"", *STI, &SP, Obj->getFileName(), &Rels, LVP);
IP->setCommentStream(llvm::nulls());
// If disassembly has failed, avoid analysing invalid/incomplete
// instruction information. Otherwise, try to resolve the target
// address (jump target or memory operand address) and print it on the
// right of the instruction.
if (Disassembled && MIA) {
// Branch targets are printed just after the instructions.
llvm::raw_ostream *TargetOS = &FOS;
uint64_t Target;
bool PrintTarget =
MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target);
if (!PrintTarget)
if (Optional<uint64_t> MaybeTarget =
MIA->evaluateMemoryOperandAddress(
Inst, SectionAddr + Index, Size)) {
Target = *MaybeTarget;
PrintTarget = true;
// Do not print real address when symbolizing.
if (!SymbolizeOperands) {
// Memory operand addresses are printed as comments.
TargetOS = &CommentStream;
*TargetOS << "0x" << Twine::utohexstr(Target);
}
}
if (PrintTarget) {
// In a relocatable object, the target's section must reside in
// the same section as the call instruction or it is accessed
// through a relocation.
//
// In a non-relocatable object, the target may be in any section.
// In that case, locate the section(s) containing the target
// address and find the symbol in one of those, if possible.
//
// N.B. We don't walk the relocations in the relocatable case yet.
std::vector<const SectionSymbolsTy *> TargetSectionSymbols;
if (!Obj->isRelocatableObject()) {
auto It = llvm::partition_point(
SectionAddresses,
[=](const std::pair<uint64_t, SectionRef> &O) {
return O.first <= Target;
});
uint64_t TargetSecAddr = 0;
while (It != SectionAddresses.begin()) {
--It;
if (TargetSecAddr == 0)
TargetSecAddr = It->first;
if (It->first != TargetSecAddr)
break;
TargetSectionSymbols.push_back(&AllSymbols[It->second]);
}
} else {
TargetSectionSymbols.push_back(&Symbols);
}
TargetSectionSymbols.push_back(&AbsoluteSymbols);
// Find the last symbol in the first candidate section whose
// offset is less than or equal to the target. If there are no
// such symbols, try in the next section and so on, before finally
// using the nearest preceding absolute symbol (if any), if there
// are no other valid symbols.
const SymbolInfoTy *TargetSym = nullptr;
for (const SectionSymbolsTy *TargetSymbols :
TargetSectionSymbols) {
auto It = llvm::partition_point(
*TargetSymbols,
[=](const SymbolInfoTy &O) { return O.Addr <= Target; });
if (It != TargetSymbols->begin()) {
TargetSym = &*(It - 1);
break;
}
}
// Print the labels corresponding to the target if there's any.
bool LabelAvailable = AllLabels.count(Target);
if (TargetSym != nullptr) {
uint64_t TargetAddress = TargetSym->Addr;
uint64_t Disp = Target - TargetAddress;
std::string TargetName = TargetSym->Name.str();
if (Demangle)
TargetName = demangle(TargetName);
*TargetOS << " <";
if (!Disp) {
// Always Print the binary symbol precisely corresponding to
// the target address.
*TargetOS << TargetName;
} else if (!LabelAvailable) {
// Always Print the binary symbol plus an offset if there's no
// local label corresponding to the target address.
*TargetOS << TargetName << "+0x" << Twine::utohexstr(Disp);
} else {
*TargetOS << AllLabels[Target];
}
*TargetOS << ">";
} else if (LabelAvailable) {
*TargetOS << " <" << AllLabels[Target] << ">";
}
// By convention, each record in the comment stream should be
// terminated.
if (TargetOS == &CommentStream)
*TargetOS << "\n";
}
}
}
assert(Ctx.getAsmInfo());
emitPostInstructionInfo(FOS, *Ctx.getAsmInfo(), *STI,
CommentStream.str(), LVP);
Comments.clear();
// Hexagon does this in pretty printer
if (Obj->getArch() != Triple::hexagon) {
// Print relocation for instruction and data.
while (RelCur != RelEnd) {
- uint64_t Offset = RelCur->getOffset();
+ uint64_t Offset = RelCur->getOffset() - RelAdjustment;
// If this relocation is hidden, skip it.
if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
++RelCur;
continue;
}
// Stop when RelCur's offset is past the disassembled
// instruction/data. Note that it's possible the disassembled data
// is not the complete data: we might see the relocation printed in
// the middle of the data, but this matches the binutils objdump
// output.
if (Offset >= Index + Size)
break;
// When --adjust-vma is used, update the address printed.
if (RelCur->getSymbol() != Obj->symbol_end()) {
Expected<section_iterator> SymSI =
RelCur->getSymbol()->getSection();
if (SymSI && *SymSI != Obj->section_end() &&
shouldAdjustVA(**SymSI))
Offset += AdjustVMA;
}
printRelocation(FOS, Obj->getFileName(), *RelCur,
SectionAddr + Offset, Is64Bits);
LVP.printAfterOtherLine(FOS, true);
++RelCur;
}
}
Index += Size;
}
}
}
StringSet<> MissingDisasmSymbolSet =
set_difference(DisasmSymbolSet, FoundDisasmSymbolSet);
for (StringRef Sym : MissingDisasmSymbolSet.keys())
reportWarning("failed to disassemble missing symbol " + Sym, FileName);
}
static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
const Target *TheTarget = getTarget(Obj);
// Package up features to be passed to target/subtarget
SubtargetFeatures Features = Obj->getFeatures();
if (!MAttrs.empty())
for (unsigned I = 0; I != MAttrs.size(); ++I)
Features.AddFeature(MAttrs[I]);
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
if (!MRI)
reportError(Obj->getFileName(),
"no register info for target " + TripleName);
// Set up disassembler.
MCTargetOptions MCOptions;
std::unique_ptr<const MCAsmInfo> AsmInfo(
TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
if (!AsmInfo)
reportError(Obj->getFileName(),
"no assembly info for target " + TripleName);
if (MCPU.empty())
MCPU = Obj->tryGetCPUName().getValueOr("").str();
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
if (!STI)
reportError(Obj->getFileName(),
"no subtarget info for target " + TripleName);
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
if (!MII)
reportError(Obj->getFileName(),
"no instruction info for target " + TripleName);
MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
// FIXME: for now initialize MCObjectFileInfo with default values
std::unique_ptr<MCObjectFileInfo> MOFI(
TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
Ctx.setObjectFileInfo(MOFI.get());
std::unique_ptr<MCDisassembler> DisAsm(
TheTarget->createMCDisassembler(*STI, Ctx));
if (!DisAsm)
reportError(Obj->getFileName(), "no disassembler for target " + TripleName);
// If we have an ARM object file, we need a second disassembler, because
// ARM CPUs have two different instruction sets: ARM mode, and Thumb mode.
// We use mapping symbols to switch between the two assemblers, where
// appropriate.
std::unique_ptr<MCDisassembler> SecondaryDisAsm;
std::unique_ptr<const MCSubtargetInfo> SecondarySTI;
if (isArmElf(Obj) && !STI->checkFeatures("+mclass")) {
if (STI->checkFeatures("+thumb-mode"))
Features.AddFeature("-thumb-mode");
else
Features.AddFeature("+thumb-mode");
SecondarySTI.reset(TheTarget->createMCSubtargetInfo(TripleName, MCPU,
Features.getString()));
SecondaryDisAsm.reset(TheTarget->createMCDisassembler(*SecondarySTI, Ctx));
}
std::unique_ptr<const MCInstrAnalysis> MIA(
TheTarget->createMCInstrAnalysis(MII.get()));
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
if (!IP)
reportError(Obj->getFileName(),
"no instruction printer for target " + TripleName);
IP->setPrintImmHex(PrintImmHex);
IP->setPrintBranchImmAsAddress(true);
IP->setSymbolizeOperands(SymbolizeOperands);
IP->setMCInstrAnalysis(MIA.get());
PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
SourcePrinter SP(Obj, TheTarget->getName());
for (StringRef Opt : DisassemblerOptions)
if (!IP->applyTargetSpecificCLOption(Opt))
reportError(Obj->getFileName(),
"Unrecognized disassembler option: " + Opt);
disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(),
MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP,
SP, InlineRelocs);
}
void objdump::printRelocations(const ObjectFile *Obj) {
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 :
"%08" PRIx64;
// Regular objdump doesn't print relocations in non-relocatable object
// files.
if (!Obj->isRelocatableObject())
return;
// Build a mapping from relocation target to a vector of relocation
// sections. Usually, there is an only one relocation section for
// each relocated section.
MapVector<SectionRef, std::vector<SectionRef>> SecToRelSec;
uint64_t Ndx;
for (const SectionRef &Section : ToolSectionFilter(*Obj, &Ndx)) {
if (Section.relocation_begin() == Section.relocation_end())
continue;
Expected<section_iterator> SecOrErr = Section.getRelocatedSection();
if (!SecOrErr)
reportError(Obj->getFileName(),
"section (" + Twine(Ndx) +
"): unable to get a relocation target: " +
toString(SecOrErr.takeError()));
SecToRelSec[**SecOrErr].push_back(Section);
}
for (std::pair<SectionRef, std::vector<SectionRef>> &P : SecToRelSec) {
StringRef SecName = unwrapOrError(P.first.getName(), Obj->getFileName());
outs() << "\nRELOCATION RECORDS FOR [" << SecName << "]:\n";
uint32_t OffsetPadding = (Obj->getBytesInAddress() > 4 ? 16 : 8);
uint32_t TypePadding = 24;
outs() << left_justify("OFFSET", OffsetPadding) << " "
<< left_justify("TYPE", TypePadding) << " "
<< "VALUE\n";
for (SectionRef Section : P.second) {
for (const RelocationRef &Reloc : Section.relocations()) {
uint64_t Address = Reloc.getOffset();
SmallString<32> RelocName;
SmallString<32> ValueStr;
if (Address < StartAddress || Address > StopAddress || getHidden(Reloc))
continue;
Reloc.getTypeName(RelocName);
if (Error E = getRelocationValueString(Reloc, ValueStr))
reportError(std::move(E), Obj->getFileName());
outs() << format(Fmt.data(), Address) << " "
<< left_justify(RelocName, TypePadding) << " " << ValueStr
<< "\n";
}
}
}
}
void objdump::printDynamicRelocations(const ObjectFile *Obj) {
// For the moment, this option is for ELF only
if (!Obj->isELF())
return;
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
if (!Elf || Elf->getEType() != ELF::ET_DYN) {
reportError(Obj->getFileName(), "not a dynamic object");
return;
}
std::vector<SectionRef> DynRelSec = Obj->dynamic_relocation_sections();
if (DynRelSec.empty())
return;
outs() << "DYNAMIC RELOCATION RECORDS\n";
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
for (const SectionRef &Section : DynRelSec)
for (const RelocationRef &Reloc : Section.relocations()) {
uint64_t Address = Reloc.getOffset();
SmallString<32> RelocName;
SmallString<32> ValueStr;
Reloc.getTypeName(RelocName);
if (Error E = getRelocationValueString(Reloc, ValueStr))
reportError(std::move(E), Obj->getFileName());
outs() << format(Fmt.data(), Address) << " " << RelocName << " "
<< ValueStr << "\n";
}
}
// Returns true if we need to show LMA column when dumping section headers. We
// show it only when the platform is ELF and either we have at least one section
// whose VMA and LMA are different and/or when --show-lma flag is used.
static bool shouldDisplayLMA(const ObjectFile *Obj) {
if (!Obj->isELF())
return false;
for (const SectionRef &S : ToolSectionFilter(*Obj))
if (S.getAddress() != getELFSectionLMA(S))
return true;
return ShowLMA;
}
static size_t getMaxSectionNameWidth(const ObjectFile *Obj) {
// Default column width for names is 13 even if no names are that long.
size_t MaxWidth = 13;
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
MaxWidth = std::max(MaxWidth, Name.size());
}
return MaxWidth;
}
void objdump::printSectionHeaders(const ObjectFile *Obj) {
size_t NameWidth = getMaxSectionNameWidth(Obj);
size_t AddressWidth = 2 * Obj->getBytesInAddress();
bool HasLMAColumn = shouldDisplayLMA(Obj);
outs() << "\nSections:\n";
if (HasLMAColumn)
outs() << "Idx " << left_justify("Name", NameWidth) << " Size "
<< left_justify("VMA", AddressWidth) << " "
<< left_justify("LMA", AddressWidth) << " Type\n";
else
outs() << "Idx " << left_justify("Name", NameWidth) << " Size "
<< left_justify("VMA", AddressWidth) << " Type\n";
uint64_t Idx;
for (const SectionRef &Section : ToolSectionFilter(*Obj, &Idx)) {
StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
uint64_t VMA = Section.getAddress();
if (shouldAdjustVA(Section))
VMA += AdjustVMA;
uint64_t Size = Section.getSize();
std::string Type = Section.isText() ? "TEXT" : "";
if (Section.isData())
Type += Type.empty() ? "DATA" : ", DATA";
if (Section.isBSS())
Type += Type.empty() ? "BSS" : ", BSS";
if (Section.isDebugSection())
Type += Type.empty() ? "DEBUG" : ", DEBUG";
if (HasLMAColumn)
outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth,
Name.str().c_str(), Size)
<< format_hex_no_prefix(VMA, AddressWidth) << " "
<< format_hex_no_prefix(getELFSectionLMA(Section), AddressWidth)
<< " " << Type << "\n";
else
outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth,
Name.str().c_str(), Size)
<< format_hex_no_prefix(VMA, AddressWidth) << " " << Type << "\n";
}
}
void objdump::printSectionContents(const ObjectFile *Obj) {
const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj);
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
uint64_t BaseAddr = Section.getAddress();
uint64_t Size = Section.getSize();
if (!Size)
continue;
outs() << "Contents of section ";
StringRef SegmentName = getSegmentName(MachO, Section);
if (!SegmentName.empty())
outs() << SegmentName << ",";
outs() << Name << ":\n";
if (Section.isBSS()) {
outs() << format("<skipping contents of bss section at [%04" PRIx64
", %04" PRIx64 ")>\n",
BaseAddr, BaseAddr + Size);
continue;
}
StringRef Contents = unwrapOrError(Section.getContents(), Obj->getFileName());
// Dump out the content as hex and printable ascii characters.
for (std::size_t Addr = 0, End = Contents.size(); Addr < End; Addr += 16) {
outs() << format(" %04" PRIx64 " ", BaseAddr + Addr);
// Dump line of hex.
for (std::size_t I = 0; I < 16; ++I) {
if (I != 0 && I % 4 == 0)
outs() << ' ';
if (Addr + I < End)
outs() << hexdigit((Contents[Addr + I] >> 4) & 0xF, true)
<< hexdigit(Contents[Addr + I] & 0xF, true);
else
outs() << " ";
}
// Print ascii.
outs() << " ";
for (std::size_t I = 0; I < 16 && Addr + I < End; ++I) {
if (isPrint(static_cast<unsigned char>(Contents[Addr + I]) & 0xFF))
outs() << Contents[Addr + I];
else
outs() << ".";
}
outs() << "\n";
}
}
}
void objdump::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
StringRef ArchitectureName, bool DumpDynamic) {
if (O->isCOFF() && !DumpDynamic) {
outs() << "\nSYMBOL TABLE:\n";
printCOFFSymbolTable(cast<const COFFObjectFile>(O));
return;
}
const StringRef FileName = O->getFileName();
if (!DumpDynamic) {
outs() << "\nSYMBOL TABLE:\n";
for (auto I = O->symbol_begin(); I != O->symbol_end(); ++I)
printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic);
return;
}
outs() << "\nDYNAMIC SYMBOL TABLE:\n";
if (!O->isELF()) {
reportWarning(
"this operation is not currently supported for this file format",
FileName);
return;
}
const ELFObjectFileBase *ELF = cast<const ELFObjectFileBase>(O);
for (auto I = ELF->getDynamicSymbolIterators().begin();
I != ELF->getDynamicSymbolIterators().end(); ++I)
printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic);
}
void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol,
StringRef FileName, StringRef ArchiveName,
StringRef ArchitectureName, bool DumpDynamic) {
const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(O);
uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName,
ArchitectureName);
if ((Address < StartAddress) || (Address > StopAddress))
return;
SymbolRef::Type Type =
unwrapOrError(Symbol.getType(), FileName, ArchiveName, ArchitectureName);
uint32_t Flags =
unwrapOrError(Symbol.getFlags(), FileName, ArchiveName, ArchitectureName);
// Don't ask a Mach-O STAB symbol for its section unless you know that
// STAB symbol's section field refers to a valid section index. Otherwise
// the symbol may error trying to load a section that does not exist.
bool IsSTAB = false;
if (MachO) {
DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
uint8_t NType =
(MachO->is64Bit() ? MachO->getSymbol64TableEntry(SymDRI).n_type
: MachO->getSymbolTableEntry(SymDRI).n_type);
if (NType & MachO::N_STAB)
IsSTAB = true;
}
section_iterator Section = IsSTAB
? O->section_end()
: unwrapOrError(Symbol.getSection(), FileName,
ArchiveName, ArchitectureName);
StringRef Name;
if (Type == SymbolRef::ST_Debug && Section != O->section_end()) {
if (Expected<StringRef> NameOrErr = Section->getName())
Name = *NameOrErr;
else
consumeError(NameOrErr.takeError());
} else {
Name = unwrapOrError(Symbol.getName(), FileName, ArchiveName,
ArchitectureName);
}
bool Global = Flags & SymbolRef::SF_Global;
bool Weak = Flags & SymbolRef::SF_Weak;
bool Absolute = Flags & SymbolRef::SF_Absolute;
bool Common = Flags & SymbolRef::SF_Common;
bool Hidden = Flags & SymbolRef::SF_Hidden;
char GlobLoc = ' ';
if ((Section != O->section_end() || Absolute) && !Weak)
GlobLoc = Global ? 'g' : 'l';
char IFunc = ' ';
if (O->isELF()) {
if (ELFSymbolRef(Symbol).getELFType() == ELF::STT_GNU_IFUNC)
IFunc = 'i';
if (ELFSymbolRef(Symbol).getBinding() == ELF::STB_GNU_UNIQUE)
GlobLoc = 'u';
}
char Debug = ' ';
if (DumpDynamic)
Debug = 'D';
else if (Type == SymbolRef::ST_Debug || Type == SymbolRef::ST_File)
Debug = 'd';
char FileFunc = ' ';
if (Type == SymbolRef::ST_File)
FileFunc = 'f';
else if (Type == SymbolRef::ST_Function)
FileFunc = 'F';
else if (Type == SymbolRef::ST_Data)
FileFunc = 'O';
const char *Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
outs() << format(Fmt, Address) << " "
<< GlobLoc // Local -> 'l', Global -> 'g', Neither -> ' '
<< (Weak ? 'w' : ' ') // Weak?
<< ' ' // Constructor. Not supported yet.
<< ' ' // Warning. Not supported yet.
<< IFunc // Indirect reference to another symbol.
<< Debug // Debugging (d) or dynamic (D) symbol.
<< FileFunc // Name of function (F), file (f) or object (O).
<< ' ';
if (Absolute) {
outs() << "*ABS*";
} else if (Common) {
outs() << "*COM*";
} else if (Section == O->section_end()) {
outs() << "*UND*";
} else {
StringRef SegmentName = getSegmentName(MachO, *Section);
if (!SegmentName.empty())
outs() << SegmentName << ",";
StringRef SectionName = unwrapOrError(Section->getName(), FileName);
outs() << SectionName;
}
if (Common || O->isELF()) {
uint64_t Val =
Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize();
outs() << '\t' << format(Fmt, Val);
}
if (O->isELF()) {
uint8_t Other = ELFSymbolRef(Symbol).getOther();
switch (Other) {
case ELF::STV_DEFAULT:
break;
case ELF::STV_INTERNAL:
outs() << " .internal";
break;
case ELF::STV_HIDDEN:
outs() << " .hidden";
break;
case ELF::STV_PROTECTED:
outs() << " .protected";
break;
default:
outs() << format(" 0x%02x", Other);
break;
}
} else if (Hidden) {
outs() << " .hidden";
}
if (Demangle)
outs() << ' ' << demangle(std::string(Name)) << '\n';
else
outs() << ' ' << Name << '\n';
}
static void printUnwindInfo(const ObjectFile *O) {
outs() << "Unwind info:\n\n";
if (const COFFObjectFile *Coff = dyn_cast<COFFObjectFile>(O))
printCOFFUnwindInfo(Coff);
else if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(O))
printMachOUnwindInfo(MachO);
else
// TODO: Extract DWARF dump tool to objdump.
WithColor::error(errs(), ToolName)
<< "This operation is only currently supported "
"for COFF and MachO object files.\n";
}
/// Dump the raw contents of the __clangast section so the output can be piped
/// into llvm-bcanalyzer.
static void printRawClangAST(const ObjectFile *Obj) {
if (outs().is_displayed()) {
WithColor::error(errs(), ToolName)
<< "The -raw-clang-ast option will dump the raw binary contents of "
"the clang ast section.\n"
"Please redirect the output to a file or another program such as "
"llvm-bcanalyzer.\n";
return;
}
StringRef ClangASTSectionName("__clangast");
if (Obj->isCOFF()) {
ClangASTSectionName = "clangast";
}
Optional<object::SectionRef> ClangASTSection;
for (auto Sec : ToolSectionFilter(*Obj)) {
StringRef Name;
if (Expected<StringRef> NameOrErr = Sec.getName())
Name = *NameOrErr;
else
consumeError(NameOrErr.takeError());
if (Name == ClangASTSectionName) {
ClangASTSection = Sec;
break;
}
}
if (!ClangASTSection)
return;
StringRef ClangASTContents = unwrapOrError(
ClangASTSection.getValue().getContents(), Obj->getFileName());
outs().write(ClangASTContents.data(), ClangASTContents.size());
}
static void printFaultMaps(const ObjectFile *Obj) {
StringRef FaultMapSectionName;
if (Obj->isELF()) {
FaultMapSectionName = ".llvm_faultmaps";
} else if (Obj->isMachO()) {
FaultMapSectionName = "__llvm_faultmaps";
} else {
WithColor::error(errs(), ToolName)
<< "This operation is only currently supported "
"for ELF and Mach-O executable files.\n";
return;
}
Optional<object::SectionRef> FaultMapSection;
for (auto Sec : ToolSectionFilter(*Obj)) {
StringRef Name;
if (Expected<StringRef> NameOrErr = Sec.getName())
Name = *NameOrErr;
else
consumeError(NameOrErr.takeError());
if (Name == FaultMapSectionName) {
FaultMapSection = Sec;
break;
}
}
outs() << "FaultMap table:\n";
if (!FaultMapSection.hasValue()) {
outs() << "<not found>\n";
return;
}
StringRef FaultMapContents =
unwrapOrError(FaultMapSection.getValue().getContents(), Obj->getFileName());
FaultMapParser FMP(FaultMapContents.bytes_begin(),
FaultMapContents.bytes_end());
outs() << FMP;
}
static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) {
if (O->isELF()) {
printELFFileHeader(O);
printELFDynamicSection(O);
printELFSymbolVersionInfo(O);
return;
}
if (O->isCOFF())
return printCOFFFileHeader(O);
if (O->isWasm())
return printWasmFileHeader(O);
if (O->isMachO()) {
printMachOFileHeader(O);
if (!OnlyFirst)
printMachOLoadCommands(O);
return;
}
reportError(O->getFileName(), "Invalid/Unsupported object file format");
}
static void printFileHeaders(const ObjectFile *O) {
if (!O->isELF() && !O->isCOFF())
reportError(O->getFileName(), "Invalid/Unsupported object file format");
Triple::ArchType AT = O->getArch();
outs() << "architecture: " << Triple::getArchTypeName(AT) << "\n";
uint64_t Address = unwrapOrError(O->getStartAddress(), O->getFileName());
StringRef Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
outs() << "start address: "
<< "0x" << format(Fmt.data(), Address) << "\n";
}
static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
Expected<sys::fs::perms> ModeOrErr = C.getAccessMode();
if (!ModeOrErr) {
WithColor::error(errs(), ToolName) << "ill-formed archive entry.\n";
consumeError(ModeOrErr.takeError());
return;
}
sys::fs::perms Mode = ModeOrErr.get();
outs() << ((Mode & sys::fs::owner_read) ? "r" : "-");
outs() << ((Mode & sys::fs::owner_write) ? "w" : "-");
outs() << ((Mode & sys::fs::owner_exe) ? "x" : "-");
outs() << ((Mode & sys::fs::group_read) ? "r" : "-");
outs() << ((Mode & sys::fs::group_write) ? "w" : "-");
outs() << ((Mode & sys::fs::group_exe) ? "x" : "-");
outs() << ((Mode & sys::fs::others_read) ? "r" : "-");
outs() << ((Mode & sys::fs::others_write) ? "w" : "-");
outs() << ((Mode & sys::fs::others_exe) ? "x" : "-");
outs() << " ";
outs() << format("%d/%d %6" PRId64 " ", unwrapOrError(C.getUID(), Filename),
unwrapOrError(C.getGID(), Filename),
unwrapOrError(C.getRawSize(), Filename));
StringRef RawLastModified = C.getRawLastModified();
unsigned Seconds;
if (RawLastModified.getAsInteger(10, Seconds))
outs() << "(date: \"" << RawLastModified
<< "\" contains non-decimal chars) ";
else {
// Since ctime(3) returns a 26 character string of the form:
// "Sun Sep 16 01:03:52 1973\n\0"
// just print 24 characters.
time_t t = Seconds;
outs() << format("%.24s ", ctime(&t));
}
StringRef Name = "";
Expected<StringRef> NameOrErr = C.getName();
if (!NameOrErr) {
consumeError(NameOrErr.takeError());
Name = unwrapOrError(C.getRawName(), Filename);
} else {
Name = NameOrErr.get();
}
outs() << Name << "\n";
}
// For ELF only now.
static bool shouldWarnForInvalidStartStopAddress(ObjectFile *Obj) {
if (const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj)) {
if (Elf->getEType() != ELF::ET_REL)
return true;
}
return false;
}
static void checkForInvalidStartStopAddress(ObjectFile *Obj,
uint64_t Start, uint64_t Stop) {
if (!shouldWarnForInvalidStartStopAddress(Obj))
return;
for (const SectionRef &Section : Obj->sections())
if (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC) {
uint64_t BaseAddr = Section.getAddress();
uint64_t Size = Section.getSize();
if ((Start < BaseAddr + Size) && Stop > BaseAddr)
return;
}
if (!HasStartAddressFlag)
reportWarning("no section has address less than 0x" +
Twine::utohexstr(Stop) + " specified by --stop-address",
Obj->getFileName());
else if (!HasStopAddressFlag)
reportWarning("no section has address greater than or equal to 0x" +
Twine::utohexstr(Start) + " specified by --start-address",
Obj->getFileName());
else
reportWarning("no section overlaps the range [0x" +
Twine::utohexstr(Start) + ",0x" + Twine::utohexstr(Stop) +
") specified by --start-address/--stop-address",
Obj->getFileName());
}
static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
const Archive::Child *C = nullptr) {
// Avoid other output when using a raw option.
if (!RawClangAST) {
outs() << '\n';
if (A)
outs() << A->getFileName() << "(" << O->getFileName() << ")";
else
outs() << O->getFileName();
outs() << ":\tfile format " << O->getFileFormatName().lower() << "\n";
}
if (HasStartAddressFlag || HasStopAddressFlag)
checkForInvalidStartStopAddress(O, StartAddress, StopAddress);
// Note: the order here matches GNU objdump for compatability.
StringRef ArchiveName = A ? A->getFileName() : "";
if (ArchiveHeaders && !MachOOpt && C)
printArchiveChild(ArchiveName, *C);
if (FileHeaders)
printFileHeaders(O);
if (PrivateHeaders || FirstPrivateHeader)
printPrivateFileHeaders(O, FirstPrivateHeader);
if (SectionHeaders)
printSectionHeaders(O);
if (SymbolTable)
printSymbolTable(O, ArchiveName);
if (DynamicSymbolTable)
printSymbolTable(O, ArchiveName, /*ArchitectureName=*/"",
/*DumpDynamic=*/true);
if (DwarfDumpType != DIDT_Null) {
std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O);
// Dump the complete DWARF structure.
DIDumpOptions DumpOpts;
DumpOpts.DumpType = DwarfDumpType;
DICtx->dump(outs(), DumpOpts);
}
if (Relocations && !Disassemble)
printRelocations(O);
if (DynamicRelocations)
printDynamicRelocations(O);
if (SectionContents)
printSectionContents(O);
if (Disassemble)
disassembleObject(O, Relocations);
if (UnwindInfo)
printUnwindInfo(O);
// Mach-O specific options:
if (ExportsTrie)
printExportsTrie(O);
if (Rebase)
printRebaseTable(O);
if (Bind)
printBindTable(O);
if (LazyBind)
printLazyBindTable(O);
if (WeakBind)
printWeakBindTable(O);
// Other special sections:
if (RawClangAST)
printRawClangAST(O);
if (FaultMapSection)
printFaultMaps(O);
}
static void dumpObject(const COFFImportFile *I, const Archive *A,
const Archive::Child *C = nullptr) {
StringRef ArchiveName = A ? A->getFileName() : "";
// Avoid other output when using a raw option.
if (!RawClangAST)
outs() << '\n'
<< ArchiveName << "(" << I->getFileName() << ")"
<< ":\tfile format COFF-import-file"
<< "\n\n";
if (ArchiveHeaders && !MachOOpt && C)
printArchiveChild(ArchiveName, *C);
if (SymbolTable)
printCOFFSymbolTable(I);
}
/// Dump each object file in \a a;
static void dumpArchive(const Archive *A) {
Error Err = Error::success();
unsigned I = -1;
for (auto &C : A->children(Err)) {
++I;
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
reportError(std::move(E), getFileNameForError(C, I), A->getFileName());
continue;
}
if (ObjectFile *O = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
dumpObject(O, A, &C);
else if (COFFImportFile *I = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
dumpObject(I, A, &C);
else
reportError(errorCodeToError(object_error::invalid_file_type),
A->getFileName());
}
if (Err)
reportError(std::move(Err), A->getFileName());
}
/// Open file and figure out how to dump it.
static void dumpInput(StringRef file) {
// If we are using the Mach-O specific object file parser, then let it parse
// the file and process the command line options. So the -arch flags can
// be used to select specific slices, etc.
if (MachOOpt) {
parseInputMachO(file);
return;
}
// Attempt to open the binary.
OwningBinary<Binary> OBinary = unwrapOrError(createBinary(file), file);
Binary &Binary = *OBinary.getBinary();
if (Archive *A = dyn_cast<Archive>(&Binary))
dumpArchive(A);
else if (ObjectFile *O = dyn_cast<ObjectFile>(&Binary))
dumpObject(O);
else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Binary))
parseInputMachO(UB);
else
reportError(errorCodeToError(object_error::invalid_file_type), file);
}
template <typename T>
static void parseIntArg(const llvm::opt::InputArgList &InputArgs, int ID,
T &Value) {
if (const opt::Arg *A = InputArgs.getLastArg(ID)) {
StringRef V(A->getValue());
if (!llvm::to_integer(V, Value, 0)) {
reportCmdLineError(A->getSpelling() +
": expected a non-negative integer, but got '" + V +
"'");
}
}
}
static std::vector<std::string>
commaSeparatedValues(const llvm::opt::InputArgList &InputArgs, int ID) {
std::vector<std::string> Values;
for (StringRef Value : InputArgs.getAllArgValues(ID)) {
llvm::SmallVector<StringRef, 2> SplitValues;
llvm::SplitString(Value, SplitValues, ",");
for (StringRef SplitValue : SplitValues)
Values.push_back(SplitValue.str());
}
return Values;
}
static void parseOtoolOptions(const llvm::opt::InputArgList &InputArgs) {
MachOOpt = true;
FullLeadingAddr = true;
PrintImmHex = true;
ArchName = InputArgs.getLastArgValue(OTOOL_arch).str();
LinkOptHints = InputArgs.hasArg(OTOOL_C);
if (InputArgs.hasArg(OTOOL_d))
FilterSections.push_back("__DATA,__data");
DylibId = InputArgs.hasArg(OTOOL_D);
UniversalHeaders = InputArgs.hasArg(OTOOL_f);
DataInCode = InputArgs.hasArg(OTOOL_G);
FirstPrivateHeader = InputArgs.hasArg(OTOOL_h);
IndirectSymbols = InputArgs.hasArg(OTOOL_I);
ShowRawInsn = InputArgs.hasArg(OTOOL_j);
PrivateHeaders = InputArgs.hasArg(OTOOL_l);
DylibsUsed = InputArgs.hasArg(OTOOL_L);
MCPU = InputArgs.getLastArgValue(OTOOL_mcpu_EQ).str();
ObjcMetaData = InputArgs.hasArg(OTOOL_o);
DisSymName = InputArgs.getLastArgValue(OTOOL_p).str();
InfoPlist = InputArgs.hasArg(OTOOL_P);
Relocations = InputArgs.hasArg(OTOOL_r);
if (const Arg *A = InputArgs.getLastArg(OTOOL_s)) {
auto Filter = (A->getValue(0) + StringRef(",") + A->getValue(1)).str();
FilterSections.push_back(Filter);
}
if (InputArgs.hasArg(OTOOL_t))
FilterSections.push_back("__TEXT,__text");
Verbose = InputArgs.hasArg(OTOOL_v) || InputArgs.hasArg(OTOOL_V) ||
InputArgs.hasArg(OTOOL_o);
SymbolicOperands = InputArgs.hasArg(OTOOL_V);
if (InputArgs.hasArg(OTOOL_x))
FilterSections.push_back(",__text");
LeadingAddr = LeadingHeaders = !InputArgs.hasArg(OTOOL_X);
InputFilenames = InputArgs.getAllArgValues(OTOOL_INPUT);
if (InputFilenames.empty())
reportCmdLineError("no input file");
for (const Arg *A : InputArgs) {
const Option &O = A->getOption();
if (O.getGroup().isValid() && O.getGroup().getID() == OTOOL_grp_obsolete) {
reportCmdLineWarning(O.getPrefixedName() +
" is obsolete and not implemented");
}
}
}
static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) {
parseIntArg(InputArgs, OBJDUMP_adjust_vma_EQ, AdjustVMA);
AllHeaders = InputArgs.hasArg(OBJDUMP_all_headers);
ArchName = InputArgs.getLastArgValue(OBJDUMP_arch_name_EQ).str();
ArchiveHeaders = InputArgs.hasArg(OBJDUMP_archive_headers);
Demangle = InputArgs.hasArg(OBJDUMP_demangle);
Disassemble = InputArgs.hasArg(OBJDUMP_disassemble);
DisassembleAll = InputArgs.hasArg(OBJDUMP_disassemble_all);
SymbolDescription = InputArgs.hasArg(OBJDUMP_symbol_description);
DisassembleSymbols =
commaSeparatedValues(InputArgs, OBJDUMP_disassemble_symbols_EQ);
DisassembleZeroes = InputArgs.hasArg(OBJDUMP_disassemble_zeroes);
if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_dwarf_EQ)) {
DwarfDumpType =
StringSwitch<DIDumpType>(A->getValue()).Case("frames", DIDT_DebugFrame);
}
DynamicRelocations = InputArgs.hasArg(OBJDUMP_dynamic_reloc);
FaultMapSection = InputArgs.hasArg(OBJDUMP_fault_map_section);
FileHeaders = InputArgs.hasArg(OBJDUMP_file_headers);
SectionContents = InputArgs.hasArg(OBJDUMP_full_contents);
PrintLines = InputArgs.hasArg(OBJDUMP_line_numbers);
InputFilenames = InputArgs.getAllArgValues(OBJDUMP_INPUT);
MachOOpt = InputArgs.hasArg(OBJDUMP_macho);
MCPU = InputArgs.getLastArgValue(OBJDUMP_mcpu_EQ).str();
MAttrs = commaSeparatedValues(InputArgs, OBJDUMP_mattr_EQ);
ShowRawInsn = !InputArgs.hasArg(OBJDUMP_no_show_raw_insn);
LeadingAddr = !InputArgs.hasArg(OBJDUMP_no_leading_addr);
RawClangAST = InputArgs.hasArg(OBJDUMP_raw_clang_ast);
Relocations = InputArgs.hasArg(OBJDUMP_reloc);
PrintImmHex =
InputArgs.hasFlag(OBJDUMP_print_imm_hex, OBJDUMP_no_print_imm_hex, false);
PrivateHeaders = InputArgs.hasArg(OBJDUMP_private_headers);
FilterSections = InputArgs.getAllArgValues(OBJDUMP_section_EQ);
SectionHeaders = InputArgs.hasArg(OBJDUMP_section_headers);
ShowLMA = InputArgs.hasArg(OBJDUMP_show_lma);
PrintSource = InputArgs.hasArg(OBJDUMP_source);
parseIntArg(InputArgs, OBJDUMP_start_address_EQ, StartAddress);
HasStartAddressFlag = InputArgs.hasArg(OBJDUMP_start_address_EQ);
parseIntArg(InputArgs, OBJDUMP_stop_address_EQ, StopAddress);
HasStopAddressFlag = InputArgs.hasArg(OBJDUMP_stop_address_EQ);
SymbolTable = InputArgs.hasArg(OBJDUMP_syms);
SymbolizeOperands = InputArgs.hasArg(OBJDUMP_symbolize_operands);
DynamicSymbolTable = InputArgs.hasArg(OBJDUMP_dynamic_syms);
TripleName = InputArgs.getLastArgValue(OBJDUMP_triple_EQ).str();
UnwindInfo = InputArgs.hasArg(OBJDUMP_unwind_info);
Wide = InputArgs.hasArg(OBJDUMP_wide);
Prefix = InputArgs.getLastArgValue(OBJDUMP_prefix).str();
parseIntArg(InputArgs, OBJDUMP_prefix_strip, PrefixStrip);
if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_debug_vars_EQ)) {
DbgVariables = StringSwitch<DebugVarsFormat>(A->getValue())
.Case("ascii", DVASCII)
.Case("unicode", DVUnicode);
}
parseIntArg(InputArgs, OBJDUMP_debug_vars_indent_EQ, DbgIndent);
parseMachOOptions(InputArgs);
// Parse -M (--disassembler-options) and deprecated
// --x86-asm-syntax={att,intel}.
//
// Note, for x86, the asm dialect (AssemblerDialect) is initialized when the
// MCAsmInfo is constructed. MCInstPrinter::applyTargetSpecificCLOption is
// called too late. For now we have to use the internal cl::opt option.
const char *AsmSyntax = nullptr;
for (const auto *A : InputArgs.filtered(OBJDUMP_disassembler_options_EQ,
OBJDUMP_x86_asm_syntax_att,
OBJDUMP_x86_asm_syntax_intel)) {
switch (A->getOption().getID()) {
case OBJDUMP_x86_asm_syntax_att:
AsmSyntax = "--x86-asm-syntax=att";
continue;
case OBJDUMP_x86_asm_syntax_intel:
AsmSyntax = "--x86-asm-syntax=intel";
continue;
}
SmallVector<StringRef, 2> Values;
llvm::SplitString(A->getValue(), Values, ",");
for (StringRef V : Values) {
if (V == "att")
AsmSyntax = "--x86-asm-syntax=att";
else if (V == "intel")
AsmSyntax = "--x86-asm-syntax=intel";
else
DisassemblerOptions.push_back(V.str());
}
}
if (AsmSyntax) {
const char *Argv[] = {"llvm-objdump", AsmSyntax};
llvm::cl::ParseCommandLineOptions(2, Argv);
}
// objdump defaults to a.out if no filenames specified.
if (InputFilenames.empty())
InputFilenames.push_back("a.out");
}
int main(int argc, char **argv) {
using namespace llvm;
InitLLVM X(argc, argv);
ToolName = argv[0];
std::unique_ptr<CommonOptTable> T;
OptSpecifier Unknown, HelpFlag, HelpHiddenFlag, VersionFlag;
StringRef Stem = sys::path::stem(ToolName);
auto Is = [=](StringRef Tool) {
// We need to recognize the following filenames:
//
// llvm-objdump -> objdump
// llvm-otool-10.exe -> otool
// powerpc64-unknown-freebsd13-objdump -> objdump
auto I = Stem.rfind_insensitive(Tool);
return I != StringRef::npos &&
(I + Tool.size() == Stem.size() || !isAlnum(Stem[I + Tool.size()]));
};
if (Is("otool")) {
T = std::make_unique<OtoolOptTable>();
Unknown = OTOOL_UNKNOWN;
HelpFlag = OTOOL_help;
HelpHiddenFlag = OTOOL_help_hidden;
VersionFlag = OTOOL_version;
} else {
T = std::make_unique<ObjdumpOptTable>();
Unknown = OBJDUMP_UNKNOWN;
HelpFlag = OBJDUMP_help;
HelpHiddenFlag = OBJDUMP_help_hidden;
VersionFlag = OBJDUMP_version;
}
BumpPtrAllocator A;
StringSaver Saver(A);
opt::InputArgList InputArgs =
T->parseArgs(argc, argv, Unknown, Saver,
[&](StringRef Msg) { reportCmdLineError(Msg); });
if (InputArgs.size() == 0 || InputArgs.hasArg(HelpFlag)) {
T->printHelp(ToolName);
return 0;
}
if (InputArgs.hasArg(HelpHiddenFlag)) {
T->printHelp(ToolName, /*show_hidden=*/true);
return 0;
}
// Initialize targets and assembly printers/parsers.
InitializeAllTargetInfos();
InitializeAllTargetMCs();
InitializeAllDisassemblers();
if (InputArgs.hasArg(VersionFlag)) {
cl::PrintVersionMessage();
if (!Is("otool")) {
outs() << '\n';
TargetRegistry::printRegisteredTargetsForVersion(outs());
}
return 0;
}
if (Is("otool"))
parseOtoolOptions(InputArgs);
else
parseObjdumpOptions(InputArgs);
if (StartAddress >= StopAddress)
reportCmdLineError("start address should be less than stop address");
// Removes trailing separators from prefix.
while (!Prefix.empty() && sys::path::is_separator(Prefix.back()))
Prefix.pop_back();
if (AllHeaders)
ArchiveHeaders = FileHeaders = PrivateHeaders = Relocations =
SectionHeaders = SymbolTable = true;
if (DisassembleAll || PrintSource || PrintLines ||
!DisassembleSymbols.empty())
Disassemble = true;
if (!ArchiveHeaders && !Disassemble && DwarfDumpType == DIDT_Null &&
!DynamicRelocations && !FileHeaders && !PrivateHeaders && !RawClangAST &&
!Relocations && !SectionHeaders && !SectionContents && !SymbolTable &&
!DynamicSymbolTable && !UnwindInfo && !FaultMapSection &&
!(MachOOpt &&
(Bind || DataInCode || DylibId || DylibsUsed || ExportsTrie ||
FirstPrivateHeader || FunctionStarts || IndirectSymbols || InfoPlist ||
LazyBind || LinkOptHints || ObjcMetaData || Rebase || Rpaths ||
UniversalHeaders || WeakBind || !FilterSections.empty()))) {
T->printHelp(ToolName);
return 2;
}
DisasmSymbolSet.insert(DisassembleSymbols.begin(), DisassembleSymbols.end());
llvm::for_each(InputFilenames, dumpInput);
warnOnNoMatchForSections();
return EXIT_SUCCESS;
}
diff --git a/lib/clang/include/VCSVersion.inc b/lib/clang/include/VCSVersion.inc
index c7ff897b27b6..b12c539ee764 100644
--- a/lib/clang/include/VCSVersion.inc
+++ b/lib/clang/include/VCSVersion.inc
@@ -1,14 +1,14 @@
// $FreeBSD$
-#define LLVM_REVISION "llvmorg-13.0.0-rc1-97-g23ba3732246a"
+#define LLVM_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
#define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"
-#define CLANG_REVISION "llvmorg-13.0.0-rc1-97-g23ba3732246a"
+#define CLANG_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
#define CLANG_REPOSITORY "git@github.com:llvm/llvm-project.git"
// <Upstream revision at import>-<Local identifier in __FreeBSD_version style>
-#define LLD_REVISION "llvmorg-13.0.0-rc1-97-g23ba3732246a-1200014"
+#define LLD_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5-1200014"
#define LLD_REPOSITORY "FreeBSD"
-#define LLDB_REVISION "llvmorg-13.0.0-rc1-97-g23ba3732246a"
+#define LLDB_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
#define LLDB_REPOSITORY "git@github.com:llvm/llvm-project.git"
diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h
index 1257fe7db9e9..ef7b3966a338 100644
--- a/lib/clang/include/llvm/Support/VCSRevision.h
+++ b/lib/clang/include/llvm/Support/VCSRevision.h
@@ -1,3 +1,3 @@
/* $FreeBSD$ */
-#define LLVM_REVISION "llvmorg-13.0.0-rc1-97-g23ba3732246a"
+#define LLVM_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
#define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"

File Metadata

Mime Type
application/octet-stream
Expires
Sat, Jun 29, 7:24 AM (2 d)
Storage Engine
chunks
Storage Format
Chunks
Storage Handle
1xRAPugsk5ki
Default Alt Text
(7 MB)

Event Timeline